nettrace: Add a new tracing library

This commit introduces a new tracing library, that replaces golang.org/x/net/trace, and supports (amongts other thing) nested traces. This is a minimal change, future patches will make use of the new functionality.
2025-12-17 14:37:02 +00:00 · 2022-03-07 01:13:34 +00:00
parent 3ebe5c5173
commit 9c6661eca2
18 changed files with 2276 additions and 54 deletions
--- a/internal/maillog/maillog.go
+++ b/internal/maillog/maillog.go
@@ -15,7 +15,7 @@ import (

 // Global event logs.
 var (
-	authLog = trace.NewEventLog("Authentication", "Incoming SMTP")
+	authLog = trace.New("Authentication", "Incoming SMTP")
 )

 // Logger contains a backend used to log data to, such as a file or syslog.
--- a/internal/nettrace/bench_test.go
+++ b/internal/nettrace/bench_test.go
@@ -0,0 +1,57 @@
+package nettrace
+
+import (
+	"testing"
+)
+
+// Our benchmark loop is similar to the one from golang.org/x/net/trace, so we
+// can compare results.
+func runBench(b *testing.B, events int) {
+	nTraces := (b.N + events + 1) / events
+
+	for i := 0; i < nTraces; i++ {
+		tr := New("bench", "test")
+		for j := 0; j < events; j++ {
+			tr.Printf("%d", j)
+		}
+		tr.Finish()
+	}
+}
+
+func BenchmarkTrace_2(b *testing.B) {
+	runBench(b, 2)
+}
+
+func BenchmarkTrace_10(b *testing.B) {
+	runBench(b, 10)
+}
+
+func BenchmarkTrace_100(b *testing.B) {
+	runBench(b, 100)
+}
+
+func BenchmarkTrace_1000(b *testing.B) {
+	runBench(b, 1000)
+}
+
+func BenchmarkTrace_10000(b *testing.B) {
+	runBench(b, 10000)
+}
+
+func BenchmarkNewAndFinish(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		tr := New("bench", "test")
+		tr.Finish()
+	}
+}
+
+func BenchmarkPrintf(b *testing.B) {
+	tr := New("bench", "test")
+	defer tr.Finish()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		// Keep this without any formatting, so we measure our code instead of
+		// the performance of fmt.Sprintf.
+		tr.Printf("this is printf")
+	}
+}
--- a/internal/nettrace/context.go
+++ b/internal/nettrace/context.go
@@ -0,0 +1,40 @@
+package nettrace
+
+import "context"
+
+type ctxKeyT string
+
+const ctxKey ctxKeyT = "blitiri.com.ar/go/srv/nettrace"
+
+// NewContext returns a new context with the given trace attached.
+func NewContext(ctx context.Context, tr Trace) context.Context {
+	return context.WithValue(ctx, ctxKey, tr)
+}
+
+// FromContext returns the trace attached to the given context (if any).
+func FromContext(ctx context.Context) (Trace, bool) {
+	tr, ok := ctx.Value(ctxKey).(Trace)
+	return tr, ok
+}
+
+// FromContextOrNew returns the trace attached to the given context, or a new
+// trace if there is none.
+func FromContextOrNew(ctx context.Context, family, title string) (Trace, context.Context) {
+	tr, ok := FromContext(ctx)
+	if ok {
+		return tr, ctx
+	}
+
+	tr = New(family, title)
+	return tr, NewContext(ctx, tr)
+}
+
+// ChildFromContext returns a new trace that is a child of the one attached to
+// the context (if any).
+func ChildFromContext(ctx context.Context, family, title string) Trace {
+	parent, ok := FromContext(ctx)
+	if ok {
+		return parent.NewChild(family, title)
+	}
+	return New(family, title)
+}
--- a/internal/nettrace/context_test.go
+++ b/internal/nettrace/context_test.go
@@ -0,0 +1,66 @@
+package nettrace
+
+import (
+	"context"
+	"testing"
+)
+
+func TestContext(t *testing.T) {
+	tr := New("TestContext", "trace")
+	defer tr.Finish()
+
+	// Attach the trace to a new context.
+	ctx := NewContext(context.Background(), tr)
+
+	// Get the trace back from the context.
+	{
+		tr2, ok := FromContext(ctx)
+		if !ok {
+			t.Errorf("Context with trace returned not found")
+		}
+		if tr != tr2 {
+			t.Errorf("Trace from context is different: %v != %v", tr, tr2)
+		}
+	}
+
+	// Create a child trace from the context.
+	{
+		tr3 := ChildFromContext(ctx, "TestContext", "child")
+		if p := tr3.(*trace).Parent; p != tr {
+			t.Errorf("Child doesn't have the right parent: %v != %v", p, tr)
+		}
+		tr3.Finish()
+	}
+
+	// FromContextOrNew returns the one from the context.
+	{
+		tr4, ctx4 := FromContextOrNew(ctx, "TestContext", "from-ctx")
+		if ctx4 != ctx {
+			t.Errorf("Got new context: %v != %v", ctx4, ctx)
+		}
+		if tr4 != tr {
+			t.Errorf("Context with trace returned new trace: %v != %v", tr4, tr)
+		}
+	}
+
+	// FromContextOrNew needs to create a new one.
+	{
+		tr5, ctx5 := FromContextOrNew(
+			context.Background(), "TestContext", "tr5")
+		if tr, _ := FromContext(ctx5); tr != tr5 {
+			t.Errorf("Context with trace returned the wrong trace: %v != %v",
+				tr, tr5)
+		}
+		tr5.Finish()
+	}
+
+	// Child from a context that has no trace attached.
+	{
+		tr6 := ChildFromContext(
+			context.Background(), "TestContext", "child")
+		tr6.Finish()
+		if p := tr6.(*trace).Parent; p != nil {
+			t.Errorf("Expected orphan trace, it has a parent: %v", p)
+		}
+	}
+}
--- a/internal/nettrace/evtring.go
+++ b/internal/nettrace/evtring.go
@@ -0,0 +1,42 @@
+package nettrace
+
+import "time"
+
+type evtRing struct {
+	evts      []event
+	max       int
+	pos       int // Points to the latest element.
+	firstDrop time.Time
+}
+
+func newEvtRing(n int) *evtRing {
+	return &evtRing{
+		max: n,
+		pos: -1,
+	}
+}
+
+func (r *evtRing) Add(e *event) {
+	if len(r.evts) < r.max {
+		r.evts = append(r.evts, *e)
+		r.pos++
+		return
+	}
+
+	r.pos = (r.pos + 1) % r.max
+
+	// Record the first drop as the time of the first dropped message.
+	if r.firstDrop.IsZero() {
+		r.firstDrop = r.evts[r.pos].When
+	}
+
+	r.evts[r.pos] = *e
+}
+
+func (r *evtRing) Do(f func(e *event)) {
+	for i := 0; i < len(r.evts); i++ {
+		// Go from older to newer by starting at (r.pos+1).
+		pos := (r.pos + 1 + i) % len(r.evts)
+		f(&r.evts[pos])
+	}
+}
--- a/internal/nettrace/histogram.go
+++ b/internal/nettrace/histogram.go
@@ -0,0 +1,71 @@
+package nettrace
+
+import (
+	"time"
+)
+
+type histogram struct {
+	count [nBuckets]uint64
+
+	totalQ uint64
+	totalT time.Duration
+	min    time.Duration
+	max    time.Duration
+}
+
+func (h *histogram) Add(bucket int, latency time.Duration) {
+	if h.totalQ == 0 || h.min > latency {
+		h.min = latency
+	}
+	if h.max < latency {
+		h.max = latency
+	}
+
+	h.count[bucket]++
+	h.totalQ++
+	h.totalT += latency
+}
+
+type histSnapshot struct {
+	Counts        map[time.Duration]line
+	Count         uint64
+	Avg, Min, Max time.Duration
+}
+
+type line struct {
+	Start     time.Duration
+	BucketIdx int
+	Count     uint64
+	Percent   float32
+	CumPct    float32
+}
+
+func (h *histogram) Snapshot() *histSnapshot {
+	s := &histSnapshot{
+		Counts: map[time.Duration]line{},
+		Count:  h.totalQ,
+		Min:    h.min,
+		Max:    h.max,
+	}
+
+	if h.totalQ > 0 {
+		s.Avg = time.Duration(uint64(h.totalT) / h.totalQ)
+	}
+
+	var cumCount uint64
+	for i := 0; i < nBuckets; i++ {
+		cumCount += h.count[i]
+		l := line{
+			Start:     buckets[i],
+			BucketIdx: i,
+			Count:     h.count[i],
+		}
+		if h.totalQ > 0 {
+			l.Percent = float32(h.count[i]) / float32(h.totalQ) * 100
+			l.CumPct = float32(cumCount) / float32(h.totalQ) * 100
+		}
+		s.Counts[buckets[i]] = l
+	}
+
+	return s
+}
--- a/internal/nettrace/histogram_test.go
+++ b/internal/nettrace/histogram_test.go
@@ -0,0 +1,29 @@
+package nettrace
+
+import (
+	"testing"
+	"time"
+)
+
+func TestHistogramBasic(t *testing.T) {
+	h := histogram{}
+	h.Add(1, 1*time.Millisecond)
+
+	snap := h.Snapshot()
+	if snap.Count != 1 ||
+		snap.Min != 1*time.Millisecond ||
+		snap.Max != 1*time.Millisecond ||
+		snap.Avg != 1*time.Millisecond {
+		t.Errorf("expected snapshot with only 1 sample, got %v", snap)
+	}
+}
+
+func TestHistogramEmpty(t *testing.T) {
+	h := histogram{}
+	snap := h.Snapshot()
+
+	if len(snap.Counts) != nBuckets || snap.Count != 0 ||
+		snap.Avg != 0 || snap.Min != 0 || snap.Max != 0 {
+		t.Errorf("expected zero snapshot, got %v", snap)
+	}
+}
--- a/internal/nettrace/http.go
+++ b/internal/nettrace/http.go
@@ -0,0 +1,259 @@
+package nettrace
+
+import (
+	"bytes"
+	"embed"
+	"fmt"
+	"hash/crc32"
+	"html/template"
+	"math"
+	"net/http"
+	"sort"
+	"strconv"
+	"time"
+)
+
+//go:embed "templates/*.tmpl" "templates/*.css"
+var templatesFS embed.FS
+var top *template.Template
+
+func init() {
+	top = template.Must(
+		template.New("_top").Funcs(template.FuncMap{
+			"stripZeros":    stripZeros,
+			"roundSeconds":  roundSeconds,
+			"roundDuration": roundDuration,
+			"colorize":      colorize,
+			"depthspan":     depthspan,
+			"shorttitle":    shorttitle,
+			"traceemoji":    traceemoji,
+		}).ParseFS(templatesFS, "templates/*"))
+}
+
+// RegisterHandler registers a the trace handler in the given ServeMux, on
+// `/debug/traces`.
+func RegisterHandler(mux *http.ServeMux) {
+	mux.HandleFunc("/debug/traces", RenderTraces)
+}
+
+// RenderTraces is an http.Handler that renders the tracing information.
+func RenderTraces(w http.ResponseWriter, req *http.Request) {
+	data := &struct {
+		Buckets   *[]time.Duration
+		FamTraces map[string]*familyTraces
+
+		// When displaying traces for a specific family.
+		Family    string
+		Bucket    int
+		BucketStr string
+		AllGT     bool
+		Traces    []*trace
+
+		// When displaying latencies for a specific family.
+		Latencies *histSnapshot
+
+		// When displaying a specific trace.
+		Trace     *trace
+		AllEvents []traceAndEvent
+
+		// Error to show to the user.
+		Error string
+	}{}
+
+	// Reference the common buckets, no need to copy them.
+	data.Buckets = &buckets
+
+	// Copy the family traces map, so we don't have to keep it locked for too
+	// long. We'll still need to lock individual entries.
+	data.FamTraces = copyFamilies()
+
+	// Default to showing greater-than.
+	data.AllGT = true
+	if all := req.FormValue("all"); all != "" {
+		data.AllGT, _ = strconv.ParseBool(all)
+	}
+
+	// Fill in the family related parameters.
+	if fam := req.FormValue("fam"); fam != "" {
+		if _, ok := data.FamTraces[fam]; !ok {
+			data.Family = ""
+			data.Error = "Unknown family"
+			w.WriteHeader(http.StatusNotFound)
+			goto render
+		}
+		data.Family = fam
+
+		if bs := req.FormValue("b"); bs != "" {
+			i, err := strconv.Atoi(bs)
+			if err != nil {
+				data.Error = "Invalid bucket (not a number)"
+				w.WriteHeader(http.StatusBadRequest)
+				goto render
+			} else if i < -2 || i >= nBuckets {
+				data.Error = "Invalid bucket number"
+				w.WriteHeader(http.StatusBadRequest)
+				goto render
+			}
+			data.Bucket = i
+			data.Traces = data.FamTraces[data.Family].TracesFor(i, data.AllGT)
+
+			switch i {
+			case -2:
+				data.BucketStr = "errors"
+			case -1:
+				data.BucketStr = "active"
+			default:
+				data.BucketStr = buckets[i].String()
+			}
+		}
+	}
+
+	if lat := req.FormValue("lat"); data.Family != "" && lat != "" {
+		data.Latencies = data.FamTraces[data.Family].Latencies()
+	}
+
+	if traceID := req.FormValue("trace"); traceID != "" {
+		refID := req.FormValue("ref")
+		tr := findInFamilies(id(traceID), id(refID))
+		if tr == nil {
+			data.Error = "Trace not found"
+			w.WriteHeader(http.StatusNotFound)
+			goto render
+		}
+		data.Trace = tr
+		data.Family = tr.Family
+		data.AllEvents = allEvents(tr)
+	}
+
+render:
+
+	// Write into a buffer, to avoid accidentally holding a lock on http
+	// writes. It shouldn't happen, but just to be extra safe.
+	bw := &bytes.Buffer{}
+	bw.Grow(16 * 1024)
+	err := top.ExecuteTemplate(bw, "index.html.tmpl", data)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		panic(err)
+	}
+
+	w.Write(bw.Bytes())
+}
+
+type traceAndEvent struct {
+	Trace *trace
+	Event event
+	Depth uint
+}
+
+// allEvents gets all the events for the trace and its children/linked traces;
+// and returns them sorted by timestamp.
+func allEvents(tr *trace) []traceAndEvent {
+	// Map tracking all traces we've seen, to avoid loops.
+	seen := map[id]bool{}
+
+	// Recursively gather all events.
+	evts := appendAllEvents(tr, []traceAndEvent{}, seen, 0)
+
+	// Sort them by time.
+	sort.Slice(evts, func(i, j int) bool {
+		return evts[i].Event.When.Before(evts[j].Event.When)
+	})
+
+	return evts
+}
+
+func appendAllEvents(tr *trace, evts []traceAndEvent, seen map[id]bool, depth uint) []traceAndEvent {
+	if seen[tr.ID] {
+		return evts
+	}
+	seen[tr.ID] = true
+
+	subTraces := []*trace{}
+
+	// Append all events of this trace.
+	trevts := tr.Events()
+	for _, e := range trevts {
+		evts = append(evts, traceAndEvent{tr, e, depth})
+		if e.Ref != nil {
+			subTraces = append(subTraces, e.Ref)
+		}
+	}
+
+	for _, t := range subTraces {
+		evts = appendAllEvents(t, evts, seen, depth+1)
+	}
+
+	return evts
+}
+
+func stripZeros(d time.Duration) string {
+	if d < time.Second {
+		_, frac := math.Modf(d.Seconds())
+		return fmt.Sprintf(" .%6d", int(frac*1000000))
+	}
+	return fmt.Sprintf("%.6f", d.Seconds())
+}
+
+func roundSeconds(d time.Duration) string {
+	return fmt.Sprintf("%.6f", d.Seconds())
+}
+
+func roundDuration(d time.Duration) time.Duration {
+	return d.Round(time.Millisecond)
+}
+
+func colorize(depth uint, id id) template.CSS {
+	if depth == 0 {
+		return template.CSS("rgba(var(--text-color))")
+	}
+
+	if depth > 3 {
+		depth = 3
+	}
+
+	// Must match the number of nested color variables in the CSS.
+	colori := crc32.ChecksumIEEE([]byte(id)) % 6
+	return template.CSS(
+		fmt.Sprintf("var(--nested-d%02d-c%02d)", depth, colori))
+}
+
+func depthspan(depth uint) template.HTML {
+	s := `<span class="depth">`
+	switch depth {
+	case 0:
+	case 1:
+		s += "· "
+	case 2:
+		s += "· · "
+	case 3:
+		s += "· · · "
+	case 4:
+		s += "· · · · "
+	default:
+		s += fmt.Sprintf("· (%d) · ", depth)
+	}
+
+	s += `</span>`
+	return template.HTML(s)
+}
+
+// Hand-picked emojis that have enough visual differences in most common
+// renderings, and are common enough to be able to easily describe them.
+var emojids = []rune(`😀🤣😇🥰🤧😈🤡👻👽🤖👋✊🦴👅` +
+	`🐒🐕🦊🐱🐯🐎🐄🐷🐑🐐🐪🦒🐘🐀🦇🐓🦆🦚🦜🐢🐍🦖🐋🐟🦈🐙` +
+	`🦋🐜🐝🪲🌻🌲🍉🍌🍍🍎🍑🥕🍄` +
+	`🧀🍦🍰🧉🚂🚗🚜🛵🚲🛼🪂🚀🌞🌈🌊⚽`)
+
+func shorttitle(tr *trace) string {
+	all := tr.Family + " - " + tr.Title
+	if len(all) > 20 {
+		all = "..." + all[len(all)-17:]
+	}
+	return all
+}
+
+func traceemoji(id id) string {
+	i := crc32.ChecksumIEEE([]byte(id)) % uint32(len(emojids))
+	return string(emojids[i])
+}
--- a/internal/nettrace/http_test.go
+++ b/internal/nettrace/http_test.go
@@ -0,0 +1,255 @@
+package nettrace
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"strings"
+	"testing"
+	"time"
+)
+
+func getValues(t *testing.T, vs url.Values, code int) string {
+	t.Helper()
+
+	req := httptest.NewRequest("GET", "/debug/traces?"+vs.Encode(), nil)
+	w := httptest.NewRecorder()
+	RenderTraces(w, req)
+
+	resp := w.Result()
+	body, _ := io.ReadAll(resp.Body)
+
+	if resp.StatusCode != code {
+		t.Errorf("expected %d, got %v", code, resp)
+	}
+
+	return string(body)
+}
+
+type v struct {
+	fam, b, lat, trace, ref, all string
+}
+
+func getCode(t *testing.T, vs v, code int) string {
+	t.Helper()
+
+	u := url.Values{}
+	if vs.fam != "" {
+		u.Add("fam", vs.fam)
+	}
+	if vs.b != "" {
+		u.Add("b", vs.b)
+	}
+	if vs.lat != "" {
+		u.Add("lat", vs.lat)
+	}
+	if vs.trace != "" {
+		u.Add("trace", vs.trace)
+	}
+	if vs.ref != "" {
+		u.Add("ref", vs.ref)
+	}
+	if vs.all != "" {
+		u.Add("all", vs.all)
+	}
+
+	return getValues(t, u, code)
+}
+
+func get(t *testing.T, fam, b, lat, trace, ref, all string) string {
+	t.Helper()
+	return getCode(t, v{fam, b, lat, trace, ref, all}, 200)
+}
+
+func getErr(t *testing.T, fam, b, lat, trace, ref, all string, code int, err string) string {
+	t.Helper()
+	body := getCode(t, v{fam, b, lat, trace, ref, all}, code)
+	if !strings.Contains(body, err) {
+		t.Errorf("Body does not contain error message %q", err)
+		t.Logf("Body: %v", body)
+	}
+
+	return body
+}
+
+func checkContains(t *testing.T, body, s string) {
+	t.Helper()
+	if !strings.Contains(body, s) {
+		t.Errorf("Body does not contain %q", s)
+		t.Logf("Body: %v", body)
+	}
+}
+
+func TestHTTP(t *testing.T) {
+	tr := New("TestHTTP", "http")
+	tr.Printf("entry #1")
+	tr.Finish()
+
+	tr = New("TestHTTP", "http")
+	tr.Printf("entry #2")
+	tr.Finish()
+
+	tr = New("TestHTTP", "http")
+	tr.Errorf("entry #3 (error)")
+	tr.Finish()
+
+	tr = New("TestHTTP", "http")
+	tr.Printf("hola marola")
+	tr.Printf("entry #4")
+	// This one is active until the end.
+	defer tr.Finish()
+
+	// Get the plain index.
+	body := get(t, "", "", "", "", "", "")
+	checkContains(t, body, "TestHTTP")
+
+	// Get a specific family, but no bucket.
+	body = get(t, "TestHTTP", "", "", "", "", "")
+	checkContains(t, body, "TestHTTP")
+
+	// Get a family and active bucket.
+	body = get(t, "TestHTTP", "-1", "", "", "", "")
+	checkContains(t, body, "hola marola")
+
+	// Get a family and error bucket.
+	body = get(t, "TestHTTP", "-2", "", "", "", "")
+	checkContains(t, body, "entry #3 (error)")
+
+	// Get a family and first bucket.
+	body = get(t, "TestHTTP", "0", "", "", "", "")
+	checkContains(t, body, "entry #2")
+
+	// Latency view. There are 3 events because the 4th is active.
+	body = get(t, "TestHTTP", "", "lat", "", "", "")
+	checkContains(t, body, "Count: 3")
+
+	// Get a specific trace. No family given, since it shouldn't be needed (we
+	// take it from the id).
+	body = get(t, "", "", "", string(tr.(*trace).ID), "", "")
+	checkContains(t, body, "hola marola")
+
+	// Check the "all=true" views.
+	body = get(t, "TestHTTP", "0", "", "", "", "true")
+	checkContains(t, body, "entry #2")
+	checkContains(t, body, "?fam=TestHTTP&b=-2&all=true")
+
+	tr.Finish()
+}
+
+func TestHTTPLong(t *testing.T) {
+	// Test a long trace.
+	tr := New("TestHTTPLong", "verbose")
+	for i := 0; i < 1000; i++ {
+		tr.Printf("entry #%d", i)
+	}
+	tr.Finish()
+	get(t, "TestHTTPLong", "", "", string(tr.(*trace).ID), "", "")
+}
+
+func TestHTTPErrors(t *testing.T) {
+	tr := New("TestHTTPErrors", "http")
+	tr.Printf("entry #1")
+	tr.Finish()
+
+	// Unknown family.
+	getErr(t, "unkfamily", "", "", "", "", "",
+		404, "Unknown family")
+
+	// Invalid bucket.
+	getErr(t, "TestHTTPErrors", "abc", "", "", "", "",
+		400, "Invalid bucket")
+	getErr(t, "TestHTTPErrors", "-3", "", "", "", "",
+		400, "Invalid bucket")
+	getErr(t, "TestHTTPErrors", "9", "", "", "", "",
+		400, "Invalid bucket")
+
+	// Unknown trace id (malformed).
+	getErr(t, "TestHTTPErrors", "", "", "unktrace", "", "",
+		404, "Trace not found")
+
+	// Unknown trace id.
+	getErr(t, "TestHTTPErrors", "", "", string(tr.(*trace).ID)+"xxx", "", "",
+		404, "Trace not found")
+
+	// Check that the trace is actually there.
+	get(t, "", "", "", string(tr.(*trace).ID), "", "")
+}
+
+func TestHTTPUroboro(t *testing.T) {
+	trA := New("TestHTTPUroboro", "trA")
+	defer trA.Finish()
+	trA.Printf("this is trace A")
+
+	trB := New("TestHTTPUroboro", "trB")
+	defer trB.Finish()
+	trB.Printf("this is trace B")
+
+	trA.Link(trB, "B is my friend")
+	trB.Link(trA, "A is my friend")
+
+	// Check that we handle cross-linked events well.
+	get(t, "TestHTTPUroboro", "", "", "", "", "")
+	get(t, "TestHTTPUroboro", "-1", "", "", "", "")
+	get(t, "", "", "", string(trA.(*trace).ID), "", "")
+	get(t, "", "", "", string(trB.(*trace).ID), "", "")
+}
+
+func TestHTTPDeep(t *testing.T) {
+	tr := New("TestHTTPDeep", "level-0")
+	defer tr.Finish()
+	ts := []Trace{tr}
+	for i := 1; i <= 9; i++ {
+		tr = tr.NewChild("TestHTTPDeep", fmt.Sprintf("level-%d", i))
+		defer tr.Finish()
+		ts = append(ts, tr)
+	}
+
+	// Active view.
+	body := get(t, "TestHTTPDeep", "-1", "", "", "", "")
+	checkContains(t, body, "level-9")
+
+	// Recursive view.
+	body = get(t, "TestHTTPDeep", "", "", string(ts[0].(*trace).ID), "", "")
+	checkContains(t, body, "level-9")
+}
+
+func TestStripZeros(t *testing.T) {
+	cases := []struct {
+		d   time.Duration
+		exp string
+	}{
+		{0 * time.Second, " .     0"},
+		{1 * time.Millisecond, " .  1000"},
+		{5 * time.Millisecond, " .  5000"},
+		{1 * time.Second, "1.000000"},
+		{1*time.Second + 8*time.Millisecond, "1.008000"},
+	}
+	for _, c := range cases {
+		if got := stripZeros(c.d); got != c.exp {
+			t.Errorf("stripZeros(%s) got %q, expected %q",
+				c.d, got, c.exp)
+		}
+	}
+}
+
+func TestRegisterHandler(t *testing.T) {
+	mux := http.NewServeMux()
+	RegisterHandler(mux)
+
+	req := httptest.NewRequest("GET", "/debug/traces", nil)
+	w := httptest.NewRecorder()
+	mux.ServeHTTP(w, req)
+
+	resp := w.Result()
+
+	if resp.StatusCode != 200 {
+		t.Errorf("expected 200, got %v", resp)
+	}
+
+	body, _ := io.ReadAll(resp.Body)
+	if !strings.Contains(string(body), "<h1>Traces</h1>") {
+		t.Errorf("unexpected body: %s", body)
+	}
+}
--- a/internal/nettrace/templates/_latency.html.tmpl
+++ b/internal/nettrace/templates/_latency.html.tmpl
@@ -0,0 +1,25 @@
+<table class="latencies"><tr>
+<td>Count: {{.Latencies.Count}}</td>
+<td>Avg: {{.Latencies.Avg | roundDuration}}</td>
+<td>Min: {{.Latencies.Min | roundDuration}}</td>
+<td>Max: {{.Latencies.Max | roundDuration}}</td>
+</tr></table>
+<p>
+
+<table class="latencies">
+<tr><th>Bucket</th><th>Count</th><th>%</th><th></th><th>Cumulative</th></tr>
+{{range .Latencies.Counts}}
+<tr>
+  <td>
+    <a href="?fam={{$.Family}}&b={{.BucketIdx}}"
+       {{if eq .Count 0}}class="muted"{{end}}>
+        &ge;{{.Start}}</a>
+  </td>
+  <td>{{.Count}}</td>
+  <td>{{.Percent | printf "%5.2f"}}%</td>
+  <td><meter max="100" value="{{.Percent}}">
+      {{.Percent | printf "%.2f"}}%</meter>
+  <td>{{.CumPct | printf "%5.2f"}}%</td>
+</tr>
+{{end}}
+</table>
--- a/internal/nettrace/templates/_recursive.html.tmpl
+++ b/internal/nettrace/templates/_recursive.html.tmpl
@@ -0,0 +1,51 @@
+{{if .Trace.Parent}}
+<a href="?trace={{.Trace.Parent.ID}}&ref={{.Trace.ID}}">
+  Parent: {{.Trace.Parent.Family}} - {{.Trace.Parent.Title}}</a>
+<p>
+{{end}}
+
+<table class="trace">
+
+<thead>
+<tr>
+  <th>Trace</th>
+  <th>Timestamp</th>
+  <th>Elapsed (s)</th>
+  <th></th>
+  <th>Message</th>
+</tr>
+</thead>
+
+<tbody>
+{{$prev := .Trace.Start}}
+{{range .AllEvents}}
+<tr style="background: {{colorize .Depth .Trace.ID}};">
+<td title='{{.Trace.Family}} - {{.Trace.Title}}
+@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'>
+  {{shorttitle .Trace}}</td>
+
+<td class="when">{{.Event.When.Format "15:04:05.000000"}}</td>
+<td class="duration">{{(.Event.When.Sub $prev) | stripZeros}}</td>
+<td class="emoji" title='{{.Trace.Family}} - {{.Trace.Title}}
+@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'>
+  <div class="emoji">{{traceemoji .Trace.ID}}</div></td>
+<td class="msg">
+  {{- depthspan .Depth -}}
+  {{- if .Event.Type.IsLog -}}
+    {{.Event.Msg}}
+  {{- else if .Event.Type.IsChild -}}
+    new child: <a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Ref.Family}} - {{.Event.Ref.Title}}</a>
+  {{- else if .Event.Type.IsLink -}}
+    <a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Msg}}</a>
+  {{- else if .Event.Type.IsDrop -}}
+    <b><i>[ events dropped ]</i></b>
+  {{- else -}}
+    <i>[ unknown event type ]</i>
+  {{- end -}}
+</td>
+</tr>
+{{$prev = .Event.When}}
+{{end}}
+
+</tbody>
+</table>
--- a/internal/nettrace/templates/_single.html.tmpl
+++ b/internal/nettrace/templates/_single.html.tmpl
@@ -0,0 +1,35 @@
+<tr class="title">
+<td class="when">{{.Start.Format "2006-01-02 15:04:05.000000"}}</td>
+<td class="duration">{{.Duration | roundSeconds}}</td>
+<td><a href="?trace={{.ID}}">{{.Title}}</a>
+  {{if .Parent}}(parent: <a href="?trace={{.Parent.ID}}&ref={{.ID}}">
+    {{.Parent.Family}} - {{.Parent.Title}}</a>)
+  {{end}}
+</td>
+<tr>
+
+{{$prev := .Start}}
+{{range .Events}}
+<tr>
+<td class="when">{{.When.Format "15:04:05.000000"}}</td>
+<td class="duration">{{(.When.Sub $prev) | stripZeros}}</td>
+<td class="msg">
+  {{- if .Type.IsLog -}}
+    {{.Msg}}
+  {{- else if .Type.IsChild -}}
+    new child <a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Ref.Family}} {{.Ref.Title}}</a>
+  {{- else if .Type.IsLink -}}
+    <a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Msg}}</a>
+  {{- else if .Type.IsDrop -}}
+    <i>[ events dropped ]</i>
+  {{- else -}}
+    <i>[ unknown event type ]</i>
+  {{- end -}}
+</td>
+</tr>
+{{$prev = .When}}
+{{end}}
+
+<tr>
+<td>&nbsp;</td>
+</tr>
--- a/internal/nettrace/templates/index.html.tmpl
+++ b/internal/nettrace/templates/index.html.tmpl
@@ -0,0 +1,129 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<style>
+{{template "style.css"}}
+</style>
+
+<title>
+{{if .Trace}}{{.Trace.Family}} - {{.Trace.Title}}
+{{else if .BucketStr}}{{.Family}} - {{.BucketStr}}
+{{else if .Latencies}}{{.Family}} - latency
+{{else}}Traces
+{{end}}
+</title>
+
+</head>
+
+<body>
+<h1>Traces</h1>
+
+<table class="index">
+{{range $name, $ftr := .FamTraces}}
+<tr>
+  <td class="family">
+    <a href="?fam={{$name}}&b=0&all=true">
+    {{if eq $name $.Family}}<u>{{end}}
+    {{$name}}
+    {{if eq $name $.Family}}</u>{{end}}
+    </a>
+  </td>
+
+  <td class="bucket active">
+    {{$n := $ftr.LenActive}}
+    {{if and (eq $name $.Family) (eq "active" $.BucketStr)}}<u>{{end}}
+
+    <a href="?fam={{$name}}&b=-1&all={{$.AllGT}}"
+       {{if eq $n 0}}class="muted"{{end}}>
+        {{$n}} active</a>
+
+    {{if and (eq $name $.Family) (eq "active" $.BucketStr)}}</u>{{end}}
+  </td>
+
+  {{range $i, $b := $.Buckets}}
+  <td class="bucket">
+    {{$n := $ftr.LenBucket $i}}
+    {{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}<u>{{end}}
+
+    <a href="?fam={{$name}}&b={{$i}}&all={{$.AllGT}}"
+       {{if eq $n 0}}class="muted"{{end}}>
+        &ge;{{$b}}</a>
+
+    {{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}</u>{{end}}
+  </td>
+  {{end}}
+
+  <td class="bucket">
+    {{$n := $ftr.LenErrors}}
+    {{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}<u>{{end}}
+
+    <a href="?fam={{$name}}&b=-2&all={{$.AllGT}}"
+       {{if eq $n 0}}class="muted"{{end}}>
+        errors</a>
+
+    {{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}</u>{{end}}
+  </td>
+
+  <td class="bucket">
+    <a href="?fam={{$name}}&lat=true&all={{$.AllGT}}">[latency]</a>
+  </td>
+</tr>
+{{end}}
+</table>
+<br>
+Show: <a href="?fam={{.Family}}&b={{.Bucket}}&all=false">
+  {{if not .AllGT}}<u>{{end}}
+  Only in bucket</a>
+  {{if not .AllGT}}</u>{{end}}
+/
+<a href="?fam={{.Family}}&b={{.Bucket}}&all=true">
+  {{if .AllGT}}<u>{{end}}
+  All &ge; bucket</a>
+  {{if .AllGT}}</u>{{end}}
+<p>
+
+<!--------------------------------------------->
+{{if .Error}}
+<p class="error">Error: {{.Error}}</p>
+{{end}}
+
+<!--------------------------------------------->
+{{if .BucketStr}}
+<h2>{{.Family}} - {{.BucketStr}}</h2>
+
+<table class="trace">
+<thead>
+<tr>
+  <th>Timestamp</th>
+  <th>Elapsed (s)</th>
+  <th>Message</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>&nbsp;</td>
+</tr>
+{{range .Traces}}
+{{template "_single.html.tmpl" .}}<p>
+{{end}}
+</tbody>
+</table>
+
+<p>
+{{end}}
+
+<!--------------------------------------------->
+{{if .Latencies}}
+<h2>{{.Family}} - latency</h2>
+{{template "_latency.html.tmpl" .}}<p>
+{{end}}
+
+<!--------------------------------------------->
+{{if .Trace}}
+<h2>{{.Trace.Family}} - <i>{{.Trace.Title}}</i></h2>
+{{template "_recursive.html.tmpl" .}}<p>
+{{end}}
+
+</body>
+
+</html>
--- a/internal/nettrace/templates/style.css
+++ b/internal/nettrace/templates/style.css
@@ -0,0 +1,187 @@
+:root {
+    --text-color: black;
+    --bg-color: #fffff7;
+    --zebra-bg-color: #eeeee7;
+    --muted-color: #444;
+    --xmuted-color: #a1a1a1;
+    --link-color: #39c;
+    --link-hover: #069;
+    --underline-color: grey;
+    --error-color: red;
+
+    /* Colors for the nested zebras. */
+    --nested-d01-c00: #ffebee;
+    --nested-d01-c01: #ede7f6;
+    --nested-d01-c02: #e3f2fd;
+    --nested-d01-c03: #e8f5e9;
+    --nested-d01-c04: #fff8e1;
+    --nested-d01-c05: #efebe9;
+    --nested-d02-c00: #f0dcdf;
+    --nested-d02-c01: #ded8e7;
+    --nested-d02-c02: #d4e3ee;
+    --nested-d02-c03: #d9e6da;
+    --nested-d02-c04: #f0e9d2;
+    --nested-d02-c05: #e0dcda;
+    --nested-d03-c00: #e1cdd0;
+    --nested-d03-c01: #cfc9d8;
+    --nested-d03-c02: #c5d4df;
+    --nested-d03-c03: #cad7cb;
+    --nested-d03-c04: #e1dac3;
+    --nested-d03-c05: #d1cdcb;
+}
+
+@media (prefers-color-scheme: dark) {
+    :root {
+        --text-color: rgba(255, 255, 255, 0.90);
+        --bg-color: #121212;
+        --zebra-bg-color: #222222;
+        --muted-color: #aaaaaa;
+        --xmuted-color: #a1a1a1;
+        --link-color: #44b4ec;
+        --link-hover: #7fc9ee;
+        --underline-color: grey;
+        --error-color: #dd4747;
+
+        /* Colors for the nested zebras. */
+        --nested-d01-c00: #220212;
+        --nested-d01-c01: #1c1c22;
+        --nested-d01-c02: #001e20;
+        --nested-d01-c03: #0f0301;
+        --nested-d01-c04: #201d06;
+        --nested-d01-c05: #00192b;
+        --nested-d02-c00: #311121;
+        --nested-d02-c01: #2b2b31;
+        --nested-d02-c02: #0f2d2f;
+        --nested-d02-c03: #1e1210;
+        --nested-d02-c04: #2f2c15;
+        --nested-d02-c05: #0f283a;
+        --nested-d03-c00: #402030;
+        --nested-d03-c01: #3a3a40;
+        --nested-d03-c02: #1e3c3e;
+        --nested-d03-c03: #2d211f;
+        --nested-d03-c04: #3e3b24;
+        --nested-d03-c05: #1e3749;
+    }
+}
+
+body {
+    font-family: sans-serif;
+    color: var(--text-color);
+    background: var(--bg-color);
+}
+
+p.error {
+    color: var(--error-color);
+    font-size: large;
+}
+
+a {
+    color: var(--link-color);
+    text-decoration: none;
+}
+
+a:hover {
+    color: var(--link-hover);
+}
+
+.family a {
+    color: var(--text-color);
+}
+
+u {
+    text-decoration-color: var(--underline-color);
+}
+
+table.index {
+    border-collapse: collapse;
+}
+
+table.index tr:nth-child(odd) {
+
+        background-color: var(--zebra-bg-color);
+}
+
+table.index td {
+    padding: 0.25em 0.5em;
+}
+
+table.index td.bucket {
+    min-width: 2em;
+    text-align: center;
+}
+
+table.index td.active {
+    /* Make the "active" column wider so there's less jumping on refresh. */
+    min-width: 5em;
+    text-align: right;
+}
+
+table.index a {
+    text-decoration: none;
+}
+
+a.muted {
+    color: var(--muted-color);
+}
+
+table.trace {
+    font-family: monospace;
+    border-collapse: collapse;
+}
+
+table.trace thead {
+    border-bottom: 1px solid var(--text-color);
+}
+
+table.trace th {
+    text-align: left;
+    padding: 0.1em 1em;
+}
+
+table.trace tr.title {
+    font-weight: bold;
+}
+
+table.trace td {
+    padding: 0.1em 1em;
+}
+
+table.trace td.when {
+    text-align: right;
+}
+
+table.trace td.duration {
+    text-align: right;
+    white-space: pre;
+}
+
+table.trace td.msg {
+    white-space: pre-wrap;
+}
+
+span.depth {
+    color: var(--xmuted-color);
+}
+
+div.emoji {
+    /* Emojis sometimes are rendered excessively tall. */
+    /* This ensures they're sized appropriately. */
+    max-height: 1.3em;
+    overflow: hidden;
+}
+
+table.latencies {
+    text-align: right;
+}
+
+table.latencies td {
+    padding: 0 0.3em;
+}
+
+table.latencies th {
+    text-align: center;
+}
+
+meter {
+    width: 15em;
+}
--- a/internal/nettrace/test_server.go
+++ b/internal/nettrace/test_server.go
@@ -0,0 +1,243 @@
+//go:build ignore
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"net/http"
+	"time"
+
+	_ "net/http/pprof"
+
+	"blitiri.com.ar/go/srv/nettrace"
+)
+
+func main() {
+	addr := flag.String("addr", ":8080", "listening address")
+	flag.Parse()
+
+	go RandomEvents("random-one")
+	go RandomEvents("random-two")
+	go RandomEvents("random-three")
+	go RandomLongEvent("random-long", "long-one")
+	go RandomLongEvent("random-long", "long-two")
+	go RandomLongEvent("random-long", "long-three")
+	go RandomBunny("random-bunny", "first 🐇")
+	go RandomNested("random-nested")
+	go RandomLazy("random-lazy")
+
+	http.DefaultServeMux.Handle("/",
+		WithLogging(http.HandlerFunc(HandleRoot)))
+
+	http.DefaultServeMux.Handle("/debug/traces",
+		WithLogging(http.HandlerFunc(nettrace.RenderTraces)))
+
+	fmt.Printf("listening on %s\n", *addr)
+	http.ListenAndServe(*addr, nil)
+}
+
+func RandomEvents(family string) {
+	for i := 0; ; i++ {
+		tr := nettrace.New(family, fmt.Sprintf("evt-%d", i))
+		randomTrace(family, tr)
+	}
+}
+
+func randomTrace(family string, tr nettrace.Trace) {
+	tr.Printf("this is a random event")
+	tr.Printf("and it has a random delay")
+	delay := time.Duration(rand.Intn(1000)) * time.Millisecond
+	tr.Printf("sleeping %v", delay)
+	time.Sleep(delay)
+
+	if rand.Intn(100) < 1 {
+		tr.Printf("this unlucky one is an error")
+		tr.SetError()
+	}
+
+	if rand.Intn(100) < 10 {
+		tr.Printf("this one got super verbose!")
+		for j := 0; j < 100; j++ {
+			tr.Printf("message %d", j)
+		}
+	}
+
+	if rand.Intn(100) < 30 {
+		tr.Printf("this one had a child")
+		c := tr.NewChild(family, "achild")
+		go randomTrace(family, c)
+	}
+
+	tr.Printf("all done")
+	tr.Finish()
+}
+
+func RandomLongEvent(family, title string) {
+	tr := nettrace.New(family, title)
+	tr.Printf("this is a random long event")
+	for i := 0; ; i++ {
+		delay := time.Duration(rand.Intn(100)) * time.Millisecond
+		time.Sleep(delay)
+		tr.Printf("message %d, slept %v", i, delay)
+	}
+	tr.Finish()
+}
+
+func RandomBunny(family, title string) {
+	tr := nettrace.New(family, title)
+	tr.SetMaxEvents(100)
+	tr.Printf("this is the top 🐇")
+	for i := 0; ; i++ {
+		delay := time.Duration(rand.Intn(100)) * time.Millisecond
+		time.Sleep(delay)
+		tr.Printf("message %d, slept %v", i, delay)
+
+		if rand.Intn(100) < 40 {
+			c := tr.NewChild(family, fmt.Sprintf("child-%d", i))
+			go randomTrace(family, c)
+		}
+
+		if rand.Intn(100) < 40 {
+			n := nettrace.New(family, fmt.Sprintf("linked-%d", i))
+			go randomTrace(family, n)
+			tr.Link(n, "linking with this guy")
+		}
+	}
+	tr.Finish()
+}
+
+func randomNested(family string, depth int, parent nettrace.Trace) {
+	tr := parent.NewChild(family, fmt.Sprintf("nested-%d", depth))
+	defer tr.Finish()
+
+	tr.Printf("I am a spoiled child")
+
+	delay := time.Duration(rand.Intn(100)) * time.Millisecond
+	time.Sleep(delay)
+	tr.Printf("slept %v", delay)
+
+	if depth > 10 {
+		tr.Printf("I went too far")
+		return
+	}
+
+	// If we make this < 50, then it grows forever.
+	if rand.Intn(100) < 75 {
+		tr.Printf("I sang really well")
+		return
+	}
+
+	max := rand.Intn(5)
+	for i := 0; i < max; i++ {
+		tr.Printf("spawning %d", i)
+		go randomNested(family, depth+1, tr)
+	}
+
+}
+func RandomNested(family string) {
+	tr := nettrace.New(family, "nested-0")
+	for i := 0; ; i++ {
+		randomNested(family, 1, tr)
+	}
+}
+
+func RandomLazy(family string) {
+	for i := 0; ; i++ {
+		tr := nettrace.New(family, fmt.Sprintf("evt-%d", i))
+		tr.Printf("I am very lazy and do little work")
+		tr.Finish()
+		time.Sleep(500 * time.Millisecond)
+	}
+}
+
+func HandleRoot(w http.ResponseWriter, r *http.Request) {
+	if delayS := r.FormValue("delay"); delayS != "" {
+		delay, err := time.ParseDuration(delayS)
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		time.Sleep(delay)
+	}
+
+	if withError := r.FormValue("error"); withError != "" {
+		tr, _ := nettrace.FromContext(r.Context())
+		tr.SetError()
+	}
+
+	w.Write([]byte(rootHTML))
+}
+
+func WithLogging(parent http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		tr := nettrace.New("http", r.URL.String())
+		defer tr.Finish()
+
+		// Associate the trace with this request.
+		r = r.WithContext(nettrace.NewContext(r.Context(), tr))
+
+		tr.Printf("%s %s %s %s",
+			r.RemoteAddr, r.Proto, r.Method, r.URL.String())
+		start := time.Now()
+		parent.ServeHTTP(w, r)
+		latency := time.Since(start)
+		tr.Printf("handler took %s", latency)
+
+		// 1.2.3.4:34575 HTTP/2.0 GET /path = 1.2ms
+		fmt.Printf("%s %s %s %s = %s\n",
+			r.RemoteAddr, r.Proto, r.Method, r.URL.String(),
+			latency)
+	})
+}
+
+const rootHTML = `
+<html>
+<head>
+<style>
+  td {
+    min-width: 2em;
+	text-align: center;
+  }
+  input#delay {
+    width: 5em;
+  }
+</style>
+</head>
+<body>
+
+<a href="/debug/traces">Traces</a><p>
+
+<table>
+  <tr>
+  <td>Delay:</td>
+  <td><a href="/?delay=0s">0s</a></td>
+  <td><a href="/?delay=0.1s">0.1s</a></td>
+  <td><a href="/?delay=0.2s">0.2s</a></td>
+  <td><a href="/?delay=0.5s">0.5s</a></td>
+  <td><a href="/?delay=1s">1s</a></td>
+  </tr>
+
+  <tr>
+  <td>+ error:</td>
+  <td><a href="/?delay=0&error=on">0s</a></td>
+  <td><a href="/?delay=0.1s&error=on">0.1s</a></td>
+  <td><a href="/?delay=0.2s&error=on">0.2s</a></td>
+  <td><a href="/?delay=0.5s&error=on">0.5s</a></td>
+  <td><a href="/?delay=1s&error=on">1s</a></td>
+  </tr>
+</table>
+
+<form action="/" method="get">
+  <label for="delay">Custom delay:</label>
+  <input type="text" id="delay" name="delay" placeholder="250ms"
+    autofocus required>
+  <input type="checkbox" id="error" name="error">
+  <label for="error">is error</label>
+  <input type="submit">
+</form>
+
+</body>
+</html>
+`
--- a/internal/nettrace/trace.go
+++ b/internal/nettrace/trace.go
@@ -0,0 +1,558 @@
+// Package nettrace implements tracing of requests. Traces are created by
+// nettrace.New, and can then be viewed over HTTP on /debug/traces.
+package nettrace
+
+import (
+	"container/ring"
+	"fmt"
+	"math/rand"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+// IDs are of the form "family!timestamp!unique identifier", which allows for
+// sorting them by time much easily, and also some convenient optimizations
+// when looking up an id across all the known ones.
+// Family is not escaped. It should not contain the separator.
+// It is not expected to be stable, for internal use only.
+type id string
+
+func newID(family string, ts int64) id {
+	return id(
+		family + "!" +
+			strconv.FormatInt(ts, 10) + "!" +
+			strconv.FormatUint(rand.Uint64(), 10))
+}
+
+func (id id) Family() string {
+	sp := strings.SplitN(string(id), "!", 2)
+	if len(sp) != 2 {
+		return string(id)
+	}
+	return sp[0]
+}
+
+// Trace represents a single request trace.
+type Trace interface {
+	// NewChild creates a new trace, that is a child of this one.
+	NewChild(family, title string) Trace
+
+	// Link to another trace with the given message.
+	Link(other Trace, msg string)
+
+	// SetMaxEvents sets the maximum number of events that will be stored in
+	// the trace. It must be called right after initialization.
+	SetMaxEvents(n int)
+
+	// SetError marks that the trace was for an error event.
+	SetError()
+
+	// Printf adds a message to the trace.
+	Printf(format string, a ...interface{})
+
+	// Errorf adds a message to the trace, marks it as an error, and returns
+	// an error for it.
+	Errorf(format string, a ...interface{}) error
+
+	// Finish marks the trace as complete.
+	// The trace should not be used after calling this method.
+	Finish()
+}
+
+// A single trace. Can be active or inactive.
+// Exported fields are allowed to be accessed directly, e.g. by the HTTP
+// handler. Private ones are mutex protected.
+type trace struct {
+	ID id
+
+	Family string
+	Title  string
+
+	Parent *trace
+
+	Start time.Time
+
+	// Fields below are mu-protected.
+	// We keep them unexported so they're not accidentally accessed in html
+	// templates.
+	mu sync.Mutex
+
+	end time.Time
+
+	isError   bool
+	maxEvents int
+
+	// We keep two separate groups: the first ~1/3rd events in a simple slice,
+	// and the last 2/3rd in a ring so we can drop events without losing the
+	// first ones.
+	cutoff      int
+	firstEvents []event
+	lastEvents  *evtRing
+}
+
+type evtType uint8
+
+const (
+	evtLOG = evtType(1 + iota)
+	evtCHILD
+	evtLINK
+	evtDROP
+)
+
+func (t evtType) IsLog() bool   { return t == evtLOG }
+func (t evtType) IsChild() bool { return t == evtCHILD }
+func (t evtType) IsLink() bool  { return t == evtLINK }
+func (t evtType) IsDrop() bool  { return t == evtDROP }
+
+type event struct {
+	When time.Time
+	Type evtType
+
+	Ref *trace
+	Msg string
+}
+
+const defaultMaxEvents = 30
+
+func newTrace(family, title string) *trace {
+	start := time.Now()
+	tr := &trace{
+		ID:     newID(family, start.UnixNano()),
+		Family: family,
+		Title:  title,
+		Start:  start,
+
+		maxEvents: defaultMaxEvents,
+		cutoff:    defaultMaxEvents / 3,
+	}
+
+	// Pre-allocate a couple of events to speed things up.
+	// Don't allocate lastEvents, that can be expensive and it is not always
+	// needed. No need to slow down trace creation just for it.
+	tr.firstEvents = make([]event, 0, 4)
+
+	familiesMu.Lock()
+	ft, ok := families[family]
+	if !ok {
+		ft = newFamilyTraces()
+		families[family] = ft
+	}
+	familiesMu.Unlock()
+
+	ft.mu.Lock()
+	ft.active[tr.ID] = tr
+	ft.mu.Unlock()
+
+	return tr
+}
+
+// New creates a new trace with the given family and title.
+func New(family, title string) Trace {
+	return newTrace(family, title)
+}
+
+func (tr *trace) append(evt *event) {
+	tr.mu.Lock()
+	defer tr.mu.Unlock()
+
+	if len(tr.firstEvents) < tr.cutoff {
+		tr.firstEvents = append(tr.firstEvents, *evt)
+		return
+	}
+
+	if tr.lastEvents == nil {
+		// The ring holds the last 2/3rds of the events.
+		tr.lastEvents = newEvtRing(tr.maxEvents - tr.cutoff)
+	}
+
+	tr.lastEvents.Add(evt)
+}
+
+// String is for debugging only.
+func (tr *trace) String() string {
+	return fmt.Sprintf("trace{%s, %s, %q, %d}",
+		tr.Family, tr.Title, tr.ID, len(tr.Events()))
+}
+
+func (tr *trace) NewChild(family, title string) Trace {
+	c := newTrace(family, title)
+	c.Parent = tr
+
+	// Add the event to the parent.
+	evt := &event{
+		When: time.Now(),
+		Type: evtCHILD,
+		Ref:  c,
+	}
+	tr.append(evt)
+
+	return c
+}
+
+func (tr *trace) Link(other Trace, msg string) {
+	evt := &event{
+		When: time.Now(),
+		Type: evtLINK,
+		Ref:  other.(*trace),
+		Msg:  msg,
+	}
+	tr.append(evt)
+}
+
+func (tr *trace) SetMaxEvents(n int) {
+	// Set a minimum of 6, so the truncation works without running into
+	// issues.
+	if n < 6 {
+		n = 6
+	}
+	tr.mu.Lock()
+	tr.maxEvents = n
+	tr.cutoff = n / 3
+	tr.mu.Unlock()
+}
+
+func (tr *trace) SetError() {
+	tr.mu.Lock()
+	tr.isError = true
+	tr.mu.Unlock()
+}
+
+func (tr *trace) Printf(format string, a ...interface{}) {
+	evt := &event{
+		When: time.Now(),
+		Type: evtLOG,
+		Msg:  fmt.Sprintf(format, a...),
+	}
+
+	tr.append(evt)
+}
+
+func (tr *trace) Errorf(format string, a ...interface{}) error {
+	tr.SetError()
+	err := fmt.Errorf(format, a...)
+	tr.Printf(err.Error())
+	return err
+}
+
+func (tr *trace) Finish() {
+	tr.mu.Lock()
+	tr.end = time.Now()
+	tr.mu.Unlock()
+
+	familiesMu.Lock()
+	ft := families[tr.Family]
+	familiesMu.Unlock()
+	ft.finalize(tr)
+}
+
+// Duration of this trace.
+func (tr *trace) Duration() time.Duration {
+	tr.mu.Lock()
+	start, end := tr.Start, tr.end
+	tr.mu.Unlock()
+
+	if end.IsZero() {
+		return time.Since(start)
+	}
+	return end.Sub(start)
+}
+
+// Events returns a copy of the trace events.
+func (tr *trace) Events() []event {
+	tr.mu.Lock()
+	defer tr.mu.Unlock()
+
+	evts := make([]event, len(tr.firstEvents))
+	copy(evts, tr.firstEvents)
+
+	if tr.lastEvents == nil {
+		return evts
+	}
+
+	if !tr.lastEvents.firstDrop.IsZero() {
+		evts = append(evts, event{
+			When: tr.lastEvents.firstDrop,
+			Type: evtDROP,
+		})
+	}
+
+	tr.lastEvents.Do(func(e *event) {
+		evts = append(evts, *e)
+	})
+
+	return evts
+}
+
+func (tr *trace) IsError() bool {
+	tr.mu.Lock()
+	defer tr.mu.Unlock()
+	return tr.isError
+}
+
+//
+// Trace hierarchy
+//
+// Each trace belongs to a family. For each family, we have all active traces,
+// and then N traces that finished <1s, N that finished <2s, etc.
+
+// We keep this many buckets of finished traces.
+const nBuckets = 8
+
+// Buckets to use. Lenght must match nBuckets.
+// "Traces with a latency >= $duration".
+var buckets = []time.Duration{
+	time.Duration(0),
+	5 * time.Millisecond,
+	10 * time.Millisecond,
+	50 * time.Millisecond,
+	100 * time.Millisecond,
+	300 * time.Millisecond,
+	1 * time.Second,
+	10 * time.Second,
+}
+
+func findBucket(latency time.Duration) int {
+	for i, d := range buckets {
+		if latency >= d {
+			continue
+		}
+		return i - 1
+	}
+
+	return nBuckets - 1
+}
+
+// How many traces we keep per bucket.
+const tracesInBucket = 10
+
+type traceRing struct {
+	ring *ring.Ring
+	max  int
+	l    int
+}
+
+func newTraceRing(n int) *traceRing {
+	return &traceRing{
+		ring: ring.New(n),
+		max:  n,
+	}
+}
+
+func (r *traceRing) Add(tr *trace) {
+	r.ring.Value = tr
+	r.ring = r.ring.Next()
+	if r.l < r.max {
+		r.l++
+	}
+}
+
+func (r *traceRing) Len() int {
+	return r.l
+}
+
+func (r *traceRing) Do(f func(tr *trace)) {
+	r.ring.Do(func(x interface{}) {
+		if x == nil {
+			return
+		}
+		f(x.(*trace))
+	})
+}
+
+type familyTraces struct {
+	mu sync.Mutex
+
+	// All active ones.
+	active map[id]*trace
+
+	// The ones we decided to keep.
+	// Each bucket is a ring-buffer, finishedHead keeps the head pointer.
+	finished [nBuckets]*traceRing
+
+	// The ones that errored have their own bucket.
+	errors *traceRing
+
+	// Histogram of latencies.
+	latencies histogram
+}
+
+func newFamilyTraces() *familyTraces {
+	ft := &familyTraces{}
+	ft.active = map[id]*trace{}
+	for i := 0; i < nBuckets; i++ {
+		ft.finished[i] = newTraceRing(tracesInBucket)
+	}
+	ft.errors = newTraceRing(tracesInBucket)
+	return ft
+}
+
+func (ft *familyTraces) LenActive() int {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+	return len(ft.active)
+}
+
+func (ft *familyTraces) LenErrors() int {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+	return ft.errors.Len()
+}
+
+func (ft *familyTraces) LenBucket(b int) int {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+	return ft.finished[b].Len()
+}
+
+func (ft *familyTraces) TracesFor(b int, allgt bool) []*trace {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+
+	trs := []*trace{}
+	appendTrace := func(tr *trace) {
+		trs = append(trs, tr)
+	}
+	if b == -2 {
+		ft.errors.Do(appendTrace)
+	} else if b == -1 {
+		for _, tr := range ft.active {
+			appendTrace(tr)
+		}
+	} else if b < nBuckets {
+		ft.finished[b].Do(appendTrace)
+		if allgt {
+			for i := b + 1; i < nBuckets; i++ {
+				ft.finished[i].Do(appendTrace)
+			}
+		}
+	}
+
+	// Sort them by start, newer first. This is the order that will be used
+	// when displaying them.
+	sort.Slice(trs, func(i, j int) bool {
+		return trs[i].Start.After(trs[j].Start)
+	})
+	return trs
+}
+
+func (ft *familyTraces) find(id id) *trace {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+
+	if tr, ok := ft.active[id]; ok {
+		return tr
+	}
+
+	var found *trace
+	for _, bs := range ft.finished {
+		bs.Do(func(tr *trace) {
+			if tr.ID == id {
+				found = tr
+			}
+		})
+		if found != nil {
+			return found
+		}
+	}
+
+	ft.errors.Do(func(tr *trace) {
+		if tr.ID == id {
+			found = tr
+		}
+	})
+	if found != nil {
+		return found
+	}
+
+	return nil
+}
+
+func (ft *familyTraces) finalize(tr *trace) {
+	latency := tr.end.Sub(tr.Start)
+	b := findBucket(latency)
+
+	ft.mu.Lock()
+
+	// Delete from the active list.
+	delete(ft.active, tr.ID)
+
+	// Add it to the corresponding finished bucket, based on the trace
+	// latency.
+	ft.finished[b].Add(tr)
+
+	// Errors go on their own list, in addition to the above.
+	if tr.isError {
+		ft.errors.Add(tr)
+	}
+
+	ft.latencies.Add(b, latency)
+
+	ft.mu.Unlock()
+}
+
+func (ft *familyTraces) Latencies() *histSnapshot {
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+	return ft.latencies.Snapshot()
+}
+
+//
+// Global state
+//
+
+var (
+	familiesMu sync.Mutex
+	families   = map[string]*familyTraces{}
+)
+
+func copyFamilies() map[string]*familyTraces {
+	n := map[string]*familyTraces{}
+
+	familiesMu.Lock()
+	for f, trs := range families {
+		n[f] = trs
+	}
+	familiesMu.Unlock()
+
+	return n
+}
+
+func findInFamilies(traceID id, refID id) *trace {
+	// First, try to find it via the family.
+	family := traceID.Family()
+	familiesMu.Lock()
+	fts, ok := families[family]
+	familiesMu.Unlock()
+
+	if ok {
+		tr := fts.find(traceID)
+		if tr != nil {
+			return tr
+		}
+	}
+
+	// If that fail and we have a reference, try finding via it.
+	// The reference can be a parent or a child.
+	if refID != id("") {
+		ref := findInFamilies(refID, "")
+		if ref == nil {
+			return nil
+		}
+
+		// Is the reference's parent the one we're looking for?
+		if ref.Parent != nil && ref.Parent.ID == traceID {
+			return ref.Parent
+		}
+
+		// Try to find it in the ref's events.
+		for _, e := range ref.Events() {
+			if e.Ref != nil && e.Ref.ID == traceID {
+				return e.Ref
+			}
+		}
+	}
+	return nil
+}
--- a/internal/nettrace/trace_test.go
+++ b/internal/nettrace/trace_test.go
@@ -0,0 +1,215 @@
+package nettrace
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+)
+
+func expectEvents(t *testing.T, tr Trace, n int) {
+	t.Helper()
+	if evts := tr.(*trace).Events(); len(evts) != n {
+		t.Errorf("expected %d events, got %d", n, len(evts))
+		t.Logf("%v", evts)
+	}
+}
+
+func TestBasic(t *testing.T) {
+	var tr Trace = New("TestBasic", "basic")
+	defer tr.Finish()
+	tr.Printf("hola marola")
+	tr.Printf("hola marola 2")
+
+	c1 := tr.NewChild("TestBasic", "basic child")
+	c1.Printf("hijo de la noche")
+
+	expectEvents(t, tr, 3)
+
+	if s := tr.(*trace).String(); !strings.Contains(s, "TestBasic, basic") {
+		t.Errorf("tr.String does not contain family and title: %q", s)
+	}
+}
+
+func TestLong(t *testing.T) {
+	tr := New("TestLong", "long")
+	defer tr.Finish()
+	tr.SetMaxEvents(100)
+
+	// First 90 events, no drop.
+	for i := 0; i < 90; i++ {
+		tr.Printf("evt %d", i)
+	}
+	expectEvents(t, tr, 90)
+
+	// Up to 99, still no drop.
+	for i := 0; i < 9; i++ {
+		tr.Printf("evt %d", i)
+	}
+	expectEvents(t, tr, 99)
+
+	// Note that we go up to 101 due to rounding errors, we're ok with it.
+	tr.Printf("evt 100")
+	expectEvents(t, tr, 100)
+	tr.Printf("evt 101")
+	expectEvents(t, tr, 101)
+	tr.Printf("evt 102")
+	expectEvents(t, tr, 101)
+
+	// Add more events, expect none of them to exceed 101.
+	for i := 0; i < 9; i++ {
+		tr.Printf("evt %d", i)
+		expectEvents(t, tr, 101)
+	}
+}
+
+func TestIsError(t *testing.T) {
+	tr := New("TestIsError", "long")
+	defer tr.Finish()
+	if tr.(*trace).IsError() != false {
+		tr.Errorf("new trace is error")
+	}
+
+	tr.Errorf("this is an error")
+	if tr.(*trace).IsError() != true {
+		tr.Errorf("error not recorded properly")
+	}
+}
+
+func TestFindViaRef(t *testing.T) {
+	parent := New("TestFindViaRef", "parent")
+	parentID := parent.(*trace).ID
+	defer parent.Finish()
+	child := parent.NewChild("TestFindViaRef", "child")
+	childID := child.(*trace).ID
+	defer child.Finish()
+
+	// Basic check that both can be directly found.
+	if f := findInFamilies(parentID, id("")); f != parent {
+		t.Errorf("didn't find parent directly, found: %v", f)
+	}
+	if f := findInFamilies(childID, id("")); f != child {
+		t.Errorf("didn't find child directly, found: %v", f)
+	}
+
+	// Hackily remove child from the active traces, to force a reference
+	// lookup when needed.
+	familiesMu.Lock()
+	delete(families["TestFindViaRef"].active, child.(*trace).ID)
+	familiesMu.Unlock()
+
+	// Now the child should not be findable directly anymore.
+	if f := findInFamilies(childID, id("")); f != nil {
+		t.Errorf("child should not be findable directly, found: %v", f)
+	}
+
+	// But we still should be able to get to it via the parent.
+	if f := findInFamilies(childID, parentID); f != child {
+		t.Errorf("didn't find child via parent, found: %v", f)
+	}
+}
+
+func TestMaxEvents(t *testing.T) {
+	tr := trace{}
+
+	// Test that we keep a minimum, and that the cutoff behaves as expected.
+	cases := []struct{ me, exp, cutoffExp int }{
+		{0, 6, 2},
+		{5, 6, 2},
+		{6, 6, 2},
+		{7, 7, 2},
+		{8, 8, 2},
+		{9, 9, 3},
+		{12, 12, 4},
+	}
+	for _, c := range cases {
+		tr.SetMaxEvents(c.me)
+		if got := tr.maxEvents; got != c.exp {
+			t.Errorf("set max events to %d, expected %d, got %d",
+				c.me, c.exp, got)
+		}
+		if got := tr.cutoff; got != c.cutoffExp {
+			t.Errorf("set max events to %d, expected cutoff %d, got %d",
+				c.me, c.cutoffExp, got)
+		}
+	}
+}
+
+func TestFind(t *testing.T) {
+	// Make sure we find the right bucket, including for latencies above the
+	// last one.
+	for i, d := range buckets {
+		found := findBucket(d + 1*time.Millisecond)
+		if found != i {
+			t.Errorf("find bucket [%s + 1ms] got %d, expected %d",
+				d, found, i)
+		}
+	}
+
+	// Create a family, populate it with traces in all buckets.
+	finished := [nBuckets]*trace{}
+	for i, d := range buckets {
+		lat := d + 1*time.Millisecond
+		tr := newTrace("TestFind", fmt.Sprintf("evt-%s", lat))
+		tr.end = tr.Start.Add(lat)
+		families[tr.Family].finalize(tr)
+		finished[i] = tr
+	}
+
+	// Also have an active trace.
+	activeTr := newTrace("TestFind", "active")
+
+	// And add an error trace, which isn't on any of the other buckets (to
+	// simulate that they've been rotated out of the latency buckets, but are
+	// still around in errors)
+	errTr := newTrace("TestFind", "evt-err")
+	errTr.end = errTr.Start.Add(666 * time.Millisecond)
+	errTr.SetError()
+	delete(families[errTr.Family].active, errTr.ID)
+	families[errTr.Family].errors.Add(errTr)
+
+	// Find all of them.
+	for i := range buckets {
+		found := findInFamilies(finished[i].ID, "")
+		if found != finished[i] {
+			t.Errorf("finding trace %d on bucket %s, expected %v, got %v",
+				i, buckets[i], finished[i], found)
+		}
+	}
+	if found := findInFamilies(activeTr.ID, ""); found != activeTr {
+		t.Errorf("finding active trace, expected %v, got %v",
+			activeTr, found)
+	}
+	if found := findInFamilies(errTr.ID, ""); found != errTr {
+		t.Errorf("finding error trace, expected %v, got %v",
+			errTr, found)
+	}
+
+	// Non-existent traces.
+	if found := findInFamilies("does!notexist", ""); found != nil {
+		t.Errorf("finding non-existent trace, expected nil, got %v", found)
+	}
+	if found := findInFamilies("does!notexist", "does!notexist"); found != nil {
+		t.Errorf("finding non-existent trace w/ref, expected nil, got %v", found)
+	}
+}
+
+func TestFindParent(t *testing.T) {
+	// Direct parent finding.
+	// If the ref is the parent, we should find it even if the target trace
+	// isn't known to the family (e.g. the child is there, but the parent has
+	// been rotated and is no longer referenced).
+
+	parent := newTrace("TestFindParent", "parent")
+	child := parent.NewChild("TestFindParent", "child").(*trace)
+
+	// Remove the parent from the active list.
+	delete(families[parent.Family].active, parent.ID)
+
+	if found := findInFamilies(parent.ID, ""); found != nil {
+		t.Errorf("finding parent without ref, expected nil, got %v", found)
+	}
+	if found := findInFamilies(parent.ID, child.ID); found != parent {
+		t.Errorf("finding parent with ref, expected %v, got %v", parent, found)
+	}
+}
--- a/internal/trace/trace.go
+++ b/internal/trace/trace.go
@@ -1,23 +1,15 @@
-// Package trace extends golang.org/x/net/trace.
+// Package trace extends nettrace with logging.
 package trace

 import (
 	"fmt"
-	"net/http"
 	"strconv"

+	"blitiri.com.ar/go/chasquid/internal/nettrace"
 	"blitiri.com.ar/go/log"
-
-	nettrace "golang.org/x/net/trace"
 )

 func init() {
-	// golang.org/x/net/trace has its own authorization which by default only
-	// allows localhost. This can be confusing and limiting in environments
-	// which access the monitoring server remotely.
-	nettrace.AuthRequest = func(req *http.Request) (any, sensitive bool) {
-		return true, true
-	}
 }

 // A Trace represents an active request.
@@ -38,9 +30,16 @@ func New(family, title string) *Trace {
 	return t
 }

+// NewChild creates a new child trace.
+func (t *Trace) NewChild(family, title string) *Trace {
+	n := &Trace{family, title, t.t.NewChild(family, title)}
+	n.t.SetMaxEvents(30)
+	return n
+}
+
 // Printf adds this message to the trace's log.
 func (t *Trace) Printf(format string, a ...interface{}) {
-	t.t.LazyPrintf(format, a...)
+	t.t.Printf(format, a...)

 	log.Log(log.Info, 1, "%s %s: %s", t.family, t.title,
 		quote(fmt.Sprintf(format, a...)))
@@ -48,7 +47,7 @@ func (t *Trace) Printf(format string, a ...interface{}) {

 // Debugf adds this message to the trace's log, with a debugging level.
 func (t *Trace) Debugf(format string, a ...interface{}) {
-	t.t.LazyPrintf(format, a...)
+	t.t.Printf(format, a...)

 	log.Log(log.Debug, 1, "%s %s: %s",
 		t.family, t.title, quote(fmt.Sprintf(format, a...)))
@@ -59,7 +58,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error {
 	// Note we can't just call t.Error here, as it breaks caller logging.
 	err := fmt.Errorf(format, a...)
 	t.t.SetError()
-	t.t.LazyPrintf("error: %v", err)
+	t.t.Printf("error: %v", err)

 	log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title,
 		quote(err.Error()))
@@ -70,7 +69,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error {
 // trace's log.
 func (t *Trace) Error(err error) error {
 	t.t.SetError()
-	t.t.LazyPrintf("error: %v", err)
+	t.t.Printf("error: %v", err)

 	log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title,
 		quote(err.Error()))
@@ -83,45 +82,6 @@ func (t *Trace) Finish() {
 	t.t.Finish()
 }

-// EventLog is used for tracing long-lived objects.
-type EventLog struct {
-	family string
-	title  string
-	e      nettrace.EventLog
-}
-
-// NewEventLog returns a new EventLog.
-func NewEventLog(family, title string) *EventLog {
-	return &EventLog{family, title, nettrace.NewEventLog(family, title)}
-}
-
-// Printf adds the message to the EventLog.
-func (e *EventLog) Printf(format string, a ...interface{}) {
-	e.e.Printf(format, a...)
-
-	log.Log(log.Info, 1, "%s %s: %s", e.family, e.title,
-		quote(fmt.Sprintf(format, a...)))
-}
-
-// Debugf adds the message to the EventLog, with a debugging level.
-func (e *EventLog) Debugf(format string, a ...interface{}) {
-	e.e.Printf(format, a...)
-
-	log.Log(log.Debug, 1, "%s %s: %s", e.family, e.title,
-		quote(fmt.Sprintf(format, a...)))
-}
-
-// Errorf adds the message to the EventLog, with an error level.
-func (e *EventLog) Errorf(format string, a ...interface{}) error {
-	err := fmt.Errorf(format, a...)
-	e.e.Errorf("error: %v", err)
-
-	log.Log(log.Info, 1, "%s %s: error: %s",
-		e.family, e.title, quote(err.Error()))
-
-	return err
-}
-
 func quote(s string) string {
 	qs := strconv.Quote(s)
 	return qs[1 : len(qs)-1]