1
0
mirror of https://blitiri.com.ar/repos/chasquid synced 2025-12-17 14:37:02 +00:00

nettrace: Add a new tracing library

This commit introduces a new tracing library, that replaces
golang.org/x/net/trace, and supports (amongts other thing) nested
traces.

This is a minimal change, future patches will make use of the new
functionality.
This commit is contained in:
Alberto Bertogli
2022-03-07 01:13:34 +00:00
parent 3ebe5c5173
commit 9c6661eca2
18 changed files with 2276 additions and 54 deletions

View File

@@ -15,7 +15,7 @@ import (
// Global event logs.
var (
authLog = trace.NewEventLog("Authentication", "Incoming SMTP")
authLog = trace.New("Authentication", "Incoming SMTP")
)
// Logger contains a backend used to log data to, such as a file or syslog.

View File

@@ -0,0 +1,57 @@
package nettrace
import (
"testing"
)
// Our benchmark loop is similar to the one from golang.org/x/net/trace, so we
// can compare results.
func runBench(b *testing.B, events int) {
nTraces := (b.N + events + 1) / events
for i := 0; i < nTraces; i++ {
tr := New("bench", "test")
for j := 0; j < events; j++ {
tr.Printf("%d", j)
}
tr.Finish()
}
}
func BenchmarkTrace_2(b *testing.B) {
runBench(b, 2)
}
func BenchmarkTrace_10(b *testing.B) {
runBench(b, 10)
}
func BenchmarkTrace_100(b *testing.B) {
runBench(b, 100)
}
func BenchmarkTrace_1000(b *testing.B) {
runBench(b, 1000)
}
func BenchmarkTrace_10000(b *testing.B) {
runBench(b, 10000)
}
func BenchmarkNewAndFinish(b *testing.B) {
for i := 0; i < b.N; i++ {
tr := New("bench", "test")
tr.Finish()
}
}
func BenchmarkPrintf(b *testing.B) {
tr := New("bench", "test")
defer tr.Finish()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Keep this without any formatting, so we measure our code instead of
// the performance of fmt.Sprintf.
tr.Printf("this is printf")
}
}

View File

@@ -0,0 +1,40 @@
package nettrace
import "context"
type ctxKeyT string
const ctxKey ctxKeyT = "blitiri.com.ar/go/srv/nettrace"
// NewContext returns a new context with the given trace attached.
func NewContext(ctx context.Context, tr Trace) context.Context {
return context.WithValue(ctx, ctxKey, tr)
}
// FromContext returns the trace attached to the given context (if any).
func FromContext(ctx context.Context) (Trace, bool) {
tr, ok := ctx.Value(ctxKey).(Trace)
return tr, ok
}
// FromContextOrNew returns the trace attached to the given context, or a new
// trace if there is none.
func FromContextOrNew(ctx context.Context, family, title string) (Trace, context.Context) {
tr, ok := FromContext(ctx)
if ok {
return tr, ctx
}
tr = New(family, title)
return tr, NewContext(ctx, tr)
}
// ChildFromContext returns a new trace that is a child of the one attached to
// the context (if any).
func ChildFromContext(ctx context.Context, family, title string) Trace {
parent, ok := FromContext(ctx)
if ok {
return parent.NewChild(family, title)
}
return New(family, title)
}

View File

@@ -0,0 +1,66 @@
package nettrace
import (
"context"
"testing"
)
func TestContext(t *testing.T) {
tr := New("TestContext", "trace")
defer tr.Finish()
// Attach the trace to a new context.
ctx := NewContext(context.Background(), tr)
// Get the trace back from the context.
{
tr2, ok := FromContext(ctx)
if !ok {
t.Errorf("Context with trace returned not found")
}
if tr != tr2 {
t.Errorf("Trace from context is different: %v != %v", tr, tr2)
}
}
// Create a child trace from the context.
{
tr3 := ChildFromContext(ctx, "TestContext", "child")
if p := tr3.(*trace).Parent; p != tr {
t.Errorf("Child doesn't have the right parent: %v != %v", p, tr)
}
tr3.Finish()
}
// FromContextOrNew returns the one from the context.
{
tr4, ctx4 := FromContextOrNew(ctx, "TestContext", "from-ctx")
if ctx4 != ctx {
t.Errorf("Got new context: %v != %v", ctx4, ctx)
}
if tr4 != tr {
t.Errorf("Context with trace returned new trace: %v != %v", tr4, tr)
}
}
// FromContextOrNew needs to create a new one.
{
tr5, ctx5 := FromContextOrNew(
context.Background(), "TestContext", "tr5")
if tr, _ := FromContext(ctx5); tr != tr5 {
t.Errorf("Context with trace returned the wrong trace: %v != %v",
tr, tr5)
}
tr5.Finish()
}
// Child from a context that has no trace attached.
{
tr6 := ChildFromContext(
context.Background(), "TestContext", "child")
tr6.Finish()
if p := tr6.(*trace).Parent; p != nil {
t.Errorf("Expected orphan trace, it has a parent: %v", p)
}
}
}

View File

@@ -0,0 +1,42 @@
package nettrace
import "time"
type evtRing struct {
evts []event
max int
pos int // Points to the latest element.
firstDrop time.Time
}
func newEvtRing(n int) *evtRing {
return &evtRing{
max: n,
pos: -1,
}
}
func (r *evtRing) Add(e *event) {
if len(r.evts) < r.max {
r.evts = append(r.evts, *e)
r.pos++
return
}
r.pos = (r.pos + 1) % r.max
// Record the first drop as the time of the first dropped message.
if r.firstDrop.IsZero() {
r.firstDrop = r.evts[r.pos].When
}
r.evts[r.pos] = *e
}
func (r *evtRing) Do(f func(e *event)) {
for i := 0; i < len(r.evts); i++ {
// Go from older to newer by starting at (r.pos+1).
pos := (r.pos + 1 + i) % len(r.evts)
f(&r.evts[pos])
}
}

View File

@@ -0,0 +1,71 @@
package nettrace
import (
"time"
)
type histogram struct {
count [nBuckets]uint64
totalQ uint64
totalT time.Duration
min time.Duration
max time.Duration
}
func (h *histogram) Add(bucket int, latency time.Duration) {
if h.totalQ == 0 || h.min > latency {
h.min = latency
}
if h.max < latency {
h.max = latency
}
h.count[bucket]++
h.totalQ++
h.totalT += latency
}
type histSnapshot struct {
Counts map[time.Duration]line
Count uint64
Avg, Min, Max time.Duration
}
type line struct {
Start time.Duration
BucketIdx int
Count uint64
Percent float32
CumPct float32
}
func (h *histogram) Snapshot() *histSnapshot {
s := &histSnapshot{
Counts: map[time.Duration]line{},
Count: h.totalQ,
Min: h.min,
Max: h.max,
}
if h.totalQ > 0 {
s.Avg = time.Duration(uint64(h.totalT) / h.totalQ)
}
var cumCount uint64
for i := 0; i < nBuckets; i++ {
cumCount += h.count[i]
l := line{
Start: buckets[i],
BucketIdx: i,
Count: h.count[i],
}
if h.totalQ > 0 {
l.Percent = float32(h.count[i]) / float32(h.totalQ) * 100
l.CumPct = float32(cumCount) / float32(h.totalQ) * 100
}
s.Counts[buckets[i]] = l
}
return s
}

View File

@@ -0,0 +1,29 @@
package nettrace
import (
"testing"
"time"
)
func TestHistogramBasic(t *testing.T) {
h := histogram{}
h.Add(1, 1*time.Millisecond)
snap := h.Snapshot()
if snap.Count != 1 ||
snap.Min != 1*time.Millisecond ||
snap.Max != 1*time.Millisecond ||
snap.Avg != 1*time.Millisecond {
t.Errorf("expected snapshot with only 1 sample, got %v", snap)
}
}
func TestHistogramEmpty(t *testing.T) {
h := histogram{}
snap := h.Snapshot()
if len(snap.Counts) != nBuckets || snap.Count != 0 ||
snap.Avg != 0 || snap.Min != 0 || snap.Max != 0 {
t.Errorf("expected zero snapshot, got %v", snap)
}
}

259
internal/nettrace/http.go Normal file
View File

@@ -0,0 +1,259 @@
package nettrace
import (
"bytes"
"embed"
"fmt"
"hash/crc32"
"html/template"
"math"
"net/http"
"sort"
"strconv"
"time"
)
//go:embed "templates/*.tmpl" "templates/*.css"
var templatesFS embed.FS
var top *template.Template
func init() {
top = template.Must(
template.New("_top").Funcs(template.FuncMap{
"stripZeros": stripZeros,
"roundSeconds": roundSeconds,
"roundDuration": roundDuration,
"colorize": colorize,
"depthspan": depthspan,
"shorttitle": shorttitle,
"traceemoji": traceemoji,
}).ParseFS(templatesFS, "templates/*"))
}
// RegisterHandler registers a the trace handler in the given ServeMux, on
// `/debug/traces`.
func RegisterHandler(mux *http.ServeMux) {
mux.HandleFunc("/debug/traces", RenderTraces)
}
// RenderTraces is an http.Handler that renders the tracing information.
func RenderTraces(w http.ResponseWriter, req *http.Request) {
data := &struct {
Buckets *[]time.Duration
FamTraces map[string]*familyTraces
// When displaying traces for a specific family.
Family string
Bucket int
BucketStr string
AllGT bool
Traces []*trace
// When displaying latencies for a specific family.
Latencies *histSnapshot
// When displaying a specific trace.
Trace *trace
AllEvents []traceAndEvent
// Error to show to the user.
Error string
}{}
// Reference the common buckets, no need to copy them.
data.Buckets = &buckets
// Copy the family traces map, so we don't have to keep it locked for too
// long. We'll still need to lock individual entries.
data.FamTraces = copyFamilies()
// Default to showing greater-than.
data.AllGT = true
if all := req.FormValue("all"); all != "" {
data.AllGT, _ = strconv.ParseBool(all)
}
// Fill in the family related parameters.
if fam := req.FormValue("fam"); fam != "" {
if _, ok := data.FamTraces[fam]; !ok {
data.Family = ""
data.Error = "Unknown family"
w.WriteHeader(http.StatusNotFound)
goto render
}
data.Family = fam
if bs := req.FormValue("b"); bs != "" {
i, err := strconv.Atoi(bs)
if err != nil {
data.Error = "Invalid bucket (not a number)"
w.WriteHeader(http.StatusBadRequest)
goto render
} else if i < -2 || i >= nBuckets {
data.Error = "Invalid bucket number"
w.WriteHeader(http.StatusBadRequest)
goto render
}
data.Bucket = i
data.Traces = data.FamTraces[data.Family].TracesFor(i, data.AllGT)
switch i {
case -2:
data.BucketStr = "errors"
case -1:
data.BucketStr = "active"
default:
data.BucketStr = buckets[i].String()
}
}
}
if lat := req.FormValue("lat"); data.Family != "" && lat != "" {
data.Latencies = data.FamTraces[data.Family].Latencies()
}
if traceID := req.FormValue("trace"); traceID != "" {
refID := req.FormValue("ref")
tr := findInFamilies(id(traceID), id(refID))
if tr == nil {
data.Error = "Trace not found"
w.WriteHeader(http.StatusNotFound)
goto render
}
data.Trace = tr
data.Family = tr.Family
data.AllEvents = allEvents(tr)
}
render:
// Write into a buffer, to avoid accidentally holding a lock on http
// writes. It shouldn't happen, but just to be extra safe.
bw := &bytes.Buffer{}
bw.Grow(16 * 1024)
err := top.ExecuteTemplate(bw, "index.html.tmpl", data)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
panic(err)
}
w.Write(bw.Bytes())
}
type traceAndEvent struct {
Trace *trace
Event event
Depth uint
}
// allEvents gets all the events for the trace and its children/linked traces;
// and returns them sorted by timestamp.
func allEvents(tr *trace) []traceAndEvent {
// Map tracking all traces we've seen, to avoid loops.
seen := map[id]bool{}
// Recursively gather all events.
evts := appendAllEvents(tr, []traceAndEvent{}, seen, 0)
// Sort them by time.
sort.Slice(evts, func(i, j int) bool {
return evts[i].Event.When.Before(evts[j].Event.When)
})
return evts
}
func appendAllEvents(tr *trace, evts []traceAndEvent, seen map[id]bool, depth uint) []traceAndEvent {
if seen[tr.ID] {
return evts
}
seen[tr.ID] = true
subTraces := []*trace{}
// Append all events of this trace.
trevts := tr.Events()
for _, e := range trevts {
evts = append(evts, traceAndEvent{tr, e, depth})
if e.Ref != nil {
subTraces = append(subTraces, e.Ref)
}
}
for _, t := range subTraces {
evts = appendAllEvents(t, evts, seen, depth+1)
}
return evts
}
func stripZeros(d time.Duration) string {
if d < time.Second {
_, frac := math.Modf(d.Seconds())
return fmt.Sprintf(" .%6d", int(frac*1000000))
}
return fmt.Sprintf("%.6f", d.Seconds())
}
func roundSeconds(d time.Duration) string {
return fmt.Sprintf("%.6f", d.Seconds())
}
func roundDuration(d time.Duration) time.Duration {
return d.Round(time.Millisecond)
}
func colorize(depth uint, id id) template.CSS {
if depth == 0 {
return template.CSS("rgba(var(--text-color))")
}
if depth > 3 {
depth = 3
}
// Must match the number of nested color variables in the CSS.
colori := crc32.ChecksumIEEE([]byte(id)) % 6
return template.CSS(
fmt.Sprintf("var(--nested-d%02d-c%02d)", depth, colori))
}
func depthspan(depth uint) template.HTML {
s := `<span class="depth">`
switch depth {
case 0:
case 1:
s += "· "
case 2:
s += "· · "
case 3:
s += "· · · "
case 4:
s += "· · · · "
default:
s += fmt.Sprintf("· (%d) · ", depth)
}
s += `</span>`
return template.HTML(s)
}
// Hand-picked emojis that have enough visual differences in most common
// renderings, and are common enough to be able to easily describe them.
var emojids = []rune(`😀🤣😇🥰🤧😈🤡👻👽🤖👋✊🦴👅` +
`🐒🐕🦊🐱🐯🐎🐄🐷🐑🐐🐪🦒🐘🐀🦇🐓🦆🦚🦜🐢🐍🦖🐋🐟🦈🐙` +
`🦋🐜🐝🪲🌻🌲🍉🍌🍍🍎🍑🥕🍄` +
`🧀🍦🍰🧉🚂🚗🚜🛵🚲🛼🪂🚀🌞🌈🌊⚽`)
func shorttitle(tr *trace) string {
all := tr.Family + " - " + tr.Title
if len(all) > 20 {
all = "..." + all[len(all)-17:]
}
return all
}
func traceemoji(id id) string {
i := crc32.ChecksumIEEE([]byte(id)) % uint32(len(emojids))
return string(emojids[i])
}

View File

@@ -0,0 +1,255 @@
package nettrace
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"
)
func getValues(t *testing.T, vs url.Values, code int) string {
t.Helper()
req := httptest.NewRequest("GET", "/debug/traces?"+vs.Encode(), nil)
w := httptest.NewRecorder()
RenderTraces(w, req)
resp := w.Result()
body, _ := io.ReadAll(resp.Body)
if resp.StatusCode != code {
t.Errorf("expected %d, got %v", code, resp)
}
return string(body)
}
type v struct {
fam, b, lat, trace, ref, all string
}
func getCode(t *testing.T, vs v, code int) string {
t.Helper()
u := url.Values{}
if vs.fam != "" {
u.Add("fam", vs.fam)
}
if vs.b != "" {
u.Add("b", vs.b)
}
if vs.lat != "" {
u.Add("lat", vs.lat)
}
if vs.trace != "" {
u.Add("trace", vs.trace)
}
if vs.ref != "" {
u.Add("ref", vs.ref)
}
if vs.all != "" {
u.Add("all", vs.all)
}
return getValues(t, u, code)
}
func get(t *testing.T, fam, b, lat, trace, ref, all string) string {
t.Helper()
return getCode(t, v{fam, b, lat, trace, ref, all}, 200)
}
func getErr(t *testing.T, fam, b, lat, trace, ref, all string, code int, err string) string {
t.Helper()
body := getCode(t, v{fam, b, lat, trace, ref, all}, code)
if !strings.Contains(body, err) {
t.Errorf("Body does not contain error message %q", err)
t.Logf("Body: %v", body)
}
return body
}
func checkContains(t *testing.T, body, s string) {
t.Helper()
if !strings.Contains(body, s) {
t.Errorf("Body does not contain %q", s)
t.Logf("Body: %v", body)
}
}
func TestHTTP(t *testing.T) {
tr := New("TestHTTP", "http")
tr.Printf("entry #1")
tr.Finish()
tr = New("TestHTTP", "http")
tr.Printf("entry #2")
tr.Finish()
tr = New("TestHTTP", "http")
tr.Errorf("entry #3 (error)")
tr.Finish()
tr = New("TestHTTP", "http")
tr.Printf("hola marola")
tr.Printf("entry #4")
// This one is active until the end.
defer tr.Finish()
// Get the plain index.
body := get(t, "", "", "", "", "", "")
checkContains(t, body, "TestHTTP")
// Get a specific family, but no bucket.
body = get(t, "TestHTTP", "", "", "", "", "")
checkContains(t, body, "TestHTTP")
// Get a family and active bucket.
body = get(t, "TestHTTP", "-1", "", "", "", "")
checkContains(t, body, "hola marola")
// Get a family and error bucket.
body = get(t, "TestHTTP", "-2", "", "", "", "")
checkContains(t, body, "entry #3 (error)")
// Get a family and first bucket.
body = get(t, "TestHTTP", "0", "", "", "", "")
checkContains(t, body, "entry #2")
// Latency view. There are 3 events because the 4th is active.
body = get(t, "TestHTTP", "", "lat", "", "", "")
checkContains(t, body, "Count: 3")
// Get a specific trace. No family given, since it shouldn't be needed (we
// take it from the id).
body = get(t, "", "", "", string(tr.(*trace).ID), "", "")
checkContains(t, body, "hola marola")
// Check the "all=true" views.
body = get(t, "TestHTTP", "0", "", "", "", "true")
checkContains(t, body, "entry #2")
checkContains(t, body, "?fam=TestHTTP&b=-2&all=true")
tr.Finish()
}
func TestHTTPLong(t *testing.T) {
// Test a long trace.
tr := New("TestHTTPLong", "verbose")
for i := 0; i < 1000; i++ {
tr.Printf("entry #%d", i)
}
tr.Finish()
get(t, "TestHTTPLong", "", "", string(tr.(*trace).ID), "", "")
}
func TestHTTPErrors(t *testing.T) {
tr := New("TestHTTPErrors", "http")
tr.Printf("entry #1")
tr.Finish()
// Unknown family.
getErr(t, "unkfamily", "", "", "", "", "",
404, "Unknown family")
// Invalid bucket.
getErr(t, "TestHTTPErrors", "abc", "", "", "", "",
400, "Invalid bucket")
getErr(t, "TestHTTPErrors", "-3", "", "", "", "",
400, "Invalid bucket")
getErr(t, "TestHTTPErrors", "9", "", "", "", "",
400, "Invalid bucket")
// Unknown trace id (malformed).
getErr(t, "TestHTTPErrors", "", "", "unktrace", "", "",
404, "Trace not found")
// Unknown trace id.
getErr(t, "TestHTTPErrors", "", "", string(tr.(*trace).ID)+"xxx", "", "",
404, "Trace not found")
// Check that the trace is actually there.
get(t, "", "", "", string(tr.(*trace).ID), "", "")
}
func TestHTTPUroboro(t *testing.T) {
trA := New("TestHTTPUroboro", "trA")
defer trA.Finish()
trA.Printf("this is trace A")
trB := New("TestHTTPUroboro", "trB")
defer trB.Finish()
trB.Printf("this is trace B")
trA.Link(trB, "B is my friend")
trB.Link(trA, "A is my friend")
// Check that we handle cross-linked events well.
get(t, "TestHTTPUroboro", "", "", "", "", "")
get(t, "TestHTTPUroboro", "-1", "", "", "", "")
get(t, "", "", "", string(trA.(*trace).ID), "", "")
get(t, "", "", "", string(trB.(*trace).ID), "", "")
}
func TestHTTPDeep(t *testing.T) {
tr := New("TestHTTPDeep", "level-0")
defer tr.Finish()
ts := []Trace{tr}
for i := 1; i <= 9; i++ {
tr = tr.NewChild("TestHTTPDeep", fmt.Sprintf("level-%d", i))
defer tr.Finish()
ts = append(ts, tr)
}
// Active view.
body := get(t, "TestHTTPDeep", "-1", "", "", "", "")
checkContains(t, body, "level-9")
// Recursive view.
body = get(t, "TestHTTPDeep", "", "", string(ts[0].(*trace).ID), "", "")
checkContains(t, body, "level-9")
}
func TestStripZeros(t *testing.T) {
cases := []struct {
d time.Duration
exp string
}{
{0 * time.Second, " . 0"},
{1 * time.Millisecond, " . 1000"},
{5 * time.Millisecond, " . 5000"},
{1 * time.Second, "1.000000"},
{1*time.Second + 8*time.Millisecond, "1.008000"},
}
for _, c := range cases {
if got := stripZeros(c.d); got != c.exp {
t.Errorf("stripZeros(%s) got %q, expected %q",
c.d, got, c.exp)
}
}
}
func TestRegisterHandler(t *testing.T) {
mux := http.NewServeMux()
RegisterHandler(mux)
req := httptest.NewRequest("GET", "/debug/traces", nil)
w := httptest.NewRecorder()
mux.ServeHTTP(w, req)
resp := w.Result()
if resp.StatusCode != 200 {
t.Errorf("expected 200, got %v", resp)
}
body, _ := io.ReadAll(resp.Body)
if !strings.Contains(string(body), "<h1>Traces</h1>") {
t.Errorf("unexpected body: %s", body)
}
}

View File

@@ -0,0 +1,25 @@
<table class="latencies"><tr>
<td>Count: {{.Latencies.Count}}</td>
<td>Avg: {{.Latencies.Avg | roundDuration}}</td>
<td>Min: {{.Latencies.Min | roundDuration}}</td>
<td>Max: {{.Latencies.Max | roundDuration}}</td>
</tr></table>
<p>
<table class="latencies">
<tr><th>Bucket</th><th>Count</th><th>%</th><th></th><th>Cumulative</th></tr>
{{range .Latencies.Counts}}
<tr>
<td>
<a href="?fam={{$.Family}}&b={{.BucketIdx}}"
{{if eq .Count 0}}class="muted"{{end}}>
&ge;{{.Start}}</a>
</td>
<td>{{.Count}}</td>
<td>{{.Percent | printf "%5.2f"}}%</td>
<td><meter max="100" value="{{.Percent}}">
{{.Percent | printf "%.2f"}}%</meter>
<td>{{.CumPct | printf "%5.2f"}}%</td>
</tr>
{{end}}
</table>

View File

@@ -0,0 +1,51 @@
{{if .Trace.Parent}}
<a href="?trace={{.Trace.Parent.ID}}&ref={{.Trace.ID}}">
Parent: {{.Trace.Parent.Family}} - {{.Trace.Parent.Title}}</a>
<p>
{{end}}
<table class="trace">
<thead>
<tr>
<th>Trace</th>
<th>Timestamp</th>
<th>Elapsed (s)</th>
<th></th>
<th>Message</th>
</tr>
</thead>
<tbody>
{{$prev := .Trace.Start}}
{{range .AllEvents}}
<tr style="background: {{colorize .Depth .Trace.ID}};">
<td title='{{.Trace.Family}} - {{.Trace.Title}}
@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'>
{{shorttitle .Trace}}</td>
<td class="when">{{.Event.When.Format "15:04:05.000000"}}</td>
<td class="duration">{{(.Event.When.Sub $prev) | stripZeros}}</td>
<td class="emoji" title='{{.Trace.Family}} - {{.Trace.Title}}
@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'>
<div class="emoji">{{traceemoji .Trace.ID}}</div></td>
<td class="msg">
{{- depthspan .Depth -}}
{{- if .Event.Type.IsLog -}}
{{.Event.Msg}}
{{- else if .Event.Type.IsChild -}}
new child: <a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Ref.Family}} - {{.Event.Ref.Title}}</a>
{{- else if .Event.Type.IsLink -}}
<a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Msg}}</a>
{{- else if .Event.Type.IsDrop -}}
<b><i>[ events dropped ]</i></b>
{{- else -}}
<i>[ unknown event type ]</i>
{{- end -}}
</td>
</tr>
{{$prev = .Event.When}}
{{end}}
</tbody>
</table>

View File

@@ -0,0 +1,35 @@
<tr class="title">
<td class="when">{{.Start.Format "2006-01-02 15:04:05.000000"}}</td>
<td class="duration">{{.Duration | roundSeconds}}</td>
<td><a href="?trace={{.ID}}">{{.Title}}</a>
{{if .Parent}}(parent: <a href="?trace={{.Parent.ID}}&ref={{.ID}}">
{{.Parent.Family}} - {{.Parent.Title}}</a>)
{{end}}
</td>
<tr>
{{$prev := .Start}}
{{range .Events}}
<tr>
<td class="when">{{.When.Format "15:04:05.000000"}}</td>
<td class="duration">{{(.When.Sub $prev) | stripZeros}}</td>
<td class="msg">
{{- if .Type.IsLog -}}
{{.Msg}}
{{- else if .Type.IsChild -}}
new child <a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Ref.Family}} {{.Ref.Title}}</a>
{{- else if .Type.IsLink -}}
<a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Msg}}</a>
{{- else if .Type.IsDrop -}}
<i>[ events dropped ]</i>
{{- else -}}
<i>[ unknown event type ]</i>
{{- end -}}
</td>
</tr>
{{$prev = .When}}
{{end}}
<tr>
<td>&nbsp;</td>
</tr>

View File

@@ -0,0 +1,129 @@
<!DOCTYPE html>
<html lang="en">
<head>
<style>
{{template "style.css"}}
</style>
<title>
{{if .Trace}}{{.Trace.Family}} - {{.Trace.Title}}
{{else if .BucketStr}}{{.Family}} - {{.BucketStr}}
{{else if .Latencies}}{{.Family}} - latency
{{else}}Traces
{{end}}
</title>
</head>
<body>
<h1>Traces</h1>
<table class="index">
{{range $name, $ftr := .FamTraces}}
<tr>
<td class="family">
<a href="?fam={{$name}}&b=0&all=true">
{{if eq $name $.Family}}<u>{{end}}
{{$name}}
{{if eq $name $.Family}}</u>{{end}}
</a>
</td>
<td class="bucket active">
{{$n := $ftr.LenActive}}
{{if and (eq $name $.Family) (eq "active" $.BucketStr)}}<u>{{end}}
<a href="?fam={{$name}}&b=-1&all={{$.AllGT}}"
{{if eq $n 0}}class="muted"{{end}}>
{{$n}} active</a>
{{if and (eq $name $.Family) (eq "active" $.BucketStr)}}</u>{{end}}
</td>
{{range $i, $b := $.Buckets}}
<td class="bucket">
{{$n := $ftr.LenBucket $i}}
{{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}<u>{{end}}
<a href="?fam={{$name}}&b={{$i}}&all={{$.AllGT}}"
{{if eq $n 0}}class="muted"{{end}}>
&ge;{{$b}}</a>
{{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}</u>{{end}}
</td>
{{end}}
<td class="bucket">
{{$n := $ftr.LenErrors}}
{{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}<u>{{end}}
<a href="?fam={{$name}}&b=-2&all={{$.AllGT}}"
{{if eq $n 0}}class="muted"{{end}}>
errors</a>
{{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}</u>{{end}}
</td>
<td class="bucket">
<a href="?fam={{$name}}&lat=true&all={{$.AllGT}}">[latency]</a>
</td>
</tr>
{{end}}
</table>
<br>
Show: <a href="?fam={{.Family}}&b={{.Bucket}}&all=false">
{{if not .AllGT}}<u>{{end}}
Only in bucket</a>
{{if not .AllGT}}</u>{{end}}
/
<a href="?fam={{.Family}}&b={{.Bucket}}&all=true">
{{if .AllGT}}<u>{{end}}
All &ge; bucket</a>
{{if .AllGT}}</u>{{end}}
<p>
<!--------------------------------------------->
{{if .Error}}
<p class="error">Error: {{.Error}}</p>
{{end}}
<!--------------------------------------------->
{{if .BucketStr}}
<h2>{{.Family}} - {{.BucketStr}}</h2>
<table class="trace">
<thead>
<tr>
<th>Timestamp</th>
<th>Elapsed (s)</th>
<th>Message</th>
</tr>
</thead>
<tbody>
<tr>
<td>&nbsp;</td>
</tr>
{{range .Traces}}
{{template "_single.html.tmpl" .}}<p>
{{end}}
</tbody>
</table>
<p>
{{end}}
<!--------------------------------------------->
{{if .Latencies}}
<h2>{{.Family}} - latency</h2>
{{template "_latency.html.tmpl" .}}<p>
{{end}}
<!--------------------------------------------->
{{if .Trace}}
<h2>{{.Trace.Family}} - <i>{{.Trace.Title}}</i></h2>
{{template "_recursive.html.tmpl" .}}<p>
{{end}}
</body>
</html>

View File

@@ -0,0 +1,187 @@
:root {
--text-color: black;
--bg-color: #fffff7;
--zebra-bg-color: #eeeee7;
--muted-color: #444;
--xmuted-color: #a1a1a1;
--link-color: #39c;
--link-hover: #069;
--underline-color: grey;
--error-color: red;
/* Colors for the nested zebras. */
--nested-d01-c00: #ffebee;
--nested-d01-c01: #ede7f6;
--nested-d01-c02: #e3f2fd;
--nested-d01-c03: #e8f5e9;
--nested-d01-c04: #fff8e1;
--nested-d01-c05: #efebe9;
--nested-d02-c00: #f0dcdf;
--nested-d02-c01: #ded8e7;
--nested-d02-c02: #d4e3ee;
--nested-d02-c03: #d9e6da;
--nested-d02-c04: #f0e9d2;
--nested-d02-c05: #e0dcda;
--nested-d03-c00: #e1cdd0;
--nested-d03-c01: #cfc9d8;
--nested-d03-c02: #c5d4df;
--nested-d03-c03: #cad7cb;
--nested-d03-c04: #e1dac3;
--nested-d03-c05: #d1cdcb;
}
@media (prefers-color-scheme: dark) {
:root {
--text-color: rgba(255, 255, 255, 0.90);
--bg-color: #121212;
--zebra-bg-color: #222222;
--muted-color: #aaaaaa;
--xmuted-color: #a1a1a1;
--link-color: #44b4ec;
--link-hover: #7fc9ee;
--underline-color: grey;
--error-color: #dd4747;
/* Colors for the nested zebras. */
--nested-d01-c00: #220212;
--nested-d01-c01: #1c1c22;
--nested-d01-c02: #001e20;
--nested-d01-c03: #0f0301;
--nested-d01-c04: #201d06;
--nested-d01-c05: #00192b;
--nested-d02-c00: #311121;
--nested-d02-c01: #2b2b31;
--nested-d02-c02: #0f2d2f;
--nested-d02-c03: #1e1210;
--nested-d02-c04: #2f2c15;
--nested-d02-c05: #0f283a;
--nested-d03-c00: #402030;
--nested-d03-c01: #3a3a40;
--nested-d03-c02: #1e3c3e;
--nested-d03-c03: #2d211f;
--nested-d03-c04: #3e3b24;
--nested-d03-c05: #1e3749;
}
}
body {
font-family: sans-serif;
color: var(--text-color);
background: var(--bg-color);
}
p.error {
color: var(--error-color);
font-size: large;
}
a {
color: var(--link-color);
text-decoration: none;
}
a:hover {
color: var(--link-hover);
}
.family a {
color: var(--text-color);
}
u {
text-decoration-color: var(--underline-color);
}
table.index {
border-collapse: collapse;
}
table.index tr:nth-child(odd) {
background-color: var(--zebra-bg-color);
}
table.index td {
padding: 0.25em 0.5em;
}
table.index td.bucket {
min-width: 2em;
text-align: center;
}
table.index td.active {
/* Make the "active" column wider so there's less jumping on refresh. */
min-width: 5em;
text-align: right;
}
table.index a {
text-decoration: none;
}
a.muted {
color: var(--muted-color);
}
table.trace {
font-family: monospace;
border-collapse: collapse;
}
table.trace thead {
border-bottom: 1px solid var(--text-color);
}
table.trace th {
text-align: left;
padding: 0.1em 1em;
}
table.trace tr.title {
font-weight: bold;
}
table.trace td {
padding: 0.1em 1em;
}
table.trace td.when {
text-align: right;
}
table.trace td.duration {
text-align: right;
white-space: pre;
}
table.trace td.msg {
white-space: pre-wrap;
}
span.depth {
color: var(--xmuted-color);
}
div.emoji {
/* Emojis sometimes are rendered excessively tall. */
/* This ensures they're sized appropriately. */
max-height: 1.3em;
overflow: hidden;
}
table.latencies {
text-align: right;
}
table.latencies td {
padding: 0 0.3em;
}
table.latencies th {
text-align: center;
}
meter {
width: 15em;
}

View File

@@ -0,0 +1,243 @@
//go:build ignore
package main
import (
"flag"
"fmt"
"math/rand"
"net/http"
"time"
_ "net/http/pprof"
"blitiri.com.ar/go/srv/nettrace"
)
func main() {
addr := flag.String("addr", ":8080", "listening address")
flag.Parse()
go RandomEvents("random-one")
go RandomEvents("random-two")
go RandomEvents("random-three")
go RandomLongEvent("random-long", "long-one")
go RandomLongEvent("random-long", "long-two")
go RandomLongEvent("random-long", "long-three")
go RandomBunny("random-bunny", "first 🐇")
go RandomNested("random-nested")
go RandomLazy("random-lazy")
http.DefaultServeMux.Handle("/",
WithLogging(http.HandlerFunc(HandleRoot)))
http.DefaultServeMux.Handle("/debug/traces",
WithLogging(http.HandlerFunc(nettrace.RenderTraces)))
fmt.Printf("listening on %s\n", *addr)
http.ListenAndServe(*addr, nil)
}
func RandomEvents(family string) {
for i := 0; ; i++ {
tr := nettrace.New(family, fmt.Sprintf("evt-%d", i))
randomTrace(family, tr)
}
}
func randomTrace(family string, tr nettrace.Trace) {
tr.Printf("this is a random event")
tr.Printf("and it has a random delay")
delay := time.Duration(rand.Intn(1000)) * time.Millisecond
tr.Printf("sleeping %v", delay)
time.Sleep(delay)
if rand.Intn(100) < 1 {
tr.Printf("this unlucky one is an error")
tr.SetError()
}
if rand.Intn(100) < 10 {
tr.Printf("this one got super verbose!")
for j := 0; j < 100; j++ {
tr.Printf("message %d", j)
}
}
if rand.Intn(100) < 30 {
tr.Printf("this one had a child")
c := tr.NewChild(family, "achild")
go randomTrace(family, c)
}
tr.Printf("all done")
tr.Finish()
}
func RandomLongEvent(family, title string) {
tr := nettrace.New(family, title)
tr.Printf("this is a random long event")
for i := 0; ; i++ {
delay := time.Duration(rand.Intn(100)) * time.Millisecond
time.Sleep(delay)
tr.Printf("message %d, slept %v", i, delay)
}
tr.Finish()
}
func RandomBunny(family, title string) {
tr := nettrace.New(family, title)
tr.SetMaxEvents(100)
tr.Printf("this is the top 🐇")
for i := 0; ; i++ {
delay := time.Duration(rand.Intn(100)) * time.Millisecond
time.Sleep(delay)
tr.Printf("message %d, slept %v", i, delay)
if rand.Intn(100) < 40 {
c := tr.NewChild(family, fmt.Sprintf("child-%d", i))
go randomTrace(family, c)
}
if rand.Intn(100) < 40 {
n := nettrace.New(family, fmt.Sprintf("linked-%d", i))
go randomTrace(family, n)
tr.Link(n, "linking with this guy")
}
}
tr.Finish()
}
func randomNested(family string, depth int, parent nettrace.Trace) {
tr := parent.NewChild(family, fmt.Sprintf("nested-%d", depth))
defer tr.Finish()
tr.Printf("I am a spoiled child")
delay := time.Duration(rand.Intn(100)) * time.Millisecond
time.Sleep(delay)
tr.Printf("slept %v", delay)
if depth > 10 {
tr.Printf("I went too far")
return
}
// If we make this < 50, then it grows forever.
if rand.Intn(100) < 75 {
tr.Printf("I sang really well")
return
}
max := rand.Intn(5)
for i := 0; i < max; i++ {
tr.Printf("spawning %d", i)
go randomNested(family, depth+1, tr)
}
}
func RandomNested(family string) {
tr := nettrace.New(family, "nested-0")
for i := 0; ; i++ {
randomNested(family, 1, tr)
}
}
func RandomLazy(family string) {
for i := 0; ; i++ {
tr := nettrace.New(family, fmt.Sprintf("evt-%d", i))
tr.Printf("I am very lazy and do little work")
tr.Finish()
time.Sleep(500 * time.Millisecond)
}
}
func HandleRoot(w http.ResponseWriter, r *http.Request) {
if delayS := r.FormValue("delay"); delayS != "" {
delay, err := time.ParseDuration(delayS)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
time.Sleep(delay)
}
if withError := r.FormValue("error"); withError != "" {
tr, _ := nettrace.FromContext(r.Context())
tr.SetError()
}
w.Write([]byte(rootHTML))
}
func WithLogging(parent http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
tr := nettrace.New("http", r.URL.String())
defer tr.Finish()
// Associate the trace with this request.
r = r.WithContext(nettrace.NewContext(r.Context(), tr))
tr.Printf("%s %s %s %s",
r.RemoteAddr, r.Proto, r.Method, r.URL.String())
start := time.Now()
parent.ServeHTTP(w, r)
latency := time.Since(start)
tr.Printf("handler took %s", latency)
// 1.2.3.4:34575 HTTP/2.0 GET /path = 1.2ms
fmt.Printf("%s %s %s %s = %s\n",
r.RemoteAddr, r.Proto, r.Method, r.URL.String(),
latency)
})
}
const rootHTML = `
<html>
<head>
<style>
td {
min-width: 2em;
text-align: center;
}
input#delay {
width: 5em;
}
</style>
</head>
<body>
<a href="/debug/traces">Traces</a><p>
<table>
<tr>
<td>Delay:</td>
<td><a href="/?delay=0s">0s</a></td>
<td><a href="/?delay=0.1s">0.1s</a></td>
<td><a href="/?delay=0.2s">0.2s</a></td>
<td><a href="/?delay=0.5s">0.5s</a></td>
<td><a href="/?delay=1s">1s</a></td>
</tr>
<tr>
<td>+ error:</td>
<td><a href="/?delay=0&error=on">0s</a></td>
<td><a href="/?delay=0.1s&error=on">0.1s</a></td>
<td><a href="/?delay=0.2s&error=on">0.2s</a></td>
<td><a href="/?delay=0.5s&error=on">0.5s</a></td>
<td><a href="/?delay=1s&error=on">1s</a></td>
</tr>
</table>
<form action="/" method="get">
<label for="delay">Custom delay:</label>
<input type="text" id="delay" name="delay" placeholder="250ms"
autofocus required>
<input type="checkbox" id="error" name="error">
<label for="error">is error</label>
<input type="submit">
</form>
</body>
</html>
`

558
internal/nettrace/trace.go Normal file
View File

@@ -0,0 +1,558 @@
// Package nettrace implements tracing of requests. Traces are created by
// nettrace.New, and can then be viewed over HTTP on /debug/traces.
package nettrace
import (
"container/ring"
"fmt"
"math/rand"
"sort"
"strconv"
"strings"
"sync"
"time"
)
// IDs are of the form "family!timestamp!unique identifier", which allows for
// sorting them by time much easily, and also some convenient optimizations
// when looking up an id across all the known ones.
// Family is not escaped. It should not contain the separator.
// It is not expected to be stable, for internal use only.
type id string
func newID(family string, ts int64) id {
return id(
family + "!" +
strconv.FormatInt(ts, 10) + "!" +
strconv.FormatUint(rand.Uint64(), 10))
}
func (id id) Family() string {
sp := strings.SplitN(string(id), "!", 2)
if len(sp) != 2 {
return string(id)
}
return sp[0]
}
// Trace represents a single request trace.
type Trace interface {
// NewChild creates a new trace, that is a child of this one.
NewChild(family, title string) Trace
// Link to another trace with the given message.
Link(other Trace, msg string)
// SetMaxEvents sets the maximum number of events that will be stored in
// the trace. It must be called right after initialization.
SetMaxEvents(n int)
// SetError marks that the trace was for an error event.
SetError()
// Printf adds a message to the trace.
Printf(format string, a ...interface{})
// Errorf adds a message to the trace, marks it as an error, and returns
// an error for it.
Errorf(format string, a ...interface{}) error
// Finish marks the trace as complete.
// The trace should not be used after calling this method.
Finish()
}
// A single trace. Can be active or inactive.
// Exported fields are allowed to be accessed directly, e.g. by the HTTP
// handler. Private ones are mutex protected.
type trace struct {
ID id
Family string
Title string
Parent *trace
Start time.Time
// Fields below are mu-protected.
// We keep them unexported so they're not accidentally accessed in html
// templates.
mu sync.Mutex
end time.Time
isError bool
maxEvents int
// We keep two separate groups: the first ~1/3rd events in a simple slice,
// and the last 2/3rd in a ring so we can drop events without losing the
// first ones.
cutoff int
firstEvents []event
lastEvents *evtRing
}
type evtType uint8
const (
evtLOG = evtType(1 + iota)
evtCHILD
evtLINK
evtDROP
)
func (t evtType) IsLog() bool { return t == evtLOG }
func (t evtType) IsChild() bool { return t == evtCHILD }
func (t evtType) IsLink() bool { return t == evtLINK }
func (t evtType) IsDrop() bool { return t == evtDROP }
type event struct {
When time.Time
Type evtType
Ref *trace
Msg string
}
const defaultMaxEvents = 30
func newTrace(family, title string) *trace {
start := time.Now()
tr := &trace{
ID: newID(family, start.UnixNano()),
Family: family,
Title: title,
Start: start,
maxEvents: defaultMaxEvents,
cutoff: defaultMaxEvents / 3,
}
// Pre-allocate a couple of events to speed things up.
// Don't allocate lastEvents, that can be expensive and it is not always
// needed. No need to slow down trace creation just for it.
tr.firstEvents = make([]event, 0, 4)
familiesMu.Lock()
ft, ok := families[family]
if !ok {
ft = newFamilyTraces()
families[family] = ft
}
familiesMu.Unlock()
ft.mu.Lock()
ft.active[tr.ID] = tr
ft.mu.Unlock()
return tr
}
// New creates a new trace with the given family and title.
func New(family, title string) Trace {
return newTrace(family, title)
}
func (tr *trace) append(evt *event) {
tr.mu.Lock()
defer tr.mu.Unlock()
if len(tr.firstEvents) < tr.cutoff {
tr.firstEvents = append(tr.firstEvents, *evt)
return
}
if tr.lastEvents == nil {
// The ring holds the last 2/3rds of the events.
tr.lastEvents = newEvtRing(tr.maxEvents - tr.cutoff)
}
tr.lastEvents.Add(evt)
}
// String is for debugging only.
func (tr *trace) String() string {
return fmt.Sprintf("trace{%s, %s, %q, %d}",
tr.Family, tr.Title, tr.ID, len(tr.Events()))
}
func (tr *trace) NewChild(family, title string) Trace {
c := newTrace(family, title)
c.Parent = tr
// Add the event to the parent.
evt := &event{
When: time.Now(),
Type: evtCHILD,
Ref: c,
}
tr.append(evt)
return c
}
func (tr *trace) Link(other Trace, msg string) {
evt := &event{
When: time.Now(),
Type: evtLINK,
Ref: other.(*trace),
Msg: msg,
}
tr.append(evt)
}
func (tr *trace) SetMaxEvents(n int) {
// Set a minimum of 6, so the truncation works without running into
// issues.
if n < 6 {
n = 6
}
tr.mu.Lock()
tr.maxEvents = n
tr.cutoff = n / 3
tr.mu.Unlock()
}
func (tr *trace) SetError() {
tr.mu.Lock()
tr.isError = true
tr.mu.Unlock()
}
func (tr *trace) Printf(format string, a ...interface{}) {
evt := &event{
When: time.Now(),
Type: evtLOG,
Msg: fmt.Sprintf(format, a...),
}
tr.append(evt)
}
func (tr *trace) Errorf(format string, a ...interface{}) error {
tr.SetError()
err := fmt.Errorf(format, a...)
tr.Printf(err.Error())
return err
}
func (tr *trace) Finish() {
tr.mu.Lock()
tr.end = time.Now()
tr.mu.Unlock()
familiesMu.Lock()
ft := families[tr.Family]
familiesMu.Unlock()
ft.finalize(tr)
}
// Duration of this trace.
func (tr *trace) Duration() time.Duration {
tr.mu.Lock()
start, end := tr.Start, tr.end
tr.mu.Unlock()
if end.IsZero() {
return time.Since(start)
}
return end.Sub(start)
}
// Events returns a copy of the trace events.
func (tr *trace) Events() []event {
tr.mu.Lock()
defer tr.mu.Unlock()
evts := make([]event, len(tr.firstEvents))
copy(evts, tr.firstEvents)
if tr.lastEvents == nil {
return evts
}
if !tr.lastEvents.firstDrop.IsZero() {
evts = append(evts, event{
When: tr.lastEvents.firstDrop,
Type: evtDROP,
})
}
tr.lastEvents.Do(func(e *event) {
evts = append(evts, *e)
})
return evts
}
func (tr *trace) IsError() bool {
tr.mu.Lock()
defer tr.mu.Unlock()
return tr.isError
}
//
// Trace hierarchy
//
// Each trace belongs to a family. For each family, we have all active traces,
// and then N traces that finished <1s, N that finished <2s, etc.
// We keep this many buckets of finished traces.
const nBuckets = 8
// Buckets to use. Lenght must match nBuckets.
// "Traces with a latency >= $duration".
var buckets = []time.Duration{
time.Duration(0),
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
300 * time.Millisecond,
1 * time.Second,
10 * time.Second,
}
func findBucket(latency time.Duration) int {
for i, d := range buckets {
if latency >= d {
continue
}
return i - 1
}
return nBuckets - 1
}
// How many traces we keep per bucket.
const tracesInBucket = 10
type traceRing struct {
ring *ring.Ring
max int
l int
}
func newTraceRing(n int) *traceRing {
return &traceRing{
ring: ring.New(n),
max: n,
}
}
func (r *traceRing) Add(tr *trace) {
r.ring.Value = tr
r.ring = r.ring.Next()
if r.l < r.max {
r.l++
}
}
func (r *traceRing) Len() int {
return r.l
}
func (r *traceRing) Do(f func(tr *trace)) {
r.ring.Do(func(x interface{}) {
if x == nil {
return
}
f(x.(*trace))
})
}
type familyTraces struct {
mu sync.Mutex
// All active ones.
active map[id]*trace
// The ones we decided to keep.
// Each bucket is a ring-buffer, finishedHead keeps the head pointer.
finished [nBuckets]*traceRing
// The ones that errored have their own bucket.
errors *traceRing
// Histogram of latencies.
latencies histogram
}
func newFamilyTraces() *familyTraces {
ft := &familyTraces{}
ft.active = map[id]*trace{}
for i := 0; i < nBuckets; i++ {
ft.finished[i] = newTraceRing(tracesInBucket)
}
ft.errors = newTraceRing(tracesInBucket)
return ft
}
func (ft *familyTraces) LenActive() int {
ft.mu.Lock()
defer ft.mu.Unlock()
return len(ft.active)
}
func (ft *familyTraces) LenErrors() int {
ft.mu.Lock()
defer ft.mu.Unlock()
return ft.errors.Len()
}
func (ft *familyTraces) LenBucket(b int) int {
ft.mu.Lock()
defer ft.mu.Unlock()
return ft.finished[b].Len()
}
func (ft *familyTraces) TracesFor(b int, allgt bool) []*trace {
ft.mu.Lock()
defer ft.mu.Unlock()
trs := []*trace{}
appendTrace := func(tr *trace) {
trs = append(trs, tr)
}
if b == -2 {
ft.errors.Do(appendTrace)
} else if b == -1 {
for _, tr := range ft.active {
appendTrace(tr)
}
} else if b < nBuckets {
ft.finished[b].Do(appendTrace)
if allgt {
for i := b + 1; i < nBuckets; i++ {
ft.finished[i].Do(appendTrace)
}
}
}
// Sort them by start, newer first. This is the order that will be used
// when displaying them.
sort.Slice(trs, func(i, j int) bool {
return trs[i].Start.After(trs[j].Start)
})
return trs
}
func (ft *familyTraces) find(id id) *trace {
ft.mu.Lock()
defer ft.mu.Unlock()
if tr, ok := ft.active[id]; ok {
return tr
}
var found *trace
for _, bs := range ft.finished {
bs.Do(func(tr *trace) {
if tr.ID == id {
found = tr
}
})
if found != nil {
return found
}
}
ft.errors.Do(func(tr *trace) {
if tr.ID == id {
found = tr
}
})
if found != nil {
return found
}
return nil
}
func (ft *familyTraces) finalize(tr *trace) {
latency := tr.end.Sub(tr.Start)
b := findBucket(latency)
ft.mu.Lock()
// Delete from the active list.
delete(ft.active, tr.ID)
// Add it to the corresponding finished bucket, based on the trace
// latency.
ft.finished[b].Add(tr)
// Errors go on their own list, in addition to the above.
if tr.isError {
ft.errors.Add(tr)
}
ft.latencies.Add(b, latency)
ft.mu.Unlock()
}
func (ft *familyTraces) Latencies() *histSnapshot {
ft.mu.Lock()
defer ft.mu.Unlock()
return ft.latencies.Snapshot()
}
//
// Global state
//
var (
familiesMu sync.Mutex
families = map[string]*familyTraces{}
)
func copyFamilies() map[string]*familyTraces {
n := map[string]*familyTraces{}
familiesMu.Lock()
for f, trs := range families {
n[f] = trs
}
familiesMu.Unlock()
return n
}
func findInFamilies(traceID id, refID id) *trace {
// First, try to find it via the family.
family := traceID.Family()
familiesMu.Lock()
fts, ok := families[family]
familiesMu.Unlock()
if ok {
tr := fts.find(traceID)
if tr != nil {
return tr
}
}
// If that fail and we have a reference, try finding via it.
// The reference can be a parent or a child.
if refID != id("") {
ref := findInFamilies(refID, "")
if ref == nil {
return nil
}
// Is the reference's parent the one we're looking for?
if ref.Parent != nil && ref.Parent.ID == traceID {
return ref.Parent
}
// Try to find it in the ref's events.
for _, e := range ref.Events() {
if e.Ref != nil && e.Ref.ID == traceID {
return e.Ref
}
}
}
return nil
}

View File

@@ -0,0 +1,215 @@
package nettrace
import (
"fmt"
"strings"
"testing"
"time"
)
func expectEvents(t *testing.T, tr Trace, n int) {
t.Helper()
if evts := tr.(*trace).Events(); len(evts) != n {
t.Errorf("expected %d events, got %d", n, len(evts))
t.Logf("%v", evts)
}
}
func TestBasic(t *testing.T) {
var tr Trace = New("TestBasic", "basic")
defer tr.Finish()
tr.Printf("hola marola")
tr.Printf("hola marola 2")
c1 := tr.NewChild("TestBasic", "basic child")
c1.Printf("hijo de la noche")
expectEvents(t, tr, 3)
if s := tr.(*trace).String(); !strings.Contains(s, "TestBasic, basic") {
t.Errorf("tr.String does not contain family and title: %q", s)
}
}
func TestLong(t *testing.T) {
tr := New("TestLong", "long")
defer tr.Finish()
tr.SetMaxEvents(100)
// First 90 events, no drop.
for i := 0; i < 90; i++ {
tr.Printf("evt %d", i)
}
expectEvents(t, tr, 90)
// Up to 99, still no drop.
for i := 0; i < 9; i++ {
tr.Printf("evt %d", i)
}
expectEvents(t, tr, 99)
// Note that we go up to 101 due to rounding errors, we're ok with it.
tr.Printf("evt 100")
expectEvents(t, tr, 100)
tr.Printf("evt 101")
expectEvents(t, tr, 101)
tr.Printf("evt 102")
expectEvents(t, tr, 101)
// Add more events, expect none of them to exceed 101.
for i := 0; i < 9; i++ {
tr.Printf("evt %d", i)
expectEvents(t, tr, 101)
}
}
func TestIsError(t *testing.T) {
tr := New("TestIsError", "long")
defer tr.Finish()
if tr.(*trace).IsError() != false {
tr.Errorf("new trace is error")
}
tr.Errorf("this is an error")
if tr.(*trace).IsError() != true {
tr.Errorf("error not recorded properly")
}
}
func TestFindViaRef(t *testing.T) {
parent := New("TestFindViaRef", "parent")
parentID := parent.(*trace).ID
defer parent.Finish()
child := parent.NewChild("TestFindViaRef", "child")
childID := child.(*trace).ID
defer child.Finish()
// Basic check that both can be directly found.
if f := findInFamilies(parentID, id("")); f != parent {
t.Errorf("didn't find parent directly, found: %v", f)
}
if f := findInFamilies(childID, id("")); f != child {
t.Errorf("didn't find child directly, found: %v", f)
}
// Hackily remove child from the active traces, to force a reference
// lookup when needed.
familiesMu.Lock()
delete(families["TestFindViaRef"].active, child.(*trace).ID)
familiesMu.Unlock()
// Now the child should not be findable directly anymore.
if f := findInFamilies(childID, id("")); f != nil {
t.Errorf("child should not be findable directly, found: %v", f)
}
// But we still should be able to get to it via the parent.
if f := findInFamilies(childID, parentID); f != child {
t.Errorf("didn't find child via parent, found: %v", f)
}
}
func TestMaxEvents(t *testing.T) {
tr := trace{}
// Test that we keep a minimum, and that the cutoff behaves as expected.
cases := []struct{ me, exp, cutoffExp int }{
{0, 6, 2},
{5, 6, 2},
{6, 6, 2},
{7, 7, 2},
{8, 8, 2},
{9, 9, 3},
{12, 12, 4},
}
for _, c := range cases {
tr.SetMaxEvents(c.me)
if got := tr.maxEvents; got != c.exp {
t.Errorf("set max events to %d, expected %d, got %d",
c.me, c.exp, got)
}
if got := tr.cutoff; got != c.cutoffExp {
t.Errorf("set max events to %d, expected cutoff %d, got %d",
c.me, c.cutoffExp, got)
}
}
}
func TestFind(t *testing.T) {
// Make sure we find the right bucket, including for latencies above the
// last one.
for i, d := range buckets {
found := findBucket(d + 1*time.Millisecond)
if found != i {
t.Errorf("find bucket [%s + 1ms] got %d, expected %d",
d, found, i)
}
}
// Create a family, populate it with traces in all buckets.
finished := [nBuckets]*trace{}
for i, d := range buckets {
lat := d + 1*time.Millisecond
tr := newTrace("TestFind", fmt.Sprintf("evt-%s", lat))
tr.end = tr.Start.Add(lat)
families[tr.Family].finalize(tr)
finished[i] = tr
}
// Also have an active trace.
activeTr := newTrace("TestFind", "active")
// And add an error trace, which isn't on any of the other buckets (to
// simulate that they've been rotated out of the latency buckets, but are
// still around in errors)
errTr := newTrace("TestFind", "evt-err")
errTr.end = errTr.Start.Add(666 * time.Millisecond)
errTr.SetError()
delete(families[errTr.Family].active, errTr.ID)
families[errTr.Family].errors.Add(errTr)
// Find all of them.
for i := range buckets {
found := findInFamilies(finished[i].ID, "")
if found != finished[i] {
t.Errorf("finding trace %d on bucket %s, expected %v, got %v",
i, buckets[i], finished[i], found)
}
}
if found := findInFamilies(activeTr.ID, ""); found != activeTr {
t.Errorf("finding active trace, expected %v, got %v",
activeTr, found)
}
if found := findInFamilies(errTr.ID, ""); found != errTr {
t.Errorf("finding error trace, expected %v, got %v",
errTr, found)
}
// Non-existent traces.
if found := findInFamilies("does!notexist", ""); found != nil {
t.Errorf("finding non-existent trace, expected nil, got %v", found)
}
if found := findInFamilies("does!notexist", "does!notexist"); found != nil {
t.Errorf("finding non-existent trace w/ref, expected nil, got %v", found)
}
}
func TestFindParent(t *testing.T) {
// Direct parent finding.
// If the ref is the parent, we should find it even if the target trace
// isn't known to the family (e.g. the child is there, but the parent has
// been rotated and is no longer referenced).
parent := newTrace("TestFindParent", "parent")
child := parent.NewChild("TestFindParent", "child").(*trace)
// Remove the parent from the active list.
delete(families[parent.Family].active, parent.ID)
if found := findInFamilies(parent.ID, ""); found != nil {
t.Errorf("finding parent without ref, expected nil, got %v", found)
}
if found := findInFamilies(parent.ID, child.ID); found != parent {
t.Errorf("finding parent with ref, expected %v, got %v", parent, found)
}
}

View File

@@ -1,23 +1,15 @@
// Package trace extends golang.org/x/net/trace.
// Package trace extends nettrace with logging.
package trace
import (
"fmt"
"net/http"
"strconv"
"blitiri.com.ar/go/chasquid/internal/nettrace"
"blitiri.com.ar/go/log"
nettrace "golang.org/x/net/trace"
)
func init() {
// golang.org/x/net/trace has its own authorization which by default only
// allows localhost. This can be confusing and limiting in environments
// which access the monitoring server remotely.
nettrace.AuthRequest = func(req *http.Request) (any, sensitive bool) {
return true, true
}
}
// A Trace represents an active request.
@@ -38,9 +30,16 @@ func New(family, title string) *Trace {
return t
}
// NewChild creates a new child trace.
func (t *Trace) NewChild(family, title string) *Trace {
n := &Trace{family, title, t.t.NewChild(family, title)}
n.t.SetMaxEvents(30)
return n
}
// Printf adds this message to the trace's log.
func (t *Trace) Printf(format string, a ...interface{}) {
t.t.LazyPrintf(format, a...)
t.t.Printf(format, a...)
log.Log(log.Info, 1, "%s %s: %s", t.family, t.title,
quote(fmt.Sprintf(format, a...)))
@@ -48,7 +47,7 @@ func (t *Trace) Printf(format string, a ...interface{}) {
// Debugf adds this message to the trace's log, with a debugging level.
func (t *Trace) Debugf(format string, a ...interface{}) {
t.t.LazyPrintf(format, a...)
t.t.Printf(format, a...)
log.Log(log.Debug, 1, "%s %s: %s",
t.family, t.title, quote(fmt.Sprintf(format, a...)))
@@ -59,7 +58,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error {
// Note we can't just call t.Error here, as it breaks caller logging.
err := fmt.Errorf(format, a...)
t.t.SetError()
t.t.LazyPrintf("error: %v", err)
t.t.Printf("error: %v", err)
log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title,
quote(err.Error()))
@@ -70,7 +69,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error {
// trace's log.
func (t *Trace) Error(err error) error {
t.t.SetError()
t.t.LazyPrintf("error: %v", err)
t.t.Printf("error: %v", err)
log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title,
quote(err.Error()))
@@ -83,45 +82,6 @@ func (t *Trace) Finish() {
t.t.Finish()
}
// EventLog is used for tracing long-lived objects.
type EventLog struct {
family string
title string
e nettrace.EventLog
}
// NewEventLog returns a new EventLog.
func NewEventLog(family, title string) *EventLog {
return &EventLog{family, title, nettrace.NewEventLog(family, title)}
}
// Printf adds the message to the EventLog.
func (e *EventLog) Printf(format string, a ...interface{}) {
e.e.Printf(format, a...)
log.Log(log.Info, 1, "%s %s: %s", e.family, e.title,
quote(fmt.Sprintf(format, a...)))
}
// Debugf adds the message to the EventLog, with a debugging level.
func (e *EventLog) Debugf(format string, a ...interface{}) {
e.e.Printf(format, a...)
log.Log(log.Debug, 1, "%s %s: %s", e.family, e.title,
quote(fmt.Sprintf(format, a...)))
}
// Errorf adds the message to the EventLog, with an error level.
func (e *EventLog) Errorf(format string, a ...interface{}) error {
err := fmt.Errorf(format, a...)
e.e.Errorf("error: %v", err)
log.Log(log.Info, 1, "%s %s: error: %s",
e.family, e.title, quote(err.Error()))
return err
}
func quote(s string) string {
qs := strconv.Quote(s)
return qs[1 : len(qs)-1]