Files
patterm/internal/app/bench_test.go
Harry Bayliss 1c590f8e32 Concrete perf metrics: live counters in --profile + benchmark suite
Live metrics (--profile):
- New metricsTracker instruments OnPTYOut, viewport renderer,
  stdout writes, libghostty-vt Write/Title CGO calls, sidebar /
  tabbar / status draws (with cache-hit accounting), snapshot
  replays, and the chrome ticker (so we can see ticker fires that
  did nothing).
- Writes metrics.jsonl (one snapshot per second) and metrics.json
  + summary.txt on exit, alongside the existing pprof files.
- All record* methods are nil-safe so disabled paths pay only a
  cheap nil check; counters are atomic so the per-PTY-chunk hot
  path stays lock-free.

Benchmark suite (go test -bench=.):
- Three workload fixtures — plain ASCII, SGR-styled lines, and a
  ratatui-style cursor-shuffling burst — plus a containsOSC
  microbenchmark. Reports ns/op, MB/s, allocs/op, B/op.
- Initial baseline numbers added to TODO under the perf-audit
  section, alongside two new findings (renderer allocs ~1 per 4
  bytes on styled chunks; styled throughput tops out near
  90 MB/s) those benchmarks surfaced.
2026-05-15 13:31:37 +01:00

170 lines
5.1 KiB
Go

package app
import (
"fmt"
"strings"
"testing"
)
// Benchmarks for patterm's hot paths. Run with:
//
// go test -bench=. -benchmem ./internal/app/
//
// or target one:
//
// go test -bench=BenchmarkViewportRenderer_PlainASCII -benchmem ./internal/app/
//
// The fixtures below model the three workloads we care about most:
//
// - PlainASCII: long-running text output (claude streaming a code
// diff, codex outputting a tool result body). Fast-path territory.
// - StyledLines: SGR-heavy output (claude/codex chat history with
// coloured tokens). State-machine path.
// - RatatuiBurst: many short cursor-positioning / SGR transitions in
// a tight chunk, matching codex/ratatui's incremental diff
// updates.
// - SnapshotReplay: full styled-grid replay (focus switch).
// buildPlainASCIIChunk returns a roughly N-byte chunk of pure
// printable ASCII text with the occasional newline — the cheapest
// workload, exercises the fast path in viewport_renderer.Render.
func buildPlainASCIIChunk(n int) []byte {
var b strings.Builder
b.Grow(n)
line := "The quick brown fox jumps over the lazy dog 0123456789 "
for b.Len() < n {
b.WriteString(line)
if b.Len()%80 < len(line) {
b.WriteByte('\n')
}
}
return []byte(b.String()[:n])
}
// buildStyledLinesChunk simulates SGR-heavy output: every word wears
// a colour, so the renderer breaks out of its fast path on every
// escape sequence.
func buildStyledLinesChunk(n int) []byte {
var b strings.Builder
b.Grow(n)
colours := []string{"31", "32", "33", "34", "35", "36"}
words := []string{"package", "func", "return", "import", "struct", "type", "const", "var"}
i := 0
for b.Len() < n {
fmt.Fprintf(&b, "\x1b[%sm%s\x1b[0m ", colours[i%len(colours)], words[i%len(words)])
if i%10 == 9 {
b.WriteByte('\n')
}
i++
}
return []byte(b.String()[:n])
}
// buildRatatuiBurst simulates a single ratatui-style diff frame:
// CUP, SGR, a few chars, CUP, SGR, a few chars… for a viewport's
// worth of cells.
func buildRatatuiBurst(cells int) []byte {
var b strings.Builder
for i := 0; i < cells; i++ {
row := (i / 80) + 1
col := (i % 80) + 1
fmt.Fprintf(&b, "\x1b[%d;%dH\x1b[3%dm%c", row, col, i%8, byte('A'+(i%26)))
}
b.WriteString("\x1b[0m")
return []byte(b.String())
}
// BenchmarkViewportRenderer_PlainASCII drives a 16 KiB plain-text
// chunk through Render once per iteration. Reports ns/op,
// allocations, and B/op.
func BenchmarkViewportRenderer_PlainASCII(b *testing.B) {
chunk := buildPlainASCIIChunk(16 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkViewportRenderer_StyledLines exercises the per-byte CSI
// path on SGR-heavy output. Most claude/codex chat resume traffic
// looks like this — coloured prose with frequent style toggles.
func BenchmarkViewportRenderer_StyledLines(b *testing.B) {
chunk := buildStyledLinesChunk(16 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkViewportRenderer_RatatuiBurst measures the worst-case
// cursor-shuffling workload: full-frame diff updates dominated by
// CUP + SGR + single-char writes.
func BenchmarkViewportRenderer_RatatuiBurst(b *testing.B) {
chunk := buildRatatuiBurst(80 * 24) // one screenful of cells
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkContainsOSC measures the OSC-gate fast path used by
// pumpChild before deciding whether to fire the per-chunk Title()
// CGO call. Inputs:
// - "hot": SGR-styled output without OSC — the common case for
// codex/ratatui. We want this near zero.
// - "cold": chunk with an OSC sequence in the middle.
func BenchmarkContainsOSC_NoOSC(b *testing.B) {
chunk := buildStyledLinesChunk(8 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = containsOSC(chunk)
}
}
func BenchmarkContainsOSC_WithOSC(b *testing.B) {
chunk := append(buildStyledLinesChunk(8*1024), []byte("\x1b]0;new title\x07")...)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = containsOSC(chunk)
}
}
// BenchmarkRendererThroughput_ReuseInstance approximates real
// session behaviour: a single viewport renderer fed many chunks in
// sequence, no per-iteration allocation. Reports a throughput
// closer to the steady-state OnPTYOut path. Chunks are 4 KiB to
// match typical PTY read sizes; the renderer is reset every
// benchmark run.
func BenchmarkRendererThroughput_ReuseInstance(b *testing.B) {
chunks := make([][]byte, 16)
for i := range chunks {
chunks[i] = buildStyledLinesChunk(4 * 1024)
}
totalBytes := 0
for _, c := range chunks {
totalBytes += len(c)
}
b.SetBytes(int64(totalBytes))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
for _, c := range chunks {
_ = vr.Render(c)
}
}
}