Concrete perf metrics: live counters in --profile + benchmark suite

Live metrics (--profile):
- New metricsTracker instruments OnPTYOut, viewport renderer,
  stdout writes, libghostty-vt Write/Title CGO calls, sidebar /
  tabbar / status draws (with cache-hit accounting), snapshot
  replays, and the chrome ticker (so we can see ticker fires that
  did nothing).
- Writes metrics.jsonl (one snapshot per second) and metrics.json
  + summary.txt on exit, alongside the existing pprof files.
- All record* methods are nil-safe so disabled paths pay only a
  cheap nil check; counters are atomic so the per-PTY-chunk hot
  path stays lock-free.

Benchmark suite (go test -bench=.):
- Three workload fixtures — plain ASCII, SGR-styled lines, and a
  ratatui-style cursor-shuffling burst — plus a containsOSC
  microbenchmark. Reports ns/op, MB/s, allocs/op, B/op.
- Initial baseline numbers added to TODO under the perf-audit
  section, alongside two new findings (renderer allocs ~1 per 4
  bytes on styled chunks; styled throughput tops out near
  90 MB/s) those benchmarks surfaced.
This commit is contained in:
2026-05-15 13:31:37 +01:00
parent 442eed605c
commit 1c590f8e32
10 changed files with 931 additions and 7 deletions

View File

@@ -34,6 +34,12 @@ type Options struct {
// <DebugDir>/<child-id>.raw. The dir is created if missing. Events
// (spawn / exit / state change) land in <DebugDir>/events.jsonl.
DebugDir string
// ProfileDir, when non-empty, enables in-process performance
// counters. patterm writes a per-second JSONL snapshot stream to
// <ProfileDir>/metrics.jsonl, a final aggregate to metrics.json,
// and a human-readable summary.txt on shutdown. The pprof files
// written by --profile sit alongside these in the same dir.
ProfileDir string
}
const keyCtrlK byte = 0x0b
@@ -134,6 +140,18 @@ func Run(ctx context.Context, opts Options) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Performance tracker — instrumented hot-path timings written to
// <ProfileDir>. nil when --profile is off, in which case every
// record*() call is a fast nil check.
metrics, err := newMetricsTracker(opts.ProfileDir)
if err != nil {
return fmt.Errorf("app: metrics tracker: %w", err)
}
if metrics != nil {
go metrics.run(ctx)
defer metrics.close()
}
// Per-session idle-detection classifier. One goroutine ticks every
// 250ms over every live child and updates IdleState. It stops when
// ctx is cancelled.
@@ -150,7 +168,9 @@ func Run(ctx context.Context, opts Options) error {
hostCols: cols,
hostRows: rows,
stdinTTY: term.IsTerminal(int(os.Stdin.Fd())),
metrics: metrics,
}
sess.SetMetrics(metrics)
host.attention = st
host.focus = st
host.prompter = st
@@ -271,7 +291,9 @@ func Run(ctx context.Context, opts Options) error {
}
chromeChanged := st.chromeDirty.Swap(false)
sidebarChanged := st.sidebarDirty.Swap(false)
if !chromeChanged && !sidebarChanged {
didWork := chromeChanged || sidebarChanged
st.metrics.recordTickerFire(didWork)
if !didWork {
continue
}
if chromeChanged {
@@ -383,6 +405,11 @@ type uiState struct {
hostCols, hostRows uint16
stdinTTY bool
// metrics is the optional performance tracker. nil when --profile
// is off. Hot paths call metrics.recordX which is a fast nil
// check on the disabled path.
metrics *metricsTracker
// chromeCacheMu guards the last-rendered byte cache for each chrome
// element. The tab bar, sidebar, and status line all repaint on
// many state changes and on every PTY chunk, but their content
@@ -787,6 +814,10 @@ func (st *uiState) scheduleAutoRestart(c *Child) {
// disabled only around the replay so long styled runs cannot wrap into
// the right rail.
func (st *uiState) OnPTYOut(childID string, chunk []byte) {
var entry time.Time
if st.metrics != nil {
entry = time.Now()
}
layout := st.layoutSnapshot()
st.mu.Lock()
focus := st.focusedID
@@ -803,16 +834,31 @@ func (st *uiState) OnPTYOut(childID string, chunk []byte) {
}
st.mu.Unlock()
if palOpen || focus != childID || renderer == nil {
st.metrics.recordPTYOutDrop()
return
}
var out []byte
if forceRepaint {
var snapStart time.Time
if st.metrics != nil {
snapStart = time.Now()
}
out = st.renderFocusedSnapshot(childID, renderer, layout)
if st.metrics != nil {
st.metrics.recordSnapshot(time.Since(snapStart))
}
if len(out) == 0 {
return
}
} else {
var rstart time.Time
if st.metrics != nil {
rstart = time.Now()
}
out = renderer.Render(chunk)
if st.metrics != nil {
st.metrics.recordRender(time.Since(rstart))
}
}
// One write covers the autowrap-disable prelude, the chunk, and the
// autowrap-restore postlude — three syscalls collapsed into one
@@ -822,9 +868,16 @@ func (st *uiState) OnPTYOut(childID string, chunk []byte) {
wrapped = append(wrapped, "\x1b[?7l"...)
wrapped = append(wrapped, out...)
wrapped = append(wrapped, "\x1b[?7h"...)
var wstart time.Time
if st.metrics != nil {
wstart = time.Now()
}
st.outMu.Lock()
_, _ = os.Stdout.Write(wrapped)
st.outMu.Unlock()
if st.metrics != nil {
st.metrics.recordStdout(time.Since(wstart), len(wrapped))
}
// RI / IND / NEL / SU / SD / IL / DL and bottom-margin LF / VT / FF
// scroll content within the host's scroll region, which spans every
// column — so any of them drags the right-hand sidebar's session-tree
@@ -851,6 +904,9 @@ func (st *uiState) OnPTYOut(childID string, chunk []byte) {
// avoiding the string build, FindChild, and locking on every
// chunk pulls steady-state CPU off the hot path.
st.markChromeDirty()
if st.metrics != nil {
st.metrics.recordPTYOut(time.Since(entry), len(chunk))
}
}
func (st *uiState) enterScreen() {
@@ -990,6 +1046,10 @@ func (st *uiState) renderPaletteLocked() {
// attention ask. Right side: palette hint. The PTY child occupies
// host_rows-1 rows so this row is exclusively ours.
func (st *uiState) drawStatusLine() {
var entry time.Time
if st.metrics != nil {
entry = time.Now()
}
st.mu.Lock()
palOpen := st.palette != nil
focusID := st.focusedID
@@ -1076,10 +1136,16 @@ func (st *uiState) drawStatusLine() {
st.chromeCacheMu.Lock()
if line == st.statusLineCache {
st.chromeCacheMu.Unlock()
if st.metrics != nil {
st.metrics.recordStatus(time.Since(entry), true)
}
return
}
st.statusLineCache = line
st.chromeCacheMu.Unlock()
if st.metrics != nil {
defer func() { st.metrics.recordStatus(time.Since(entry), false) }()
}
st.outMu.Lock()
defer st.outMu.Unlock()

169
internal/app/bench_test.go Normal file
View File

@@ -0,0 +1,169 @@
package app
import (
"fmt"
"strings"
"testing"
)
// Benchmarks for patterm's hot paths. Run with:
//
// go test -bench=. -benchmem ./internal/app/
//
// or target one:
//
// go test -bench=BenchmarkViewportRenderer_PlainASCII -benchmem ./internal/app/
//
// The fixtures below model the three workloads we care about most:
//
// - PlainASCII: long-running text output (claude streaming a code
// diff, codex outputting a tool result body). Fast-path territory.
// - StyledLines: SGR-heavy output (claude/codex chat history with
// coloured tokens). State-machine path.
// - RatatuiBurst: many short cursor-positioning / SGR transitions in
// a tight chunk, matching codex/ratatui's incremental diff
// updates.
// - SnapshotReplay: full styled-grid replay (focus switch).
// buildPlainASCIIChunk returns a roughly N-byte chunk of pure
// printable ASCII text with the occasional newline — the cheapest
// workload, exercises the fast path in viewport_renderer.Render.
func buildPlainASCIIChunk(n int) []byte {
var b strings.Builder
b.Grow(n)
line := "The quick brown fox jumps over the lazy dog 0123456789 "
for b.Len() < n {
b.WriteString(line)
if b.Len()%80 < len(line) {
b.WriteByte('\n')
}
}
return []byte(b.String()[:n])
}
// buildStyledLinesChunk simulates SGR-heavy output: every word wears
// a colour, so the renderer breaks out of its fast path on every
// escape sequence.
func buildStyledLinesChunk(n int) []byte {
var b strings.Builder
b.Grow(n)
colours := []string{"31", "32", "33", "34", "35", "36"}
words := []string{"package", "func", "return", "import", "struct", "type", "const", "var"}
i := 0
for b.Len() < n {
fmt.Fprintf(&b, "\x1b[%sm%s\x1b[0m ", colours[i%len(colours)], words[i%len(words)])
if i%10 == 9 {
b.WriteByte('\n')
}
i++
}
return []byte(b.String()[:n])
}
// buildRatatuiBurst simulates a single ratatui-style diff frame:
// CUP, SGR, a few chars, CUP, SGR, a few chars… for a viewport's
// worth of cells.
func buildRatatuiBurst(cells int) []byte {
var b strings.Builder
for i := 0; i < cells; i++ {
row := (i / 80) + 1
col := (i % 80) + 1
fmt.Fprintf(&b, "\x1b[%d;%dH\x1b[3%dm%c", row, col, i%8, byte('A'+(i%26)))
}
b.WriteString("\x1b[0m")
return []byte(b.String())
}
// BenchmarkViewportRenderer_PlainASCII drives a 16 KiB plain-text
// chunk through Render once per iteration. Reports ns/op,
// allocations, and B/op.
func BenchmarkViewportRenderer_PlainASCII(b *testing.B) {
chunk := buildPlainASCIIChunk(16 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkViewportRenderer_StyledLines exercises the per-byte CSI
// path on SGR-heavy output. Most claude/codex chat resume traffic
// looks like this — coloured prose with frequent style toggles.
func BenchmarkViewportRenderer_StyledLines(b *testing.B) {
chunk := buildStyledLinesChunk(16 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkViewportRenderer_RatatuiBurst measures the worst-case
// cursor-shuffling workload: full-frame diff updates dominated by
// CUP + SGR + single-char writes.
func BenchmarkViewportRenderer_RatatuiBurst(b *testing.B) {
chunk := buildRatatuiBurst(80 * 24) // one screenful of cells
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
_ = vr.Render(chunk)
}
}
// BenchmarkContainsOSC measures the OSC-gate fast path used by
// pumpChild before deciding whether to fire the per-chunk Title()
// CGO call. Inputs:
// - "hot": SGR-styled output without OSC — the common case for
// codex/ratatui. We want this near zero.
// - "cold": chunk with an OSC sequence in the middle.
func BenchmarkContainsOSC_NoOSC(b *testing.B) {
chunk := buildStyledLinesChunk(8 * 1024)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = containsOSC(chunk)
}
}
func BenchmarkContainsOSC_WithOSC(b *testing.B) {
chunk := append(buildStyledLinesChunk(8*1024), []byte("\x1b]0;new title\x07")...)
b.SetBytes(int64(len(chunk)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = containsOSC(chunk)
}
}
// BenchmarkRendererThroughput_ReuseInstance approximates real
// session behaviour: a single viewport renderer fed many chunks in
// sequence, no per-iteration allocation. Reports a throughput
// closer to the steady-state OnPTYOut path. Chunks are 4 KiB to
// match typical PTY read sizes; the renderer is reset every
// benchmark run.
func BenchmarkRendererThroughput_ReuseInstance(b *testing.B) {
chunks := make([][]byte, 16)
for i := range chunks {
chunks[i] = buildStyledLinesChunk(4 * 1024)
}
totalBytes := 0
for _, c := range chunks {
totalBytes += len(c)
}
b.SetBytes(int64(totalBytes))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
vr := newViewportRenderer(newTerminalLayout(120, 40))
for _, c := range chunks {
_ = vr.Render(c)
}
}
}

462
internal/app/metrics.go Normal file
View File

@@ -0,0 +1,462 @@
package app
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"
)
// metricsTracker collects per-hot-path counters and timings. All
// fields are atomic so callers can record from the per-PTY-chunk path
// without taking a lock. Enabled only when --profile is set.
//
// Sampled rates ("X per second", "p99 latency") are not tracked here
// directly — the snapshotter goroutine writes a row to metrics.jsonl
// every second, and analysis tools compute rates from the deltas.
// Aggregate totals are written to metrics.json on shutdown.
type metricsTracker struct {
startedAt time.Time
// PTY chunk arrival → stdout write pipeline (per OnPTYOut call).
ptyChunks atomic.Int64
ptyBytes atomic.Int64
onPTYOutNs atomic.Int64
onPTYOutMaxNs atomic.Int64
onPTYOutDrops atomic.Int64 // chunks for non-focused children — fast-path returns
stdoutWrites atomic.Int64
stdoutBytes atomic.Int64
stdoutNs atomic.Int64
stdoutMaxNs atomic.Int64
// Viewport renderer (state-machine over child PTY bytes).
renderCalls atomic.Int64
renderNs atomic.Int64
renderMaxNs atomic.Int64
// CGO into libghostty-vt (counted from pumpChild).
emuWriteCalls atomic.Int64
emuWriteNs atomic.Int64
emuWriteMaxNs atomic.Int64
emuTitleCalls atomic.Int64
emuTitleNs atomic.Int64
emuTitleSkips atomic.Int64 // OSC-gate fast path — title poll skipped
// Chrome paint pipeline.
sidebarDraws atomic.Int64
sidebarCacheHits atomic.Int64
sidebarNs atomic.Int64
sidebarMaxNs atomic.Int64
tabbarDraws atomic.Int64
tabbarCacheHits atomic.Int64
tabbarNs atomic.Int64
statusDraws atomic.Int64
statusCacheHits atomic.Int64
statusNs atomic.Int64
// Snapshot replay (focus / spawn / nudge).
snapshotReplays atomic.Int64
snapshotNs atomic.Int64
snapshotMaxNs atomic.Int64
// Chrome ticker — distinguishes useful work from idle wakeups.
tickerFires atomic.Int64
tickerIdleFires atomic.Int64 // nothing dirty when the ticker fired
// Output destination (set when enabled).
rowFile *os.File // metrics.jsonl
dir string
}
// newMetricsTracker creates an enabled tracker writing to <dir>/.
// Returns nil + nil err if dir is empty (feature off). Caller must
// call tracker.run(ctx) in a goroutine and tracker.close() at exit.
func newMetricsTracker(dir string) (*metricsTracker, error) {
if dir == "" {
return nil, nil
}
if err := os.MkdirAll(dir, 0o700); err != nil {
return nil, err
}
row, err := os.Create(filepath.Join(dir, "metrics.jsonl"))
if err != nil {
return nil, err
}
return &metricsTracker{
startedAt: time.Now(),
rowFile: row,
dir: dir,
}, nil
}
// observeMax updates dst to max(dst, v) using a CAS loop. Atomic max
// isn't a hardware primitive on most CPUs; this is the standard idiom.
// Spurious wakeups can race but the result settles at the true max.
func observeMax(dst *atomic.Int64, v int64) {
for {
old := dst.Load()
if v <= old {
return
}
if dst.CompareAndSwap(old, v) {
return
}
}
}
// recordPTYOut is called once at the end of each OnPTYOut invocation.
// `dur` is the full per-chunk wall time (renderer + stdout + chrome
// signals); `bytes` is the chunk's byte count.
func (m *metricsTracker) recordPTYOut(dur time.Duration, bytes int) {
if m == nil {
return
}
m.ptyChunks.Add(1)
m.ptyBytes.Add(int64(bytes))
ns := dur.Nanoseconds()
m.onPTYOutNs.Add(ns)
observeMax(&m.onPTYOutMaxNs, ns)
}
func (m *metricsTracker) recordPTYOutDrop() {
if m == nil {
return
}
m.onPTYOutDrops.Add(1)
}
func (m *metricsTracker) recordRender(dur time.Duration) {
if m == nil {
return
}
m.renderCalls.Add(1)
ns := dur.Nanoseconds()
m.renderNs.Add(ns)
observeMax(&m.renderMaxNs, ns)
}
func (m *metricsTracker) recordStdout(dur time.Duration, bytes int) {
if m == nil {
return
}
m.stdoutWrites.Add(1)
m.stdoutBytes.Add(int64(bytes))
ns := dur.Nanoseconds()
m.stdoutNs.Add(ns)
observeMax(&m.stdoutMaxNs, ns)
}
func (m *metricsTracker) recordEmuWrite(dur time.Duration) {
if m == nil {
return
}
m.emuWriteCalls.Add(1)
ns := dur.Nanoseconds()
m.emuWriteNs.Add(ns)
observeMax(&m.emuWriteMaxNs, ns)
}
func (m *metricsTracker) recordEmuTitle(dur time.Duration, skipped bool) {
if m == nil {
return
}
if skipped {
m.emuTitleSkips.Add(1)
return
}
m.emuTitleCalls.Add(1)
m.emuTitleNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordSidebar(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.sidebarDraws.Add(1)
if cacheHit {
m.sidebarCacheHits.Add(1)
}
ns := dur.Nanoseconds()
m.sidebarNs.Add(ns)
observeMax(&m.sidebarMaxNs, ns)
}
func (m *metricsTracker) recordTabbar(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.tabbarDraws.Add(1)
if cacheHit {
m.tabbarCacheHits.Add(1)
}
m.tabbarNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordStatus(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.statusDraws.Add(1)
if cacheHit {
m.statusCacheHits.Add(1)
}
m.statusNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordSnapshot(dur time.Duration) {
if m == nil {
return
}
m.snapshotReplays.Add(1)
ns := dur.Nanoseconds()
m.snapshotNs.Add(ns)
observeMax(&m.snapshotMaxNs, ns)
}
func (m *metricsTracker) recordTickerFire(didWork bool) {
if m == nil {
return
}
m.tickerFires.Add(1)
if !didWork {
m.tickerIdleFires.Add(1)
}
}
// snapshot captures the tracker's current state as a JSON-serialisable
// map. Suitable for both the per-second JSONL row and the final
// metrics.json aggregate.
type metricsSnapshot struct {
WallSeconds float64 `json:"wall_seconds"`
PTYChunks int64 `json:"pty_chunks"`
PTYBytes int64 `json:"pty_bytes"`
OnPTYOutNs int64 `json:"on_pty_out_ns_total"`
OnPTYOutMaxNs int64 `json:"on_pty_out_ns_max"`
OnPTYOutDrops int64 `json:"on_pty_out_drops"`
StdoutWrites int64 `json:"stdout_writes"`
StdoutBytes int64 `json:"stdout_bytes"`
StdoutNs int64 `json:"stdout_ns_total"`
StdoutMaxNs int64 `json:"stdout_ns_max"`
RenderCalls int64 `json:"render_calls"`
RenderNs int64 `json:"render_ns_total"`
RenderMaxNs int64 `json:"render_ns_max"`
EmuWriteCalls int64 `json:"emu_write_calls"`
EmuWriteNs int64 `json:"emu_write_ns_total"`
EmuWriteMaxNs int64 `json:"emu_write_ns_max"`
EmuTitleCalls int64 `json:"emu_title_calls"`
EmuTitleNs int64 `json:"emu_title_ns_total"`
EmuTitleSkips int64 `json:"emu_title_skips"`
SidebarDraws int64 `json:"sidebar_draws"`
SidebarCacheHits int64 `json:"sidebar_cache_hits"`
SidebarNs int64 `json:"sidebar_ns_total"`
SidebarMaxNs int64 `json:"sidebar_ns_max"`
TabbarDraws int64 `json:"tabbar_draws"`
TabbarCacheHits int64 `json:"tabbar_cache_hits"`
TabbarNs int64 `json:"tabbar_ns_total"`
StatusDraws int64 `json:"status_draws"`
StatusCacheHits int64 `json:"status_cache_hits"`
StatusNs int64 `json:"status_ns_total"`
SnapshotReplays int64 `json:"snapshot_replays"`
SnapshotNs int64 `json:"snapshot_ns_total"`
SnapshotMaxNs int64 `json:"snapshot_ns_max"`
TickerFires int64 `json:"ticker_fires"`
TickerIdleFires int64 `json:"ticker_idle_fires"`
// Derived rates (computed at snapshot time so consumers don't have
// to). All "per_second" values are averaged over wall_seconds.
PTYChunksPerSec float64 `json:"pty_chunks_per_sec"`
PTYBytesPerSec float64 `json:"pty_bytes_per_sec"`
OnPTYOutMeanUs float64 `json:"on_pty_out_mean_us"`
StdoutMeanUs float64 `json:"stdout_mean_us"`
EmuWriteMeanUs float64 `json:"emu_write_mean_us"`
SidebarMeanUs float64 `json:"sidebar_mean_us"`
SidebarCacheHitRate float64 `json:"sidebar_cache_hit_rate"`
TabbarCacheHitRate float64 `json:"tabbar_cache_hit_rate"`
StatusCacheHitRate float64 `json:"status_cache_hit_rate"`
EmuTitleSkipRate float64 `json:"emu_title_skip_rate"`
TickerIdleRate float64 `json:"ticker_idle_rate"`
Timestamp string `json:"timestamp"`
}
func (m *metricsTracker) snapshotNow() metricsSnapshot {
wall := time.Since(m.startedAt).Seconds()
if wall <= 0 {
wall = 1
}
chunks := m.ptyChunks.Load()
bytes := m.ptyBytes.Load()
onptyTotal := m.onPTYOutNs.Load()
stdW := m.stdoutWrites.Load()
stdNs := m.stdoutNs.Load()
emuW := m.emuWriteCalls.Load()
emuWNs := m.emuWriteNs.Load()
sbDraws := m.sidebarDraws.Load()
sbHits := m.sidebarCacheHits.Load()
sbNs := m.sidebarNs.Load()
tbDraws := m.tabbarDraws.Load()
tbHits := m.tabbarCacheHits.Load()
stDraws := m.statusDraws.Load()
stHits := m.statusCacheHits.Load()
emuTC := m.emuTitleCalls.Load()
emuTS := m.emuTitleSkips.Load()
tickerF := m.tickerFires.Load()
tickerI := m.tickerIdleFires.Load()
div := func(num, denom int64) float64 {
if denom == 0 {
return 0
}
return float64(num) / float64(denom)
}
return metricsSnapshot{
WallSeconds: wall,
PTYChunks: chunks,
PTYBytes: bytes,
OnPTYOutNs: onptyTotal,
OnPTYOutMaxNs: m.onPTYOutMaxNs.Load(),
OnPTYOutDrops: m.onPTYOutDrops.Load(),
StdoutWrites: stdW,
StdoutBytes: m.stdoutBytes.Load(),
StdoutNs: stdNs,
StdoutMaxNs: m.stdoutMaxNs.Load(),
RenderCalls: m.renderCalls.Load(),
RenderNs: m.renderNs.Load(),
RenderMaxNs: m.renderMaxNs.Load(),
EmuWriteCalls: emuW,
EmuWriteNs: emuWNs,
EmuWriteMaxNs: m.emuWriteMaxNs.Load(),
EmuTitleCalls: emuTC,
EmuTitleNs: m.emuTitleNs.Load(),
EmuTitleSkips: emuTS,
SidebarDraws: sbDraws,
SidebarCacheHits: sbHits,
SidebarNs: sbNs,
SidebarMaxNs: m.sidebarMaxNs.Load(),
TabbarDraws: tbDraws,
TabbarCacheHits: tbHits,
TabbarNs: m.tabbarNs.Load(),
StatusDraws: stDraws,
StatusCacheHits: stHits,
StatusNs: m.statusNs.Load(),
SnapshotReplays: m.snapshotReplays.Load(),
SnapshotNs: m.snapshotNs.Load(),
SnapshotMaxNs: m.snapshotMaxNs.Load(),
TickerFires: tickerF,
TickerIdleFires: tickerI,
PTYChunksPerSec: float64(chunks) / wall,
PTYBytesPerSec: float64(bytes) / wall,
OnPTYOutMeanUs: div(onptyTotal/1000, chunks),
StdoutMeanUs: div(stdNs/1000, stdW),
EmuWriteMeanUs: div(emuWNs/1000, emuW),
SidebarMeanUs: div(sbNs/1000, sbDraws),
SidebarCacheHitRate: div(sbHits, sbDraws),
TabbarCacheHitRate: div(tbHits, tbDraws),
StatusCacheHitRate: div(stHits, stDraws),
EmuTitleSkipRate: div(emuTS, emuTC+emuTS),
TickerIdleRate: div(tickerI, tickerF),
Timestamp: time.Now().Format(time.RFC3339Nano),
}
}
// run is the snapshotter goroutine: write a JSONL row every second
// until ctx is cancelled. Stops cleanly without flushing partial
// rows.
func (m *metricsTracker) run(ctx context.Context) {
if m == nil {
return
}
enc := json.NewEncoder(m.rowFile)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
snap := m.snapshotNow()
_ = enc.Encode(snap)
}
}
}
// close writes the final aggregate snapshot to metrics.json + a
// short human-readable summary.txt, then closes the row file. Safe
// to call on a nil receiver.
func (m *metricsTracker) close() {
if m == nil {
return
}
snap := m.snapshotNow()
if f, err := os.Create(filepath.Join(m.dir, "metrics.json")); err == nil {
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
_ = enc.Encode(snap)
_ = f.Close()
}
if f, err := os.Create(filepath.Join(m.dir, "summary.txt")); err == nil {
writeSummary(f, snap)
_ = f.Close()
}
if m.rowFile != nil {
_ = m.rowFile.Close()
m.rowFile = nil
}
}
// writeSummary renders a brief human-readable digest of a snapshot.
// Designed for `cat summary.txt` after a session — quick orientation
// before diving into metrics.json / pprof.
func writeSummary(w *os.File, s metricsSnapshot) {
fmt.Fprintf(w, "patterm performance summary\n")
fmt.Fprintf(w, "===========================\n\n")
fmt.Fprintf(w, "session length: %.1fs\n", s.WallSeconds)
fmt.Fprintf(w, "pty chunks: %d (%.1f /s)\n", s.PTYChunks, s.PTYChunksPerSec)
fmt.Fprintf(w, "pty bytes: %d (%.0f /s, %.1f KiB/s)\n",
s.PTYBytes, s.PTYBytesPerSec, s.PTYBytesPerSec/1024)
fmt.Fprintf(w, "pty chunks dropped: %d (focus not on caller — fast-path return)\n", s.OnPTYOutDrops)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "OnPTYOut mean: %.1fµs max: %.1fms\n",
s.OnPTYOutMeanUs, float64(s.OnPTYOutMaxNs)/1e6)
fmt.Fprintf(w, "viewport.Render calls: %d total %.1fms max %.1fms\n",
s.RenderCalls, float64(s.RenderNs)/1e6, float64(s.RenderMaxNs)/1e6)
fmt.Fprintf(w, "stdout writes: %d mean %.1fµs max %.1fms bytes %d\n",
s.StdoutWrites, s.StdoutMeanUs, float64(s.StdoutMaxNs)/1e6, s.StdoutBytes)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "emulator.Write (cgo): %d mean %.1fµs max %.1fms\n",
s.EmuWriteCalls, s.EmuWriteMeanUs, float64(s.EmuWriteMaxNs)/1e6)
fmt.Fprintf(w, "emulator.Title polls: %d real, %d gated skip rate %.1f%%\n",
s.EmuTitleCalls, s.EmuTitleSkips, s.EmuTitleSkipRate*100)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "sidebar draws: %d mean %.1fµs max %.1fms cache-hit %.1f%%\n",
s.SidebarDraws, s.SidebarMeanUs, float64(s.SidebarMaxNs)/1e6, s.SidebarCacheHitRate*100)
fmt.Fprintf(w, "tabbar draws: %d cache-hit %.1f%%\n",
s.TabbarDraws, s.TabbarCacheHitRate*100)
fmt.Fprintf(w, "status draws: %d cache-hit %.1f%%\n",
s.StatusDraws, s.StatusCacheHitRate*100)
fmt.Fprintf(w, "snapshot replays: %d total %.1fms max %.1fms\n",
s.SnapshotReplays, float64(s.SnapshotNs)/1e6, float64(s.SnapshotMaxNs)/1e6)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "chrome ticker: %d fires, %d idle idle rate %.1f%%\n",
s.TickerFires, s.TickerIdleFires, s.TickerIdleRate*100)
}

View File

@@ -0,0 +1,116 @@
package app
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"time"
)
func TestMetricsTrackerDisabledByEmptyDir(t *testing.T) {
m, err := newMetricsTracker("")
if err != nil {
t.Fatalf("newMetricsTracker(\"\") err: %v", err)
}
if m != nil {
t.Fatalf("expected nil tracker for empty dir, got %v", m)
}
}
func TestMetricsTrackerRecordsAndWrites(t *testing.T) {
dir := t.TempDir()
m, err := newMetricsTracker(dir)
if err != nil {
t.Fatalf("newMetricsTracker: %v", err)
}
if m == nil {
t.Fatal("expected non-nil tracker")
}
m.recordPTYOut(2*time.Millisecond, 1024)
m.recordPTYOut(5*time.Millisecond, 4096)
m.recordRender(800 * time.Microsecond)
m.recordStdout(300*time.Microsecond, 1100)
m.recordEmuWrite(150 * time.Microsecond)
m.recordEmuTitle(0, true)
m.recordEmuTitle(20*time.Microsecond, false)
m.recordSidebar(100*time.Microsecond, true)
m.recordSidebar(900*time.Microsecond, false)
m.recordTabbar(50*time.Microsecond, true)
m.recordStatus(40*time.Microsecond, true)
m.recordSnapshot(2 * time.Millisecond)
m.recordTickerFire(false)
m.recordTickerFire(true)
m.recordPTYOutDrop()
m.close()
// metrics.json should exist and parse, and reflect what we recorded.
raw, err := os.ReadFile(filepath.Join(dir, "metrics.json"))
if err != nil {
t.Fatalf("read metrics.json: %v", err)
}
var snap metricsSnapshot
if err := json.Unmarshal(raw, &snap); err != nil {
t.Fatalf("parse metrics.json: %v", err)
}
if snap.PTYChunks != 2 {
t.Errorf("PTYChunks = %d, want 2", snap.PTYChunks)
}
if snap.PTYBytes != 5120 {
t.Errorf("PTYBytes = %d, want 5120", snap.PTYBytes)
}
if snap.OnPTYOutMaxNs != (5 * time.Millisecond).Nanoseconds() {
t.Errorf("OnPTYOutMaxNs = %d, want %d",
snap.OnPTYOutMaxNs, (5 * time.Millisecond).Nanoseconds())
}
if snap.SidebarDraws != 2 {
t.Errorf("SidebarDraws = %d, want 2", snap.SidebarDraws)
}
if snap.SidebarCacheHits != 1 {
t.Errorf("SidebarCacheHits = %d, want 1", snap.SidebarCacheHits)
}
if snap.SidebarCacheHitRate != 0.5 {
t.Errorf("SidebarCacheHitRate = %v, want 0.5", snap.SidebarCacheHitRate)
}
if snap.EmuTitleCalls != 1 || snap.EmuTitleSkips != 1 {
t.Errorf("emu title accounting: calls=%d skips=%d, want 1/1",
snap.EmuTitleCalls, snap.EmuTitleSkips)
}
if snap.TickerFires != 2 || snap.TickerIdleFires != 1 {
t.Errorf("ticker accounting: fires=%d idle=%d, want 2/1",
snap.TickerFires, snap.TickerIdleFires)
}
if snap.OnPTYOutDrops != 1 {
t.Errorf("OnPTYOutDrops = %d, want 1", snap.OnPTYOutDrops)
}
// summary.txt should also be present and non-empty.
info, err := os.Stat(filepath.Join(dir, "summary.txt"))
if err != nil {
t.Fatalf("stat summary.txt: %v", err)
}
if info.Size() == 0 {
t.Fatal("summary.txt is empty")
}
}
func TestMetricsTrackerNilSafe(t *testing.T) {
// Every record* method must be safe to call on a nil receiver
// because the hot paths use that to avoid an enabled-check.
var m *metricsTracker
m.recordPTYOut(time.Millisecond, 100)
m.recordPTYOutDrop()
m.recordRender(time.Microsecond)
m.recordStdout(time.Microsecond, 50)
m.recordEmuWrite(time.Microsecond)
m.recordEmuTitle(time.Microsecond, false)
m.recordEmuTitle(0, true)
m.recordSidebar(time.Microsecond, true)
m.recordTabbar(time.Microsecond, false)
m.recordStatus(time.Microsecond, true)
m.recordSnapshot(time.Microsecond)
m.recordTickerFire(true)
m.close()
}

View File

@@ -50,6 +50,11 @@ type Session struct {
// JSON file so they can be re-spawned after patterm restarts.
// Optional; nil means "no persistence" (used by unit tests).
persistStore *persist.Store
// metrics is the optional performance tracker. nil when --profile
// is off. The pump goroutine reads it via atomic Load so installing
// metrics post-construction doesn't race with running children.
metrics atomic.Pointer[metricsTracker]
}
// SetPersistStore attaches a process-persistence store. Future Spawn /
@@ -61,6 +66,18 @@ func (s *Session) SetPersistStore(p *persist.Store) {
s.mu.Unlock()
}
// SetMetrics installs the per-session performance tracker. Safe to
// call with nil to disable (the default). Reads on the hot path go
// through atomic.Pointer.Load() with no lock; SetMetrics swaps the
// pointer once at startup.
func (s *Session) SetMetrics(m *metricsTracker) {
s.metrics.Store(m)
}
func (s *Session) loadMetrics() *metricsTracker {
return s.metrics.Load()
}
// ChildEventListener is implemented by the TUI to react to lifecycle
// events without polling.
type ChildEventListener interface {
@@ -392,9 +409,17 @@ func (s *Session) pumpChild(c *Child, runID uint64) {
}
chunk := buf[:n]
if em := c.Emulator(); em != nil {
m := s.loadMetrics()
wstart := time.Time{}
if m != nil {
wstart = time.Now()
}
if _, werr := em.Write(chunk); werr != nil {
logf("emulator.Write(child %s): %v", c.ID, werr)
}
if m != nil {
m.recordEmuWrite(time.Since(wstart))
}
// OSC 0/2 title updates ride on the same byte stream as
// the rest of the output. Polling the emulator after each
// chunk is cheap on its own (one CGO call) but codex/
@@ -403,9 +428,18 @@ func (s *Session) pumpChild(c *Child, runID uint64) {
// the chunk doesn't carry an OSC start byte at all; the
// title can only change on chunks that include one.
if containsOSC(chunk) {
tstart := time.Time{}
if m != nil {
tstart = time.Now()
}
if t, terr := em.Title(); terr == nil {
c.recordTitle(t)
}
if m != nil {
m.recordEmuTitle(time.Since(tstart), false)
}
} else if m != nil {
m.recordEmuTitle(0, true)
}
}
c.recordWrite(chunk)

View File

@@ -38,6 +38,10 @@ func formatShortDuration(d time.Duration) string {
// computed main viewport, so the sidebar region is outside the child's
// cursor range. We can redraw freely without fighting the child for cells.
func (st *uiState) drawSidebar() {
var entry time.Time
if st.metrics != nil {
entry = time.Now()
}
st.mu.Lock()
palOpen := st.palette != nil
focus := st.focusedID
@@ -231,10 +235,16 @@ func (st *uiState) drawSidebar() {
st.chromeCacheMu.Lock()
if frame == st.sidebarCache {
st.chromeCacheMu.Unlock()
if st.metrics != nil {
st.metrics.recordSidebar(time.Since(entry), true)
}
return
}
st.sidebarCache = frame
st.chromeCacheMu.Unlock()
if st.metrics != nil {
defer func() { st.metrics.recordSidebar(time.Since(entry), false) }()
}
st.outMu.Lock()
// Save cursor; emit the sidebar; restore.

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"strings"
"time"
"unicode/utf8"
)
@@ -17,6 +18,10 @@ const tabBarRows = 2
// to the leftmost tabs so the strip fills the screen edge-to-edge.
// A trailing "+ new" hint sits in the rightmost reserved slot.
func (st *uiState) drawTabBar() {
var entry time.Time
if st.metrics != nil {
entry = time.Now()
}
st.mu.Lock()
palOpen := st.palette != nil
focus := st.focusedID
@@ -188,10 +193,16 @@ func (st *uiState) drawTabBar() {
st.chromeCacheMu.Lock()
if frame == st.tabBarCache {
st.chromeCacheMu.Unlock()
if st.metrics != nil {
st.metrics.recordTabbar(time.Since(entry), true)
}
return
}
st.tabBarCache = frame
st.chromeCacheMu.Unlock()
if st.metrics != nil {
defer func() { st.metrics.recordTabbar(time.Since(entry), false) }()
}
st.outMu.Lock()
defer st.outMu.Unlock()