Files
patterm/internal/app/metrics.go
Harry Bayliss 1c590f8e32 Concrete perf metrics: live counters in --profile + benchmark suite
Live metrics (--profile):
- New metricsTracker instruments OnPTYOut, viewport renderer,
  stdout writes, libghostty-vt Write/Title CGO calls, sidebar /
  tabbar / status draws (with cache-hit accounting), snapshot
  replays, and the chrome ticker (so we can see ticker fires that
  did nothing).
- Writes metrics.jsonl (one snapshot per second) and metrics.json
  + summary.txt on exit, alongside the existing pprof files.
- All record* methods are nil-safe so disabled paths pay only a
  cheap nil check; counters are atomic so the per-PTY-chunk hot
  path stays lock-free.

Benchmark suite (go test -bench=.):
- Three workload fixtures — plain ASCII, SGR-styled lines, and a
  ratatui-style cursor-shuffling burst — plus a containsOSC
  microbenchmark. Reports ns/op, MB/s, allocs/op, B/op.
- Initial baseline numbers added to TODO under the perf-audit
  section, alongside two new findings (renderer allocs ~1 per 4
  bytes on styled chunks; styled throughput tops out near
  90 MB/s) those benchmarks surfaced.
2026-05-15 13:31:37 +01:00

463 lines
14 KiB
Go

package app
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"
)
// metricsTracker collects per-hot-path counters and timings. All
// fields are atomic so callers can record from the per-PTY-chunk path
// without taking a lock. Enabled only when --profile is set.
//
// Sampled rates ("X per second", "p99 latency") are not tracked here
// directly — the snapshotter goroutine writes a row to metrics.jsonl
// every second, and analysis tools compute rates from the deltas.
// Aggregate totals are written to metrics.json on shutdown.
type metricsTracker struct {
startedAt time.Time
// PTY chunk arrival → stdout write pipeline (per OnPTYOut call).
ptyChunks atomic.Int64
ptyBytes atomic.Int64
onPTYOutNs atomic.Int64
onPTYOutMaxNs atomic.Int64
onPTYOutDrops atomic.Int64 // chunks for non-focused children — fast-path returns
stdoutWrites atomic.Int64
stdoutBytes atomic.Int64
stdoutNs atomic.Int64
stdoutMaxNs atomic.Int64
// Viewport renderer (state-machine over child PTY bytes).
renderCalls atomic.Int64
renderNs atomic.Int64
renderMaxNs atomic.Int64
// CGO into libghostty-vt (counted from pumpChild).
emuWriteCalls atomic.Int64
emuWriteNs atomic.Int64
emuWriteMaxNs atomic.Int64
emuTitleCalls atomic.Int64
emuTitleNs atomic.Int64
emuTitleSkips atomic.Int64 // OSC-gate fast path — title poll skipped
// Chrome paint pipeline.
sidebarDraws atomic.Int64
sidebarCacheHits atomic.Int64
sidebarNs atomic.Int64
sidebarMaxNs atomic.Int64
tabbarDraws atomic.Int64
tabbarCacheHits atomic.Int64
tabbarNs atomic.Int64
statusDraws atomic.Int64
statusCacheHits atomic.Int64
statusNs atomic.Int64
// Snapshot replay (focus / spawn / nudge).
snapshotReplays atomic.Int64
snapshotNs atomic.Int64
snapshotMaxNs atomic.Int64
// Chrome ticker — distinguishes useful work from idle wakeups.
tickerFires atomic.Int64
tickerIdleFires atomic.Int64 // nothing dirty when the ticker fired
// Output destination (set when enabled).
rowFile *os.File // metrics.jsonl
dir string
}
// newMetricsTracker creates an enabled tracker writing to <dir>/.
// Returns nil + nil err if dir is empty (feature off). Caller must
// call tracker.run(ctx) in a goroutine and tracker.close() at exit.
func newMetricsTracker(dir string) (*metricsTracker, error) {
if dir == "" {
return nil, nil
}
if err := os.MkdirAll(dir, 0o700); err != nil {
return nil, err
}
row, err := os.Create(filepath.Join(dir, "metrics.jsonl"))
if err != nil {
return nil, err
}
return &metricsTracker{
startedAt: time.Now(),
rowFile: row,
dir: dir,
}, nil
}
// observeMax updates dst to max(dst, v) using a CAS loop. Atomic max
// isn't a hardware primitive on most CPUs; this is the standard idiom.
// Spurious wakeups can race but the result settles at the true max.
func observeMax(dst *atomic.Int64, v int64) {
for {
old := dst.Load()
if v <= old {
return
}
if dst.CompareAndSwap(old, v) {
return
}
}
}
// recordPTYOut is called once at the end of each OnPTYOut invocation.
// `dur` is the full per-chunk wall time (renderer + stdout + chrome
// signals); `bytes` is the chunk's byte count.
func (m *metricsTracker) recordPTYOut(dur time.Duration, bytes int) {
if m == nil {
return
}
m.ptyChunks.Add(1)
m.ptyBytes.Add(int64(bytes))
ns := dur.Nanoseconds()
m.onPTYOutNs.Add(ns)
observeMax(&m.onPTYOutMaxNs, ns)
}
func (m *metricsTracker) recordPTYOutDrop() {
if m == nil {
return
}
m.onPTYOutDrops.Add(1)
}
func (m *metricsTracker) recordRender(dur time.Duration) {
if m == nil {
return
}
m.renderCalls.Add(1)
ns := dur.Nanoseconds()
m.renderNs.Add(ns)
observeMax(&m.renderMaxNs, ns)
}
func (m *metricsTracker) recordStdout(dur time.Duration, bytes int) {
if m == nil {
return
}
m.stdoutWrites.Add(1)
m.stdoutBytes.Add(int64(bytes))
ns := dur.Nanoseconds()
m.stdoutNs.Add(ns)
observeMax(&m.stdoutMaxNs, ns)
}
func (m *metricsTracker) recordEmuWrite(dur time.Duration) {
if m == nil {
return
}
m.emuWriteCalls.Add(1)
ns := dur.Nanoseconds()
m.emuWriteNs.Add(ns)
observeMax(&m.emuWriteMaxNs, ns)
}
func (m *metricsTracker) recordEmuTitle(dur time.Duration, skipped bool) {
if m == nil {
return
}
if skipped {
m.emuTitleSkips.Add(1)
return
}
m.emuTitleCalls.Add(1)
m.emuTitleNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordSidebar(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.sidebarDraws.Add(1)
if cacheHit {
m.sidebarCacheHits.Add(1)
}
ns := dur.Nanoseconds()
m.sidebarNs.Add(ns)
observeMax(&m.sidebarMaxNs, ns)
}
func (m *metricsTracker) recordTabbar(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.tabbarDraws.Add(1)
if cacheHit {
m.tabbarCacheHits.Add(1)
}
m.tabbarNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordStatus(dur time.Duration, cacheHit bool) {
if m == nil {
return
}
m.statusDraws.Add(1)
if cacheHit {
m.statusCacheHits.Add(1)
}
m.statusNs.Add(dur.Nanoseconds())
}
func (m *metricsTracker) recordSnapshot(dur time.Duration) {
if m == nil {
return
}
m.snapshotReplays.Add(1)
ns := dur.Nanoseconds()
m.snapshotNs.Add(ns)
observeMax(&m.snapshotMaxNs, ns)
}
func (m *metricsTracker) recordTickerFire(didWork bool) {
if m == nil {
return
}
m.tickerFires.Add(1)
if !didWork {
m.tickerIdleFires.Add(1)
}
}
// snapshot captures the tracker's current state as a JSON-serialisable
// map. Suitable for both the per-second JSONL row and the final
// metrics.json aggregate.
type metricsSnapshot struct {
WallSeconds float64 `json:"wall_seconds"`
PTYChunks int64 `json:"pty_chunks"`
PTYBytes int64 `json:"pty_bytes"`
OnPTYOutNs int64 `json:"on_pty_out_ns_total"`
OnPTYOutMaxNs int64 `json:"on_pty_out_ns_max"`
OnPTYOutDrops int64 `json:"on_pty_out_drops"`
StdoutWrites int64 `json:"stdout_writes"`
StdoutBytes int64 `json:"stdout_bytes"`
StdoutNs int64 `json:"stdout_ns_total"`
StdoutMaxNs int64 `json:"stdout_ns_max"`
RenderCalls int64 `json:"render_calls"`
RenderNs int64 `json:"render_ns_total"`
RenderMaxNs int64 `json:"render_ns_max"`
EmuWriteCalls int64 `json:"emu_write_calls"`
EmuWriteNs int64 `json:"emu_write_ns_total"`
EmuWriteMaxNs int64 `json:"emu_write_ns_max"`
EmuTitleCalls int64 `json:"emu_title_calls"`
EmuTitleNs int64 `json:"emu_title_ns_total"`
EmuTitleSkips int64 `json:"emu_title_skips"`
SidebarDraws int64 `json:"sidebar_draws"`
SidebarCacheHits int64 `json:"sidebar_cache_hits"`
SidebarNs int64 `json:"sidebar_ns_total"`
SidebarMaxNs int64 `json:"sidebar_ns_max"`
TabbarDraws int64 `json:"tabbar_draws"`
TabbarCacheHits int64 `json:"tabbar_cache_hits"`
TabbarNs int64 `json:"tabbar_ns_total"`
StatusDraws int64 `json:"status_draws"`
StatusCacheHits int64 `json:"status_cache_hits"`
StatusNs int64 `json:"status_ns_total"`
SnapshotReplays int64 `json:"snapshot_replays"`
SnapshotNs int64 `json:"snapshot_ns_total"`
SnapshotMaxNs int64 `json:"snapshot_ns_max"`
TickerFires int64 `json:"ticker_fires"`
TickerIdleFires int64 `json:"ticker_idle_fires"`
// Derived rates (computed at snapshot time so consumers don't have
// to). All "per_second" values are averaged over wall_seconds.
PTYChunksPerSec float64 `json:"pty_chunks_per_sec"`
PTYBytesPerSec float64 `json:"pty_bytes_per_sec"`
OnPTYOutMeanUs float64 `json:"on_pty_out_mean_us"`
StdoutMeanUs float64 `json:"stdout_mean_us"`
EmuWriteMeanUs float64 `json:"emu_write_mean_us"`
SidebarMeanUs float64 `json:"sidebar_mean_us"`
SidebarCacheHitRate float64 `json:"sidebar_cache_hit_rate"`
TabbarCacheHitRate float64 `json:"tabbar_cache_hit_rate"`
StatusCacheHitRate float64 `json:"status_cache_hit_rate"`
EmuTitleSkipRate float64 `json:"emu_title_skip_rate"`
TickerIdleRate float64 `json:"ticker_idle_rate"`
Timestamp string `json:"timestamp"`
}
func (m *metricsTracker) snapshotNow() metricsSnapshot {
wall := time.Since(m.startedAt).Seconds()
if wall <= 0 {
wall = 1
}
chunks := m.ptyChunks.Load()
bytes := m.ptyBytes.Load()
onptyTotal := m.onPTYOutNs.Load()
stdW := m.stdoutWrites.Load()
stdNs := m.stdoutNs.Load()
emuW := m.emuWriteCalls.Load()
emuWNs := m.emuWriteNs.Load()
sbDraws := m.sidebarDraws.Load()
sbHits := m.sidebarCacheHits.Load()
sbNs := m.sidebarNs.Load()
tbDraws := m.tabbarDraws.Load()
tbHits := m.tabbarCacheHits.Load()
stDraws := m.statusDraws.Load()
stHits := m.statusCacheHits.Load()
emuTC := m.emuTitleCalls.Load()
emuTS := m.emuTitleSkips.Load()
tickerF := m.tickerFires.Load()
tickerI := m.tickerIdleFires.Load()
div := func(num, denom int64) float64 {
if denom == 0 {
return 0
}
return float64(num) / float64(denom)
}
return metricsSnapshot{
WallSeconds: wall,
PTYChunks: chunks,
PTYBytes: bytes,
OnPTYOutNs: onptyTotal,
OnPTYOutMaxNs: m.onPTYOutMaxNs.Load(),
OnPTYOutDrops: m.onPTYOutDrops.Load(),
StdoutWrites: stdW,
StdoutBytes: m.stdoutBytes.Load(),
StdoutNs: stdNs,
StdoutMaxNs: m.stdoutMaxNs.Load(),
RenderCalls: m.renderCalls.Load(),
RenderNs: m.renderNs.Load(),
RenderMaxNs: m.renderMaxNs.Load(),
EmuWriteCalls: emuW,
EmuWriteNs: emuWNs,
EmuWriteMaxNs: m.emuWriteMaxNs.Load(),
EmuTitleCalls: emuTC,
EmuTitleNs: m.emuTitleNs.Load(),
EmuTitleSkips: emuTS,
SidebarDraws: sbDraws,
SidebarCacheHits: sbHits,
SidebarNs: sbNs,
SidebarMaxNs: m.sidebarMaxNs.Load(),
TabbarDraws: tbDraws,
TabbarCacheHits: tbHits,
TabbarNs: m.tabbarNs.Load(),
StatusDraws: stDraws,
StatusCacheHits: stHits,
StatusNs: m.statusNs.Load(),
SnapshotReplays: m.snapshotReplays.Load(),
SnapshotNs: m.snapshotNs.Load(),
SnapshotMaxNs: m.snapshotMaxNs.Load(),
TickerFires: tickerF,
TickerIdleFires: tickerI,
PTYChunksPerSec: float64(chunks) / wall,
PTYBytesPerSec: float64(bytes) / wall,
OnPTYOutMeanUs: div(onptyTotal/1000, chunks),
StdoutMeanUs: div(stdNs/1000, stdW),
EmuWriteMeanUs: div(emuWNs/1000, emuW),
SidebarMeanUs: div(sbNs/1000, sbDraws),
SidebarCacheHitRate: div(sbHits, sbDraws),
TabbarCacheHitRate: div(tbHits, tbDraws),
StatusCacheHitRate: div(stHits, stDraws),
EmuTitleSkipRate: div(emuTS, emuTC+emuTS),
TickerIdleRate: div(tickerI, tickerF),
Timestamp: time.Now().Format(time.RFC3339Nano),
}
}
// run is the snapshotter goroutine: write a JSONL row every second
// until ctx is cancelled. Stops cleanly without flushing partial
// rows.
func (m *metricsTracker) run(ctx context.Context) {
if m == nil {
return
}
enc := json.NewEncoder(m.rowFile)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
snap := m.snapshotNow()
_ = enc.Encode(snap)
}
}
}
// close writes the final aggregate snapshot to metrics.json + a
// short human-readable summary.txt, then closes the row file. Safe
// to call on a nil receiver.
func (m *metricsTracker) close() {
if m == nil {
return
}
snap := m.snapshotNow()
if f, err := os.Create(filepath.Join(m.dir, "metrics.json")); err == nil {
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
_ = enc.Encode(snap)
_ = f.Close()
}
if f, err := os.Create(filepath.Join(m.dir, "summary.txt")); err == nil {
writeSummary(f, snap)
_ = f.Close()
}
if m.rowFile != nil {
_ = m.rowFile.Close()
m.rowFile = nil
}
}
// writeSummary renders a brief human-readable digest of a snapshot.
// Designed for `cat summary.txt` after a session — quick orientation
// before diving into metrics.json / pprof.
func writeSummary(w *os.File, s metricsSnapshot) {
fmt.Fprintf(w, "patterm performance summary\n")
fmt.Fprintf(w, "===========================\n\n")
fmt.Fprintf(w, "session length: %.1fs\n", s.WallSeconds)
fmt.Fprintf(w, "pty chunks: %d (%.1f /s)\n", s.PTYChunks, s.PTYChunksPerSec)
fmt.Fprintf(w, "pty bytes: %d (%.0f /s, %.1f KiB/s)\n",
s.PTYBytes, s.PTYBytesPerSec, s.PTYBytesPerSec/1024)
fmt.Fprintf(w, "pty chunks dropped: %d (focus not on caller — fast-path return)\n", s.OnPTYOutDrops)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "OnPTYOut mean: %.1fµs max: %.1fms\n",
s.OnPTYOutMeanUs, float64(s.OnPTYOutMaxNs)/1e6)
fmt.Fprintf(w, "viewport.Render calls: %d total %.1fms max %.1fms\n",
s.RenderCalls, float64(s.RenderNs)/1e6, float64(s.RenderMaxNs)/1e6)
fmt.Fprintf(w, "stdout writes: %d mean %.1fµs max %.1fms bytes %d\n",
s.StdoutWrites, s.StdoutMeanUs, float64(s.StdoutMaxNs)/1e6, s.StdoutBytes)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "emulator.Write (cgo): %d mean %.1fµs max %.1fms\n",
s.EmuWriteCalls, s.EmuWriteMeanUs, float64(s.EmuWriteMaxNs)/1e6)
fmt.Fprintf(w, "emulator.Title polls: %d real, %d gated skip rate %.1f%%\n",
s.EmuTitleCalls, s.EmuTitleSkips, s.EmuTitleSkipRate*100)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "sidebar draws: %d mean %.1fµs max %.1fms cache-hit %.1f%%\n",
s.SidebarDraws, s.SidebarMeanUs, float64(s.SidebarMaxNs)/1e6, s.SidebarCacheHitRate*100)
fmt.Fprintf(w, "tabbar draws: %d cache-hit %.1f%%\n",
s.TabbarDraws, s.TabbarCacheHitRate*100)
fmt.Fprintf(w, "status draws: %d cache-hit %.1f%%\n",
s.StatusDraws, s.StatusCacheHitRate*100)
fmt.Fprintf(w, "snapshot replays: %d total %.1fms max %.1fms\n",
s.SnapshotReplays, float64(s.SnapshotNs)/1e6, float64(s.SnapshotMaxNs)/1e6)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "chrome ticker: %d fires, %d idle idle rate %.1f%%\n",
s.TickerFires, s.TickerIdleFires, s.TickerIdleRate*100)
}