Concrete perf metrics: live counters in --profile + benchmark suite
Live metrics (--profile): - New metricsTracker instruments OnPTYOut, viewport renderer, stdout writes, libghostty-vt Write/Title CGO calls, sidebar / tabbar / status draws (with cache-hit accounting), snapshot replays, and the chrome ticker (so we can see ticker fires that did nothing). - Writes metrics.jsonl (one snapshot per second) and metrics.json + summary.txt on exit, alongside the existing pprof files. - All record* methods are nil-safe so disabled paths pay only a cheap nil check; counters are atomic so the per-PTY-chunk hot path stays lock-free. Benchmark suite (go test -bench=.): - Three workload fixtures — plain ASCII, SGR-styled lines, and a ratatui-style cursor-shuffling burst — plus a containsOSC microbenchmark. Reports ns/op, MB/s, allocs/op, B/op. - Initial baseline numbers added to TODO under the perf-audit section, alongside two new findings (renderer allocs ~1 per 4 bytes on styled chunks; styled throughput tops out near 90 MB/s) those benchmarks surfaced.
This commit is contained in:
462
internal/app/metrics.go
Normal file
462
internal/app/metrics.go
Normal file
@@ -0,0 +1,462 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// metricsTracker collects per-hot-path counters and timings. All
|
||||
// fields are atomic so callers can record from the per-PTY-chunk path
|
||||
// without taking a lock. Enabled only when --profile is set.
|
||||
//
|
||||
// Sampled rates ("X per second", "p99 latency") are not tracked here
|
||||
// directly — the snapshotter goroutine writes a row to metrics.jsonl
|
||||
// every second, and analysis tools compute rates from the deltas.
|
||||
// Aggregate totals are written to metrics.json on shutdown.
|
||||
type metricsTracker struct {
|
||||
startedAt time.Time
|
||||
|
||||
// PTY chunk arrival → stdout write pipeline (per OnPTYOut call).
|
||||
ptyChunks atomic.Int64
|
||||
ptyBytes atomic.Int64
|
||||
onPTYOutNs atomic.Int64
|
||||
onPTYOutMaxNs atomic.Int64
|
||||
onPTYOutDrops atomic.Int64 // chunks for non-focused children — fast-path returns
|
||||
stdoutWrites atomic.Int64
|
||||
stdoutBytes atomic.Int64
|
||||
stdoutNs atomic.Int64
|
||||
stdoutMaxNs atomic.Int64
|
||||
|
||||
// Viewport renderer (state-machine over child PTY bytes).
|
||||
renderCalls atomic.Int64
|
||||
renderNs atomic.Int64
|
||||
renderMaxNs atomic.Int64
|
||||
|
||||
// CGO into libghostty-vt (counted from pumpChild).
|
||||
emuWriteCalls atomic.Int64
|
||||
emuWriteNs atomic.Int64
|
||||
emuWriteMaxNs atomic.Int64
|
||||
emuTitleCalls atomic.Int64
|
||||
emuTitleNs atomic.Int64
|
||||
emuTitleSkips atomic.Int64 // OSC-gate fast path — title poll skipped
|
||||
|
||||
// Chrome paint pipeline.
|
||||
sidebarDraws atomic.Int64
|
||||
sidebarCacheHits atomic.Int64
|
||||
sidebarNs atomic.Int64
|
||||
sidebarMaxNs atomic.Int64
|
||||
|
||||
tabbarDraws atomic.Int64
|
||||
tabbarCacheHits atomic.Int64
|
||||
tabbarNs atomic.Int64
|
||||
|
||||
statusDraws atomic.Int64
|
||||
statusCacheHits atomic.Int64
|
||||
statusNs atomic.Int64
|
||||
|
||||
// Snapshot replay (focus / spawn / nudge).
|
||||
snapshotReplays atomic.Int64
|
||||
snapshotNs atomic.Int64
|
||||
snapshotMaxNs atomic.Int64
|
||||
|
||||
// Chrome ticker — distinguishes useful work from idle wakeups.
|
||||
tickerFires atomic.Int64
|
||||
tickerIdleFires atomic.Int64 // nothing dirty when the ticker fired
|
||||
|
||||
// Output destination (set when enabled).
|
||||
rowFile *os.File // metrics.jsonl
|
||||
dir string
|
||||
}
|
||||
|
||||
// newMetricsTracker creates an enabled tracker writing to <dir>/.
|
||||
// Returns nil + nil err if dir is empty (feature off). Caller must
|
||||
// call tracker.run(ctx) in a goroutine and tracker.close() at exit.
|
||||
func newMetricsTracker(dir string) (*metricsTracker, error) {
|
||||
if dir == "" {
|
||||
return nil, nil
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row, err := os.Create(filepath.Join(dir, "metrics.jsonl"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &metricsTracker{
|
||||
startedAt: time.Now(),
|
||||
rowFile: row,
|
||||
dir: dir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// observeMax updates dst to max(dst, v) using a CAS loop. Atomic max
|
||||
// isn't a hardware primitive on most CPUs; this is the standard idiom.
|
||||
// Spurious wakeups can race but the result settles at the true max.
|
||||
func observeMax(dst *atomic.Int64, v int64) {
|
||||
for {
|
||||
old := dst.Load()
|
||||
if v <= old {
|
||||
return
|
||||
}
|
||||
if dst.CompareAndSwap(old, v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// recordPTYOut is called once at the end of each OnPTYOut invocation.
|
||||
// `dur` is the full per-chunk wall time (renderer + stdout + chrome
|
||||
// signals); `bytes` is the chunk's byte count.
|
||||
func (m *metricsTracker) recordPTYOut(dur time.Duration, bytes int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.ptyChunks.Add(1)
|
||||
m.ptyBytes.Add(int64(bytes))
|
||||
ns := dur.Nanoseconds()
|
||||
m.onPTYOutNs.Add(ns)
|
||||
observeMax(&m.onPTYOutMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordPTYOutDrop() {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.onPTYOutDrops.Add(1)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordRender(dur time.Duration) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.renderCalls.Add(1)
|
||||
ns := dur.Nanoseconds()
|
||||
m.renderNs.Add(ns)
|
||||
observeMax(&m.renderMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordStdout(dur time.Duration, bytes int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.stdoutWrites.Add(1)
|
||||
m.stdoutBytes.Add(int64(bytes))
|
||||
ns := dur.Nanoseconds()
|
||||
m.stdoutNs.Add(ns)
|
||||
observeMax(&m.stdoutMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordEmuWrite(dur time.Duration) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.emuWriteCalls.Add(1)
|
||||
ns := dur.Nanoseconds()
|
||||
m.emuWriteNs.Add(ns)
|
||||
observeMax(&m.emuWriteMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordEmuTitle(dur time.Duration, skipped bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
if skipped {
|
||||
m.emuTitleSkips.Add(1)
|
||||
return
|
||||
}
|
||||
m.emuTitleCalls.Add(1)
|
||||
m.emuTitleNs.Add(dur.Nanoseconds())
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordSidebar(dur time.Duration, cacheHit bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.sidebarDraws.Add(1)
|
||||
if cacheHit {
|
||||
m.sidebarCacheHits.Add(1)
|
||||
}
|
||||
ns := dur.Nanoseconds()
|
||||
m.sidebarNs.Add(ns)
|
||||
observeMax(&m.sidebarMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordTabbar(dur time.Duration, cacheHit bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.tabbarDraws.Add(1)
|
||||
if cacheHit {
|
||||
m.tabbarCacheHits.Add(1)
|
||||
}
|
||||
m.tabbarNs.Add(dur.Nanoseconds())
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordStatus(dur time.Duration, cacheHit bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.statusDraws.Add(1)
|
||||
if cacheHit {
|
||||
m.statusCacheHits.Add(1)
|
||||
}
|
||||
m.statusNs.Add(dur.Nanoseconds())
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordSnapshot(dur time.Duration) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.snapshotReplays.Add(1)
|
||||
ns := dur.Nanoseconds()
|
||||
m.snapshotNs.Add(ns)
|
||||
observeMax(&m.snapshotMaxNs, ns)
|
||||
}
|
||||
|
||||
func (m *metricsTracker) recordTickerFire(didWork bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.tickerFires.Add(1)
|
||||
if !didWork {
|
||||
m.tickerIdleFires.Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
// snapshot captures the tracker's current state as a JSON-serialisable
|
||||
// map. Suitable for both the per-second JSONL row and the final
|
||||
// metrics.json aggregate.
|
||||
type metricsSnapshot struct {
|
||||
WallSeconds float64 `json:"wall_seconds"`
|
||||
PTYChunks int64 `json:"pty_chunks"`
|
||||
PTYBytes int64 `json:"pty_bytes"`
|
||||
OnPTYOutNs int64 `json:"on_pty_out_ns_total"`
|
||||
OnPTYOutMaxNs int64 `json:"on_pty_out_ns_max"`
|
||||
OnPTYOutDrops int64 `json:"on_pty_out_drops"`
|
||||
StdoutWrites int64 `json:"stdout_writes"`
|
||||
StdoutBytes int64 `json:"stdout_bytes"`
|
||||
StdoutNs int64 `json:"stdout_ns_total"`
|
||||
StdoutMaxNs int64 `json:"stdout_ns_max"`
|
||||
|
||||
RenderCalls int64 `json:"render_calls"`
|
||||
RenderNs int64 `json:"render_ns_total"`
|
||||
RenderMaxNs int64 `json:"render_ns_max"`
|
||||
|
||||
EmuWriteCalls int64 `json:"emu_write_calls"`
|
||||
EmuWriteNs int64 `json:"emu_write_ns_total"`
|
||||
EmuWriteMaxNs int64 `json:"emu_write_ns_max"`
|
||||
EmuTitleCalls int64 `json:"emu_title_calls"`
|
||||
EmuTitleNs int64 `json:"emu_title_ns_total"`
|
||||
EmuTitleSkips int64 `json:"emu_title_skips"`
|
||||
|
||||
SidebarDraws int64 `json:"sidebar_draws"`
|
||||
SidebarCacheHits int64 `json:"sidebar_cache_hits"`
|
||||
SidebarNs int64 `json:"sidebar_ns_total"`
|
||||
SidebarMaxNs int64 `json:"sidebar_ns_max"`
|
||||
|
||||
TabbarDraws int64 `json:"tabbar_draws"`
|
||||
TabbarCacheHits int64 `json:"tabbar_cache_hits"`
|
||||
TabbarNs int64 `json:"tabbar_ns_total"`
|
||||
|
||||
StatusDraws int64 `json:"status_draws"`
|
||||
StatusCacheHits int64 `json:"status_cache_hits"`
|
||||
StatusNs int64 `json:"status_ns_total"`
|
||||
|
||||
SnapshotReplays int64 `json:"snapshot_replays"`
|
||||
SnapshotNs int64 `json:"snapshot_ns_total"`
|
||||
SnapshotMaxNs int64 `json:"snapshot_ns_max"`
|
||||
|
||||
TickerFires int64 `json:"ticker_fires"`
|
||||
TickerIdleFires int64 `json:"ticker_idle_fires"`
|
||||
|
||||
// Derived rates (computed at snapshot time so consumers don't have
|
||||
// to). All "per_second" values are averaged over wall_seconds.
|
||||
PTYChunksPerSec float64 `json:"pty_chunks_per_sec"`
|
||||
PTYBytesPerSec float64 `json:"pty_bytes_per_sec"`
|
||||
OnPTYOutMeanUs float64 `json:"on_pty_out_mean_us"`
|
||||
StdoutMeanUs float64 `json:"stdout_mean_us"`
|
||||
EmuWriteMeanUs float64 `json:"emu_write_mean_us"`
|
||||
SidebarMeanUs float64 `json:"sidebar_mean_us"`
|
||||
SidebarCacheHitRate float64 `json:"sidebar_cache_hit_rate"`
|
||||
TabbarCacheHitRate float64 `json:"tabbar_cache_hit_rate"`
|
||||
StatusCacheHitRate float64 `json:"status_cache_hit_rate"`
|
||||
EmuTitleSkipRate float64 `json:"emu_title_skip_rate"`
|
||||
TickerIdleRate float64 `json:"ticker_idle_rate"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
}
|
||||
|
||||
func (m *metricsTracker) snapshotNow() metricsSnapshot {
|
||||
wall := time.Since(m.startedAt).Seconds()
|
||||
if wall <= 0 {
|
||||
wall = 1
|
||||
}
|
||||
chunks := m.ptyChunks.Load()
|
||||
bytes := m.ptyBytes.Load()
|
||||
onptyTotal := m.onPTYOutNs.Load()
|
||||
stdW := m.stdoutWrites.Load()
|
||||
stdNs := m.stdoutNs.Load()
|
||||
emuW := m.emuWriteCalls.Load()
|
||||
emuWNs := m.emuWriteNs.Load()
|
||||
sbDraws := m.sidebarDraws.Load()
|
||||
sbHits := m.sidebarCacheHits.Load()
|
||||
sbNs := m.sidebarNs.Load()
|
||||
tbDraws := m.tabbarDraws.Load()
|
||||
tbHits := m.tabbarCacheHits.Load()
|
||||
stDraws := m.statusDraws.Load()
|
||||
stHits := m.statusCacheHits.Load()
|
||||
emuTC := m.emuTitleCalls.Load()
|
||||
emuTS := m.emuTitleSkips.Load()
|
||||
tickerF := m.tickerFires.Load()
|
||||
tickerI := m.tickerIdleFires.Load()
|
||||
|
||||
div := func(num, denom int64) float64 {
|
||||
if denom == 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(num) / float64(denom)
|
||||
}
|
||||
|
||||
return metricsSnapshot{
|
||||
WallSeconds: wall,
|
||||
PTYChunks: chunks,
|
||||
PTYBytes: bytes,
|
||||
OnPTYOutNs: onptyTotal,
|
||||
OnPTYOutMaxNs: m.onPTYOutMaxNs.Load(),
|
||||
OnPTYOutDrops: m.onPTYOutDrops.Load(),
|
||||
StdoutWrites: stdW,
|
||||
StdoutBytes: m.stdoutBytes.Load(),
|
||||
StdoutNs: stdNs,
|
||||
StdoutMaxNs: m.stdoutMaxNs.Load(),
|
||||
|
||||
RenderCalls: m.renderCalls.Load(),
|
||||
RenderNs: m.renderNs.Load(),
|
||||
RenderMaxNs: m.renderMaxNs.Load(),
|
||||
|
||||
EmuWriteCalls: emuW,
|
||||
EmuWriteNs: emuWNs,
|
||||
EmuWriteMaxNs: m.emuWriteMaxNs.Load(),
|
||||
EmuTitleCalls: emuTC,
|
||||
EmuTitleNs: m.emuTitleNs.Load(),
|
||||
EmuTitleSkips: emuTS,
|
||||
|
||||
SidebarDraws: sbDraws,
|
||||
SidebarCacheHits: sbHits,
|
||||
SidebarNs: sbNs,
|
||||
SidebarMaxNs: m.sidebarMaxNs.Load(),
|
||||
|
||||
TabbarDraws: tbDraws,
|
||||
TabbarCacheHits: tbHits,
|
||||
TabbarNs: m.tabbarNs.Load(),
|
||||
|
||||
StatusDraws: stDraws,
|
||||
StatusCacheHits: stHits,
|
||||
StatusNs: m.statusNs.Load(),
|
||||
|
||||
SnapshotReplays: m.snapshotReplays.Load(),
|
||||
SnapshotNs: m.snapshotNs.Load(),
|
||||
SnapshotMaxNs: m.snapshotMaxNs.Load(),
|
||||
|
||||
TickerFires: tickerF,
|
||||
TickerIdleFires: tickerI,
|
||||
|
||||
PTYChunksPerSec: float64(chunks) / wall,
|
||||
PTYBytesPerSec: float64(bytes) / wall,
|
||||
OnPTYOutMeanUs: div(onptyTotal/1000, chunks),
|
||||
StdoutMeanUs: div(stdNs/1000, stdW),
|
||||
EmuWriteMeanUs: div(emuWNs/1000, emuW),
|
||||
SidebarMeanUs: div(sbNs/1000, sbDraws),
|
||||
SidebarCacheHitRate: div(sbHits, sbDraws),
|
||||
TabbarCacheHitRate: div(tbHits, tbDraws),
|
||||
StatusCacheHitRate: div(stHits, stDraws),
|
||||
EmuTitleSkipRate: div(emuTS, emuTC+emuTS),
|
||||
TickerIdleRate: div(tickerI, tickerF),
|
||||
Timestamp: time.Now().Format(time.RFC3339Nano),
|
||||
}
|
||||
}
|
||||
|
||||
// run is the snapshotter goroutine: write a JSONL row every second
|
||||
// until ctx is cancelled. Stops cleanly without flushing partial
|
||||
// rows.
|
||||
func (m *metricsTracker) run(ctx context.Context) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
enc := json.NewEncoder(m.rowFile)
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
snap := m.snapshotNow()
|
||||
_ = enc.Encode(snap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// close writes the final aggregate snapshot to metrics.json + a
|
||||
// short human-readable summary.txt, then closes the row file. Safe
|
||||
// to call on a nil receiver.
|
||||
func (m *metricsTracker) close() {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
snap := m.snapshotNow()
|
||||
if f, err := os.Create(filepath.Join(m.dir, "metrics.json")); err == nil {
|
||||
enc := json.NewEncoder(f)
|
||||
enc.SetIndent("", " ")
|
||||
_ = enc.Encode(snap)
|
||||
_ = f.Close()
|
||||
}
|
||||
if f, err := os.Create(filepath.Join(m.dir, "summary.txt")); err == nil {
|
||||
writeSummary(f, snap)
|
||||
_ = f.Close()
|
||||
}
|
||||
if m.rowFile != nil {
|
||||
_ = m.rowFile.Close()
|
||||
m.rowFile = nil
|
||||
}
|
||||
}
|
||||
|
||||
// writeSummary renders a brief human-readable digest of a snapshot.
|
||||
// Designed for `cat summary.txt` after a session — quick orientation
|
||||
// before diving into metrics.json / pprof.
|
||||
func writeSummary(w *os.File, s metricsSnapshot) {
|
||||
fmt.Fprintf(w, "patterm performance summary\n")
|
||||
fmt.Fprintf(w, "===========================\n\n")
|
||||
fmt.Fprintf(w, "session length: %.1fs\n", s.WallSeconds)
|
||||
fmt.Fprintf(w, "pty chunks: %d (%.1f /s)\n", s.PTYChunks, s.PTYChunksPerSec)
|
||||
fmt.Fprintf(w, "pty bytes: %d (%.0f /s, %.1f KiB/s)\n",
|
||||
s.PTYBytes, s.PTYBytesPerSec, s.PTYBytesPerSec/1024)
|
||||
fmt.Fprintf(w, "pty chunks dropped: %d (focus not on caller — fast-path return)\n", s.OnPTYOutDrops)
|
||||
fmt.Fprintf(w, "\n")
|
||||
fmt.Fprintf(w, "OnPTYOut mean: %.1fµs max: %.1fms\n",
|
||||
s.OnPTYOutMeanUs, float64(s.OnPTYOutMaxNs)/1e6)
|
||||
fmt.Fprintf(w, "viewport.Render calls: %d total %.1fms max %.1fms\n",
|
||||
s.RenderCalls, float64(s.RenderNs)/1e6, float64(s.RenderMaxNs)/1e6)
|
||||
fmt.Fprintf(w, "stdout writes: %d mean %.1fµs max %.1fms bytes %d\n",
|
||||
s.StdoutWrites, s.StdoutMeanUs, float64(s.StdoutMaxNs)/1e6, s.StdoutBytes)
|
||||
fmt.Fprintf(w, "\n")
|
||||
fmt.Fprintf(w, "emulator.Write (cgo): %d mean %.1fµs max %.1fms\n",
|
||||
s.EmuWriteCalls, s.EmuWriteMeanUs, float64(s.EmuWriteMaxNs)/1e6)
|
||||
fmt.Fprintf(w, "emulator.Title polls: %d real, %d gated skip rate %.1f%%\n",
|
||||
s.EmuTitleCalls, s.EmuTitleSkips, s.EmuTitleSkipRate*100)
|
||||
fmt.Fprintf(w, "\n")
|
||||
fmt.Fprintf(w, "sidebar draws: %d mean %.1fµs max %.1fms cache-hit %.1f%%\n",
|
||||
s.SidebarDraws, s.SidebarMeanUs, float64(s.SidebarMaxNs)/1e6, s.SidebarCacheHitRate*100)
|
||||
fmt.Fprintf(w, "tabbar draws: %d cache-hit %.1f%%\n",
|
||||
s.TabbarDraws, s.TabbarCacheHitRate*100)
|
||||
fmt.Fprintf(w, "status draws: %d cache-hit %.1f%%\n",
|
||||
s.StatusDraws, s.StatusCacheHitRate*100)
|
||||
fmt.Fprintf(w, "snapshot replays: %d total %.1fms max %.1fms\n",
|
||||
s.SnapshotReplays, float64(s.SnapshotNs)/1e6, float64(s.SnapshotMaxNs)/1e6)
|
||||
fmt.Fprintf(w, "\n")
|
||||
fmt.Fprintf(w, "chrome ticker: %d fires, %d idle idle rate %.1f%%\n",
|
||||
s.TickerFires, s.TickerIdleFires, s.TickerIdleRate*100)
|
||||
}
|
||||
Reference in New Issue
Block a user