Live metrics (--profile): - New metricsTracker instruments OnPTYOut, viewport renderer, stdout writes, libghostty-vt Write/Title CGO calls, sidebar / tabbar / status draws (with cache-hit accounting), snapshot replays, and the chrome ticker (so we can see ticker fires that did nothing). - Writes metrics.jsonl (one snapshot per second) and metrics.json + summary.txt on exit, alongside the existing pprof files. - All record* methods are nil-safe so disabled paths pay only a cheap nil check; counters are atomic so the per-PTY-chunk hot path stays lock-free. Benchmark suite (go test -bench=.): - Three workload fixtures — plain ASCII, SGR-styled lines, and a ratatui-style cursor-shuffling burst — plus a containsOSC microbenchmark. Reports ns/op, MB/s, allocs/op, B/op. - Initial baseline numbers added to TODO under the perf-audit section, alongside two new findings (renderer allocs ~1 per 4 bytes on styled chunks; styled throughput tops out near 90 MB/s) those benchmarks surfaced.
463 lines
14 KiB
Go
463 lines
14 KiB
Go
package app
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
// metricsTracker collects per-hot-path counters and timings. All
|
|
// fields are atomic so callers can record from the per-PTY-chunk path
|
|
// without taking a lock. Enabled only when --profile is set.
|
|
//
|
|
// Sampled rates ("X per second", "p99 latency") are not tracked here
|
|
// directly — the snapshotter goroutine writes a row to metrics.jsonl
|
|
// every second, and analysis tools compute rates from the deltas.
|
|
// Aggregate totals are written to metrics.json on shutdown.
|
|
type metricsTracker struct {
|
|
startedAt time.Time
|
|
|
|
// PTY chunk arrival → stdout write pipeline (per OnPTYOut call).
|
|
ptyChunks atomic.Int64
|
|
ptyBytes atomic.Int64
|
|
onPTYOutNs atomic.Int64
|
|
onPTYOutMaxNs atomic.Int64
|
|
onPTYOutDrops atomic.Int64 // chunks for non-focused children — fast-path returns
|
|
stdoutWrites atomic.Int64
|
|
stdoutBytes atomic.Int64
|
|
stdoutNs atomic.Int64
|
|
stdoutMaxNs atomic.Int64
|
|
|
|
// Viewport renderer (state-machine over child PTY bytes).
|
|
renderCalls atomic.Int64
|
|
renderNs atomic.Int64
|
|
renderMaxNs atomic.Int64
|
|
|
|
// CGO into libghostty-vt (counted from pumpChild).
|
|
emuWriteCalls atomic.Int64
|
|
emuWriteNs atomic.Int64
|
|
emuWriteMaxNs atomic.Int64
|
|
emuTitleCalls atomic.Int64
|
|
emuTitleNs atomic.Int64
|
|
emuTitleSkips atomic.Int64 // OSC-gate fast path — title poll skipped
|
|
|
|
// Chrome paint pipeline.
|
|
sidebarDraws atomic.Int64
|
|
sidebarCacheHits atomic.Int64
|
|
sidebarNs atomic.Int64
|
|
sidebarMaxNs atomic.Int64
|
|
|
|
tabbarDraws atomic.Int64
|
|
tabbarCacheHits atomic.Int64
|
|
tabbarNs atomic.Int64
|
|
|
|
statusDraws atomic.Int64
|
|
statusCacheHits atomic.Int64
|
|
statusNs atomic.Int64
|
|
|
|
// Snapshot replay (focus / spawn / nudge).
|
|
snapshotReplays atomic.Int64
|
|
snapshotNs atomic.Int64
|
|
snapshotMaxNs atomic.Int64
|
|
|
|
// Chrome ticker — distinguishes useful work from idle wakeups.
|
|
tickerFires atomic.Int64
|
|
tickerIdleFires atomic.Int64 // nothing dirty when the ticker fired
|
|
|
|
// Output destination (set when enabled).
|
|
rowFile *os.File // metrics.jsonl
|
|
dir string
|
|
}
|
|
|
|
// newMetricsTracker creates an enabled tracker writing to <dir>/.
|
|
// Returns nil + nil err if dir is empty (feature off). Caller must
|
|
// call tracker.run(ctx) in a goroutine and tracker.close() at exit.
|
|
func newMetricsTracker(dir string) (*metricsTracker, error) {
|
|
if dir == "" {
|
|
return nil, nil
|
|
}
|
|
if err := os.MkdirAll(dir, 0o700); err != nil {
|
|
return nil, err
|
|
}
|
|
row, err := os.Create(filepath.Join(dir, "metrics.jsonl"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &metricsTracker{
|
|
startedAt: time.Now(),
|
|
rowFile: row,
|
|
dir: dir,
|
|
}, nil
|
|
}
|
|
|
|
// observeMax updates dst to max(dst, v) using a CAS loop. Atomic max
|
|
// isn't a hardware primitive on most CPUs; this is the standard idiom.
|
|
// Spurious wakeups can race but the result settles at the true max.
|
|
func observeMax(dst *atomic.Int64, v int64) {
|
|
for {
|
|
old := dst.Load()
|
|
if v <= old {
|
|
return
|
|
}
|
|
if dst.CompareAndSwap(old, v) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// recordPTYOut is called once at the end of each OnPTYOut invocation.
|
|
// `dur` is the full per-chunk wall time (renderer + stdout + chrome
|
|
// signals); `bytes` is the chunk's byte count.
|
|
func (m *metricsTracker) recordPTYOut(dur time.Duration, bytes int) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.ptyChunks.Add(1)
|
|
m.ptyBytes.Add(int64(bytes))
|
|
ns := dur.Nanoseconds()
|
|
m.onPTYOutNs.Add(ns)
|
|
observeMax(&m.onPTYOutMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordPTYOutDrop() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.onPTYOutDrops.Add(1)
|
|
}
|
|
|
|
func (m *metricsTracker) recordRender(dur time.Duration) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.renderCalls.Add(1)
|
|
ns := dur.Nanoseconds()
|
|
m.renderNs.Add(ns)
|
|
observeMax(&m.renderMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordStdout(dur time.Duration, bytes int) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.stdoutWrites.Add(1)
|
|
m.stdoutBytes.Add(int64(bytes))
|
|
ns := dur.Nanoseconds()
|
|
m.stdoutNs.Add(ns)
|
|
observeMax(&m.stdoutMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordEmuWrite(dur time.Duration) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.emuWriteCalls.Add(1)
|
|
ns := dur.Nanoseconds()
|
|
m.emuWriteNs.Add(ns)
|
|
observeMax(&m.emuWriteMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordEmuTitle(dur time.Duration, skipped bool) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
if skipped {
|
|
m.emuTitleSkips.Add(1)
|
|
return
|
|
}
|
|
m.emuTitleCalls.Add(1)
|
|
m.emuTitleNs.Add(dur.Nanoseconds())
|
|
}
|
|
|
|
func (m *metricsTracker) recordSidebar(dur time.Duration, cacheHit bool) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.sidebarDraws.Add(1)
|
|
if cacheHit {
|
|
m.sidebarCacheHits.Add(1)
|
|
}
|
|
ns := dur.Nanoseconds()
|
|
m.sidebarNs.Add(ns)
|
|
observeMax(&m.sidebarMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordTabbar(dur time.Duration, cacheHit bool) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.tabbarDraws.Add(1)
|
|
if cacheHit {
|
|
m.tabbarCacheHits.Add(1)
|
|
}
|
|
m.tabbarNs.Add(dur.Nanoseconds())
|
|
}
|
|
|
|
func (m *metricsTracker) recordStatus(dur time.Duration, cacheHit bool) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.statusDraws.Add(1)
|
|
if cacheHit {
|
|
m.statusCacheHits.Add(1)
|
|
}
|
|
m.statusNs.Add(dur.Nanoseconds())
|
|
}
|
|
|
|
func (m *metricsTracker) recordSnapshot(dur time.Duration) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.snapshotReplays.Add(1)
|
|
ns := dur.Nanoseconds()
|
|
m.snapshotNs.Add(ns)
|
|
observeMax(&m.snapshotMaxNs, ns)
|
|
}
|
|
|
|
func (m *metricsTracker) recordTickerFire(didWork bool) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.tickerFires.Add(1)
|
|
if !didWork {
|
|
m.tickerIdleFires.Add(1)
|
|
}
|
|
}
|
|
|
|
// snapshot captures the tracker's current state as a JSON-serialisable
|
|
// map. Suitable for both the per-second JSONL row and the final
|
|
// metrics.json aggregate.
|
|
type metricsSnapshot struct {
|
|
WallSeconds float64 `json:"wall_seconds"`
|
|
PTYChunks int64 `json:"pty_chunks"`
|
|
PTYBytes int64 `json:"pty_bytes"`
|
|
OnPTYOutNs int64 `json:"on_pty_out_ns_total"`
|
|
OnPTYOutMaxNs int64 `json:"on_pty_out_ns_max"`
|
|
OnPTYOutDrops int64 `json:"on_pty_out_drops"`
|
|
StdoutWrites int64 `json:"stdout_writes"`
|
|
StdoutBytes int64 `json:"stdout_bytes"`
|
|
StdoutNs int64 `json:"stdout_ns_total"`
|
|
StdoutMaxNs int64 `json:"stdout_ns_max"`
|
|
|
|
RenderCalls int64 `json:"render_calls"`
|
|
RenderNs int64 `json:"render_ns_total"`
|
|
RenderMaxNs int64 `json:"render_ns_max"`
|
|
|
|
EmuWriteCalls int64 `json:"emu_write_calls"`
|
|
EmuWriteNs int64 `json:"emu_write_ns_total"`
|
|
EmuWriteMaxNs int64 `json:"emu_write_ns_max"`
|
|
EmuTitleCalls int64 `json:"emu_title_calls"`
|
|
EmuTitleNs int64 `json:"emu_title_ns_total"`
|
|
EmuTitleSkips int64 `json:"emu_title_skips"`
|
|
|
|
SidebarDraws int64 `json:"sidebar_draws"`
|
|
SidebarCacheHits int64 `json:"sidebar_cache_hits"`
|
|
SidebarNs int64 `json:"sidebar_ns_total"`
|
|
SidebarMaxNs int64 `json:"sidebar_ns_max"`
|
|
|
|
TabbarDraws int64 `json:"tabbar_draws"`
|
|
TabbarCacheHits int64 `json:"tabbar_cache_hits"`
|
|
TabbarNs int64 `json:"tabbar_ns_total"`
|
|
|
|
StatusDraws int64 `json:"status_draws"`
|
|
StatusCacheHits int64 `json:"status_cache_hits"`
|
|
StatusNs int64 `json:"status_ns_total"`
|
|
|
|
SnapshotReplays int64 `json:"snapshot_replays"`
|
|
SnapshotNs int64 `json:"snapshot_ns_total"`
|
|
SnapshotMaxNs int64 `json:"snapshot_ns_max"`
|
|
|
|
TickerFires int64 `json:"ticker_fires"`
|
|
TickerIdleFires int64 `json:"ticker_idle_fires"`
|
|
|
|
// Derived rates (computed at snapshot time so consumers don't have
|
|
// to). All "per_second" values are averaged over wall_seconds.
|
|
PTYChunksPerSec float64 `json:"pty_chunks_per_sec"`
|
|
PTYBytesPerSec float64 `json:"pty_bytes_per_sec"`
|
|
OnPTYOutMeanUs float64 `json:"on_pty_out_mean_us"`
|
|
StdoutMeanUs float64 `json:"stdout_mean_us"`
|
|
EmuWriteMeanUs float64 `json:"emu_write_mean_us"`
|
|
SidebarMeanUs float64 `json:"sidebar_mean_us"`
|
|
SidebarCacheHitRate float64 `json:"sidebar_cache_hit_rate"`
|
|
TabbarCacheHitRate float64 `json:"tabbar_cache_hit_rate"`
|
|
StatusCacheHitRate float64 `json:"status_cache_hit_rate"`
|
|
EmuTitleSkipRate float64 `json:"emu_title_skip_rate"`
|
|
TickerIdleRate float64 `json:"ticker_idle_rate"`
|
|
Timestamp string `json:"timestamp"`
|
|
}
|
|
|
|
func (m *metricsTracker) snapshotNow() metricsSnapshot {
|
|
wall := time.Since(m.startedAt).Seconds()
|
|
if wall <= 0 {
|
|
wall = 1
|
|
}
|
|
chunks := m.ptyChunks.Load()
|
|
bytes := m.ptyBytes.Load()
|
|
onptyTotal := m.onPTYOutNs.Load()
|
|
stdW := m.stdoutWrites.Load()
|
|
stdNs := m.stdoutNs.Load()
|
|
emuW := m.emuWriteCalls.Load()
|
|
emuWNs := m.emuWriteNs.Load()
|
|
sbDraws := m.sidebarDraws.Load()
|
|
sbHits := m.sidebarCacheHits.Load()
|
|
sbNs := m.sidebarNs.Load()
|
|
tbDraws := m.tabbarDraws.Load()
|
|
tbHits := m.tabbarCacheHits.Load()
|
|
stDraws := m.statusDraws.Load()
|
|
stHits := m.statusCacheHits.Load()
|
|
emuTC := m.emuTitleCalls.Load()
|
|
emuTS := m.emuTitleSkips.Load()
|
|
tickerF := m.tickerFires.Load()
|
|
tickerI := m.tickerIdleFires.Load()
|
|
|
|
div := func(num, denom int64) float64 {
|
|
if denom == 0 {
|
|
return 0
|
|
}
|
|
return float64(num) / float64(denom)
|
|
}
|
|
|
|
return metricsSnapshot{
|
|
WallSeconds: wall,
|
|
PTYChunks: chunks,
|
|
PTYBytes: bytes,
|
|
OnPTYOutNs: onptyTotal,
|
|
OnPTYOutMaxNs: m.onPTYOutMaxNs.Load(),
|
|
OnPTYOutDrops: m.onPTYOutDrops.Load(),
|
|
StdoutWrites: stdW,
|
|
StdoutBytes: m.stdoutBytes.Load(),
|
|
StdoutNs: stdNs,
|
|
StdoutMaxNs: m.stdoutMaxNs.Load(),
|
|
|
|
RenderCalls: m.renderCalls.Load(),
|
|
RenderNs: m.renderNs.Load(),
|
|
RenderMaxNs: m.renderMaxNs.Load(),
|
|
|
|
EmuWriteCalls: emuW,
|
|
EmuWriteNs: emuWNs,
|
|
EmuWriteMaxNs: m.emuWriteMaxNs.Load(),
|
|
EmuTitleCalls: emuTC,
|
|
EmuTitleNs: m.emuTitleNs.Load(),
|
|
EmuTitleSkips: emuTS,
|
|
|
|
SidebarDraws: sbDraws,
|
|
SidebarCacheHits: sbHits,
|
|
SidebarNs: sbNs,
|
|
SidebarMaxNs: m.sidebarMaxNs.Load(),
|
|
|
|
TabbarDraws: tbDraws,
|
|
TabbarCacheHits: tbHits,
|
|
TabbarNs: m.tabbarNs.Load(),
|
|
|
|
StatusDraws: stDraws,
|
|
StatusCacheHits: stHits,
|
|
StatusNs: m.statusNs.Load(),
|
|
|
|
SnapshotReplays: m.snapshotReplays.Load(),
|
|
SnapshotNs: m.snapshotNs.Load(),
|
|
SnapshotMaxNs: m.snapshotMaxNs.Load(),
|
|
|
|
TickerFires: tickerF,
|
|
TickerIdleFires: tickerI,
|
|
|
|
PTYChunksPerSec: float64(chunks) / wall,
|
|
PTYBytesPerSec: float64(bytes) / wall,
|
|
OnPTYOutMeanUs: div(onptyTotal/1000, chunks),
|
|
StdoutMeanUs: div(stdNs/1000, stdW),
|
|
EmuWriteMeanUs: div(emuWNs/1000, emuW),
|
|
SidebarMeanUs: div(sbNs/1000, sbDraws),
|
|
SidebarCacheHitRate: div(sbHits, sbDraws),
|
|
TabbarCacheHitRate: div(tbHits, tbDraws),
|
|
StatusCacheHitRate: div(stHits, stDraws),
|
|
EmuTitleSkipRate: div(emuTS, emuTC+emuTS),
|
|
TickerIdleRate: div(tickerI, tickerF),
|
|
Timestamp: time.Now().Format(time.RFC3339Nano),
|
|
}
|
|
}
|
|
|
|
// run is the snapshotter goroutine: write a JSONL row every second
|
|
// until ctx is cancelled. Stops cleanly without flushing partial
|
|
// rows.
|
|
func (m *metricsTracker) run(ctx context.Context) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
enc := json.NewEncoder(m.rowFile)
|
|
ticker := time.NewTicker(time.Second)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
snap := m.snapshotNow()
|
|
_ = enc.Encode(snap)
|
|
}
|
|
}
|
|
}
|
|
|
|
// close writes the final aggregate snapshot to metrics.json + a
|
|
// short human-readable summary.txt, then closes the row file. Safe
|
|
// to call on a nil receiver.
|
|
func (m *metricsTracker) close() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
snap := m.snapshotNow()
|
|
if f, err := os.Create(filepath.Join(m.dir, "metrics.json")); err == nil {
|
|
enc := json.NewEncoder(f)
|
|
enc.SetIndent("", " ")
|
|
_ = enc.Encode(snap)
|
|
_ = f.Close()
|
|
}
|
|
if f, err := os.Create(filepath.Join(m.dir, "summary.txt")); err == nil {
|
|
writeSummary(f, snap)
|
|
_ = f.Close()
|
|
}
|
|
if m.rowFile != nil {
|
|
_ = m.rowFile.Close()
|
|
m.rowFile = nil
|
|
}
|
|
}
|
|
|
|
// writeSummary renders a brief human-readable digest of a snapshot.
|
|
// Designed for `cat summary.txt` after a session — quick orientation
|
|
// before diving into metrics.json / pprof.
|
|
func writeSummary(w *os.File, s metricsSnapshot) {
|
|
fmt.Fprintf(w, "patterm performance summary\n")
|
|
fmt.Fprintf(w, "===========================\n\n")
|
|
fmt.Fprintf(w, "session length: %.1fs\n", s.WallSeconds)
|
|
fmt.Fprintf(w, "pty chunks: %d (%.1f /s)\n", s.PTYChunks, s.PTYChunksPerSec)
|
|
fmt.Fprintf(w, "pty bytes: %d (%.0f /s, %.1f KiB/s)\n",
|
|
s.PTYBytes, s.PTYBytesPerSec, s.PTYBytesPerSec/1024)
|
|
fmt.Fprintf(w, "pty chunks dropped: %d (focus not on caller — fast-path return)\n", s.OnPTYOutDrops)
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "OnPTYOut mean: %.1fµs max: %.1fms\n",
|
|
s.OnPTYOutMeanUs, float64(s.OnPTYOutMaxNs)/1e6)
|
|
fmt.Fprintf(w, "viewport.Render calls: %d total %.1fms max %.1fms\n",
|
|
s.RenderCalls, float64(s.RenderNs)/1e6, float64(s.RenderMaxNs)/1e6)
|
|
fmt.Fprintf(w, "stdout writes: %d mean %.1fµs max %.1fms bytes %d\n",
|
|
s.StdoutWrites, s.StdoutMeanUs, float64(s.StdoutMaxNs)/1e6, s.StdoutBytes)
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "emulator.Write (cgo): %d mean %.1fµs max %.1fms\n",
|
|
s.EmuWriteCalls, s.EmuWriteMeanUs, float64(s.EmuWriteMaxNs)/1e6)
|
|
fmt.Fprintf(w, "emulator.Title polls: %d real, %d gated skip rate %.1f%%\n",
|
|
s.EmuTitleCalls, s.EmuTitleSkips, s.EmuTitleSkipRate*100)
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "sidebar draws: %d mean %.1fµs max %.1fms cache-hit %.1f%%\n",
|
|
s.SidebarDraws, s.SidebarMeanUs, float64(s.SidebarMaxNs)/1e6, s.SidebarCacheHitRate*100)
|
|
fmt.Fprintf(w, "tabbar draws: %d cache-hit %.1f%%\n",
|
|
s.TabbarDraws, s.TabbarCacheHitRate*100)
|
|
fmt.Fprintf(w, "status draws: %d cache-hit %.1f%%\n",
|
|
s.StatusDraws, s.StatusCacheHitRate*100)
|
|
fmt.Fprintf(w, "snapshot replays: %d total %.1fms max %.1fms\n",
|
|
s.SnapshotReplays, float64(s.SnapshotNs)/1e6, float64(s.SnapshotMaxNs)/1e6)
|
|
fmt.Fprintf(w, "\n")
|
|
fmt.Fprintf(w, "chrome ticker: %d fires, %d idle idle rate %.1f%%\n",
|
|
s.TickerFires, s.TickerIdleFires, s.TickerIdleRate*100)
|
|
}
|