Files
patterm/internal/app/session.go
Harry Bayliss 412b1167a2 Cancel pending timers when a child is closed (#6)
Co-authored-by: Harry Bayliss <harry@hjb.dev>
Co-committed-by: Harry Bayliss <harry@hjb.dev>
2026-05-18 12:46:50 +01:00

766 lines
21 KiB
Go

// Package app is patterm's single foreground process. It owns the TUI,
// every PTY, every emulator, the in-process MCP server, and the
// scratchpad/preset state.
//
// There is no daemon, no detach, no socket-based client/daemon split
// (SPEC §2). One process owns everything; closing the terminal window
// ends the session and tears down every child.
package app
import (
"errors"
"fmt"
"os"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/hjbdev/patterm/internal/persist"
"github.com/hjbdev/patterm/internal/vt"
)
const childStopTimeout = 2 * time.Second
// Session is the in-memory state for the running patterm process.
// In SPEC §4 terms each top-level tab is a session; v1 ships with a
// single implicit session and reserves room to grow.
type Session struct {
projectDir string
projectKey string
mu sync.Mutex
children map[string]*Child
order []string
// nameSeq tracks the default-name counter per kind (agent-1,
// command-2, terminal-3, …). Reset is a non-goal: counters are
// monotonic across the session lifetime.
nameSeq map[ChildKind]int
// listeners is the set of UI listeners that want to hear about child
// lifecycle events (spawn/exit) — exactly one (the TUI) in v1.
// listeners is an atomic.Pointer to a frozen slice. Subscribe
// copy-on-writes the slice; emit* paths use a single atomic Load.
// This drops one mutex acquisition per PTY chunk on the hot path.
listenersMu sync.Mutex
listeners atomic.Pointer[[]ChildEventListener]
// persistStore records top-level command entries to a per-project
// JSON file so they can be re-spawned after patterm restarts.
// Optional; nil means "no persistence" (used by unit tests).
persistStore *persist.Store
// metrics is the optional performance tracker. nil when --profile
// is off. The pump goroutine reads it via atomic Load so installing
// metrics post-construction doesn't race with running children.
metrics atomic.Pointer[metricsTracker]
}
// SetPersistStore attaches a process-persistence store. Future Spawn /
// Close / Rename / SetAutoRestart calls on top-level command entries
// will mirror the change into the store.
func (s *Session) SetPersistStore(p *persist.Store) {
s.mu.Lock()
s.persistStore = p
s.mu.Unlock()
}
// SetMetrics installs the per-session performance tracker. Safe to
// call with nil to disable (the default). Reads on the hot path go
// through atomic.Pointer.Load() with no lock; SetMetrics swaps the
// pointer once at startup.
func (s *Session) SetMetrics(m *metricsTracker) {
s.metrics.Store(m)
}
func (s *Session) loadMetrics() *metricsTracker {
return s.metrics.Load()
}
// ChildEventListener is implemented by the TUI to react to lifecycle
// events without polling.
type ChildEventListener interface {
OnChildSpawned(*Child)
OnChildExited(*Child)
// OnPTYOut is called for every chunk the child writes to its PTY.
// Only the focused-child chunk should reach the screen — the TUI
// filters by id.
OnPTYOut(childID string, chunk []byte)
// OnChildStateChanged fires when the idle-detection classifier
// updates a child's IdleState. Listeners use this to repaint the
// sidebar badge and to evaluate idle-aware timers.
OnChildStateChanged(childID string, state IdleState)
// OnChildClosed fires when a child is being removed from the
// session (either via close_process, or — for agent/terminal
// kinds — when the PTY exits and the entry will never be
// restarted). It signals that any pending references to childID
// (e.g. timers owned by or watching it) should be dropped.
OnChildClosed(childID string)
}
func NewSession(projectDir, projectKey string) *Session {
return &Session{
projectDir: projectDir,
projectKey: projectKey,
children: make(map[string]*Child),
nameSeq: make(map[ChildKind]int),
}
}
func (s *Session) Subscribe(l ChildEventListener) {
s.listenersMu.Lock()
defer s.listenersMu.Unlock()
prev := s.listenersSnapshot()
next := make([]ChildEventListener, 0, len(prev)+1)
next = append(next, prev...)
next = append(next, l)
s.listeners.Store(&next)
}
// Unsubscribe removes a previously-registered listener. Safe to call
// with a listener that wasn't registered (no-op).
func (s *Session) Unsubscribe(l ChildEventListener) {
s.listenersMu.Lock()
defer s.listenersMu.Unlock()
prev := s.listenersSnapshot()
if len(prev) == 0 {
return
}
next := make([]ChildEventListener, 0, len(prev))
for _, e := range prev {
if e != l {
next = append(next, e)
}
}
s.listeners.Store(&next)
}
// listenersSnapshot returns the frozen listener slice. Safe to call
// without the listeners mutex.
func (s *Session) listenersSnapshot() []ChildEventListener {
p := s.listeners.Load()
if p == nil {
return nil
}
return *p
}
func (s *Session) emitSpawn(c *Child) {
for _, l := range s.listenersSnapshot() {
l.OnChildSpawned(c)
}
}
func (s *Session) emitExit(c *Child) {
for _, l := range s.listenersSnapshot() {
l.OnChildExited(c)
}
}
// emitPTYOut dispatches a fresh PTY chunk to every listener. Listeners
// MUST NOT retain `chunk` past return — the slice is owned by the
// pumpChild read buffer and is overwritten on the next read.
func (s *Session) emitPTYOut(id string, chunk []byte) {
for _, l := range s.listenersSnapshot() {
l.OnPTYOut(id, chunk)
}
}
func (s *Session) emitStateChanged(id string, state IdleState) {
for _, l := range s.listenersSnapshot() {
l.OnChildStateChanged(id, state)
}
}
func (s *Session) emitClosed(id string) {
for _, l := range s.listenersSnapshot() {
l.OnChildClosed(id)
}
}
func (s *Session) ChildEnv() []string {
env := os.Environ()
// Mark patterm-owned PTYs so a recursive `patterm` invocation can
// detect it and degrade. The MCP socket is per-PID and lives under
// $XDG_RUNTIME_DIR — see internal/mcp.
env = append(env,
"PATTERM=1",
"PATTERM_PROJECT_KEY="+s.projectKey,
"PATTERM_PROJECT_DIR="+s.projectDir,
)
return env
}
// SpawnSpec is the argument record for Session.Spawn — the new
// argv-shaped spawn API matching SPEC §7 spawn_process.
type SpawnSpec struct {
Kind ChildKind
Argv []string
Env []string
WorkDir string
Name string
ParentID string
PresetRef string
Identity string // pre-minted; otherwise the constructor mints one for agents
// CleanupPaths are owned runtime files/dirs removed when the child exits
// or is closed. They must be attached before the PTY starts so a
// fast-exiting child cannot outrun cleanup registration.
CleanupPaths []string
// IdleDetection is the resolved per-preset idle classifier config.
// Must be installed before the child is published to s.children so
// the classifier goroutine never observes a nil/default config for
// a preset that overrides it.
IdleDetection *resolvedIdleDetection
}
// Spawn creates a new entry and starts its PTY. For Kind = command the
// entry remains in the session after PTY exit (it can be Restart'd).
// For agent/terminal the entry's lifetime equals the PTY's: reapChild
// fires emitExit and the entry stays in `exited` status until the
// caller `close_process`'s it.
func (s *Session) Spawn(spec SpawnSpec, cols, rows uint16) (*Child, error) {
if len(spec.Argv) == 0 {
return nil, errors.New("session.Spawn: empty argv")
}
if spec.Env == nil {
spec.Env = s.ChildEnv()
}
s.mu.Lock()
id := s.mintUniqueIDLocked()
s.nameSeq[spec.Kind]++
if spec.Name == "" {
spec.Name = fmt.Sprintf("%s-%d", spec.Kind, s.nameSeq[spec.Kind])
}
s.mu.Unlock()
c := newChildEntry(id, spec.Name, spec.Kind, spec.Argv, spec.Env, spec.ParentID, spec.WorkDir, spec.PresetRef)
if spec.Identity != "" {
c.Identity = spec.Identity
}
for _, path := range spec.CleanupPaths {
c.AddCleanupPath(path)
}
// Install idle-detection BEFORE publishing to s.children — otherwise
// the classifier goroutine could read c.idleDetection while the
// launcher is still racing to set it.
if spec.IdleDetection != nil {
c.setIdleDetection(spec.IdleDetection)
}
runID, err := c.startPTY(cols, rows)
if err != nil {
c.cleanupOwnedPaths()
return nil, err
}
s.mu.Lock()
s.children[id] = c
s.order = append(s.order, id)
store := s.persistStore
s.mu.Unlock()
// Wire persistence callback BEFORE registering so SetName /
// SetAutoRestart calls that race the listener still hit the store.
if store != nil {
c.setPersistFn(func(ch *Child) {
s.persistEntry(ch)
})
s.persistEntry(c)
}
s.emitSpawn(c)
go s.pumpChild(c, runID)
go s.reapChild(c, runID)
return c, nil
}
// persistEntry writes (or refreshes) the persist record for c if it
// qualifies — top-level command entries only. No-op when no store is
// attached.
func (s *Session) persistEntry(c *Child) {
s.mu.Lock()
store := s.persistStore
s.mu.Unlock()
if store == nil || !shouldPersist(c) {
return
}
e := persist.Entry{
ID: c.ID,
Name: c.DisplayName(),
Argv: append([]string(nil), c.Argv...),
WorkDir: c.WorkDir,
PresetRef: c.PresetRef,
AutoRestart: c.AutoRestart(),
}
if err := store.Save(e); err != nil {
logf("persist save %s: %v", c.ID, err)
}
}
func (s *Session) forgetPersisted(id string) {
s.mu.Lock()
store := s.persistStore
s.mu.Unlock()
if store == nil {
return
}
if err := store.Remove(id); err != nil {
logf("persist remove %s: %v", id, err)
}
}
// shouldPersist gates which Child entries get mirrored into the
// persist store. v1 only restores top-level command entries — agents
// and terminals are ephemeral by design, and sub-agent-spawned
// commands belong to their orchestrator's lifecycle.
func shouldPersist(c *Child) bool {
return c != nil && c.Kind == KindCommand && c.ParentID == ""
}
// Start (re)attaches a PTY to an entry that is currently stopped or
// exited. Errors if the entry is already live.
func (s *Session) Start(id string, cols, rows uint16) error {
c := s.FindChild(id)
if c == nil {
return fmt.Errorf("no such process %q", id)
}
if c.IsLive() {
return nil // SPEC §7 start_process is a no-op on a running entry
}
runID, err := c.startPTY(cols, rows)
if err != nil {
return err
}
go s.pumpChild(c, runID)
go s.reapChild(c, runID)
return nil
}
// Restart stops the entry (if live) then starts it again with the same
// argv/env/workdir. Per SPEC §7: valid for command entries; valid for
// agent/terminal only while their PTY is still live.
func (s *Session) Restart(id string, sig syscall.Signal, cols, rows uint16) error {
c := s.FindChild(id)
if c == nil {
return fmt.Errorf("no such process %q", id)
}
if c.Kind != KindCommand && !c.IsLive() {
return fmt.Errorf("restart: %s entries can only be restarted while live", c.Kind)
}
// Only live entries can own runtime MCP config paths today. Keep the
// reaper from cleaning those paths while restart swaps the PTY.
c.restarting.Store(true)
defer c.restarting.Store(false)
if c.IsLive() {
terminateAndWait(c, sig, childStopTimeout)
}
c.teardownPTY()
runID, err := c.startPTY(cols, rows)
if err != nil {
return err
}
go s.pumpChild(c, runID)
go s.reapChild(c, runID)
return nil
}
// Close removes an entry from the session entirely. If still live,
// stops it first. SPEC §7 close_process.
func (s *Session) Close(id string, sig syscall.Signal) error {
c := s.FindChild(id)
if c == nil {
return fmt.Errorf("no such process %q", id)
}
if c.IsLive() {
terminateAndWait(c, sig, childStopTimeout)
}
c.teardownPTY()
c.cleanupOwnedPaths()
s.mu.Lock()
delete(s.children, id)
for i, oid := range s.order {
if oid == id {
s.order = append(s.order[:i], s.order[i+1:]...)
break
}
}
s.mu.Unlock()
// Notify listeners outside s.mu so they can take their own locks
// without inversion. Timer manager uses this to drop pending
// timers owned by or watching the closed child — otherwise the
// next classifier tick can deliver a stale fire to the parent.
s.emitClosed(id)
s.forgetPersisted(id)
return nil
}
// mintUniqueIDLocked mints an opaque process_id (SPEC §7) and retries
// if it collides with an existing entry. Caller holds s.mu.
func (s *Session) mintUniqueIDLocked() string {
for {
id := mintProcessID()
if _, exists := s.children[id]; !exists {
return id
}
}
}
func (s *Session) pumpChild(c *Child, runID uint64) {
pty := c.ptyForRun(runID)
if pty == nil {
return
}
// One PTY read buffer per pump goroutine. Consumers downstream
// (em.Write is synchronous through CGO; recordWrite append-copies
// into the ring; renderer.Render copies into its pending buffer)
// all complete or copy before returning, so the buffer can be
// reused without aliasing live data. See ChildEventListener.OnPTYOut
// docstring — listeners must not retain `chunk`.
buf := make([]byte, 64*1024)
for {
n, err := pty.Read(buf)
if n > 0 {
if !c.isCurrentRun(runID) {
return
}
chunk := buf[:n]
if em := c.Emulator(); em != nil {
m := s.loadMetrics()
wstart := time.Time{}
if m != nil {
wstart = time.Now()
}
if _, werr := em.Write(chunk); werr != nil {
logf("emulator.Write(child %s): %v", c.ID, werr)
}
if m != nil {
m.recordEmuWrite(time.Since(wstart))
}
// OSC 0/2 title updates ride on the same byte stream as
// the rest of the output. Polling the emulator after each
// chunk is cheap on its own (one CGO call) but codex/
// ratatui sends so many small chunks that the per-chunk
// CGO cost becomes measurable. Skip the Title poll when
// the chunk doesn't carry an OSC start byte at all; the
// title can only change on chunks that include one.
if containsOSC(chunk) {
tstart := time.Time{}
if m != nil {
tstart = time.Now()
}
if t, terr := em.Title(); terr == nil {
c.recordTitle(t)
}
if m != nil {
m.recordEmuTitle(time.Since(tstart), false)
}
} else if m != nil {
m.recordEmuTitle(0, true)
}
}
c.recordWrite(chunk)
s.emitPTYOut(c.ID, chunk)
}
if err != nil {
if !errors.Is(err, syscall.EIO) && !errors.Is(err, os.ErrClosed) {
logf("pty read (child %s): %v", c.ID, err)
}
return
}
}
}
func (s *Session) reapChild(c *Child, runID uint64) {
pty := c.ptyForRun(runID)
if pty == nil {
return
}
err := pty.Wait()
if !c.isCurrentRun(runID) || c.restarting.Load() {
return
}
c.markExited(err)
logf("child %s exited (err=%v)", c.ID, err)
s.emitExit(c)
s.killDescendantsOf(c.ID)
if !c.restarting.Load() {
c.cleanupOwnedPaths()
}
// Terminals are ephemeral: unlike command entries (kept around for
// restart_process) and agents (which the user clears via close_process
// once they're done with the corpse), an exited terminal has nothing
// useful left to do. Drop it from the session so it disappears from
// the Processes sidebar / switch list immediately.
if c.Kind == KindTerminal && !c.restarting.Load() {
c.teardownPTY()
s.mu.Lock()
delete(s.children, c.ID)
for i, oid := range s.order {
if oid == c.ID {
s.order = append(s.order[:i], s.order[i+1:]...)
break
}
}
s.mu.Unlock()
s.emitClosed(c.ID)
}
}
// killDescendantsOf terminates every still-live direct child of
// parentID. SPEC §2: closing the patterm process tears down every
// child it spawned; the same rule applies in-session — when an
// orchestrator dies (natural exit, user Ctrl-C, MCP close, anything
// that makes its PTY EOF), the agents/commands/terminals it spawned
// must die with it. We only signal direct children here: each
// descendant's own reapChild will fire and recurse, so the cascade
// flows through arbitrary depth without us walking the tree.
func (s *Session) killDescendantsOf(parentID string) {
if parentID == "" {
return
}
s.mu.Lock()
var live []*Child
for _, c := range s.children {
if c.ParentID == parentID && c.IsLive() {
live = append(live, c)
}
}
s.mu.Unlock()
if len(live) == 0 {
return
}
for _, c := range live {
_ = c.signal(syscall.SIGTERM)
}
waitForAllStopped(live, childStopTimeout)
for _, c := range live {
if c.IsLive() {
_ = c.signal(syscall.SIGKILL)
}
}
waitForAllStopped(live, childStopTimeout)
}
func waitForAllStopped(children []*Child, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
anyLive := false
for _, c := range children {
if c.IsLive() {
anyLive = true
break
}
}
if !anyLive {
return true
}
time.Sleep(20 * time.Millisecond)
}
return false
}
func terminateAndWait(c *Child, sig syscall.Signal, timeout time.Duration) {
if sig == 0 {
sig = syscall.SIGTERM
}
_ = c.signal(sig)
deadline := time.Now().Add(timeout)
for c.IsLive() && time.Now().Before(deadline) {
time.Sleep(20 * time.Millisecond)
}
if !c.IsLive() {
return
}
_ = c.signal(syscall.SIGKILL)
deadline = time.Now().Add(timeout)
for c.IsLive() && time.Now().Before(deadline) {
time.Sleep(20 * time.Millisecond)
}
}
// Children returns a snapshot of children in spawn order.
func (s *Session) Children() []*Child {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]*Child, 0, len(s.order))
for _, id := range s.order {
if c, ok := s.children[id]; ok {
out = append(out, c)
}
}
return out
}
// FindChild looks up a child by id; returns nil if not present.
func (s *Session) FindChild(id string) *Child {
s.mu.Lock()
defer s.mu.Unlock()
return s.children[id]
}
// FindChildByIdentity finds the child whose Identity matches token.
// Used by MCP to bind a mcp-stdio greeting to its caller. Returns nil
// if no match.
func (s *Session) FindChildByIdentity(token string) *Child {
if token == "" {
return nil
}
s.mu.Lock()
defer s.mu.Unlock()
for _, c := range s.children {
if c.Identity == token {
return c
}
}
return nil
}
// Kill sends a signal (default SIGTERM) to a child by id.
func (s *Session) Kill(id string, sig syscall.Signal) error {
c := s.FindChild(id)
if c == nil {
return fmt.Errorf("no such child %q", id)
}
if sig == 0 {
sig = syscall.SIGTERM
}
return c.signal(sig)
}
// WriteInput pipes bytes to a child's PTY stdin.
func (s *Session) WriteInput(id string, b []byte) error {
c := s.FindChild(id)
if c == nil {
return fmt.Errorf("no such child %q", id)
}
if c.Status() != StatusRunning {
return fmt.Errorf("child %q is %s", id, c.Status())
}
pty := c.PTY()
if pty == nil {
return fmt.Errorf("child %q has no pty", id)
}
_, err := pty.Write(b)
return err
}
// ResizeAll updates every child's PTY + emulator to the same cell grid.
// SPEC §5 says one viewport, no multi-client resize negotiation.
func (s *Session) ResizeAll(cols, rows uint16) {
if cols == 0 || rows == 0 {
return
}
s.mu.Lock()
cs := make([]*Child, 0, len(s.children))
for _, c := range s.children {
cs = append(cs, c)
}
s.mu.Unlock()
for _, c := range cs {
if pty := c.PTY(); pty != nil {
_ = pty.Resize(cols, rows)
}
if em := c.Emulator(); em != nil {
_ = em.Resize(cols, rows)
}
}
}
// SerializeChild returns the VT bytes that reproduce the child's
// current screen state. Used to repaint a child after the user switches
// focus or closes the palette.
func (s *Session) SerializeChild(id string) ([]byte, error) {
c := s.FindChild(id)
if c == nil {
return nil, fmt.Errorf("no such child %q", id)
}
em := c.Emulator()
if em == nil {
return nil, fmt.Errorf("child %q has no emulator", id)
}
return em.SerializeVT()
}
func (s *Session) StyledSnapshotChild(id string) ([]byte, error) {
c := s.FindChild(id)
if c == nil {
return nil, fmt.Errorf("no such child %q", id)
}
em := c.Emulator()
if em == nil {
return nil, fmt.Errorf("child %q has no emulator", id)
}
return em.StyledScreenVT()
}
func (s *Session) SnapshotChild(id string) (string, vt.CursorState, error) {
c := s.FindChild(id)
if c == nil {
return "", vt.CursorState{}, fmt.Errorf("no such child %q", id)
}
em := c.Emulator()
if em == nil {
return "", vt.CursorState{}, fmt.Errorf("child %q has no emulator", id)
}
text, err := em.ScreenText()
if err != nil {
return "", vt.CursorState{}, err
}
cursor, err := em.Cursor()
if err != nil {
return "", vt.CursorState{}, err
}
return text, cursor, nil
}
// Shutdown kills every child and waits briefly for them to drain.
// Called on Ctrl-D / SIGTERM / SIGHUP. SPEC §2 step 4.
func (s *Session) Shutdown() {
s.mu.Lock()
cs := make([]*Child, 0, len(s.children))
for _, c := range s.children {
cs = append(cs, c)
}
s.mu.Unlock()
for _, c := range cs {
_ = c.signal(syscall.SIGTERM)
}
// Close emulators and PTY masters. The reaper goroutines will fire
// emitExit as Wait() returns.
for _, c := range cs {
c.teardownPTY()
c.cleanupOwnedPaths()
}
}
// containsOSC reports whether chunk holds a sequence that could begin
// an OSC. OSC starts as ESC ] (0x1b 0x5d) or the bare C1 ] (0x9d),
// so a chunk without either cannot have changed the emulator's OSC
// title state. Used to short-circuit the per-chunk Title() poll from
// pumpChild, which otherwise pays a CGO call for every chunk even
// when codex/ratatui is just emitting SGR-styled output.
func containsOSC(chunk []byte) bool {
for i, b := range chunk {
if b == 0x9d {
return true
}
if b == 0x1b && i+1 < len(chunk) && chunk[i+1] == ']' {
return true
}
}
return false
}
func logf(format string, args ...any) {
if os.Getenv("PATTERM_DEBUG_LOG") == "" {
return
}
f, err := os.OpenFile(os.Getenv("PATTERM_DEBUG_LOG"), os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600)
if err != nil {
return
}
defer f.Close()
fmt.Fprintf(f, "patterm: "+format+"\n", args...)
}