Files
patterm/internal/app/child.go
Harry Bayliss 3622c41fd0 Land staged session/MCP/chrome work + sidebar clear-J fix
This batches the in-flight [Unreleased] block from CHANGELOG.md into a
single commit. Highlights:

- Real MCP protocol layer (initialize / tools/list / tools/call) so
  vendor MCP clients can complete the handshake against the per-PID
  socket. Legacy direct-dispatch preserved for the harness.
- New mcp_injection kinds — cli_override for codex, config_env for
  opencode — joining the existing env-var and config_file paths so
  patterm can slot into more agents without touching their real
  config or auth.
- Ctrl+A/D and Ctrl+W/S focus navigation across tabs and intra-tab
  process lists, recognised in legacy / kitty CSI u / xterm
  modifyOtherKeys encodings.
- Palette macros (sw / k / sp ) and reordering so open sessions
  surface above spawn-new entries.
- Two-row tab bar, sidebar/tabbar/status chrome cache, viewport-wipe
  on agent spawn, CR-terminated orchestrator injections, and split-
  Enter PTY writes so paste-detecting TUIs see Enter as a key event.

Also fixes the bug logged in TODO: claude's Ctrl+O tool-call expansion
emits CSI 0 J, which the viewport renderer was forwarding verbatim —
wiping the sidebar to the right of the cursor and leaving the chrome
cache convinced nothing had changed. CSI 0 J and CSI 1 J are now
translated into per-row ECH sequences clamped to the viewport, same
as CSI 2 J and CSI K already were.

Agent guides (CLAUDE.md / AGENTS.md) now spell out the
TODO->CHANGELOG workflow so completed items land in the changelog
rather than as ticked entries left behind in TODO.
2026-05-14 19:09:35 +01:00

484 lines
13 KiB
Go

package app
import (
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"os/exec"
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
pkgpty "github.com/hjbdev/patterm/internal/pty"
"github.com/hjbdev/patterm/internal/vt"
)
// portRegex matches dev-server URLs of the form `http(s)://host:NNNN[/path]`
// and reports the port. SPEC §7 get_process_ports is best-effort; we
// stick to URL-form sightings because bare `:NNNN` produces too many
// false positives (timestamps, exit codes, etc.).
var portRegex = regexp.MustCompile(`https?://[^\s:/]+:(\d{2,5})(?:/[^\s]*)?`)
type ChildStatus string
const (
StatusStarting ChildStatus = "starting"
StatusRunning ChildStatus = "running"
StatusStopped ChildStatus = "stopped"
StatusExited ChildStatus = "exited"
StatusErrored ChildStatus = "errored"
)
// ChildKind matches the three process kinds in SPEC §7.
// - agent: vendor LLM CLI launched from an agent preset (MCP-wired,
// ephemeral — lost when the PTY exits).
// - terminal: a bare interactive shell (ephemeral).
// - command: a process preset or freeform argv (session-persistent —
// survives PTY exit so it can be restart_process'd).
type ChildKind string
const (
KindAgent ChildKind = "agent"
KindTerminal ChildKind = "terminal"
KindCommand ChildKind = "command"
)
// Owner reflects the SPEC §6 input-ownership flag.
type Owner string
const (
OwnerUser Owner = "user"
OwnerOrchestrator Owner = "orchestrator"
)
// Child is one entry in the session — a PTY-backed process plus its
// emulator. Covers all three kinds (agent / terminal / command).
//
// For KindCommand the entry is session-persistent: argv/env/workingDir
// stay populated across stop/restart so Restart() can rebuild the PTY
// against the same spec.
type Child struct {
ID string
Name string
Argv []string
Env []string
WorkDir string
Kind ChildKind
ParentID string // empty for top-level sessions
// PresetRef names the source preset (when known). Used by trust
// gating to re-check on restart_process. Empty for freeform-argv
// command entries and for ephemeral terminals.
PresetRef string
// Identity is the per-spawn token the mcp-stdio proxy uses to
// identify itself when calling tools. Empty for non-agent entries.
Identity string
// nameMu guards Name (rename_process).
nameMu sync.RWMutex
// ptyMu guards pty + em so Restart can swap them while pumpChild /
// reapChild loops detect the swap by observing nil/closed PTY.
ptyMu sync.RWMutex
pty *pkgpty.PTY
em *vt.GhosttyEmulator
status atomic.Pointer[ChildStatus]
exitCode atomic.Int32
owner atomic.Pointer[Owner]
// lastWriteNS is the wall time of the most recent PTY-master write.
// SPEC §11 idle heuristic: a pane is idle once nothing has been
// written for the preset's threshold (default 1s).
lastWriteNS atomic.Int64
// screenVersion increments on every PTY-out chunk. SPEC §7
// get_process_output exposes it so orchestrators can detect changes
// without diffing content.
screenVersion atomic.Int64
// ringMu guards ring. The ring buffer carries the last `ringCap`
// bytes the PTY produced, used by SPEC §7 get_process_output stream
// mode and search_output scrollback.
ringMu sync.Mutex
ring []byte
ringStart int64 // absolute offset of ring[0]
ringWrites int64 // cumulative bytes written
// portsMu guards ports. Best-effort port detection: regex on stream.
portsMu sync.Mutex
ports []PortSighting
}
// PortSighting is one entry returned by get_process_ports.
type PortSighting struct {
Port int `json:"port"`
URL string `json:"url,omitempty"`
FirstSeenAt string `json:"first_seen_at"`
}
const ringCap = 1 << 20 // 1 MiB per SPEC §5
// newChildEntry builds the in-memory Child record but does NOT start a
// PTY. Used so command entries can exist in the `stopped` state from the
// moment they're created. Agents and terminals call newChild() which
// chains newChildEntry + startPTY for the initial run.
func newChildEntry(id, name string, kind ChildKind, argv, env []string, parentID, workDir, presetRef string) *Child {
c := &Child{
ID: id,
Name: name,
Argv: argv,
Env: env,
WorkDir: workDir,
Kind: kind,
ParentID: parentID,
PresetRef: presetRef,
ring: make([]byte, 0, ringCap),
}
st := StatusStopped
c.status.Store(&st)
c.exitCode.Store(-1)
def := OwnerUser
if kind == KindAgent && parentID != "" {
def = OwnerOrchestrator
}
c.owner.Store(&def)
if kind == KindAgent {
c.Identity = mintIdentity()
}
return c
}
func newChild(id, name string, kind ChildKind, argv, env []string, cols, rows uint16, parentID, workDir, presetRef string) (*Child, error) {
if len(argv) == 0 {
return nil, errors.New("child: empty argv")
}
c := newChildEntry(id, name, kind, argv, env, parentID, workDir, presetRef)
if err := c.startPTY(cols, rows); err != nil {
return nil, err
}
return c, nil
}
// startPTY (re)builds the emulator + PTY for this entry. Called by
// newChild on initial spawn and by Restart on subsequent runs. The
// status transitions stopped/exited → starting → running. On error the
// entry returns to errored.
func (c *Child) startPTY(cols, rows uint16) error {
em, err := vt.NewGhosttyEmulator(cols, rows)
if err != nil {
return fmt.Errorf("child %s emulator: %w", c.ID, err)
}
starting := StatusStarting
c.status.Store(&starting)
p, err := pkgpty.Start(c.Argv, c.Env, cols, rows)
if err != nil {
em.Close()
errored := StatusErrored
c.status.Store(&errored)
return fmt.Errorf("child %s pty: %w", c.ID, err)
}
em.OnWritePTY(func(b []byte) {
_, _ = p.Write(b)
})
c.ptyMu.Lock()
c.pty = p
c.em = em
c.ptyMu.Unlock()
running := StatusRunning
c.status.Store(&running)
c.exitCode.Store(-1)
c.lastWriteNS.Store(0)
return nil
}
// IsLive reports whether the PTY is currently attached and running.
// Used by callers that need to gate input on a live PTY (vs. a stopped
// command entry).
func (c *Child) IsLive() bool {
st := c.Status()
return st == StatusStarting || st == StatusRunning
}
// PTY returns the current PTY pointer under read-lock. May be nil for a
// stopped command entry.
func (c *Child) PTY() *pkgpty.PTY {
c.ptyMu.RLock()
defer c.ptyMu.RUnlock()
return c.pty
}
// Emulator returns the current emulator pointer under read-lock.
func (c *Child) Emulator() *vt.GhosttyEmulator {
c.ptyMu.RLock()
defer c.ptyMu.RUnlock()
return c.em
}
// DisplayName is the rename_process-aware accessor for Name. Callers
// that read Name directly skip the lock; the field is still safe to
// read because Go strings are immutable, but DisplayName signals intent.
func (c *Child) DisplayName() string {
c.nameMu.RLock()
defer c.nameMu.RUnlock()
return c.Name
}
// SetName updates the display name (rename_process).
func (c *Child) SetName(name string) {
c.nameMu.Lock()
c.Name = name
c.nameMu.Unlock()
}
// ScreenVersion returns the current emulator snapshot version, bumped
// on every PTY-out chunk.
func (c *Child) ScreenVersion() int64 { return c.screenVersion.Load() }
func (c *Child) Status() ChildStatus {
st := c.status.Load()
if st == nil {
return StatusRunning
}
return *st
}
func (c *Child) ExitCode() int { return int(c.exitCode.Load()) }
func (c *Child) PID() int {
pty := c.PTY()
if pty == nil {
return 0
}
return pty.Pid()
}
func (c *Child) Owner() Owner {
o := c.owner.Load()
if o == nil {
return OwnerUser
}
return *o
}
func (c *Child) SetOwner(o Owner) { c.owner.Store(&o) }
// IdleMS returns how many milliseconds since the last PTY write.
// 0 means "no writes yet". SPEC §11.
func (c *Child) IdleMS() int64 {
last := c.lastWriteNS.Load()
if last == 0 {
return 0
}
return (time.Now().UnixNano() - last) / int64(time.Millisecond)
}
func (c *Child) recordWrite(chunk []byte) {
c.lastWriteNS.Store(time.Now().UnixNano())
c.screenVersion.Add(1)
c.ringMu.Lock()
c.ring = append(c.ring, chunk...)
c.ringWrites += int64(len(chunk))
if len(c.ring) > ringCap {
drop := len(c.ring) - ringCap
c.ring = c.ring[drop:]
c.ringStart += int64(drop)
}
c.ringMu.Unlock()
c.scanPortsFromChunk(chunk)
}
// scanPortsFromChunk does best-effort port detection on a PTY chunk.
// SPEC §7 get_process_ports — no probing, just stream scanning.
func (c *Child) scanPortsFromChunk(chunk []byte) {
matches := portRegex.FindAllSubmatch(chunk, -1)
if len(matches) == 0 {
return
}
now := time.Now().UTC().Format(time.RFC3339)
c.portsMu.Lock()
defer c.portsMu.Unlock()
for _, m := range matches {
urlForm := string(m[0])
portStr := string(m[1])
port, err := strconv.Atoi(portStr)
if err != nil || port < 1 || port > 65535 {
continue
}
seen := false
for _, p := range c.ports {
if p.Port == port {
seen = true
break
}
}
if seen {
continue
}
ent := PortSighting{Port: port, FirstSeenAt: now}
if strings.HasPrefix(urlForm, "http") {
ent.URL = urlForm
}
c.ports = append(c.ports, ent)
}
}
// Ports returns a snapshot of detected port sightings.
func (c *Child) Ports() []PortSighting {
c.portsMu.Lock()
defer c.portsMu.Unlock()
out := make([]PortSighting, len(c.ports))
copy(out, c.ports)
return out
}
// StreamRead returns ring bytes from `since` to the current write head,
// plus the new offset. Offsets are absolute (cumulative bytes ever
// written). If `since` is before the ring start, the caller missed
// data; we return what we have and the new offset.
func (c *Child) StreamRead(since int64) ([]byte, int64) {
c.ringMu.Lock()
defer c.ringMu.Unlock()
if since < c.ringStart {
since = c.ringStart
}
end := c.ringStart + int64(len(c.ring))
if since >= end {
return nil, end
}
start := int(since - c.ringStart)
out := make([]byte, end-since)
copy(out, c.ring[start:])
return out, end
}
func (c *Child) signal(sig syscall.Signal) error {
pty := c.PTY()
if pty == nil {
return errors.New("child has no pty")
}
pid := pty.Pid()
if pid <= 0 {
return errors.New("child has no pid")
}
if err := syscall.Kill(-pid, sig); err == nil {
return nil
}
return syscall.Kill(pid, sig)
}
// NudgeRedraw asks the child to throw away any diff-based render state
// and emit a full frame on the next tick. Used after a focus switch so
// ratatui/ink TUIs re-render coherently against the snapshot we just
// replayed. We toggle the PTY size by one row so the kernel reliably
// emits SIGWINCH (TIOCSWINSZ skips the signal if the size didn't
// change), then send SIGWINCH explicitly for TUIs that miss or coalesce
// the size-toggled signal. The emulator is left alone — it already
// matches our intended size and the brief mismatch only affects what the
// child writes during the second redraw.
func (c *Child) NudgeRedraw(cols, rows uint16) {
pty := c.PTY()
if pty == nil || rows < 2 {
return
}
_ = pty.Resize(cols, rows-1)
_ = pty.Resize(cols, rows)
_ = c.signal(syscall.SIGWINCH)
}
func (c *Child) markExited(err error) {
exitCode := int32(0)
st := StatusExited
if err != nil {
var ee *exec.ExitError
if errors.As(err, &ee) {
exitCode = int32(ee.ExitCode())
} else {
exitCode = -1
st = StatusErrored
}
}
c.exitCode.Store(exitCode)
c.status.Store(&st)
}
// teardownPTY closes the current PTY/emulator and nils them out. Used
// by Restart so the new PTY can take their place. Safe to call when
// they're already nil.
func (c *Child) teardownPTY() {
c.ptyMu.Lock()
p, em := c.pty, c.em
c.pty, c.em = nil, nil
c.ptyMu.Unlock()
if p != nil {
_ = p.Close()
}
if em != nil {
_ = em.Close()
}
}
// InjectAsUser is the path the human takes when typing in the focused
// pane. SPEC §6: the user's first keystroke flips ownership.
func (c *Child) InjectAsUser(b []byte) error {
c.SetOwner(OwnerUser)
return c.writeInput(b)
}
// InjectAsOrchestrator is the path send_message / initial_prompt /
// timer_wait writes take. Ownership flips back to orchestrator. SPEC §6.
func (c *Child) InjectAsOrchestrator(b []byte) error {
c.SetOwner(OwnerOrchestrator)
return c.writeInput(b)
}
// writeInput is the shared PTY write path used by both injection
// flavours. Each Enter byte (CR or LF) is split onto its own write
// with a brief delay so TUI agents with paste-detection (claude,
// codex, opencode) don't coalesce a trailing CR into the text that
// preceded it. Without the split, `pty.Write([]byte("hello\r"))`
// arrives at the agent as one read() and gets treated as multi-line
// pasted content rather than "key Enter".
func (c *Child) writeInput(b []byte) error {
pty := c.PTY()
if pty == nil {
return errors.New("child has no pty")
}
pieces := splitOnEnter(b)
if len(pieces) <= 1 {
_, err := pty.Write(b)
return err
}
for i, piece := range pieces {
if i > 0 {
time.Sleep(15 * time.Millisecond)
}
if _, err := pty.Write(piece); err != nil {
return err
}
}
return nil
}
func mintIdentity() string {
var buf [12]byte
_, _ = rand.Read(buf[:])
return hex.EncodeToString(buf[:])
}
// mintProcessID generates the opaque short token SPEC §7 calls a
// process_id: lowercase `p_` followed by 6 hex chars. Collisions inside
// one session are checked by the caller (session.go).
func mintProcessID() string {
var buf [3]byte
_, _ = rand.Read(buf[:])
return "p_" + hex.EncodeToString(buf[:])
}