Cancel pending timers when a child is closed (#6)
Co-authored-by: Harry Bayliss <harry@hjb.dev> Co-committed-by: Harry Bayliss <harry@hjb.dev>
This commit was merged in pull request #6.
This commit is contained in:
@@ -33,6 +33,10 @@ loosely follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
repaint.
|
||||
- Removed the redundant "Back to Settings" row from the
|
||||
Agents / Auto-summarization settings screen.
|
||||
- Pending `timer_*` entries are now cancelled when their owning or
|
||||
watched child is closed via `close_process`, preventing stale
|
||||
timer bodies from being re-delivered to the orchestrator pane
|
||||
after the work has already been handled.
|
||||
|
||||
## [0.0.6] - 2026-05-15
|
||||
|
||||
|
||||
@@ -837,6 +837,13 @@ func (st *uiState) OnChildStateChanged(string, IdleState) {
|
||||
st.drawSidebar()
|
||||
}
|
||||
|
||||
// OnChildClosed is the explicit-removal hook (close_process or the
|
||||
// terminal-corpse cleanup in reapChild). The UI already reflects
|
||||
// removals via the OnChildExited path and the children-map view, so
|
||||
// this is a no-op here — the timerManager is the consumer that
|
||||
// cares.
|
||||
func (st *uiState) OnChildClosed(string) {}
|
||||
|
||||
// OnChildExited drops focus and shows the empty state if it was the
|
||||
// focused child.
|
||||
func (st *uiState) OnChildExited(c *Child) {
|
||||
|
||||
@@ -111,6 +111,13 @@ func (d *debugCapture) OnChildStateChanged(id string, state IdleState) {
|
||||
})
|
||||
}
|
||||
|
||||
func (d *debugCapture) OnChildClosed(id string) {
|
||||
d.writeEvent("child_closed", map[string]any{
|
||||
"time": time.Now().Format(time.RFC3339Nano),
|
||||
"id": id,
|
||||
})
|
||||
}
|
||||
|
||||
func (d *debugCapture) OnPTYOut(childID string, chunk []byte) {
|
||||
if len(chunk) == 0 {
|
||||
return
|
||||
|
||||
@@ -86,10 +86,10 @@ func newToolHost(sess *Session, pads *scratchpad.Store, launcher *Launcher, pres
|
||||
return h
|
||||
}
|
||||
|
||||
// timerListenerAdapter forwards OnChildStateChanged into the timer
|
||||
// manager and ignores the other ChildEventListener methods. The
|
||||
// session's listener API is by-interface, so we wrap the manager
|
||||
// rather than make it implement the full surface.
|
||||
// timerListenerAdapter forwards OnChildStateChanged and OnChildClosed
|
||||
// into the timer manager and ignores the other ChildEventListener
|
||||
// methods. The session's listener API is by-interface, so we wrap
|
||||
// the manager rather than make it implement the full surface.
|
||||
type timerListenerAdapter struct{ m *timerManager }
|
||||
|
||||
func (a timerListenerAdapter) OnChildSpawned(*Child) {}
|
||||
@@ -98,6 +98,9 @@ func (a timerListenerAdapter) OnPTYOut(string, []byte) {}
|
||||
func (a timerListenerAdapter) OnChildStateChanged(id string, st IdleState) {
|
||||
a.m.onChildStateChanged(id, st)
|
||||
}
|
||||
func (a timerListenerAdapter) OnChildClosed(id string) {
|
||||
a.m.onChildClosed(id)
|
||||
}
|
||||
|
||||
func (h *toolHost) SetSize(cols, rows uint16) {
|
||||
h.sizeMu.Lock()
|
||||
@@ -553,6 +556,7 @@ func (n *chunkNotifier) OnPTYOut(id string, chunk []byte) {
|
||||
}
|
||||
}
|
||||
func (n *chunkNotifier) OnChildStateChanged(string, IdleState) {}
|
||||
func (n *chunkNotifier) OnChildClosed(string) {}
|
||||
|
||||
func (h *toolHost) GetProcessPorts(callerID, processID string) ([]mcp.PortSighting, error) {
|
||||
c := h.sess.FindChild(processID)
|
||||
|
||||
@@ -91,6 +91,12 @@ type ChildEventListener interface {
|
||||
// updates a child's IdleState. Listeners use this to repaint the
|
||||
// sidebar badge and to evaluate idle-aware timers.
|
||||
OnChildStateChanged(childID string, state IdleState)
|
||||
// OnChildClosed fires when a child is being removed from the
|
||||
// session (either via close_process, or — for agent/terminal
|
||||
// kinds — when the PTY exits and the entry will never be
|
||||
// restarted). It signals that any pending references to childID
|
||||
// (e.g. timers owned by or watching it) should be dropped.
|
||||
OnChildClosed(childID string)
|
||||
}
|
||||
|
||||
func NewSession(projectDir, projectKey string) *Session {
|
||||
@@ -167,6 +173,12 @@ func (s *Session) emitStateChanged(id string, state IdleState) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Session) emitClosed(id string) {
|
||||
for _, l := range s.listenersSnapshot() {
|
||||
l.OnChildClosed(id)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Session) ChildEnv() []string {
|
||||
env := os.Environ()
|
||||
// Mark patterm-owned PTYs so a recursive `patterm` invocation can
|
||||
@@ -374,6 +386,11 @@ func (s *Session) Close(id string, sig syscall.Signal) error {
|
||||
}
|
||||
}
|
||||
s.mu.Unlock()
|
||||
// Notify listeners outside s.mu so they can take their own locks
|
||||
// without inversion. Timer manager uses this to drop pending
|
||||
// timers owned by or watching the closed child — otherwise the
|
||||
// next classifier tick can deliver a stale fire to the parent.
|
||||
s.emitClosed(id)
|
||||
s.forgetPersisted(id)
|
||||
return nil
|
||||
}
|
||||
@@ -486,6 +503,7 @@ func (s *Session) reapChild(c *Child, runID uint64) {
|
||||
}
|
||||
}
|
||||
s.mu.Unlock()
|
||||
s.emitClosed(c.ID)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -296,6 +296,65 @@ func (m *timerManager) onChildStateChanged(childID string, state IdleState) {
|
||||
}
|
||||
}
|
||||
|
||||
// onChildClosed drops pending timer references to childID. Called
|
||||
// from Session.Close (and the terminal-corpse cleanup in reapChild)
|
||||
// via the session listener bus — a deliberate signal from the host
|
||||
// that childID is gone and the parent is not waiting on it anymore.
|
||||
//
|
||||
// Semantics:
|
||||
// - timers owned by childID are cancelled and deleted: their owner
|
||||
// is gone, so even if defaultFireFn's IsLive guard would no-op
|
||||
// the delivery, the entry has no business surviving a close.
|
||||
// - timers watching childID have childID pruned from t.watched
|
||||
// (and t.idleBaseline). If t.watched becomes empty the timer is
|
||||
// cancelled and deleted; we deliberately do NOT synthesise a
|
||||
// fire here. The parent already received any legitimate idle
|
||||
// transition before close_process — see allWatchedIdleLocked's
|
||||
// "treat as satisfied" comment, which only applies to a
|
||||
// concurrent re-evaluation, not to this explicit-removal hook.
|
||||
//
|
||||
// The natural-exit path (reapChild → emitExit for agent/command
|
||||
// kinds) is NOT routed through here: the classifier emits a final
|
||||
// idle transition on exit, which fires and deletes any watching
|
||||
// timers exactly once. Cancelling on exit would swallow that
|
||||
// legitimate fire and leave the parent never notified.
|
||||
func (m *timerManager) onChildClosed(childID string) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for id, t := range m.timers {
|
||||
if t.ownerID == childID {
|
||||
if t.rt != nil {
|
||||
t.rt.Stop()
|
||||
t.rt = nil
|
||||
}
|
||||
t.status = timerStatusCanceled
|
||||
delete(m.timers, id)
|
||||
continue
|
||||
}
|
||||
if !contains(t.watched, childID) {
|
||||
continue
|
||||
}
|
||||
pruned := t.watched[:0]
|
||||
for _, w := range t.watched {
|
||||
if w != childID {
|
||||
pruned = append(pruned, w)
|
||||
}
|
||||
}
|
||||
t.watched = pruned
|
||||
if t.idleBaseline != nil {
|
||||
delete(t.idleBaseline, childID)
|
||||
}
|
||||
if len(t.watched) == 0 {
|
||||
if t.rt != nil {
|
||||
t.rt.Stop()
|
||||
t.rt = nil
|
||||
}
|
||||
t.status = timerStatusCanceled
|
||||
delete(m.timers, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// allWatchedIdleLocked reports whether every watched child is now
|
||||
// idle. Called with m.mu held — uses live Child.IdleState() under the
|
||||
// child's own atomic, not under m.mu.
|
||||
|
||||
@@ -411,3 +411,178 @@ func TestTimerRecordsRemovedOnIdleFire(t *testing.T) {
|
||||
t.Fatalf("fired idle timer %s was not removed from registry", resp.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimerCloseChildPrunesWatched covers the happy partial-prune
|
||||
// case: an idle_any timer watches two children, one is closed, the
|
||||
// timer stays pending and the remaining child can still satisfy it.
|
||||
func TestTimerCloseChildPrunesWatched(t *testing.T) {
|
||||
sess, mgr, rec := newTestManager(t)
|
||||
owner := fakeChild("p_owner")
|
||||
a := fakeChild("p_a")
|
||||
b := fakeChild("p_b")
|
||||
addChild(sess, owner)
|
||||
addChild(sess, a)
|
||||
addChild(sess, b)
|
||||
working := StateWorking
|
||||
a.idleState.Store(&working)
|
||||
b.idleState.Store(&working)
|
||||
|
||||
resp, err := mgr.TimerFireWhenIdleAny("p_owner", "one done", "", []string{"p_a", "p_b"}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("TimerFireWhenIdleAny: %v", err)
|
||||
}
|
||||
|
||||
mgr.onChildClosed("p_a")
|
||||
|
||||
mgr.mu.Lock()
|
||||
t1, ok := mgr.timers[resp.ID]
|
||||
if !ok {
|
||||
mgr.mu.Unlock()
|
||||
t.Fatalf("timer was removed but still has live watched")
|
||||
}
|
||||
watched := append([]string(nil), t1.watched...)
|
||||
mgr.mu.Unlock()
|
||||
if len(watched) != 1 || watched[0] != "p_b" {
|
||||
t.Fatalf("watched after close: %v, want [p_b]", watched)
|
||||
}
|
||||
if got := rec.snapshot(); len(got) != 0 {
|
||||
t.Fatalf("close synthesised a fire: %+v", got)
|
||||
}
|
||||
|
||||
// p_b can still satisfy the timer.
|
||||
idle := StateIdle
|
||||
b.idleState.Store(&idle)
|
||||
mgr.onChildStateChanged("p_b", StateIdle)
|
||||
if got := rec.snapshot(); len(got) != 1 || got[0].Body != "one done" {
|
||||
t.Fatalf("post-prune fire: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimerCloseLastWatchedCancels is the regression for the
|
||||
// reported stale-fire symptom: the only watched child is closed,
|
||||
// so the timer must be cancelled — no synthetic fire, and the
|
||||
// registry entry must be gone so a trailing classifier tick for the
|
||||
// removed child cannot re-deliver later.
|
||||
func TestTimerCloseLastWatchedCancels(t *testing.T) {
|
||||
sess, mgr, rec := newTestManager(t)
|
||||
owner := fakeChild("p_owner")
|
||||
a := fakeChild("p_a")
|
||||
addChild(sess, owner)
|
||||
addChild(sess, a)
|
||||
working := StateWorking
|
||||
a.idleState.Store(&working)
|
||||
|
||||
resp, err := mgr.TimerFireWhenIdleAny("p_owner", "stale body", "", []string{"p_a"}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("TimerFireWhenIdleAny: %v", err)
|
||||
}
|
||||
|
||||
mgr.onChildClosed("p_a")
|
||||
|
||||
mgr.mu.Lock()
|
||||
_, stillThere := mgr.timers[resp.ID]
|
||||
mgr.mu.Unlock()
|
||||
if stillThere {
|
||||
t.Fatalf("timer with no remaining watched should be removed")
|
||||
}
|
||||
if got := rec.snapshot(); len(got) != 0 {
|
||||
t.Fatalf("close synthesised a fire: %+v", got)
|
||||
}
|
||||
|
||||
// Simulate the trailing classifier tick for the now-closed child —
|
||||
// must not fire.
|
||||
mgr.onChildStateChanged("p_a", StateIdle)
|
||||
if got := rec.snapshot(); len(got) != 0 {
|
||||
t.Fatalf("trailing state change re-fired: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimerCloseChildIdleAllPartialPrune mirrors the idle_any
|
||||
// partial-prune for idle_all: pruning a watched child shrinks the
|
||||
// list; the remaining child going idle then satisfies the timer.
|
||||
func TestTimerCloseChildIdleAllPartialPrune(t *testing.T) {
|
||||
sess, mgr, rec := newTestManager(t)
|
||||
owner := fakeChild("p_owner")
|
||||
a := fakeChild("p_a")
|
||||
b := fakeChild("p_b")
|
||||
addChild(sess, owner)
|
||||
addChild(sess, a)
|
||||
addChild(sess, b)
|
||||
working := StateWorking
|
||||
a.idleState.Store(&working)
|
||||
b.idleState.Store(&working)
|
||||
|
||||
resp, err := mgr.TimerFireWhenIdleAll("p_owner", "all done", "", []string{"p_a", "p_b"}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("TimerFireWhenIdleAll: %v", err)
|
||||
}
|
||||
if resp.Status != "pending" {
|
||||
t.Fatalf("status: got %q want pending", resp.Status)
|
||||
}
|
||||
|
||||
mgr.onChildClosed("p_a")
|
||||
|
||||
idle := StateIdle
|
||||
b.idleState.Store(&idle)
|
||||
mgr.onChildStateChanged("p_b", StateIdle)
|
||||
if got := rec.snapshot(); len(got) != 1 || got[0].Body != "all done" {
|
||||
t.Fatalf("idle_all after partial prune: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimerCloseOwnerCancelsDelay ensures a delay timer is dropped
|
||||
// when its owner is closed: no delivery, registry empty, the
|
||||
// underlying time.Timer is stopped.
|
||||
func TestTimerCloseOwnerCancelsDelay(t *testing.T) {
|
||||
sess, mgr, rec := newTestManager(t)
|
||||
c := fakeChild("p_owner")
|
||||
addChild(sess, c)
|
||||
id, err := mgr.TimerSet("p_owner", "x", "", 0.1)
|
||||
if err != nil {
|
||||
t.Fatalf("TimerSet: %v", err)
|
||||
}
|
||||
|
||||
mgr.onChildClosed("p_owner")
|
||||
|
||||
mgr.mu.Lock()
|
||||
_, stillThere := mgr.timers[id]
|
||||
mgr.mu.Unlock()
|
||||
if stillThere {
|
||||
t.Fatalf("delay timer was not removed when owner closed")
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond) // past the original firesAt
|
||||
if got := rec.snapshot(); len(got) != 0 {
|
||||
t.Fatalf("delay timer fired after owner close: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimerCloseWatchedSubAgent is the exact shape of the reported
|
||||
// stale-fire bug: orchestrator registers a watcher on a sub-agent,
|
||||
// the sub-agent is closed, and the orchestrator must receive
|
||||
// nothing (no stale body delivered after close_process).
|
||||
func TestTimerCloseWatchedSubAgent(t *testing.T) {
|
||||
sess, mgr, rec := newTestManager(t)
|
||||
parent := fakeChild("p_owner")
|
||||
sub := fakeChild("p_sub")
|
||||
addChild(sess, parent)
|
||||
addChild(sess, sub)
|
||||
working := StateWorking
|
||||
sub.idleState.Store(&working)
|
||||
|
||||
if _, err := mgr.TimerFireWhenIdleAny(
|
||||
"p_owner",
|
||||
"codex-review-591 finished. Read your own pane …",
|
||||
"", []string{"p_sub"}, 0,
|
||||
); err != nil {
|
||||
t.Fatalf("TimerFireWhenIdleAny: %v", err)
|
||||
}
|
||||
|
||||
mgr.onChildClosed("p_sub")
|
||||
|
||||
// Trailing classifier emission for the closed sub-agent must
|
||||
// not deliver anything to the parent.
|
||||
mgr.onChildStateChanged("p_sub", StateIdle)
|
||||
if got := rec.snapshot(); len(got) != 0 {
|
||||
t.Fatalf("stale fire delivered to parent after sub-agent close: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user