Cancel pending timers when a child is closed (#6)

Co-authored-by: Harry Bayliss <harry@hjb.dev>
Co-committed-by: Harry Bayliss <harry@hjb.dev>
This commit was merged in pull request #6.
This commit is contained in:
2026-05-18 12:46:50 +01:00
committed by harry
parent de60b93bc6
commit 412b1167a2
7 changed files with 278 additions and 4 deletions

View File

@@ -411,3 +411,178 @@ func TestTimerRecordsRemovedOnIdleFire(t *testing.T) {
t.Fatalf("fired idle timer %s was not removed from registry", resp.ID)
}
}
// TestTimerCloseChildPrunesWatched covers the happy partial-prune
// case: an idle_any timer watches two children, one is closed, the
// timer stays pending and the remaining child can still satisfy it.
func TestTimerCloseChildPrunesWatched(t *testing.T) {
sess, mgr, rec := newTestManager(t)
owner := fakeChild("p_owner")
a := fakeChild("p_a")
b := fakeChild("p_b")
addChild(sess, owner)
addChild(sess, a)
addChild(sess, b)
working := StateWorking
a.idleState.Store(&working)
b.idleState.Store(&working)
resp, err := mgr.TimerFireWhenIdleAny("p_owner", "one done", "", []string{"p_a", "p_b"}, 0)
if err != nil {
t.Fatalf("TimerFireWhenIdleAny: %v", err)
}
mgr.onChildClosed("p_a")
mgr.mu.Lock()
t1, ok := mgr.timers[resp.ID]
if !ok {
mgr.mu.Unlock()
t.Fatalf("timer was removed but still has live watched")
}
watched := append([]string(nil), t1.watched...)
mgr.mu.Unlock()
if len(watched) != 1 || watched[0] != "p_b" {
t.Fatalf("watched after close: %v, want [p_b]", watched)
}
if got := rec.snapshot(); len(got) != 0 {
t.Fatalf("close synthesised a fire: %+v", got)
}
// p_b can still satisfy the timer.
idle := StateIdle
b.idleState.Store(&idle)
mgr.onChildStateChanged("p_b", StateIdle)
if got := rec.snapshot(); len(got) != 1 || got[0].Body != "one done" {
t.Fatalf("post-prune fire: %+v", got)
}
}
// TestTimerCloseLastWatchedCancels is the regression for the
// reported stale-fire symptom: the only watched child is closed,
// so the timer must be cancelled — no synthetic fire, and the
// registry entry must be gone so a trailing classifier tick for the
// removed child cannot re-deliver later.
func TestTimerCloseLastWatchedCancels(t *testing.T) {
sess, mgr, rec := newTestManager(t)
owner := fakeChild("p_owner")
a := fakeChild("p_a")
addChild(sess, owner)
addChild(sess, a)
working := StateWorking
a.idleState.Store(&working)
resp, err := mgr.TimerFireWhenIdleAny("p_owner", "stale body", "", []string{"p_a"}, 0)
if err != nil {
t.Fatalf("TimerFireWhenIdleAny: %v", err)
}
mgr.onChildClosed("p_a")
mgr.mu.Lock()
_, stillThere := mgr.timers[resp.ID]
mgr.mu.Unlock()
if stillThere {
t.Fatalf("timer with no remaining watched should be removed")
}
if got := rec.snapshot(); len(got) != 0 {
t.Fatalf("close synthesised a fire: %+v", got)
}
// Simulate the trailing classifier tick for the now-closed child —
// must not fire.
mgr.onChildStateChanged("p_a", StateIdle)
if got := rec.snapshot(); len(got) != 0 {
t.Fatalf("trailing state change re-fired: %+v", got)
}
}
// TestTimerCloseChildIdleAllPartialPrune mirrors the idle_any
// partial-prune for idle_all: pruning a watched child shrinks the
// list; the remaining child going idle then satisfies the timer.
func TestTimerCloseChildIdleAllPartialPrune(t *testing.T) {
sess, mgr, rec := newTestManager(t)
owner := fakeChild("p_owner")
a := fakeChild("p_a")
b := fakeChild("p_b")
addChild(sess, owner)
addChild(sess, a)
addChild(sess, b)
working := StateWorking
a.idleState.Store(&working)
b.idleState.Store(&working)
resp, err := mgr.TimerFireWhenIdleAll("p_owner", "all done", "", []string{"p_a", "p_b"}, 0)
if err != nil {
t.Fatalf("TimerFireWhenIdleAll: %v", err)
}
if resp.Status != "pending" {
t.Fatalf("status: got %q want pending", resp.Status)
}
mgr.onChildClosed("p_a")
idle := StateIdle
b.idleState.Store(&idle)
mgr.onChildStateChanged("p_b", StateIdle)
if got := rec.snapshot(); len(got) != 1 || got[0].Body != "all done" {
t.Fatalf("idle_all after partial prune: %+v", got)
}
}
// TestTimerCloseOwnerCancelsDelay ensures a delay timer is dropped
// when its owner is closed: no delivery, registry empty, the
// underlying time.Timer is stopped.
func TestTimerCloseOwnerCancelsDelay(t *testing.T) {
sess, mgr, rec := newTestManager(t)
c := fakeChild("p_owner")
addChild(sess, c)
id, err := mgr.TimerSet("p_owner", "x", "", 0.1)
if err != nil {
t.Fatalf("TimerSet: %v", err)
}
mgr.onChildClosed("p_owner")
mgr.mu.Lock()
_, stillThere := mgr.timers[id]
mgr.mu.Unlock()
if stillThere {
t.Fatalf("delay timer was not removed when owner closed")
}
time.Sleep(200 * time.Millisecond) // past the original firesAt
if got := rec.snapshot(); len(got) != 0 {
t.Fatalf("delay timer fired after owner close: %+v", got)
}
}
// TestTimerCloseWatchedSubAgent is the exact shape of the reported
// stale-fire bug: orchestrator registers a watcher on a sub-agent,
// the sub-agent is closed, and the orchestrator must receive
// nothing (no stale body delivered after close_process).
func TestTimerCloseWatchedSubAgent(t *testing.T) {
sess, mgr, rec := newTestManager(t)
parent := fakeChild("p_owner")
sub := fakeChild("p_sub")
addChild(sess, parent)
addChild(sess, sub)
working := StateWorking
sub.idleState.Store(&working)
if _, err := mgr.TimerFireWhenIdleAny(
"p_owner",
"codex-review-591 finished. Read your own pane …",
"", []string{"p_sub"}, 0,
); err != nil {
t.Fatalf("TimerFireWhenIdleAny: %v", err)
}
mgr.onChildClosed("p_sub")
// Trailing classifier emission for the closed sub-agent must
// not deliver anything to the parent.
mgr.onChildStateChanged("p_sub", StateIdle)
if got := rec.snapshot(); len(got) != 0 {
t.Fatalf("stale fire delivered to parent after sub-agent close: %+v", got)
}
}