From 0725375755471569b29ec8d6333ee164bb3d6e19 Mon Sep 17 00:00:00 2001 From: Harry Bayliss Date: Mon, 25 May 2026 12:43:56 +0100 Subject: [PATCH] Hold codex in thinking while a turn is running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex uses the osc_title_stability idle strategy, but it draws its progress in the pane body ('Working … esc to interrupt'), not the OSC title. The title goes stable mid-turn, so ~2s later the classifier declared codex idle while it was still working. Add a thinking-promoter pattern ((?i)esc to interrupt) to the codex built-in preset; classify() checks promoter regexes against the rendered screen before the title-stability verdict, so codex stays in thinking until the turn's in-progress footer actually disappears. Resolves the [CODEX IDLE] TODO item. --- CHANGELOG.md | 2 ++ TODO.md | 56 ---------------------------------- internal/app/idle_test.go | 15 +++++++++ internal/preset/preset.go | 5 ++- internal/preset/preset_test.go | 7 +++++ 5 files changed, 28 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13a1658..ae66ac0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ loosely follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). over MCP. ### Fixed +- Codex agents are no longer reported idle while a turn is still + running. - Slow MCP tool calls such as `wait_for_pattern` no longer block later tool calls on the same MCP connection. - Closing an agent now escalates from SIGTERM to SIGKILL when needed, diff --git a/TODO.md b/TODO.md index 9a1ee9c..e409838 100644 --- a/TODO.md +++ b/TODO.md @@ -1,62 +1,6 @@ -- [ ] Codex idle detection seems to trigger too soon, see below [CODEX IDLE] - [ ] When opening a codex sub agent, the message gets input to the field, but the message is never submitted. - This appears to be inconsistent. Sometimes it works, sometimes it doesn't. Might be because of popups on codex sub agents? - Question: when it fails, is a Codex startup popup visible (trust/workspace, auth/model selection, permissions), or is the normal composer focused? - Question: if the message is sitting in the composer, does pressing Enter once manually submit it, or does something else need to be dismissed first? - Question: does this happen with short one-line prompts as well as long/multiline sub-agent instructions? - [ ] The per-tab agent summary text should display below the tab always, not just when the tab is focused. - - - -# [CODEX IDLE] -Crunched for 1m 57s │ - │ Agent Tree -❯ codex-implement is idle. Read your own pane for the [sub-agent:codex-implement] reply. │ ───────────────────────── - │ ▎ ○ claude ⏱ 59m - Called patterm, searched memories (ctrl+o to expand) │ ● codex-implement-2 - │ -● Bash(git -C /home/harry/Dev/popdog/.worktrees/harvester-issue-136-kick-segment-repair log -3 --oneline && git -C /home/harry/Dev/popdog/.worktrees/harvester-issue-13…) │ Issue #136 plan drafted - ⎿  9604013 fix(youtube): finalize segments on ghost-cleanup path (#120) │ in scratchpad; - 6da9125 fix(kick): bump KickCategoryAuditor MaxPerCycle to 500 (#135) │ verification sub-agent - db149e6 fix(twitch): enqueue stream segment repairs (#134) │ - ⎿  Allowed by auto mode classifier │ Scratchpads - │ ───────────────────────── -● No commits in the worktree. Let me check the codex-implement pane. │ 0027-review - │ 569-state - Called patterm (ctrl+o to expand) │ db-infra-options - │ db-perf-batching-plan -● Codex exited mid-update prompting "Please restart Codex." Let me spawn a fresh one — the update is now installed. │ e2e-canonicalisation-ru… - │ issue-136-kick-segment-… - Called patterm 3 times (ctrl+o to expand) │ pg-stat-optimization-de… - │ pg-stat-statements-find… -● Fresh codex-implement-2 spawned. Waiting. │ pr-569-stream-category-… - │ stream-category-cutover… -✻ Sautéed for 55s │ - │ -❯ codex-implement-2 is idle. Read your own pane for the [sub-agent:codex-implement-2] reply. │ - │ -● Bash(git log -3 --oneline && echo "---STATUS---" && git status -s && echo "---BRANCH---" && git branch -vv && echo "---REMOTE---" && git ls-remote origin fix/kick-se…) │ - ⎿  9604013 fix(youtube): finalize segments on ghost-cleanup path (#120) │ - 6da9125 fix(kick): bump KickCategoryAuditor MaxPerCycle to 500 (#135) │ - db149e6 fix(twitch): enqueue stream segment repairs (#134) │ - … +44 lines (ctrl+o to expand) │ - ⎿  Allowed by auto mode classifier │ - ⎿  Interrupted · What should Claude do instead? │ - │ -❯ it's still running. │ - │ -● Got it — re-scheduling the wait. │ - │ - Called patterm (ctrl+o to expand) │ - │ -● Waiting for codex-implement-2 to finish. │ - │ -✻ Sautéed for 12s │ - │ - 7 tasks (5 done, 1 in progress, 1 open) │ - ◼ Spawn codex sub-agent to implement plan │ - ◻ Open PR for issue 136 │ - ✔ Read GitHub issue 136 and gather reference code │ - ✔ Set up worktree for issue 136 implementation │ - ✔ Draft implementation plan │ - … +2 completed │ diff --git a/internal/app/idle_test.go b/internal/app/idle_test.go index 784adf4..37dfaf5 100644 --- a/internal/app/idle_test.go +++ b/internal/app/idle_test.go @@ -57,6 +57,21 @@ func TestClassifyTitleStability(t *testing.T) { } } +func TestClassifyTitleStabilityThinkingPatternOverridesIdle(t *testing.T) { + cfg := &resolvedIdleDetection{ + strategy: StrategyOSCTitleStability, + idleThresholdMS: 2000, + thinkingRegexes: []*regexp.Regexp{mustCompile(t, `(?i)esc to interrupt`)}, + } + screen := []byte("• Working (5s • esc to interrupt)") + if got, _ := classify(cfg, false, false, 9999, 5000, "codex", nil, screen); got != StateThinking { + t.Fatalf("thinking screen marker: got %q want %q", got, StateThinking) + } + if got, _ := classify(cfg, false, false, 9999, 5000, "codex", nil, []byte(">_")); got != StateIdle { + t.Fatalf("stable title without marker: got %q want %q", got, StateIdle) + } +} + func TestClassifyTitleStatus(t *testing.T) { cfg := &resolvedIdleDetection{ strategy: StrategyOSCTitleStatus, diff --git a/internal/preset/preset.go b/internal/preset/preset.go index 84b5be1..1a04785 100644 --- a/internal/preset/preset.go +++ b/internal/preset/preset.go @@ -352,7 +352,10 @@ func defaultAgentPresets() []*Preset { "ready_signal": { "idle_ms": 1000 }, "idle_detection": { "strategy": "osc_title_stability", - "idle_threshold_ms": 2000 + "idle_threshold_ms": 2000, + "thinking_patterns": [ + "(?i)esc to interrupt" + ] }, "chrome_trim_hints": [ "^OpenAI Codex", diff --git a/internal/preset/preset_test.go b/internal/preset/preset_test.go index 835b837..0ef4e2c 100644 --- a/internal/preset/preset_test.go +++ b/internal/preset/preset_test.go @@ -27,6 +27,13 @@ func TestLoadUsesBuiltInDefaultsWithoutWritingConfig(t *testing.T) { if claude.IdleDetection == nil || len(claude.IdleDetection.PermissionPatterns) == 0 { t.Fatalf("built-in claude missing permission patterns: %+v", claude.IdleDetection) } + codex := presetByName(set.Agents, "codex") + if codex == nil { + t.Fatal("missing built-in codex preset") + } + if codex.IdleDetection == nil || len(codex.IdleDetection.ThinkingPatterns) == 0 { + t.Fatalf("built-in codex missing thinking patterns: %+v", codex.IdleDetection) + } } func TestLoadMergesUserOverlayIntoBuiltInPreset(t *testing.T) {