From cb3e51d568707183e4d8b65a7e41cf358cc1752e Mon Sep 17 00:00:00 2001 From: Harry Bayliss Date: Thu, 14 May 2026 14:46:21 +0100 Subject: [PATCH] Handle kitty keyboard protocol input for Ctrl-K and palette MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex (and other ratatui-based children) pushes kitty keyboard flags onto the host terminal, so Ctrl-K arrives as `\x1b[107;5u` instead of 0x0B and the palette open never fired. With "report event types" also on, the release event `\x1b[107;5:3u` followed the press and tripped the palette's "unknown ESC sequence → cancel" branch, making the palette flash and close. Add a small CSI scanner / kitty CSI u decoder and use them in two places: matchCtrlK now accepts the legacy byte, the kitty CSI u form, and xterm modifyOtherKeys; the palette's input handler consumes whole CSI sequences, ignores non-press events, and decodes Enter/Esc/ Backspace/arrows/Ctrl-U-N-P in their kitty forms. Ctrl-K Ctrl-K forwards the raw matched bytes so nested TUIs that asked for kitty input still receive kitty input. --- internal/app/app.go | 70 +++---------- internal/app/keymatch.go | 144 +++++++++++++++++++++++++++ internal/app/keymatch_test.go | 56 +++++++++++ internal/app/palette.go | 151 +++++++++++++++++++++-------- internal/app/palette_input_test.go | 108 +++++++++++++++++++++ 5 files changed, 436 insertions(+), 93 deletions(-) create mode 100644 internal/app/keymatch.go create mode 100644 internal/app/keymatch_test.go create mode 100644 internal/app/palette_input_test.go diff --git a/internal/app/app.go b/internal/app/app.go index 7bc278d..aa4e086 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -216,9 +216,6 @@ type uiState struct { // A fresh renderer is allocated per focused child so partial-escape // state cannot bleed between panes. renderer *viewportRenderer - // passthrough: when true, the next keystroke is forwarded to the - // focused PTY untouched (SPEC §4 Ctrl-K Ctrl-K). - passthroughArmed bool // attention is the latest request_human_attention surfaced via MCP; // rendered in the status line until cleared. @@ -614,31 +611,13 @@ func (st *uiState) processStdin(chunk []byte) { for i < len(chunk) { b := chunk[i] - // Passthrough armed: forward this byte literally regardless of - // what it is, then disarm. - if st.passthroughArmed { - forward = append(forward, b) - st.passthroughArmed = false - i++ - continue - } - // Palette mode swallows all bytes. if st.palette != nil { - var peek []byte - if i+1 < len(chunk) { - peek = chunk[i+1:] - } - action, done := st.palette.handleKey(b, peek) - if b == 0x1b && len(peek) >= 2 && peek[0] == '[' { - if peek[1] == 'A' || peek[1] == 'B' { - i += 3 - } else { - i++ - } - } else { - i++ + action, done, adv := st.palette.handleInput(chunk, i) + if adv <= 0 { + adv = 1 } + i += adv if done { a := action pendingAction = &a @@ -650,42 +629,23 @@ func (st *uiState) processStdin(chunk []byte) { // Ctrl-K is the reserved app-level binding. Two cases: // - Ctrl-K then anything except Ctrl-K → open palette. - // - Ctrl-K Ctrl-K → arm passthrough; the next byte goes raw. - if b == keyCtrlK { - // Peek at the next byte if we have it. - next := byte(0) - haveNext := i+1 < len(chunk) - if haveNext { - next = chunk[i+1] - } - if haveNext && next == keyCtrlK { - // Chord: forward both Ctrl-K bytes literally. (Some - // nested TUIs expect Ctrl-K itself.) + // - Ctrl-K Ctrl-K → forward both keystrokes to the child raw. + // + // Ctrl-K is recognised in legacy (0x0B), kitty CSI u, and xterm + // modifyOtherKeys encodings — see matchCtrlK. The chord forwards + // the bytes the terminal actually emitted, so a child that asked + // for kitty input gets kitty input. + if hit, adv := matchCtrlK(chunk, i); hit { + if hit2, adv2 := matchCtrlK(chunk, i+adv); hit2 { flushForward() - forward = append(forward, keyCtrlK, keyCtrlK) + forward = append(forward, chunk[i:i+adv+adv2]...) flushForward() - i += 2 + i += adv + adv2 continue } - if !haveNext { - // Could be the first byte of a chord — arm and wait. - st.passthroughArmed = true - // But we also want palette-open on a lone Ctrl-K. Resolve - // by treating "Ctrl-K at end of read" as palette open; - // any subsequent Ctrl-K in the next read still has the - // chord semantics because passthroughArmed got set first. - // To match the spec's reading, simpler model: lone Ctrl-K - // in this read opens the palette. - st.passthroughArmed = false - flushForward() - st.openPaletteLocked() - i++ - continue - } - // Ctrl-K followed by something that's not Ctrl-K → palette open. flushForward() st.openPaletteLocked() - i++ + i += adv continue } diff --git a/internal/app/keymatch.go b/internal/app/keymatch.go new file mode 100644 index 0000000..803f10b --- /dev/null +++ b/internal/app/keymatch.go @@ -0,0 +1,144 @@ +package app + +import ( + "strconv" + "strings" +) + +// csiLen returns the byte length of the CSI sequence starting at +// chunk[i], or 0 if chunk[i:] doesn't begin a complete CSI. A CSI is +// ESC '[' followed by parameter bytes (0x30..0x3F), intermediate bytes +// (0x20..0x2F), and one final byte (0x40..0x7E). +func csiLen(chunk []byte, i int) int { + if i+1 >= len(chunk) || chunk[i] != 0x1b || chunk[i+1] != '[' { + return 0 + } + end := i + 2 + for end < len(chunk) && chunk[end] >= 0x30 && chunk[end] <= 0x3F { + end++ + } + for end < len(chunk) && chunk[end] >= 0x20 && chunk[end] <= 0x2F { + end++ + } + if end >= len(chunk) { + return 0 + } + if final := chunk[end]; final < 0x40 || final > 0x7E { + return 0 + } + return end - i + 1 +} + +// csiuKey is the decoded form of a CSI u key event. key is the kitty +// keycode (the unshifted unicode codepoint for character keys, or a +// kitty functional-key constant). mods is the kitty modifier value +// (1 + bitfield: shift=1, alt=2, ctrl=4, super=8, …). event is the +// event type (1=press, 2=repeat, 3=release). +type csiuKey struct { + key int + mods int + event int +} + +// decodeCSIu parses the parameter string of a `CSI ... u` sequence. +// The kitty shape is: +// +// [:[:]] [;[:][;...]] +// +// Unspecified groups default to mods=1, event=1. +func decodeCSIu(params string) (csiuKey, bool) { + parts := strings.SplitN(params, ";", 3) + + keyGroup := parts[0] + if i := strings.IndexByte(keyGroup, ':'); i >= 0 { + keyGroup = keyGroup[:i] + } + if keyGroup == "" { + return csiuKey{}, false + } + key, err := strconv.Atoi(keyGroup) + if err != nil { + return csiuKey{}, false + } + + mods, event := 1, 1 + if len(parts) > 1 { + modGroup := parts[1] + eventGroup := "" + if i := strings.IndexByte(modGroup, ':'); i >= 0 { + eventGroup = modGroup[i+1:] + modGroup = modGroup[:i] + } + if modGroup != "" { + m, err := strconv.Atoi(modGroup) + if err != nil { + return csiuKey{}, false + } + mods = m + } + if eventGroup != "" { + e, err := strconv.Atoi(eventGroup) + if err != nil { + return csiuKey{}, false + } + event = e + } + } + return csiuKey{key: key, mods: mods, event: event}, true +} + +// matchCtrlK reports whether chunk[i:] starts with a Ctrl-K keystroke +// in any of the encodings we accept on input, and returns the number of +// bytes consumed. +// +// Three encodings are recognised: +// +// - Legacy: the single byte 0x0B. +// - Kitty keyboard CSI u: ESC '[' 107 ';' 5 'u' (optionally with sub- +// parameters and trailing groups, see [kitty]). The kitty protocol +// fires when a child PTY pushes it onto the host terminal's flag +// stack; codex/ratatui does this on startup, which is what motivated +// this matcher. +// - xterm modifyOtherKeys: ESC '[' 27 ';' 5 ';' 107 '~'. +// +// Only an unmodified Ctrl-K (modifier value exactly 5 — i.e. Ctrl with +// no Shift/Alt/Meta) and a key-press event (event-type 1 or omitted) +// match. That mirrors the legacy 0x0B byte, which only fires on plain +// Ctrl-K too. +// +// [kitty]: https://sw.kovidgoyal.net/kitty/keyboard-protocol/ +func matchCtrlK(chunk []byte, i int) (matched bool, advance int) { + if i >= len(chunk) { + return false, 0 + } + if chunk[i] == keyCtrlK { + return true, 1 + } + n := csiLen(chunk, i) + if n == 0 { + return false, 0 + } + final := chunk[i+n-1] + params := string(chunk[i+2 : i+n-1]) + switch final { + case 'u': + k, ok := decodeCSIu(params) + if ok && k.key == 107 && k.mods == 5 && k.event == 1 { + return true, n + } + case '~': + if isModifyOtherKeysCtrlK(params) { + return true, n + } + } + return false, 0 +} + +// isModifyOtherKeysCtrlK parses xterm's CSI 27;;~ form. +func isModifyOtherKeysCtrlK(params string) bool { + parts := strings.Split(params, ";") + if len(parts) != 3 { + return false + } + return parts[0] == "27" && parts[1] == "5" && parts[2] == "107" +} diff --git a/internal/app/keymatch_test.go b/internal/app/keymatch_test.go new file mode 100644 index 0000000..a4836c9 --- /dev/null +++ b/internal/app/keymatch_test.go @@ -0,0 +1,56 @@ +package app + +import "testing" + +func TestMatchCtrlK(t *testing.T) { + cases := []struct { + name string + chunk string + offset int + wantMatch bool + wantAdvance int + }{ + {"legacy lone byte", "\x0b", 0, true, 1}, + {"legacy followed by text", "\x0bx", 0, true, 1}, + {"kitty plain Ctrl-K", "\x1b[107;5u", 0, true, 8}, + {"kitty with press event", "\x1b[107;5:1u", 0, true, 10}, + {"kitty with key release", "\x1b[107;5:3u", 0, false, 0}, + {"kitty with extra shift", "\x1b[107;6u", 0, false, 0}, + {"kitty no modifier", "\x1b[107u", 0, false, 0}, + {"kitty wrong key", "\x1b[108;5u", 0, false, 0}, + {"kitty with associated text trailing group", "\x1b[107;5;107u", 0, true, 12}, + {"modifyOtherKeys Ctrl-K", "\x1b[27;5;107~", 0, true, 11}, + {"modifyOtherKeys wrong mods", "\x1b[27;6;107~", 0, false, 0}, + {"unrelated CSI", "\x1b[A", 0, false, 0}, + {"plain ascii", "k", 0, false, 0}, + {"empty", "", 0, false, 0}, + {"incomplete CSI", "\x1b[107;5", 0, false, 0}, + {"offset past legacy", "x\x0b", 1, true, 1}, + {"offset past kitty prefix", "x\x1b[107;5u", 1, true, 8}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, adv := matchCtrlK([]byte(tc.chunk), tc.offset) + if got != tc.wantMatch || adv != tc.wantAdvance { + t.Fatalf("matchCtrlK(%q, %d) = (%v, %d); want (%v, %d)", + tc.chunk, tc.offset, got, adv, tc.wantMatch, tc.wantAdvance) + } + }) + } +} + +func TestMatchCtrlKConsecutive(t *testing.T) { + // Two kitty Ctrl-K sequences back to back, the chord case. + chunk := []byte("\x1b[107;5u\x1b[107;5u") + hit, adv := matchCtrlK(chunk, 0) + if !hit || adv != 8 { + t.Fatalf("first: hit=%v adv=%d", hit, adv) + } + hit2, adv2 := matchCtrlK(chunk, adv) + if !hit2 || adv2 != 8 { + t.Fatalf("second: hit=%v adv=%d", hit2, adv2) + } + if adv+adv2 != len(chunk) { + t.Fatalf("expected to cover the whole chunk, got %d/%d", adv+adv2, len(chunk)) + } +} diff --git a/internal/app/palette.go b/internal/app/palette.go index aa0eca0..ed7b6b1 100644 --- a/internal/app/palette.go +++ b/internal/app/palette.go @@ -135,51 +135,42 @@ func fuzzyMatch(hay, needle string) bool { return true } -func (p *paletteState) handleKey(b byte, peek []byte) (paletteAction, bool) { +// kitty functional keycodes for arrows. +const ( + kittyKeyUp = 57352 + kittyKeyDown = 57353 +) + +// handleInput consumes one keystroke from chunk[i:] and updates palette +// state. advance is how many bytes the keystroke occupies (1 for legacy +// keys, longer for CSI sequences). Returning done=true tells the caller +// the palette is finished and action describes what to do next. +// +// Recognised input includes both legacy byte forms and the kitty +// keyboard CSI u encoding that codex/ratatui pushes onto the terminal. +// Unknown CSI sequences (including release events from kitty flag 2) +// are consumed silently so they don't fall through to the ESC branch +// and accidentally cancel the palette. +func (p *paletteState) handleInput(chunk []byte, i int) (action paletteAction, done bool, advance int) { + b := chunk[i] if b == 0x1b { - // Pure Esc cancels; Esc [ A/B is up/down arrow. - if len(peek) >= 2 && peek[0] == '[' { - switch peek[1] { - case 'A': - p.cursor-- - if p.cursor < 0 { - p.cursor = 0 - } - return paletteAction{}, false - case 'B': - p.cursor++ - if p.cursor >= len(p.items) { - p.cursor = len(p.items) - 1 - } - return paletteAction{}, false - } + if n := csiLen(chunk, i); n > 0 { + return p.handleCSI(chunk[i+2:i+n-1], chunk[i+n-1], n) } - return paletteAction{kind: "cancel"}, true + // Bare ESC (no CSI follow-up): cancel. + return paletteAction{kind: "cancel"}, true, 1 } switch b { case '\r', '\n': - if p.cursor >= 0 && p.cursor < len(p.items) { - return p.items[p.cursor].action, true - } - return paletteAction{kind: "cancel"}, true + return p.accept(), true, 1 case 0x7f, 0x08: - if len(p.query) > 0 { - p.query = p.query[:len(p.query)-1] - p.rebuild() - } + p.backspace() case 0x15: // Ctrl-U - p.query = p.query[:0] - p.rebuild() + p.clearQuery() case 0x0e: // Ctrl-N - p.cursor++ - if p.cursor >= len(p.items) { - p.cursor = len(p.items) - 1 - } - case 0x10: // Ctrl-P inside palette: cursor up. - p.cursor-- - if p.cursor < 0 { - p.cursor = 0 - } + p.cursorDown() + case 0x10: // Ctrl-P + p.cursorUp() case 0x0b: // Ctrl-K inside palette is a no-op (would re-open); ignore. case 0x16: // Ctrl-V literal-paste — ignore in palette. default: @@ -188,7 +179,91 @@ func (p *paletteState) handleKey(b byte, peek []byte) (paletteAction, bool) { p.rebuild() } } - return paletteAction{}, false + return paletteAction{}, false, 1 +} + +func (p *paletteState) handleCSI(params []byte, final byte, n int) (paletteAction, bool, int) { + switch final { + case 'A': + p.cursorUp() + return paletteAction{}, false, n + case 'B': + p.cursorDown() + return paletteAction{}, false, n + case 'u': + k, ok := decodeCSIu(string(params)) + if !ok || k.event != 1 { + // Repeat / release events, or malformed: ignore. + return paletteAction{}, false, n + } + switch k.key { + case 13: // Enter + return p.accept(), true, n + case 27: // Escape + return paletteAction{kind: "cancel"}, true, n + case 127, 8: // Backspace + p.backspace() + case kittyKeyUp: + p.cursorUp() + case kittyKeyDown: + p.cursorDown() + default: + // Ctrl-modified character keys. + if k.mods == 5 { + switch k.key { + case 'u': + p.clearQuery() + case 'n': + p.cursorDown() + case 'p': + p.cursorUp() + } + return paletteAction{}, false, n + } + // Unmodified printable ASCII typed via CSI u (flag 8): treat + // as a query keystroke. + if k.mods == 1 && k.key >= 0x20 && k.key < 0x7f { + p.query = append(p.query, rune(k.key)) + p.rebuild() + } + } + return paletteAction{}, false, n + } + // Anything else (~, function keys, etc.): consume silently. + return paletteAction{}, false, n +} + +func (p *paletteState) accept() paletteAction { + if p.cursor >= 0 && p.cursor < len(p.items) { + return p.items[p.cursor].action + } + return paletteAction{kind: "cancel"} +} + +func (p *paletteState) backspace() { + if len(p.query) > 0 { + p.query = p.query[:len(p.query)-1] + p.rebuild() + } +} + +func (p *paletteState) clearQuery() { + p.query = p.query[:0] + p.rebuild() +} + +func (p *paletteState) cursorUp() { + p.cursor-- + if p.cursor < 0 { + p.cursor = 0 + } +} + +func (p *paletteState) cursorDown() { + p.cursor++ + if p.cursor >= len(p.items) { + p.cursor = len(p.items) - 1 + } } // render draws the palette onto out. Geometry: title bar + filter line + diff --git a/internal/app/palette_input_test.go b/internal/app/palette_input_test.go new file mode 100644 index 0000000..4d6a799 --- /dev/null +++ b/internal/app/palette_input_test.go @@ -0,0 +1,108 @@ +package app + +import ( + "testing" + + "github.com/harrybrwn/patterm/internal/preset" +) + +func newTestPalette() *paletteState { + return newPalette(nil, "", preset.Set{}) +} + +func TestPaletteIgnoresKittyReleaseEvent(t *testing.T) { + // A kitty key-release for Ctrl-K. With the legacy handler this looked + // like ESC followed by `[`, which fell through to cancel. + p := newTestPalette() + chunk := []byte("\x1b[107;5:3u") + action, done, adv := p.handleInput(chunk, 0) + if done { + t.Fatalf("release event closed palette: action=%+v", action) + } + if adv != len(chunk) { + t.Fatalf("advance %d, want %d", adv, len(chunk)) + } +} + +func TestPaletteEscViaKittyCancels(t *testing.T) { + p := newTestPalette() + chunk := []byte("\x1b[27u") + action, done, adv := p.handleInput(chunk, 0) + if !done || action.kind != "cancel" { + t.Fatalf("Esc via CSI u didn't cancel: action=%+v done=%v", action, done) + } + if adv != len(chunk) { + t.Fatalf("advance %d, want %d", adv, len(chunk)) + } +} + +func TestPaletteBareEscCancels(t *testing.T) { + p := newTestPalette() + action, done, adv := p.handleInput([]byte{0x1b}, 0) + if !done || action.kind != "cancel" { + t.Fatalf("bare ESC didn't cancel: action=%+v done=%v", action, done) + } + if adv != 1 { + t.Fatalf("advance %d, want 1", adv) + } +} + +func TestPaletteKittyArrowsNavigate(t *testing.T) { + pr := []*preset.Preset{{Name: "a"}, {Name: "b"}, {Name: "c"}} + p := newPalette(nil, "", preset.Set{Agents: pr}) + if p.cursor != 0 { + t.Fatalf("initial cursor %d", p.cursor) + } + // Kitty functional Down arrow. + _, _, adv := p.handleInput([]byte("\x1b[57353u"), 0) + if adv != 8 { + t.Fatalf("advance %d", adv) + } + if p.cursor != 1 { + t.Fatalf("cursor %d after Down, want 1", p.cursor) + } + // Kitty functional Up arrow. + _, _, _ = p.handleInput([]byte("\x1b[57352u"), 0) + if p.cursor != 0 { + t.Fatalf("cursor %d after Up, want 0", p.cursor) + } +} + +func TestPaletteLegacyArrowsStillWork(t *testing.T) { + pr := []*preset.Preset{{Name: "a"}, {Name: "b"}} + p := newPalette(nil, "", preset.Set{Agents: pr}) + _, _, adv := p.handleInput([]byte("\x1b[B"), 0) + if adv != 3 { + t.Fatalf("advance %d", adv) + } + if p.cursor != 1 { + t.Fatalf("cursor %d, want 1", p.cursor) + } +} + +func TestPaletteKittyEnterAccepts(t *testing.T) { + pr := []*preset.Preset{{Name: "x"}} + p := newPalette(nil, "", preset.Set{Agents: pr}) + action, done, _ := p.handleInput([]byte("\x1b[13u"), 0) + if !done || action.kind != "spawn-agent" { + t.Fatalf("Enter via CSI u didn't accept: action=%+v done=%v", action, done) + } +} + +func TestPaletteKittyBackspace(t *testing.T) { + p := newTestPalette() + p.query = []rune("hello") + _, _, _ = p.handleInput([]byte("\x1b[127u"), 0) + if string(p.query) != "hell" { + t.Fatalf("query %q after backspace", string(p.query)) + } +} + +func TestPaletteLegacyPrintableTypes(t *testing.T) { + p := newTestPalette() + _, _, _ = p.handleInput([]byte("a"), 0) + _, _, _ = p.handleInput([]byte("b"), 0) + if string(p.query) != "ab" { + t.Fatalf("query %q", string(p.query)) + } +}