diff --git a/backend/internal/domain/text.go b/backend/internal/domain/text.go new file mode 100644 index 00000000..3144dfd1 --- /dev/null +++ b/backend/internal/domain/text.go @@ -0,0 +1,26 @@ +package domain + +import ( + "strings" + "unicode" +) + +// SanitizeControlChars removes control characters that are unsafe to deliver +// into a live terminal pane, while preserving the whitespace that legitimate +// multi-line text relies on (newline, carriage return, tab). +// +// Any text that reaches an agent's PTY must pass through here. The session +// runtime pastes messages straight into the live pane, so an unfiltered escape +// sequence (cursor control, screen clear, OSC) embedded in attacker-influenced +// content — a GitHub reviewer comment, a CI job log tail — would be interpreted +// by the terminal instead of read as plain text. Both the HTTP send endpoint +// and the lifecycle nudge path share this one definition so neither can drift +// into delivering raw control bytes. +func SanitizeControlChars(s string) string { + return strings.Map(func(r rune) rune { + if unicode.IsControl(r) && r != '\n' && r != '\r' && r != '\t' { + return -1 + } + return r + }, s) +} diff --git a/backend/internal/domain/text_test.go b/backend/internal/domain/text_test.go new file mode 100644 index 00000000..40794e6a --- /dev/null +++ b/backend/internal/domain/text_test.go @@ -0,0 +1,25 @@ +package domain + +import "testing" + +func TestSanitizeControlChars(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {name: "plain text unchanged", in: "hello world", want: "hello world"}, + {name: "keeps newline tab carriage return", in: "a\nb\tc\rd", want: "a\nb\tc\rd"}, + {name: "strips ansi escape byte leaving harmless residue", in: "before\x1b[2Jafter", want: "before[2Jafter"}, + {name: "strips nul and bell", in: "x\x00y\az", want: "xyz"}, + {name: "strips osc sequence bytes", in: "\x1b]0;title\a", want: "]0;title"}, + {name: "empty stays empty", in: "", want: ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := SanitizeControlChars(tt.in); got != tt.want { + t.Fatalf("SanitizeControlChars(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} diff --git a/backend/internal/httpd/controllers/sessions.go b/backend/internal/httpd/controllers/sessions.go index bc0a4504..b59d2d0a 100644 --- a/backend/internal/httpd/controllers/sessions.go +++ b/backend/internal/httpd/controllers/sessions.go @@ -6,7 +6,6 @@ import ( "net/http" "strconv" "strings" - "unicode" "github.com/go-chi/chi/v5" @@ -274,7 +273,7 @@ func (c *SessionsController) send(w http.ResponseWriter, r *http.Request) { envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "MESSAGE_TOO_LONG", "Message is too long", nil) return } - message := stripUnsafeControlChars(in.Message) + message := domain.SanitizeControlChars(in.Message) if err := c.Svc.Send(r.Context(), sessionID(r), message); err != nil { envelope.WriteError(w, r, err) return @@ -403,15 +402,6 @@ func parseSessionListFilter(r *http.Request) (sessionsvc.ListFilter, error) { return filter, nil } -func stripUnsafeControlChars(message string) string { - return strings.Map(func(r rune) rune { - if unicode.IsControl(r) && r != '\n' && r != '\r' && r != '\t' { - return -1 - } - return r - }, message) -} - func writeSessionPRError(w http.ResponseWriter, r *http.Request, err error) { var claimed ports.PRClaimedByActiveSessionError switch { diff --git a/backend/internal/lifecycle/manager_test.go b/backend/internal/lifecycle/manager_test.go index 95a753ac..ca33d560 100644 --- a/backend/internal/lifecycle/manager_test.go +++ b/backend/internal/lifecycle/manager_test.go @@ -190,6 +190,46 @@ func TestPRObservation_ReviewCommentsNudgeAgent(t *testing.T) { } } +func TestPRObservation_CINudgeSanitizesLogTailControlChars(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + // A CI log tail with an embedded ANSI escape sequence and a NUL byte; the + // agent's pane must receive the visible text without the control bytes. + o := ports.PRObservation{Fetched: true, URL: "pr1", CI: domain.CIFailing, Checks: []ports.PRCheckObservation{{Name: "build", CommitHash: "c1", Status: domain.PRCheckFailed, LogTail: "line1\x1b[2Jline2\x00\ttabbed"}}} + if err := m.ApplyPRObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 1 { + t.Fatalf("want one CI nudge, got %v", msg.msgs) + } + got := msg.msgs[0] + if strings.ContainsRune(got, '\x1b') || strings.ContainsRune(got, '\x00') { + t.Fatalf("nudge still carries control bytes: %q", got) + } + if !strings.Contains(got, "line1") || !strings.Contains(got, "line2") || !strings.Contains(got, "\ttabbed") { + t.Fatalf("nudge dropped visible text or tab: %q", got) + } +} + +func TestPRObservation_ReviewNudgeSanitizesCommentControlChars(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + o := ports.PRObservation{Fetched: true, URL: "pr1", Review: domain.ReviewChangesRequest, Comments: []ports.PRCommentObservation{{ID: "1", Body: "please\x1b]0;pwned\afix this"}}} + if err := m.ApplyPRObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 1 { + t.Fatalf("want one review nudge, got %v", msg.msgs) + } + got := msg.msgs[0] + if strings.ContainsRune(got, '\x1b') || strings.ContainsRune(got, '\a') { + t.Fatalf("review nudge still carries control bytes: %q", got) + } + if !strings.Contains(got, "please") || !strings.Contains(got, "fix this") { + t.Fatalf("review nudge dropped visible text: %q", got) + } +} + func TestSCMObservationProjectsToExistingPRReactions(t *testing.T) { m, st, msg := newManager() st.sessions["mer-1"] = working("mer-1") diff --git a/backend/internal/lifecycle/reactions.go b/backend/internal/lifecycle/reactions.go index 540a5064..851f11c0 100644 --- a/backend/internal/lifecycle/reactions.go +++ b/backend/internal/lifecycle/reactions.go @@ -70,7 +70,10 @@ func (m *Manager) ApplyPRObservation(ctx context.Context, id domain.SessionID, o if ch.Status == domain.PRCheckFailed { msg := "CI is failing on your PR. Review the output below and push a fix." if ch.LogTail != "" { - msg += "\n\nFailing output:\n" + ch.LogTail + // LogTail is raw CI job output; sanitize before it reaches the + // agent's live pane so embedded escape sequences can't drive the + // terminal (the dedup signature stays on the raw bytes). + msg += "\n\nFailing output:\n" + domain.SanitizeControlChars(ch.LogTail) } return m.sendOnce(ctx, id, o.URL, "ci:"+o.URL+":"+ch.Name, ch.CommitHash+":"+ch.LogTail, msg, 0) } @@ -404,7 +407,11 @@ func reviewContent(comments []ports.PRCommentObservation) (string, string) { if c.Resolved { continue } - bodies = append(bodies, c.Body) + // Comment bodies are attacker-influenced (anyone who can comment on the + // PR) and get pasted into the agent's live pane; strip control/escape + // chars. The signature is built from comment IDs, not bodies, so dedup is + // unaffected. + bodies = append(bodies, domain.SanitizeControlChars(c.Body)) ids = append(ids, c.ID) } return strings.Join(bodies, "\n\n"), strings.Join(ids, ",")