Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions backend/internal/adapters/telemetry/posthog.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,37 @@ var remotePayloadAllowlist = map[string]map[string]struct{}{
"command_path": {},
},
"ao.cli.usage_errors": {
"component": {},
"command": {},
"command_path": {},
"error_kind": {},
"fingerprint": {},
"operation": {},
},
"ao.daemon.panic": {
"method": {},
"path": {},
"panic_kind": {},
"component": {},
"fingerprint": {},
"method": {},
"operation": {},
"path": {},
"panic_kind": {},
"stack_fingerprint": {},
},
"ao.daemon.started": {
"agent": {},
"port": {},
},
"ao.http.5xx": {
"duration": {},
"error_code": {},
"error_kind": {},
"method": {},
"path": {},
"status": {},
"component": {},
"duration": {},
"error_code": {},
"error_kind": {},
"fingerprint": {},
"method": {},
"operation": {},
"path": {},
"status": {},
"status_family": {},
},
"ao.onboarding.first_project_added": {
"has_git_remote": {},
Expand All @@ -70,11 +81,14 @@ var remotePayloadAllowlist = map[string]map[string]struct{}{
"kind": {},
},
"ao.session.spawn_failed": {
"component": {},
"duration_ms": {},
"error_code": {},
"error_kind": {},
"fingerprint": {},
"harness": {},
"kind": {},
"operation": {},
},
"ao.session.spawned": {
"duration_ms": {},
Expand Down
20 changes: 15 additions & 5 deletions backend/internal/adapters/telemetry/posthog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,15 @@ func TestPostHogSinkSanitizesPayloads(t *testing.T) {
OccurredAt: time.Unix(1700000000, 0).UTC(),
Level: ports.TelemetryLevelError,
Payload: map[string]any{
"method": http.MethodGet,
"path": "/api/v1/sessions/demo",
"panic_kind": "error",
"panic": "open /Users/name/private: no such file",
"stack": "stack trace with local path",
"component": "httpd",
"operation": "http_request_panic",
"method": http.MethodGet,
"path": "/api/v1/sessions/demo",
"panic_kind": "error",
"fingerprint": "abc123",
"stack_fingerprint": "def456",
"panic": "open /Users/name/private: no such file",
"stack": "stack trace with local path",
},
})
if err := sink.Close(context.Background()); err != nil {
Expand All @@ -110,9 +114,15 @@ func TestPostHogSinkSanitizesPayloads(t *testing.T) {
if !ok {
t.Fatalf("properties type = %T, want map[string]any", req["properties"])
}
if props["component"] != "httpd" || props["operation"] != "http_request_panic" {
t.Fatalf("sanitized properties = %#v, want allowlisted metadata", props)
}
if props["method"] != http.MethodGet || props["path"] != "/api/v1/sessions/demo" || props["panic_kind"] != "error" {
t.Fatalf("sanitized properties = %#v, want allowlisted fields", props)
}
if props["fingerprint"] != "abc123" || props["stack_fingerprint"] != "def456" {
t.Fatalf("sanitized properties = %#v, want exported fingerprints", props)
}
if _, ok := props["panic"]; ok {
t.Fatalf("panic property should be dropped: %#v", props)
}
Expand Down
41 changes: 15 additions & 26 deletions backend/internal/httpd/log.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package httpd

import (
"errors"
"log/slog"
"net/http"
"strconv"
"time"

"github.com/go-chi/chi/v5/middleware"

"github.com/aoagents/agent-orchestrator/backend/internal/httpd/apierr"
"github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope"
"github.com/aoagents/agent-orchestrator/backend/internal/ports"
"github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta"
)

// requestLogger emits one structured access-log line per request via the
Expand Down Expand Up @@ -48,21 +48,23 @@ func requestLogger(log *slog.Logger, sink ports.EventSink) func(http.Handler) ht
}
log.Info("http request", attrs...)
if sink != nil && ww.Status() >= http.StatusInternalServerError {
path := telemetrymeta.RoutePattern(r)
payload := map[string]any{
"method": r.Method,
"path": r.URL.Path,
"status": ww.Status(),
"duration": time.Since(start).Milliseconds(),
"component": "httpd",
"operation": "http_request",
"method": r.Method,
"path": path,
"status": ww.Status(),
"status_family": telemetrymeta.StatusFamily(ww.Status()),
"duration": time.Since(start).Milliseconds(),
}
if err := capturedErr(); err != nil {
payload["error_kind"] = "internal"
var apiErr *apierr.Error
if errors.As(err, &apiErr) {
payload["error_kind"] = telemetryErrorKind(apiErr.Kind)
if apiErr.Code != "" {
payload["error_code"] = apiErr.Code
}
errorKind, errorCode := telemetrymeta.ErrorKindAndCode(err)
payload["error_kind"] = errorKind
if errorCode != "" {
payload["error_code"] = errorCode
}
payload["fingerprint"] = telemetrymeta.Fingerprint("httpd", "http_request", r.Method, path, strconv.Itoa(ww.Status()), errorKind, errorCode)
}
sink.Emit(r.Context(), ports.TelemetryEvent{
Name: "ao.http.5xx",
Expand All @@ -78,16 +80,3 @@ func requestLogger(log *slog.Logger, sink ports.EventSink) func(http.Handler) ht
})
}
}

func telemetryErrorKind(kind apierr.Kind) string {
switch kind {
case apierr.KindInvalid:
return "invalid"
case apierr.KindNotFound:
return "not_found"
case apierr.KindConflict:
return "conflict"
default:
return "internal"
}
}
25 changes: 25 additions & 0 deletions backend/internal/httpd/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,31 @@ func TestRequestLoggerRecords5xxCause(t *testing.T) {
if len(sink.events) != 1 || sink.events[0].Name != "ao.http.5xx" {
t.Fatalf("telemetry events = %#v, want one ao.http.5xx event", sink.events)
}
payload := sink.events[0].Payload
if got := payload["component"]; got != "httpd" {
t.Fatalf("payload.component = %#v, want httpd", got)
}
if got := payload["operation"]; got != "http_request" {
t.Fatalf("payload.operation = %#v, want http_request", got)
}
if got := payload["method"]; got != http.MethodPost {
t.Fatalf("payload.method = %#v, want POST", got)
}
if got := payload["path"]; got != "/api/v1/sessions/x/kill" {
t.Fatalf("payload.path = %#v, want request path fallback", got)
}
if got := payload["status"]; got != http.StatusInternalServerError {
t.Fatalf("payload.status = %#v, want 500", got)
}
if got := payload["status_family"]; got != "5xx" {
t.Fatalf("payload.status_family = %#v, want 5xx", got)
}
if got := payload["error_kind"]; got != "internal" {
t.Fatalf("payload.error_kind = %#v, want internal", got)
}
if got := payload["fingerprint"]; got == "" {
t.Fatalf("payload.fingerprint = %#v, want non-empty", got)
}
})
}
}
Expand Down
24 changes: 10 additions & 14 deletions backend/internal/httpd/recover.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope"
"github.com/aoagents/agent-orchestrator/backend/internal/ports"
"github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta"
)

func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler) http.Handler {
Expand All @@ -29,16 +30,22 @@ func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler)
"stack", stack,
)
if sink != nil {
path := telemetrymeta.RoutePattern(r)
panicKind := telemetrymeta.PanicKind(rec)
sink.Emit(r.Context(), ports.TelemetryEvent{
Name: "ao.daemon.panic",
Source: "http",
OccurredAt: time.Now().UTC(),
Level: ports.TelemetryLevelError,
RequestID: middleware.GetReqID(r.Context()),
Payload: map[string]any{
"method": r.Method,
"path": r.URL.Path,
"panic_kind": telemetryPanicKind(rec),
"component": "httpd",
"operation": "http_request_panic",
"method": r.Method,
"path": path,
"panic_kind": panicKind,
"stack_fingerprint": telemetrymeta.Fingerprint("httpd", "http_request_panic", path, panicKind, stack),
"fingerprint": telemetrymeta.Fingerprint("httpd", "http_request_panic", r.Method, path, panicKind),
},
})
}
Expand All @@ -50,17 +57,6 @@ func recoverTelemetry(log *slog.Logger, sink ports.EventSink) func(http.Handler)
}
}

func telemetryPanicKind(rec any) string {
switch rec.(type) {
case error:
return "error"
case string:
return "string"
default:
return "other"
}
}

func writeRecoveredError(w http.ResponseWriter, r *http.Request) {
if strings.HasPrefix(r.URL.Path, "/api/") {
envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal_error", "INTERNAL_ERROR", "Internal server error", nil)
Expand Down
4 changes: 4 additions & 0 deletions backend/internal/httpd/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/aoagents/agent-orchestrator/backend/internal/daemonmeta"
"github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope"
"github.com/aoagents/agent-orchestrator/backend/internal/ports"
"github.com/aoagents/agent-orchestrator/backend/internal/telemetrymeta"
"github.com/aoagents/agent-orchestrator/backend/internal/terminal"
)

Expand Down Expand Up @@ -187,9 +188,12 @@ func mountTelemetry(r chi.Router, sink ports.EventSink) {
Level: ports.TelemetryLevelWarn,
RequestID: middleware.GetReqID(req.Context()),
Payload: map[string]any{
"component": "cli",
"operation": "command_parse",
"command": body.Command,
"command_path": body.CommandPath,
"error_kind": "usage",
"fingerprint": telemetrymeta.Fingerprint("cli", "command_parse", body.CommandPath, "usage"),
},
})
w.WriteHeader(http.StatusAccepted)
Expand Down
53 changes: 48 additions & 5 deletions backend/internal/httpd/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,25 @@ func TestCLIUsageErrorRouteEmitsTelemetry(t *testing.T) {
if len(sink.events) != 1 || sink.events[0].Name != "ao.cli.usage_errors" {
t.Fatalf("events = %#v, want one ao.cli.usage_errors event", sink.events)
}
payload := sink.events[0].Payload
if got := payload["component"]; got != "cli" {
t.Fatalf("payload.component = %#v, want cli", got)
}
if got := payload["operation"]; got != "command_parse" {
t.Fatalf("payload.operation = %#v, want command_parse", got)
}
if got := payload["command_path"]; got != "ao status" {
t.Fatalf("payload.command_path = %#v, want ao status", got)
}
if got := payload["error_kind"]; got != "usage" {
t.Fatalf("payload.error_kind = %#v, want usage", got)
}
if got := payload["fingerprint"]; got == "" {
t.Fatalf("payload.fingerprint = %#v, want non-empty", got)
}
if _, ok := payload["error"]; ok {
t.Fatalf("payload leaked raw error text: %#v", payload)
}
}

func TestRecoverTelemetryEmitsPanicEvent(t *testing.T) {
Expand All @@ -90,19 +109,43 @@ func TestRecoverTelemetryEmitsPanicEvent(t *testing.T) {
if rec.Code != http.StatusInternalServerError {
t.Fatalf("status = %d, want 500", rec.Code)
}
var sawPanic, saw5xx bool
var panicPayload, fiveXXPayload map[string]any
for _, ev := range sink.events {
switch ev.Name {
case "ao.daemon.panic":
sawPanic = true
panicPayload = ev.Payload
case "ao.http.5xx":
saw5xx = true
fiveXXPayload = ev.Payload
}
}
if !sawPanic {
if panicPayload == nil {
t.Fatalf("events = %#v, want ao.daemon.panic", sink.events)
}
if !saw5xx {
if fiveXXPayload == nil {
t.Fatalf("events = %#v, want ao.http.5xx after recovery", sink.events)
}
if got := panicPayload["component"]; got != "httpd" {
t.Fatalf("panic payload.component = %#v, want httpd", got)
}
if got := panicPayload["operation"]; got != "http_request_panic" {
t.Fatalf("panic payload.operation = %#v, want http_request_panic", got)
}
if got := panicPayload["path"]; got != "/panic" {
t.Fatalf("panic payload.path = %#v, want /panic", got)
}
if got := panicPayload["panic_kind"]; got != "string" {
t.Fatalf("panic payload.panic_kind = %#v, want string", got)
}
if got := panicPayload["fingerprint"]; got == "" {
t.Fatalf("panic payload.fingerprint = %#v, want non-empty", got)
}
if got := panicPayload["stack_fingerprint"]; got == "" {
t.Fatalf("panic payload.stack_fingerprint = %#v, want non-empty", got)
}
if got := fiveXXPayload["path"]; got != "/panic" {
t.Fatalf("5xx payload.path = %#v, want /panic", got)
}
if got := fiveXXPayload["status_family"]; got != "5xx" {
t.Fatalf("5xx payload.status_family = %#v, want 5xx", got)
}
}
Loading
Loading