From 78b278436290066b23ce7a2342d2673814ccd401 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 6 May 2026 19:38:53 +0200 Subject: [PATCH] graph, parser, mcp: KindString node kind plus string_emitters analyzer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-repo index for string literals that cross an API boundary — Datadog/dogstatsd metric names, errors.New / fmt.Errorf messages, and raw HTTP route paths. Each extractor emits a KindString node and an EdgeEmits from the enclosing function; applyRepoPrefix stamps the repo slug so two repos emitting the same metric stay distinct. Go extractor whitelist: - metric: Increment, Decrement, Count, Gauge, Histogram, Distribution, Timing, TimeInMilliseconds, Event, ServiceCheck (statsd / dogstatsd shape; Set/Inc/Add deliberately excluded as too generic). - error_msg: errors.New, fmt.Errorf, xerrors.New, xerrors.Errorf. - route: Handle / HandleFunc / Get / Post / Put / Delete / Patch / Options / Head / Connect / Trace, gated on a path-likeness check so cache.Get("user:42") doesn't fall in. Accepts net/http 1.22+ pattern syntax ("GET /foo"). New analyzer `analyze kind=string_emitters` groups by (context, value) with context and name-substring filters; supports JSON, GCX1 wire format, and compact-text outputs. Long values (>200 chars) get a 16-char SHA1 suffix in the node ID; short ones keep the literal text for direct grep. --- internal/graph/node.go | 12 +- internal/mcp/gcx.go | 12 + internal/mcp/tools_analyze_string_emitters.go | 117 +++++++++ .../mcp/tools_analyze_string_emitters_test.go | 124 +++++++++ internal/mcp/tools_enhancements.go | 4 +- internal/parser/languages/go_strings.go | 239 ++++++++++++++++++ internal/parser/languages/go_strings_test.go | 214 ++++++++++++++++ internal/parser/languages/golang.go | 31 +++ 8 files changed, 751 insertions(+), 2 deletions(-) create mode 100644 internal/mcp/tools_analyze_string_emitters.go create mode 100644 internal/mcp/tools_analyze_string_emitters_test.go create mode 100644 internal/parser/languages/go_strings.go create mode 100644 internal/parser/languages/go_strings_test.go diff --git a/internal/graph/node.go b/internal/graph/node.go index afb6778..1e51ad8 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -98,6 +98,16 @@ const ( // KindLicense represents an SPDX license identifier. ID convention: // `license::`. Files link to it via EdgeLicensedAs. KindLicense NodeKind = "license" + // KindString represents a string literal that crosses an API + // boundary worth tracking — Datadog/Prometheus metric names, + // errors.New / fmt.Errorf messages, raw HTTP route paths, and + // (later) HTML class/id values. ID convention: + // `string::::`. Context ∈ + // metric|error_msg|route|html_class|html_id|… EdgeEmits links the + // enclosing function/method to the string node, mirroring KindEvent. + // Per-repo: applyRepoPrefix prefixes every node ID with the repo + // slug so two repos that emit the same string don't collide. + KindString NodeKind = "string" ) var validNodeKinds = map[NodeKind]bool{ @@ -111,7 +121,7 @@ var validNodeKinds = map[NodeKind]bool{ KindTable: true, KindColumn: true, KindConfigKey: true, KindFlag: true, KindEvent: true, KindMigration: true, KindFixture: true, KindTodo: true, KindTeam: true, - KindRelease: true, KindLicense: true, + KindRelease: true, KindLicense: true, KindString: true, } type Node struct { diff --git a/internal/mcp/gcx.go b/internal/mcp/gcx.go index f00ee9c..12892c3 100644 --- a/internal/mcp/gcx.go +++ b/internal/mcp/gcx.go @@ -465,6 +465,18 @@ func encodeAnalyze(kind string, payload any) ([]byte, error) { } } return buf.Bytes(), enc.Close() + case "string_emitters": + items, _ := payload.([]stringEmitterItem) + enc := newGCX(&buf, "analyze.string_emitters", + []string{"id", "context", "value", "emits", "emitters"}, + "count", fmt.Sprintf("%d", len(items)), + ) + for _, it := range items { + if err := enc.WriteRow(it.ID, it.Context, it.Value, it.Emits, it.Emitters); err != nil { + return nil, err + } + } + return buf.Bytes(), enc.Close() case "error_surface": items, _ := payload.([]errorSurfaceItem) enc := newGCX(&buf, "analyze.error_surface", diff --git a/internal/mcp/tools_analyze_string_emitters.go b/internal/mcp/tools_analyze_string_emitters.go new file mode 100644 index 0000000..e27a89b --- /dev/null +++ b/internal/mcp/tools_analyze_string_emitters.go @@ -0,0 +1,117 @@ +package mcp + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/mark3labs/mcp-go/mcp" + + "github.com/zzet/gortex/internal/graph" +) + +// handleAnalyzeStringEmitters walks EdgeEmits edges to KindString +// nodes and groups by (context, value). Mirrors handleAnalyzeEventEmitters +// but works against the broader string domain (metrics, error +// messages, raw routes; later HTML class/id and i18n keys). +// +// Filters: +// +// - context: metric|error_msg|route — narrows to one string domain. +// - name: string value (case-insensitive substring match). Use to +// find emitters of a specific metric, error message, or route. +func (s *Server) handleAnalyzeStringEmitters(_ context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := req.GetArguments() + contextFilter := strings.ToLower(strings.TrimSpace(stringArg(args, "context"))) + nameFilter := strings.ToLower(strings.TrimSpace(stringArg(args, "name"))) + + type stringRow struct { + ID string `json:"id"` + Context string `json:"context"` + Value string `json:"value"` + Emits int `json:"emits"` + Emitters []string `json:"emitters,omitempty"` + } + byString := map[string]*stringRow{} + for _, e := range s.graph.AllEdges() { + if e.Kind != graph.EdgeEmits { + continue + } + n := s.graph.GetNode(e.To) + if n == nil || n.Kind != graph.KindString { + continue + } + ctx, _ := n.Meta["context"].(string) + if contextFilter != "" && ctx != contextFilter { + continue + } + if nameFilter != "" && !strings.Contains(strings.ToLower(n.Name), nameFilter) { + continue + } + row, ok := byString[e.To] + if !ok { + row = &stringRow{ + ID: e.To, + Context: ctx, + Value: n.Name, + } + byString[e.To] = row + } + row.Emits++ + row.Emitters = appendUnique(row.Emitters, e.From) + } + rows := make([]*stringRow, 0, len(byString)) + for _, r := range byString { + sort.Strings(r.Emitters) + rows = append(rows, r) + } + sort.Slice(rows, func(i, j int) bool { + if rows[i].Emits != rows[j].Emits { + return rows[i].Emits > rows[j].Emits + } + if rows[i].Context != rows[j].Context { + return rows[i].Context < rows[j].Context + } + return rows[i].Value < rows[j].Value + }) + + if isGCX(req) { + items := make([]stringEmitterItem, 0, len(rows)) + for _, r := range rows { + items = append(items, stringEmitterItem{ + ID: r.ID, + Context: r.Context, + Value: r.Value, + Emits: r.Emits, + Emitters: strings.Join(r.Emitters, ","), + }) + } + return gcxResponse(encodeAnalyze("string_emitters", items)) + } + + if isCompact(req) { + var b strings.Builder + for _, r := range rows { + fmt.Fprintf(&b, "%-3d [%s] %s\n", r.Emits, r.Context, r.Value) + } + if len(rows) == 0 { + b.WriteString("no string emitters\n") + } + return mcp.NewToolResultText(b.String()), nil + } + return mcp.NewToolResultJSON(map[string]any{ + "strings": rows, + "total": len(rows), + }) +} + +// stringEmitterItem is the GCX1 row layout for the string_emitters +// analyzer. Mirrors eventEmitterItem. +type stringEmitterItem struct { + ID string `gcx:"id"` + Context string `gcx:"context"` + Value string `gcx:"value"` + Emits int `gcx:"emits"` + Emitters string `gcx:"emitters"` +} diff --git a/internal/mcp/tools_analyze_string_emitters_test.go b/internal/mcp/tools_analyze_string_emitters_test.go new file mode 100644 index 0000000..4406bda --- /dev/null +++ b/internal/mcp/tools_analyze_string_emitters_test.go @@ -0,0 +1,124 @@ +package mcp + +import ( + "context" + "encoding/json" + "testing" + + mcplib "github.com/mark3labs/mcp-go/mcp" + "github.com/zzet/gortex/internal/graph" +) + +func callAnalyzeStringEmitters(t *testing.T, srv *Server, args map[string]any) map[string]any { + t.Helper() + args["kind"] = "string_emitters" + req := mcplib.CallToolRequest{} + req.Params.Name = "analyze" + req.Params.Arguments = args + res, err := srv.handleAnalyze(context.Background(), req) + if err != nil { + t.Fatalf("handleAnalyze: %v", err) + } + if res.IsError { + t.Fatalf("error: %+v", res.Content) + } + textBlock := res.Content[0].(mcplib.TextContent) + var out map[string]any + if err := json.Unmarshal([]byte(textBlock.Text), &out); err != nil { + t.Fatalf("json: %v\n%s", err, textBlock.Text) + } + return out +} + +func addStringNode(g *graph.Graph, id, value, ctx string) { + g.AddNode(&graph.Node{ + ID: id, + Kind: graph.KindString, + Name: value, + Meta: map[string]any{"context": ctx, "value": value}, + }) +} + +func addStringEmitEdge(g *graph.Graph, from, to, ctx, method string) { + g.AddEdge(&graph.Edge{ + From: from, + To: to, + Kind: graph.EdgeEmits, + Meta: map[string]any{"context": ctx, "method": method}, + }) +} + +func TestAnalyzeStringEmitters_GroupsByString(t *testing.T) { + srv, _ := setupTestServer(t) + addStringNode(srv.graph, "string::metric::orders.success", "orders.success", "metric") + addStringNode(srv.graph, "string::error_msg::not authorized", "not authorized", "error_msg") + addStringEmitEdge(srv.graph, "f.go::Checkout", "string::metric::orders.success", "metric", "Increment") + addStringEmitEdge(srv.graph, "f.go::Refund", "string::metric::orders.success", "metric", "Increment") + addStringEmitEdge(srv.graph, "f.go::Auth", "string::error_msg::not authorized", "error_msg", "errors.New") + + out := callAnalyzeStringEmitters(t, srv, map[string]any{}) + rows, _ := out["strings"].([]any) + if len(rows) != 2 { + t.Fatalf("expected 2 strings, got %d", len(rows)) + } + first := rows[0].(map[string]any) + if first["value"] != "orders.success" { + t.Errorf("expected orders.success first (more emits), got %v", first["value"]) + } + if first["context"] != "metric" { + t.Errorf("expected first.context = metric, got %v", first["context"]) + } + if int(first["emits"].(float64)) != 2 { + t.Errorf("expected 2 emits, got %v", first["emits"]) + } +} + +func TestAnalyzeStringEmitters_ContextFilter(t *testing.T) { + srv, _ := setupTestServer(t) + addStringNode(srv.graph, "string::metric::a", "a", "metric") + addStringNode(srv.graph, "string::route::/x", "/x", "route") + addStringEmitEdge(srv.graph, "f.go::A", "string::metric::a", "metric", "Increment") + addStringEmitEdge(srv.graph, "f.go::B", "string::route::/x", "route", "HandleFunc") + + out := callAnalyzeStringEmitters(t, srv, map[string]any{ + "context": "route", + }) + rows, _ := out["strings"].([]any) + if len(rows) != 1 { + t.Fatalf("expected 1 string after context=route filter, got %d", len(rows)) + } + first := rows[0].(map[string]any) + if first["context"] != "route" { + t.Errorf("filter leaked: got context=%v", first["context"]) + } +} + +func TestAnalyzeStringEmitters_NameSubstringFilter(t *testing.T) { + srv, _ := setupTestServer(t) + addStringNode(srv.graph, "string::metric::orders.success", "orders.success", "metric") + addStringNode(srv.graph, "string::metric::server.memory", "server.memory", "metric") + addStringEmitEdge(srv.graph, "f.go::A", "string::metric::orders.success", "metric", "Increment") + addStringEmitEdge(srv.graph, "f.go::B", "string::metric::server.memory", "metric", "Gauge") + + out := callAnalyzeStringEmitters(t, srv, map[string]any{ + "name": "orders", + }) + rows, _ := out["strings"].([]any) + if len(rows) != 1 { + t.Fatalf("expected 1 string after name=orders filter, got %d", len(rows)) + } +} + +func TestAnalyzeStringEmitters_IgnoresNonStringEmitTargets(t *testing.T) { + // EdgeEmits to KindEvent (the legacy log target) shouldn't appear + // in string_emitters results. + srv, _ := setupTestServer(t) + addEventNode(srv.graph, "event::log::user.login", "user.login", "log") + addEmitsEdge(srv.graph, "f.go::Auth", "event::log::user.login", "Info") + + out := callAnalyzeStringEmitters(t, srv, map[string]any{}) + rows, _ := out["strings"].([]any) + if len(rows) != 0 { + t.Fatalf("expected 0 string emitters (only event present), got %d", len(rows)) + } +} diff --git a/internal/mcp/tools_enhancements.go b/internal/mcp/tools_enhancements.go index eb433a9..ad44b18 100644 --- a/internal/mcp/tools_enhancements.go +++ b/internal/mcp/tools_enhancements.go @@ -650,10 +650,12 @@ func (s *Server) handleAnalyze(ctx context.Context, req mcp.CallToolRequest) (*m return s.handleAnalyzeConfigReaders(ctx, req) case "event_emitters": return s.handleAnalyzeEventEmitters(ctx, req) + case "string_emitters": + return s.handleAnalyzeStringEmitters(ctx, req) case "error_surface": return s.handleAnalyzeErrorSurface(ctx, req) default: - return mcp.NewToolResultError("unknown analyze kind: " + kind + " (expected: dead_code, hotspots, cycles, would_create_cycle, todos, blame, coverage, stale_code, ownership, coverage_gaps, stale_flags, releases, cgo_users, wasm_users, orphan_tables, unreferenced_tables, coverage_summary, channel_ops, goroutine_spawns, field_writers, annotation_users, config_readers, event_emitters, error_surface)"), nil + return mcp.NewToolResultError("unknown analyze kind: " + kind + " (expected: dead_code, hotspots, cycles, would_create_cycle, todos, blame, coverage, stale_code, ownership, coverage_gaps, stale_flags, releases, cgo_users, wasm_users, orphan_tables, unreferenced_tables, coverage_summary, channel_ops, goroutine_spawns, field_writers, annotation_users, config_readers, event_emitters, string_emitters, error_surface)"), nil } } diff --git a/internal/parser/languages/go_strings.go b/internal/parser/languages/go_strings.go new file mode 100644 index 0000000..44c18d0 --- /dev/null +++ b/internal/parser/languages/go_strings.go @@ -0,0 +1,239 @@ +package languages + +import ( + "crypto/sha1" + "encoding/hex" + "strings" + + sitter "github.com/zzet/gortex/internal/parser/tsitter" + + "github.com/zzet/gortex/internal/graph" + "github.com/zzet/gortex/internal/parser" +) + +// goStringContext labels the API position the literal was found in. +// Used as a discriminator on KindString node IDs and on the +// EdgeEmits.Meta["context"] field so analyzers can filter by domain. +type goStringContext string + +const ( + stringCtxMetric goStringContext = "metric" + stringCtxErrorMsg goStringContext = "error_msg" + stringCtxRoute goStringContext = "route" +) + +// goMetricMethods is the whitelist of method names where the first +// string-literal argument is taken as the metric name. Limited to +// statsd / dogstatsd-style APIs (which always pass the metric name +// as the first arg). Prometheus needs a separate composite-literal +// extractor (CounterOpts{Name: "..."} etc.) — out of scope here. +// +// Generic method names like Set / Inc / Add are deliberately +// excluded — they appear on too many unrelated types. +var goMetricMethods = map[string]bool{ + "Increment": true, // statsd, dogstatsd + "Decrement": true, + "Count": true, + "Gauge": true, + "Histogram": true, + "Distribution": true, + "Timing": true, + "TimeInMilliseconds": true, + "Event": true, // dogstatsd Event + "ServiceCheck": true, // dogstatsd ServiceCheck +} + +// goErrorMessageCalls maps (package-or-receiver, function) to the +// "error_msg" context. Both shapes look like selector calls +// syntactically, so we match against the receiver name as a +// heuristic — `errors.New(...)` and `fmt.Errorf(...)` are by far the +// dominant idioms. +var goErrorMessageCalls = map[[2]string]bool{ + {"errors", "New"}: true, + {"fmt", "Errorf"}: true, + {"xerrors", "New"}: true, // golang.org/x/xerrors + {"xerrors", "Errorf"}: true, +} + +// goRouteMethods is the set of method names that, when called with a +// string-literal path-like first argument, emit a "route" node. +// Mixes net/http (HandleFunc/Handle), gorilla/mux (HandleFunc/Handle), +// chi (Get/Post/...), gin/echo (GET/POST/...), and a handful of +// common router shapes that don't go through the contracts pipeline. +var goRouteMethods = map[string]bool{ + "Handle": true, + "HandleFunc": true, + "Get": true, + "Post": true, + "Put": true, + "Delete": true, + "Patch": true, + "Options": true, + "Head": true, + "Connect": true, + "Trace": true, + "GET": true, + "POST": true, + "PUT": true, + "DELETE": true, + "PATCH": true, + "OPTIONS": true, + "HEAD": true, + "CONNECT": true, + "TRACE": true, +} + +// goStringEvent is one deferred string-literal observation, queued +// during AST traversal and flushed at end-of-file by emitGoStringEvents. +type goStringEvent struct { + context goStringContext + method string + value string + line int +} + +// detectGoMetric checks a method call against the metric whitelist; +// returns the metric name when arg[0] is a string literal. +func detectGoMetric(callExpr *sitter.Node, method string, src []byte) (string, bool) { + if callExpr == nil { + return "", false + } + if !goMetricMethods[method] { + return "", false + } + return firstStringLiteralArg(callExpr, src) +} + +// detectGoErrorMessage checks for errors.New / fmt.Errorf-style calls +// where the first argument is a string literal. +func detectGoErrorMessage(callExpr *sitter.Node, receiver, method string, src []byte) (string, bool) { + if callExpr == nil { + return "", false + } + if !goErrorMessageCalls[[2]string{receiver, method}] { + return "", false + } + return firstStringLiteralArg(callExpr, src) +} + +// detectGoRoute checks for HTTP-router shapes where arg[0] is a +// path-like string literal. Path-likeness is enforced (must start +// with "/" or contain a "/" segment) to suppress false positives +// from generic method names like Get/Set on map-like types. +func detectGoRoute(callExpr *sitter.Node, method string, src []byte) (string, bool) { + if callExpr == nil { + return "", false + } + if !goRouteMethods[method] { + return "", false + } + value, ok := firstStringLiteralArg(callExpr, src) + if !ok { + return "", false + } + if !looksLikeRoute(value) { + return "", false + } + return value, true +} + +// looksLikeRoute is a cheap sanity check — keeps map.Get("foo") +// and similar generics out of the route bucket. Accepts paths that +// start with "/" (most common), "GET /…" / "POST /…" mux-1.22 form, +// or a wildcard segment. +func looksLikeRoute(s string) bool { + if s == "" { + return false + } + if strings.HasPrefix(s, "/") { + return true + } + // net/http 1.22+ pattern syntax: "GET /foo". + for _, m := range []string{"GET ", "POST ", "PUT ", "DELETE ", "PATCH ", "OPTIONS ", "HEAD "} { + if strings.HasPrefix(s, m) { + return true + } + } + return false +} + +// firstStringLiteralArg returns the value of the first string-literal +// argument of a call expression, with surrounding quotes stripped. The +// helper is shared with detectGoLogEvent's logic but kept separate so +// each context can apply its own filtering. +func firstStringLiteralArg(callExpr *sitter.Node, src []byte) (string, bool) { + args := callExpr.ChildByFieldName("arguments") + if args == nil { + return "", false + } + for i := 0; i < int(args.NamedChildCount()); i++ { + c := args.NamedChild(i) + if c == nil { + continue + } + if c.Type() != "interpreted_string_literal" && c.Type() != "raw_string_literal" { + continue + } + text := strings.Trim(c.Content(src), "\"`") + if text == "" { + return "", false + } + return text, true + } + return "", false +} + +// emitGoStringEvents creates one KindString node per (context, value) +// pair seen and an EdgeEmits from the enclosing function/method to +// each. Mirrors emitGoObservabilityEvents — same per-repo dedup +// behaviour, same caller-line lookup contract. +func emitGoStringEvents(events []goStringEvent, callerLookup func(line int) string, filePath string, result *parser.ExtractionResult) { + if len(events) == 0 { + return + } + seen := make(map[string]struct{}, len(events)) + for _, e := range events { + callerID := callerLookup(e.line) + if callerID == "" { + continue + } + strID := goStringNodeID(e.context, e.value) + if _, ok := seen[strID]; !ok { + seen[strID] = struct{}{} + result.Nodes = append(result.Nodes, &graph.Node{ + ID: strID, + Kind: graph.KindString, + Name: e.value, + FilePath: filePath, // first sighting; not authoritative + Language: "go", + Meta: map[string]any{ + "context": string(e.context), + "value": e.value, + }, + }) + } + result.Edges = append(result.Edges, &graph.Edge{ + From: callerID, + To: strID, + Kind: graph.EdgeEmits, + FilePath: filePath, + Line: e.line, + Origin: graph.OriginASTInferred, + Meta: map[string]any{ + "context": string(e.context), + "method": e.method, + }, + }) + } +} + +// goStringNodeID composes the canonical synthetic ID for a string +// node. Long values (over 200 chars) are hashed to keep IDs sane — +// the original text is preserved in node.Name and node.Meta["value"]. +func goStringNodeID(ctx goStringContext, value string) string { + if len(value) > 200 { + h := sha1.Sum([]byte(value)) + return "string::" + string(ctx) + "::sha1:" + hex.EncodeToString(h[:])[:16] + } + return "string::" + string(ctx) + "::" + value +} diff --git a/internal/parser/languages/go_strings_test.go b/internal/parser/languages/go_strings_test.go new file mode 100644 index 0000000..77d9924 --- /dev/null +++ b/internal/parser/languages/go_strings_test.go @@ -0,0 +1,214 @@ +package languages + +import ( + "strings" + "testing" + + "github.com/zzet/gortex/internal/graph" +) + +func TestGoStrings_StatsdMetricsExtracted(t *testing.T) { + src := `package foo + +type StatsdClient struct{} + +func (c *StatsdClient) Increment(name string, tags []string, rate float64) error { return nil } +func (c *StatsdClient) Gauge(name string, value float64, tags []string, rate float64) error { return nil } + +func Run(c *StatsdClient) { + c.Increment("orders.checkout.success", nil, 1) + c.Gauge("server.memory.bytes", 12345, nil, 1) +} +` + fix := runGoExtract(t, src) + + strs := fix.nodesByKind[graph.KindString] + if len(strs) != 2 { + t.Fatalf("expected 2 KindString, got %d: %+v", len(strs), strs) + } + gotByValue := map[string]*graph.Node{} + for _, n := range strs { + gotByValue[n.Name] = n + if ctx, _ := n.Meta["context"].(string); ctx != "metric" { + t.Errorf("node %q: context = %q, want metric", n.Name, ctx) + } + want := "string::metric::" + n.Name + if n.ID != want { + t.Errorf("id = %q, want %q", n.ID, want) + } + } + if gotByValue["orders.checkout.success"] == nil || gotByValue["server.memory.bytes"] == nil { + t.Errorf("missing expected metric names: %v", gotByValue) + } + + emits := fix.edgesByKind[graph.EdgeEmits] + if len(emits) != 2 { + t.Errorf("expected 2 EdgeEmits, got %d", len(emits)) + } + for _, e := range emits { + if e.From != "pkg/foo.go::Run" { + t.Errorf("emit from = %q, want pkg/foo.go::Run", e.From) + } + if ctx, _ := e.Meta["context"].(string); ctx != "metric" { + t.Errorf("emit context = %q", ctx) + } + } +} + +func TestGoStrings_GenericMethodNamesIgnored(t *testing.T) { + // Set / Inc / Add are too generic — would create false positives + // against map and counter types — and are deliberately excluded. + src := `package foo + +type M struct{} +func (m M) Set(k, v string) {} +func (m M) Inc(k string) {} +func (m M) Add(k string, n int) {} + +func Run(m M) { + m.Set("magic", "value") + m.Inc("counter") + m.Add("tally", 1) +} +` + fix := runGoExtract(t, src) + if got := fix.nodesByKind[graph.KindString]; len(got) != 0 { + t.Errorf("expected no metric strings, got %d: %+v", len(got), got) + } +} + +func TestGoStrings_ErrorsNewAndFmtErrorf(t *testing.T) { + src := `package foo + +import ( + "errors" + "fmt" +) + +func A() error { return errors.New("user not found") } +func B(id int) error { return fmt.Errorf("invalid id %d", id) } +` + fix := runGoExtract(t, src) + + strs := fix.nodesByKind[graph.KindString] + if len(strs) != 2 { + t.Fatalf("expected 2 KindString, got %d: %+v", len(strs), strs) + } + gotByValue := map[string]*graph.Node{} + for _, n := range strs { + gotByValue[n.Name] = n + if ctx, _ := n.Meta["context"].(string); ctx != "error_msg" { + t.Errorf("node %q: context = %q, want error_msg", n.Name, ctx) + } + } + if gotByValue["user not found"] == nil { + t.Errorf("missing 'user not found': %v", gotByValue) + } + if gotByValue["invalid id %d"] == nil { + t.Errorf("missing 'invalid id %%d': %v", gotByValue) + } +} + +func TestGoStrings_HTTPRoutesExtracted(t *testing.T) { + src := `package foo + +type Mux struct{} +func (m *Mux) HandleFunc(pattern string, h func()) {} +func (m *Mux) Get(pattern string, h func()) {} + +func Wire(m *Mux) { + m.HandleFunc("/api/v1/users", nil) + m.Get("/health", nil) + m.HandleFunc("GET /api/v1/orders", nil) +} +` + fix := runGoExtract(t, src) + + strs := fix.nodesByKind[graph.KindString] + if len(strs) != 3 { + t.Fatalf("expected 3 KindString routes, got %d: %+v", len(strs), strs) + } + got := map[string]bool{} + for _, n := range strs { + got[n.Name] = true + if ctx, _ := n.Meta["context"].(string); ctx != "route" { + t.Errorf("node %q: context = %q, want route", n.Name, ctx) + } + } + for _, want := range []string{"/api/v1/users", "/health", "GET /api/v1/orders"} { + if !got[want] { + t.Errorf("missing route %q", want) + } + } +} + +func TestGoStrings_NonRouteStringSkipped(t *testing.T) { + // Get/Post on map-like types — the looksLikeRoute filter should + // keep these out of the route bucket. + src := `package foo + +type Cache struct{} +func (c *Cache) Get(key string) string { return "" } + +func Run(c *Cache) { + _ = c.Get("user:42:profile") +} +` + fix := runGoExtract(t, src) + if got := fix.nodesByKind[graph.KindString]; len(got) != 0 { + t.Errorf("expected no routes for cache.Get, got: %+v", got) + } +} + +func TestGoStrings_SameMetricAcrossCallsDeduplicates(t *testing.T) { + src := `package foo + +type Client struct{} +func (c *Client) Increment(name string) {} + +func A(c *Client) { c.Increment("orders.success") } +func B(c *Client) { c.Increment("orders.success") } +` + fix := runGoExtract(t, src) + + strs := fix.nodesByKind[graph.KindString] + if len(strs) != 1 { + t.Fatalf("expected 1 KindString (deduplicated), got %d", len(strs)) + } + emits := fix.edgesByKind[graph.EdgeEmits] + if len(emits) != 2 { + t.Errorf("expected 2 EdgeEmits (one per caller), got %d", len(emits)) + } +} + +func TestGoStrings_NodeIDForLongValueIsHashed(t *testing.T) { + // 250 chars > 200-char threshold — should hash to keep IDs sane. + long := strings.Repeat("a", 250) + got := goStringNodeID(stringCtxErrorMsg, long) + want := "string::error_msg::sha1:" + if got[:len(want)] != want { + t.Errorf("long-value id = %q, want prefix %q", got, want) + } + short := goStringNodeID(stringCtxMetric, "ok.short") + if short != "string::metric::ok.short" { + t.Errorf("short-value id = %q", short) + } +} + +func TestLooksLikeRoute(t *testing.T) { + cases := map[string]bool{ + "/users": true, + "/api/v1/orders": true, + "GET /foo": true, + "POST /bar": true, + "": false, + "plain": false, + "user:42:profile": false, + "orders.checkout.event": false, + } + for in, want := range cases { + if got := looksLikeRoute(in); got != want { + t.Errorf("looksLikeRoute(%q) = %v, want %v", in, got, want) + } + } +} diff --git a/internal/parser/languages/golang.go b/internal/parser/languages/golang.go index e079278..3d15a5a 100644 --- a/internal/parser/languages/golang.go +++ b/internal/parser/languages/golang.go @@ -216,6 +216,7 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe var fieldValSels []goDeferredValueSel var fieldValIdents []goDeferredValueIdent var observabilityEvents []goObservabilityEvent + var stringEvents []goStringEvent var flagEvents []goFlagEvent var configEvents []goConfigEvent var sqlEvents []goSQLEvent @@ -273,6 +274,31 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe line: expr.StartLine + 1, }) } + receiverText := m.Captures["callm.receiver"].Text + if name, ok := detectGoMetric(expr.Node, method, src); ok { + stringEvents = append(stringEvents, goStringEvent{ + context: stringCtxMetric, + method: method, + value: name, + line: expr.StartLine + 1, + }) + } + if msg, ok := detectGoErrorMessage(expr.Node, receiverText, method, src); ok { + stringEvents = append(stringEvents, goStringEvent{ + context: stringCtxErrorMsg, + method: receiverText + "." + method, + value: msg, + line: expr.StartLine + 1, + }) + } + if route, ok := detectGoRoute(expr.Node, method, src); ok { + stringEvents = append(stringEvents, goStringEvent{ + context: stringCtxRoute, + method: method, + value: route, + line: expr.StartLine + 1, + }) + } if provider, flagName, ok := detectGoFlagCheck(expr.Node, method, src); ok { flagEvents = append(flagEvents, goFlagEvent{ provider: provider, @@ -498,6 +524,11 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe func(line int) string { return findEnclosingFunc(funcRanges, line) }, filePath, result) + // --- String observations (metrics, error messages, routes) --- + emitGoStringEvents(stringEvents, + func(line int) string { return findEnclosingFunc(funcRanges, line) }, + filePath, result) + // --- Feature flag checks --- emitGoFlagChecks(flagEvents, func(line int) string { return findEnclosingFunc(funcRanges, line) },