From 78b278436290066b23ce7a2342d2673814ccd401 Mon Sep 17 00:00:00 2001
From: Andrey Kumanyaev <me@zzet.org>
Date: Wed, 6 May 2026 19:38:53 +0200
Subject: [PATCH] graph, parser, mcp: KindString node kind plus string_emitters
 analyzer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per-repo index for string literals that cross an API boundary —
Datadog/dogstatsd metric names, errors.New / fmt.Errorf messages,
and raw HTTP route paths. Each extractor emits a KindString node
and an EdgeEmits from the enclosing function; applyRepoPrefix
stamps the repo slug so two repos emitting the same metric stay
distinct.

Go extractor whitelist:
- metric: Increment, Decrement, Count, Gauge, Histogram,
  Distribution, Timing, TimeInMilliseconds, Event, ServiceCheck
  (statsd / dogstatsd shape; Set/Inc/Add deliberately excluded
  as too generic).
- error_msg: errors.New, fmt.Errorf, xerrors.New, xerrors.Errorf.
- route: Handle / HandleFunc / Get / Post / Put / Delete / Patch /
  Options / Head / Connect / Trace, gated on a path-likeness check
  so cache.Get("user:42") doesn't fall in. Accepts net/http 1.22+
  pattern syntax ("GET /foo").

New analyzer `analyze kind=string_emitters` groups by
(context, value) with context and name-substring filters; supports
JSON, GCX1 wire format, and compact-text outputs. Long values
(>200 chars) get a 16-char SHA1 suffix in the node ID; short ones
keep the literal text for direct grep.
---
 internal/graph/node.go                        |  12 +-
 internal/mcp/gcx.go                           |  12 +
 internal/mcp/tools_analyze_string_emitters.go | 117 +++++++++
 .../mcp/tools_analyze_string_emitters_test.go | 124 +++++++++
 internal/mcp/tools_enhancements.go            |   4 +-
 internal/parser/languages/go_strings.go       | 239 ++++++++++++++++++
 internal/parser/languages/go_strings_test.go  | 214 ++++++++++++++++
 internal/parser/languages/golang.go           |  31 +++
 8 files changed, 751 insertions(+), 2 deletions(-)
 create mode 100644 internal/mcp/tools_analyze_string_emitters.go
 create mode 100644 internal/mcp/tools_analyze_string_emitters_test.go
 create mode 100644 internal/parser/languages/go_strings.go
 create mode 100644 internal/parser/languages/go_strings_test.go
diff --git a/internal/graph/node.go b/internal/graph/node.go
index afb6778..1e51ad8 100644
--- a/internal/graph/node.go
+++ b/internal/graph/node.go
@@ -98,6 +98,16 @@ const (
 	// KindLicense represents an SPDX license identifier. ID convention:
 	// `license::<spdx>`. Files link to it via EdgeLicensedAs.
 	KindLicense NodeKind = "license"
+	// KindString represents a string literal that crosses an API
+	// boundary worth tracking — Datadog/Prometheus metric names,
+	// errors.New / fmt.Errorf messages, raw HTTP route paths, and
+	// (later) HTML class/id values. ID convention:
+	// `string::<context>::<value-or-hash>`. Context ∈
+	// metric|error_msg|route|html_class|html_id|… EdgeEmits links the
+	// enclosing function/method to the string node, mirroring KindEvent.
+	// Per-repo: applyRepoPrefix prefixes every node ID with the repo
+	// slug so two repos that emit the same string don't collide.
+	KindString NodeKind = "string"
 )
 
 var validNodeKinds = map[NodeKind]bool{
@@ -111,7 +121,7 @@ var validNodeKinds = map[NodeKind]bool{
 	KindTable: true, KindColumn: true, KindConfigKey: true,
 	KindFlag: true, KindEvent: true, KindMigration: true,
 	KindFixture: true, KindTodo: true, KindTeam: true,
-	KindRelease: true, KindLicense: true,
+	KindRelease: true, KindLicense: true, KindString: true,
 }
 
 type Node struct {
diff --git a/internal/mcp/gcx.go b/internal/mcp/gcx.go
index f00ee9c..12892c3 100644
--- a/internal/mcp/gcx.go
+++ b/internal/mcp/gcx.go
@@ -465,6 +465,18 @@ func encodeAnalyze(kind string, payload any) ([]byte, error) {
 			}
 		}
 		return buf.Bytes(), enc.Close()
+	case "string_emitters":
+		items, _ := payload.([]stringEmitterItem)
+		enc := newGCX(&buf, "analyze.string_emitters",
+			[]string{"id", "context", "value", "emits", "emitters"},
+			"count", fmt.Sprintf("%d", len(items)),
+		)
+		for _, it := range items {
+			if err := enc.WriteRow(it.ID, it.Context, it.Value, it.Emits, it.Emitters); err != nil {
+				return nil, err
+			}
+		}
+		return buf.Bytes(), enc.Close()
 	case "error_surface":
 		items, _ := payload.([]errorSurfaceItem)
 		enc := newGCX(&buf, "analyze.error_surface",
diff --git a/internal/mcp/tools_analyze_string_emitters.go b/internal/mcp/tools_analyze_string_emitters.go
new file mode 100644
index 0000000..e27a89b
--- /dev/null
+++ b/internal/mcp/tools_analyze_string_emitters.go
@@ -0,0 +1,117 @@
+package mcp
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/mark3labs/mcp-go/mcp"
+
+	"github.com/zzet/gortex/internal/graph"
+)
+
+// handleAnalyzeStringEmitters walks EdgeEmits edges to KindString
+// nodes and groups by (context, value). Mirrors handleAnalyzeEventEmitters
+// but works against the broader string domain (metrics, error
+// messages, raw routes; later HTML class/id and i18n keys).
+//
+// Filters:
+//
+//   - context: metric|error_msg|route — narrows to one string domain.
+//   - name: string value (case-insensitive substring match). Use to
+//     find emitters of a specific metric, error message, or route.
+func (s *Server) handleAnalyzeStringEmitters(_ context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+	args := req.GetArguments()
+	contextFilter := strings.ToLower(strings.TrimSpace(stringArg(args, "context")))
+	nameFilter := strings.ToLower(strings.TrimSpace(stringArg(args, "name")))
+
+	type stringRow struct {
+		ID       string   `json:"id"`
+		Context  string   `json:"context"`
+		Value    string   `json:"value"`
+		Emits    int      `json:"emits"`
+		Emitters []string `json:"emitters,omitempty"`
+	}
+	byString := map[string]*stringRow{}
+	for _, e := range s.graph.AllEdges() {
+		if e.Kind != graph.EdgeEmits {
+			continue
+		}
+		n := s.graph.GetNode(e.To)
+		if n == nil || n.Kind != graph.KindString {
+			continue
+		}
+		ctx, _ := n.Meta["context"].(string)
+		if contextFilter != "" && ctx != contextFilter {
+			continue
+		}
+		if nameFilter != "" && !strings.Contains(strings.ToLower(n.Name), nameFilter) {
+			continue
+		}
+		row, ok := byString[e.To]
+		if !ok {
+			row = &stringRow{
+				ID:      e.To,
+				Context: ctx,
+				Value:   n.Name,
+			}
+			byString[e.To] = row
+		}
+		row.Emits++
+		row.Emitters = appendUnique(row.Emitters, e.From)
+	}
+	rows := make([]*stringRow, 0, len(byString))
+	for _, r := range byString {
+		sort.Strings(r.Emitters)
+		rows = append(rows, r)
+	}
+	sort.Slice(rows, func(i, j int) bool {
+		if rows[i].Emits != rows[j].Emits {
+			return rows[i].Emits > rows[j].Emits
+		}
+		if rows[i].Context != rows[j].Context {
+			return rows[i].Context < rows[j].Context
+		}
+		return rows[i].Value < rows[j].Value
+	})
+
+	if isGCX(req) {
+		items := make([]stringEmitterItem, 0, len(rows))
+		for _, r := range rows {
+			items = append(items, stringEmitterItem{
+				ID:       r.ID,
+				Context:  r.Context,
+				Value:    r.Value,
+				Emits:    r.Emits,
+				Emitters: strings.Join(r.Emitters, ","),
+			})
+		}
+		return gcxResponse(encodeAnalyze("string_emitters", items))
+	}
+
+	if isCompact(req) {
+		var b strings.Builder
+		for _, r := range rows {
+			fmt.Fprintf(&b, "%-3d [%s] %s\n", r.Emits, r.Context, r.Value)
+		}
+		if len(rows) == 0 {
+			b.WriteString("no string emitters\n")
+		}
+		return mcp.NewToolResultText(b.String()), nil
+	}
+	return mcp.NewToolResultJSON(map[string]any{
+		"strings": rows,
+		"total":   len(rows),
+	})
+}
+
+// stringEmitterItem is the GCX1 row layout for the string_emitters
+// analyzer. Mirrors eventEmitterItem.
+type stringEmitterItem struct {
+	ID       string `gcx:"id"`
+	Context  string `gcx:"context"`
+	Value    string `gcx:"value"`
+	Emits    int    `gcx:"emits"`
+	Emitters string `gcx:"emitters"`
+}
diff --git a/internal/mcp/tools_analyze_string_emitters_test.go b/internal/mcp/tools_analyze_string_emitters_test.go
new file mode 100644
index 0000000..4406bda
--- /dev/null
+++ b/internal/mcp/tools_analyze_string_emitters_test.go
@@ -0,0 +1,124 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"testing"
+
+	mcplib "github.com/mark3labs/mcp-go/mcp"
+	"github.com/zzet/gortex/internal/graph"
+)
+
+func callAnalyzeStringEmitters(t *testing.T, srv *Server, args map[string]any) map[string]any {
+	t.Helper()
+	args["kind"] = "string_emitters"
+	req := mcplib.CallToolRequest{}
+	req.Params.Name = "analyze"
+	req.Params.Arguments = args
+	res, err := srv.handleAnalyze(context.Background(), req)
+	if err != nil {
+		t.Fatalf("handleAnalyze: %v", err)
+	}
+	if res.IsError {
+		t.Fatalf("error: %+v", res.Content)
+	}
+	textBlock := res.Content[0].(mcplib.TextContent)
+	var out map[string]any
+	if err := json.Unmarshal([]byte(textBlock.Text), &out); err != nil {
+		t.Fatalf("json: %v\n%s", err, textBlock.Text)
+	}
+	return out
+}
+
+func addStringNode(g *graph.Graph, id, value, ctx string) {
+	g.AddNode(&graph.Node{
+		ID:   id,
+		Kind: graph.KindString,
+		Name: value,
+		Meta: map[string]any{"context": ctx, "value": value},
+	})
+}
+
+func addStringEmitEdge(g *graph.Graph, from, to, ctx, method string) {
+	g.AddEdge(&graph.Edge{
+		From: from,
+		To:   to,
+		Kind: graph.EdgeEmits,
+		Meta: map[string]any{"context": ctx, "method": method},
+	})
+}
+
+func TestAnalyzeStringEmitters_GroupsByString(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	addStringNode(srv.graph, "string::metric::orders.success", "orders.success", "metric")
+	addStringNode(srv.graph, "string::error_msg::not authorized", "not authorized", "error_msg")
+	addStringEmitEdge(srv.graph, "f.go::Checkout", "string::metric::orders.success", "metric", "Increment")
+	addStringEmitEdge(srv.graph, "f.go::Refund", "string::metric::orders.success", "metric", "Increment")
+	addStringEmitEdge(srv.graph, "f.go::Auth", "string::error_msg::not authorized", "error_msg", "errors.New")
+
+	out := callAnalyzeStringEmitters(t, srv, map[string]any{})
+	rows, _ := out["strings"].([]any)
+	if len(rows) != 2 {
+		t.Fatalf("expected 2 strings, got %d", len(rows))
+	}
+	first := rows[0].(map[string]any)
+	if first["value"] != "orders.success" {
+		t.Errorf("expected orders.success first (more emits), got %v", first["value"])
+	}
+	if first["context"] != "metric" {
+		t.Errorf("expected first.context = metric, got %v", first["context"])
+	}
+	if int(first["emits"].(float64)) != 2 {
+		t.Errorf("expected 2 emits, got %v", first["emits"])
+	}
+}
+
+func TestAnalyzeStringEmitters_ContextFilter(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	addStringNode(srv.graph, "string::metric::a", "a", "metric")
+	addStringNode(srv.graph, "string::route::/x", "/x", "route")
+	addStringEmitEdge(srv.graph, "f.go::A", "string::metric::a", "metric", "Increment")
+	addStringEmitEdge(srv.graph, "f.go::B", "string::route::/x", "route", "HandleFunc")
+
+	out := callAnalyzeStringEmitters(t, srv, map[string]any{
+		"context": "route",
+	})
+	rows, _ := out["strings"].([]any)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 string after context=route filter, got %d", len(rows))
+	}
+	first := rows[0].(map[string]any)
+	if first["context"] != "route" {
+		t.Errorf("filter leaked: got context=%v", first["context"])
+	}
+}
+
+func TestAnalyzeStringEmitters_NameSubstringFilter(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	addStringNode(srv.graph, "string::metric::orders.success", "orders.success", "metric")
+	addStringNode(srv.graph, "string::metric::server.memory", "server.memory", "metric")
+	addStringEmitEdge(srv.graph, "f.go::A", "string::metric::orders.success", "metric", "Increment")
+	addStringEmitEdge(srv.graph, "f.go::B", "string::metric::server.memory", "metric", "Gauge")
+
+	out := callAnalyzeStringEmitters(t, srv, map[string]any{
+		"name": "orders",
+	})
+	rows, _ := out["strings"].([]any)
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 string after name=orders filter, got %d", len(rows))
+	}
+}
+
+func TestAnalyzeStringEmitters_IgnoresNonStringEmitTargets(t *testing.T) {
+	// EdgeEmits to KindEvent (the legacy log target) shouldn't appear
+	// in string_emitters results.
+	srv, _ := setupTestServer(t)
+	addEventNode(srv.graph, "event::log::user.login", "user.login", "log")
+	addEmitsEdge(srv.graph, "f.go::Auth", "event::log::user.login", "Info")
+
+	out := callAnalyzeStringEmitters(t, srv, map[string]any{})
+	rows, _ := out["strings"].([]any)
+	if len(rows) != 0 {
+		t.Fatalf("expected 0 string emitters (only event present), got %d", len(rows))
+	}
+}
diff --git a/internal/mcp/tools_enhancements.go b/internal/mcp/tools_enhancements.go
index eb433a9..ad44b18 100644
--- a/internal/mcp/tools_enhancements.go
+++ b/internal/mcp/tools_enhancements.go
@@ -650,10 +650,12 @@ func (s *Server) handleAnalyze(ctx context.Context, req mcp.CallToolRequest) (*m
 		return s.handleAnalyzeConfigReaders(ctx, req)
 	case "event_emitters":
 		return s.handleAnalyzeEventEmitters(ctx, req)
+	case "string_emitters":
+		return s.handleAnalyzeStringEmitters(ctx, req)
 	case "error_surface":
 		return s.handleAnalyzeErrorSurface(ctx, req)
 	default:
-		return mcp.NewToolResultError("unknown analyze kind: " + kind + " (expected: dead_code, hotspots, cycles, would_create_cycle, todos, blame, coverage, stale_code, ownership, coverage_gaps, stale_flags, releases, cgo_users, wasm_users, orphan_tables, unreferenced_tables, coverage_summary, channel_ops, goroutine_spawns, field_writers, annotation_users, config_readers, event_emitters, error_surface)"), nil
+		return mcp.NewToolResultError("unknown analyze kind: " + kind + " (expected: dead_code, hotspots, cycles, would_create_cycle, todos, blame, coverage, stale_code, ownership, coverage_gaps, stale_flags, releases, cgo_users, wasm_users, orphan_tables, unreferenced_tables, coverage_summary, channel_ops, goroutine_spawns, field_writers, annotation_users, config_readers, event_emitters, string_emitters, error_surface)"), nil
 	}
 }
 
diff --git a/internal/parser/languages/go_strings.go b/internal/parser/languages/go_strings.go
new file mode 100644
index 0000000..44c18d0
--- /dev/null
+++ b/internal/parser/languages/go_strings.go
@@ -0,0 +1,239 @@
+package languages
+
+import (
+	"crypto/sha1"
+	"encoding/hex"
+	"strings"
+
+	sitter "github.com/zzet/gortex/internal/parser/tsitter"
+
+	"github.com/zzet/gortex/internal/graph"
+	"github.com/zzet/gortex/internal/parser"
+)
+
+// goStringContext labels the API position the literal was found in.
+// Used as a discriminator on KindString node IDs and on the
+// EdgeEmits.Meta["context"] field so analyzers can filter by domain.
+type goStringContext string
+
+const (
+	stringCtxMetric   goStringContext = "metric"
+	stringCtxErrorMsg goStringContext = "error_msg"
+	stringCtxRoute    goStringContext = "route"
+)
+
+// goMetricMethods is the whitelist of method names where the first
+// string-literal argument is taken as the metric name. Limited to
+// statsd / dogstatsd-style APIs (which always pass the metric name
+// as the first arg). Prometheus needs a separate composite-literal
+// extractor (CounterOpts{Name: "..."} etc.) — out of scope here.
+//
+// Generic method names like Set / Inc / Add are deliberately
+// excluded — they appear on too many unrelated types.
+var goMetricMethods = map[string]bool{
+	"Increment":          true, // statsd, dogstatsd
+	"Decrement":          true,
+	"Count":              true,
+	"Gauge":              true,
+	"Histogram":          true,
+	"Distribution":       true,
+	"Timing":             true,
+	"TimeInMilliseconds": true,
+	"Event":              true, // dogstatsd Event
+	"ServiceCheck":       true, // dogstatsd ServiceCheck
+}
+
+// goErrorMessageCalls maps (package-or-receiver, function) to the
+// "error_msg" context. Both shapes look like selector calls
+// syntactically, so we match against the receiver name as a
+// heuristic — `errors.New(...)` and `fmt.Errorf(...)` are by far the
+// dominant idioms.
+var goErrorMessageCalls = map[[2]string]bool{
+	{"errors", "New"}:    true,
+	{"fmt", "Errorf"}:    true,
+	{"xerrors", "New"}:   true, // golang.org/x/xerrors
+	{"xerrors", "Errorf"}: true,
+}
+
+// goRouteMethods is the set of method names that, when called with a
+// string-literal path-like first argument, emit a "route" node.
+// Mixes net/http (HandleFunc/Handle), gorilla/mux (HandleFunc/Handle),
+// chi (Get/Post/...), gin/echo (GET/POST/...), and a handful of
+// common router shapes that don't go through the contracts pipeline.
+var goRouteMethods = map[string]bool{
+	"Handle":     true,
+	"HandleFunc": true,
+	"Get":        true,
+	"Post":       true,
+	"Put":        true,
+	"Delete":     true,
+	"Patch":      true,
+	"Options":    true,
+	"Head":       true,
+	"Connect":    true,
+	"Trace":      true,
+	"GET":        true,
+	"POST":       true,
+	"PUT":        true,
+	"DELETE":     true,
+	"PATCH":      true,
+	"OPTIONS":    true,
+	"HEAD":       true,
+	"CONNECT":    true,
+	"TRACE":      true,
+}
+
+// goStringEvent is one deferred string-literal observation, queued
+// during AST traversal and flushed at end-of-file by emitGoStringEvents.
+type goStringEvent struct {
+	context goStringContext
+	method  string
+	value   string
+	line    int
+}
+
+// detectGoMetric checks a method call against the metric whitelist;
+// returns the metric name when arg[0] is a string literal.
+func detectGoMetric(callExpr *sitter.Node, method string, src []byte) (string, bool) {
+	if callExpr == nil {
+		return "", false
+	}
+	if !goMetricMethods[method] {
+		return "", false
+	}
+	return firstStringLiteralArg(callExpr, src)
+}
+
+// detectGoErrorMessage checks for errors.New / fmt.Errorf-style calls
+// where the first argument is a string literal.
+func detectGoErrorMessage(callExpr *sitter.Node, receiver, method string, src []byte) (string, bool) {
+	if callExpr == nil {
+		return "", false
+	}
+	if !goErrorMessageCalls[[2]string{receiver, method}] {
+		return "", false
+	}
+	return firstStringLiteralArg(callExpr, src)
+}
+
+// detectGoRoute checks for HTTP-router shapes where arg[0] is a
+// path-like string literal. Path-likeness is enforced (must start
+// with "/" or contain a "/" segment) to suppress false positives
+// from generic method names like Get/Set on map-like types.
+func detectGoRoute(callExpr *sitter.Node, method string, src []byte) (string, bool) {
+	if callExpr == nil {
+		return "", false
+	}
+	if !goRouteMethods[method] {
+		return "", false
+	}
+	value, ok := firstStringLiteralArg(callExpr, src)
+	if !ok {
+		return "", false
+	}
+	if !looksLikeRoute(value) {
+		return "", false
+	}
+	return value, true
+}
+
+// looksLikeRoute is a cheap sanity check — keeps map.Get("foo")
+// and similar generics out of the route bucket. Accepts paths that
+// start with "/" (most common), "GET /…" / "POST /…" mux-1.22 form,
+// or a wildcard segment.
+func looksLikeRoute(s string) bool {
+	if s == "" {
+		return false
+	}
+	if strings.HasPrefix(s, "/") {
+		return true
+	}
+	// net/http 1.22+ pattern syntax: "GET /foo".
+	for _, m := range []string{"GET ", "POST ", "PUT ", "DELETE ", "PATCH ", "OPTIONS ", "HEAD "} {
+		if strings.HasPrefix(s, m) {
+			return true
+		}
+	}
+	return false
+}
+
+// firstStringLiteralArg returns the value of the first string-literal
+// argument of a call expression, with surrounding quotes stripped. The
+// helper is shared with detectGoLogEvent's logic but kept separate so
+// each context can apply its own filtering.
+func firstStringLiteralArg(callExpr *sitter.Node, src []byte) (string, bool) {
+	args := callExpr.ChildByFieldName("arguments")
+	if args == nil {
+		return "", false
+	}
+	for i := 0; i < int(args.NamedChildCount()); i++ {
+		c := args.NamedChild(i)
+		if c == nil {
+			continue
+		}
+		if c.Type() != "interpreted_string_literal" && c.Type() != "raw_string_literal" {
+			continue
+		}
+		text := strings.Trim(c.Content(src), "\"`")
+		if text == "" {
+			return "", false
+		}
+		return text, true
+	}
+	return "", false
+}
+
+// emitGoStringEvents creates one KindString node per (context, value)
+// pair seen and an EdgeEmits from the enclosing function/method to
+// each. Mirrors emitGoObservabilityEvents — same per-repo dedup
+// behaviour, same caller-line lookup contract.
+func emitGoStringEvents(events []goStringEvent, callerLookup func(line int) string, filePath string, result *parser.ExtractionResult) {
+	if len(events) == 0 {
+		return
+	}
+	seen := make(map[string]struct{}, len(events))
+	for _, e := range events {
+		callerID := callerLookup(e.line)
+		if callerID == "" {
+			continue
+		}
+		strID := goStringNodeID(e.context, e.value)
+		if _, ok := seen[strID]; !ok {
+			seen[strID] = struct{}{}
+			result.Nodes = append(result.Nodes, &graph.Node{
+				ID:       strID,
+				Kind:     graph.KindString,
+				Name:     e.value,
+				FilePath: filePath, // first sighting; not authoritative
+				Language: "go",
+				Meta: map[string]any{
+					"context": string(e.context),
+					"value":   e.value,
+				},
+			})
+		}
+		result.Edges = append(result.Edges, &graph.Edge{
+			From:     callerID,
+			To:       strID,
+			Kind:     graph.EdgeEmits,
+			FilePath: filePath,
+			Line:     e.line,
+			Origin:   graph.OriginASTInferred,
+			Meta: map[string]any{
+				"context": string(e.context),
+				"method":  e.method,
+			},
+		})
+	}
+}
+
+// goStringNodeID composes the canonical synthetic ID for a string
+// node. Long values (over 200 chars) are hashed to keep IDs sane —
+// the original text is preserved in node.Name and node.Meta["value"].
+func goStringNodeID(ctx goStringContext, value string) string {
+	if len(value) > 200 {
+		h := sha1.Sum([]byte(value))
+		return "string::" + string(ctx) + "::sha1:" + hex.EncodeToString(h[:])[:16]
+	}
+	return "string::" + string(ctx) + "::" + value
+}
diff --git a/internal/parser/languages/go_strings_test.go b/internal/parser/languages/go_strings_test.go
new file mode 100644
index 0000000..77d9924
--- /dev/null
+++ b/internal/parser/languages/go_strings_test.go
@@ -0,0 +1,214 @@
+package languages
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/zzet/gortex/internal/graph"
+)
+
+func TestGoStrings_StatsdMetricsExtracted(t *testing.T) {
+	src := `package foo
+
+type StatsdClient struct{}
+
+func (c *StatsdClient) Increment(name string, tags []string, rate float64) error { return nil }
+func (c *StatsdClient) Gauge(name string, value float64, tags []string, rate float64) error { return nil }
+
+func Run(c *StatsdClient) {
+	c.Increment("orders.checkout.success", nil, 1)
+	c.Gauge("server.memory.bytes", 12345, nil, 1)
+}
+`
+	fix := runGoExtract(t, src)
+
+	strs := fix.nodesByKind[graph.KindString]
+	if len(strs) != 2 {
+		t.Fatalf("expected 2 KindString, got %d: %+v", len(strs), strs)
+	}
+	gotByValue := map[string]*graph.Node{}
+	for _, n := range strs {
+		gotByValue[n.Name] = n
+		if ctx, _ := n.Meta["context"].(string); ctx != "metric" {
+			t.Errorf("node %q: context = %q, want metric", n.Name, ctx)
+		}
+		want := "string::metric::" + n.Name
+		if n.ID != want {
+			t.Errorf("id = %q, want %q", n.ID, want)
+		}
+	}
+	if gotByValue["orders.checkout.success"] == nil || gotByValue["server.memory.bytes"] == nil {
+		t.Errorf("missing expected metric names: %v", gotByValue)
+	}
+
+	emits := fix.edgesByKind[graph.EdgeEmits]
+	if len(emits) != 2 {
+		t.Errorf("expected 2 EdgeEmits, got %d", len(emits))
+	}
+	for _, e := range emits {
+		if e.From != "pkg/foo.go::Run" {
+			t.Errorf("emit from = %q, want pkg/foo.go::Run", e.From)
+		}
+		if ctx, _ := e.Meta["context"].(string); ctx != "metric" {
+			t.Errorf("emit context = %q", ctx)
+		}
+	}
+}
+
+func TestGoStrings_GenericMethodNamesIgnored(t *testing.T) {
+	// Set / Inc / Add are too generic — would create false positives
+	// against map and counter types — and are deliberately excluded.
+	src := `package foo
+
+type M struct{}
+func (m M) Set(k, v string)  {}
+func (m M) Inc(k string)     {}
+func (m M) Add(k string, n int) {}
+
+func Run(m M) {
+	m.Set("magic", "value")
+	m.Inc("counter")
+	m.Add("tally", 1)
+}
+`
+	fix := runGoExtract(t, src)
+	if got := fix.nodesByKind[graph.KindString]; len(got) != 0 {
+		t.Errorf("expected no metric strings, got %d: %+v", len(got), got)
+	}
+}
+
+func TestGoStrings_ErrorsNewAndFmtErrorf(t *testing.T) {
+	src := `package foo
+
+import (
+	"errors"
+	"fmt"
+)
+
+func A() error { return errors.New("user not found") }
+func B(id int) error { return fmt.Errorf("invalid id %d", id) }
+`
+	fix := runGoExtract(t, src)
+
+	strs := fix.nodesByKind[graph.KindString]
+	if len(strs) != 2 {
+		t.Fatalf("expected 2 KindString, got %d: %+v", len(strs), strs)
+	}
+	gotByValue := map[string]*graph.Node{}
+	for _, n := range strs {
+		gotByValue[n.Name] = n
+		if ctx, _ := n.Meta["context"].(string); ctx != "error_msg" {
+			t.Errorf("node %q: context = %q, want error_msg", n.Name, ctx)
+		}
+	}
+	if gotByValue["user not found"] == nil {
+		t.Errorf("missing 'user not found': %v", gotByValue)
+	}
+	if gotByValue["invalid id %d"] == nil {
+		t.Errorf("missing 'invalid id %%d': %v", gotByValue)
+	}
+}
+
+func TestGoStrings_HTTPRoutesExtracted(t *testing.T) {
+	src := `package foo
+
+type Mux struct{}
+func (m *Mux) HandleFunc(pattern string, h func()) {}
+func (m *Mux) Get(pattern string, h func())        {}
+
+func Wire(m *Mux) {
+	m.HandleFunc("/api/v1/users", nil)
+	m.Get("/health", nil)
+	m.HandleFunc("GET /api/v1/orders", nil)
+}
+`
+	fix := runGoExtract(t, src)
+
+	strs := fix.nodesByKind[graph.KindString]
+	if len(strs) != 3 {
+		t.Fatalf("expected 3 KindString routes, got %d: %+v", len(strs), strs)
+	}
+	got := map[string]bool{}
+	for _, n := range strs {
+		got[n.Name] = true
+		if ctx, _ := n.Meta["context"].(string); ctx != "route" {
+			t.Errorf("node %q: context = %q, want route", n.Name, ctx)
+		}
+	}
+	for _, want := range []string{"/api/v1/users", "/health", "GET /api/v1/orders"} {
+		if !got[want] {
+			t.Errorf("missing route %q", want)
+		}
+	}
+}
+
+func TestGoStrings_NonRouteStringSkipped(t *testing.T) {
+	// Get/Post on map-like types — the looksLikeRoute filter should
+	// keep these out of the route bucket.
+	src := `package foo
+
+type Cache struct{}
+func (c *Cache) Get(key string) string { return "" }
+
+func Run(c *Cache) {
+	_ = c.Get("user:42:profile")
+}
+`
+	fix := runGoExtract(t, src)
+	if got := fix.nodesByKind[graph.KindString]; len(got) != 0 {
+		t.Errorf("expected no routes for cache.Get, got: %+v", got)
+	}
+}
+
+func TestGoStrings_SameMetricAcrossCallsDeduplicates(t *testing.T) {
+	src := `package foo
+
+type Client struct{}
+func (c *Client) Increment(name string) {}
+
+func A(c *Client) { c.Increment("orders.success") }
+func B(c *Client) { c.Increment("orders.success") }
+`
+	fix := runGoExtract(t, src)
+
+	strs := fix.nodesByKind[graph.KindString]
+	if len(strs) != 1 {
+		t.Fatalf("expected 1 KindString (deduplicated), got %d", len(strs))
+	}
+	emits := fix.edgesByKind[graph.EdgeEmits]
+	if len(emits) != 2 {
+		t.Errorf("expected 2 EdgeEmits (one per caller), got %d", len(emits))
+	}
+}
+
+func TestGoStrings_NodeIDForLongValueIsHashed(t *testing.T) {
+	// 250 chars > 200-char threshold — should hash to keep IDs sane.
+	long := strings.Repeat("a", 250)
+	got := goStringNodeID(stringCtxErrorMsg, long)
+	want := "string::error_msg::sha1:"
+	if got[:len(want)] != want {
+		t.Errorf("long-value id = %q, want prefix %q", got, want)
+	}
+	short := goStringNodeID(stringCtxMetric, "ok.short")
+	if short != "string::metric::ok.short" {
+		t.Errorf("short-value id = %q", short)
+	}
+}
+
+func TestLooksLikeRoute(t *testing.T) {
+	cases := map[string]bool{
+		"/users":                true,
+		"/api/v1/orders":        true,
+		"GET /foo":              true,
+		"POST /bar":             true,
+		"":                      false,
+		"plain":                 false,
+		"user:42:profile":       false,
+		"orders.checkout.event": false,
+	}
+	for in, want := range cases {
+		if got := looksLikeRoute(in); got != want {
+			t.Errorf("looksLikeRoute(%q) = %v, want %v", in, got, want)
+		}
+	}
+}
diff --git a/internal/parser/languages/golang.go b/internal/parser/languages/golang.go
index e079278..3d15a5a 100644
--- a/internal/parser/languages/golang.go
+++ b/internal/parser/languages/golang.go
@@ -216,6 +216,7 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe
 	var fieldValSels []goDeferredValueSel
 	var fieldValIdents []goDeferredValueIdent
 	var observabilityEvents []goObservabilityEvent
+	var stringEvents []goStringEvent
 	var flagEvents []goFlagEvent
 	var configEvents []goConfigEvent
 	var sqlEvents []goSQLEvent
@@ -273,6 +274,31 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe
 					line:   expr.StartLine + 1,
 				})
 			}
+			receiverText := m.Captures["callm.receiver"].Text
+			if name, ok := detectGoMetric(expr.Node, method, src); ok {
+				stringEvents = append(stringEvents, goStringEvent{
+					context: stringCtxMetric,
+					method:  method,
+					value:   name,
+					line:    expr.StartLine + 1,
+				})
+			}
+			if msg, ok := detectGoErrorMessage(expr.Node, receiverText, method, src); ok {
+				stringEvents = append(stringEvents, goStringEvent{
+					context: stringCtxErrorMsg,
+					method:  receiverText + "." + method,
+					value:   msg,
+					line:    expr.StartLine + 1,
+				})
+			}
+			if route, ok := detectGoRoute(expr.Node, method, src); ok {
+				stringEvents = append(stringEvents, goStringEvent{
+					context: stringCtxRoute,
+					method:  method,
+					value:   route,
+					line:    expr.StartLine + 1,
+				})
+			}
 			if provider, flagName, ok := detectGoFlagCheck(expr.Node, method, src); ok {
 				flagEvents = append(flagEvents, goFlagEvent{
 					provider: provider,
@@ -498,6 +524,11 @@ func (e *GoExtractor) Extract(filePath string, src []byte) (*parser.ExtractionRe
 		func(line int) string { return findEnclosingFunc(funcRanges, line) },
 		filePath, result)
 
+	// --- String observations (metrics, error messages, routes) ---
+	emitGoStringEvents(stringEvents,
+		func(line int) string { return findEnclosingFunc(funcRanges, line) },
+		filePath, result)
+
 	// --- Feature flag checks ---
 	emitGoFlagChecks(flagEvents,
 		func(line int) string { return findEnclosingFunc(funcRanges, line) },