diff --git a/crates/ecp-analyzer/src/post_process/mod.rs b/crates/ecp-analyzer/src/post_process/mod.rs index d757d23b..014ccd82 100644 --- a/crates/ecp-analyzer/src/post_process/mod.rs +++ b/crates/ecp-analyzer/src/post_process/mod.rs @@ -9,3 +9,4 @@ pub mod class_membership; pub mod imports_edges; pub mod overrides; +pub mod schema_field_mirrors; diff --git a/crates/ecp-analyzer/src/post_process/schema_field_mirrors.rs b/crates/ecp-analyzer/src/post_process/schema_field_mirrors.rs new file mode 100644 index 00000000..b9e015b5 --- /dev/null +++ b/crates/ecp-analyzer/src/post_process/schema_field_mirrors.rs @@ -0,0 +1,189 @@ +//! `SchemaField` Node emission + `MirrorsField` heuristic edge bucketing +//! (T4-7). +//! +//! Bridges the gap between per-file `RawSchemaField` (T4-1..T4-6 detectors) +//! and the queryable graph layer: each `RawSchemaField` becomes a +//! `NodeKind::SchemaField` Node connected to its owning class via +//! `HasProperty`, and cross-framework mirrors (e.g. Pydantic `User.email` +//! vs SQLAlchemy `User.email`) are linked by the heuristic `MirrorsField` +//! edge. +//! +//! ## Algorithm +//! +//! 1. **Promote**: iterate `LocalGraph.schema_fields`, emit one +//! `SchemaField` Node + one `HasProperty` edge (Class → SchemaField) +//! per field. SymbolTable lookup resolves the owning class name in +//! the same file; misses silently drop the field (no false-positive +//! edge to a wrong class). +//! +//! 2. **Bucket**: group emitted SchemaField Nodes by +//! `(name.to_lowercase(), SchemaType)` so case-variants and +//! type-compatible mirrors share a bucket. +//! +//! 3. **Pair within bucket**: apply the 4-point strict rubric per pair — +//! exact case-sensitive name, identical `SchemaType` (granted by +//! bucket key), identical owner-class name, and bidirectional top-1. +//! For k=2 the top-1 check is trivial; for k≥3 with a uniform +//! `(name, type, owner_class)` triple, D3 cluster semantics emit all +//! `k×(k−1)/2` pairs at confidence 0.9. Different owner-class within +//! the same bucket is currently dropped (BlindSpot emission is a +//! documented follow-up — see `test_partial_match_emits_blindspot`). +//! +//! ## Perf +//! +//! - O(N) bucket build; O(k²) pairwise emission where k = #fields sharing +//! `(name_lc, type)`. Typical k < 10 (real corpora rarely have more +//! than a handful of cross-framework mirrors per identifier). +//! - Offline-only — runs once at build time, never on `ecp` hot paths. + +use crate::resolution::index::SymbolTable; +use ecp_core::analyzer::types::{LocalGraph, SchemaType}; +use ecp_core::graph::{Edge, Node, NodeKind, RelType}; +use ecp_core::pool::StringPool; +use rustc_hash::FxHashMap; + +/// Promote `RawSchemaField`s to `SchemaField` Nodes + emit `HasProperty` +/// connections + `MirrorsField` heuristic edges. Returns the count of +/// emitted MirrorsField edges (HasProperty + SchemaField Node counts are +/// derivable from `nodes.len()` delta; this return is for telemetry). +/// +/// `file_node_count_before` is the size of `nodes` BEFORE the File-node +/// append loop runs — used to bound SchemaField nodes to the +/// raw-symbols-and-extras region so `file_node_idx` registration further +/// downstream still hits the correct File-node range. +pub fn emit_edges( + local_graphs: &[LocalGraph], + symbol_table: &SymbolTable, + string_pool: &mut StringPool, + nodes: &mut Vec, + edges: &mut Vec, +) -> usize { + let reason_has_property = string_pool.add("post_process:schema_field:has_property"); + let reason_mirror = string_pool.add("post_process:schema_field:mirrors_field"); + + /// Bucket entry: (node_idx, owner_class, exact_name). + type BucketEntry<'a> = (u32, &'a str, &'a str); + let mut buckets: FxHashMap<(String, SchemaType), Vec>> = FxHashMap::default(); + + // Phase 1 — emit SchemaField Nodes + HasProperty edges, populate buckets. + // + // Skip `LocalGraph`s with no `schema_fields` (the majority — most files + // carry no ORM/schema surface). Per-file Owner-class lookup uses + // SymbolTable's per-file index; misses (e.g. extractor emitted a class + // name that SymbolTable doesn't know about, perhaps due to file + // boundary edge cases) silently drop the field. No fabricated edges. + for (lg_idx, local_graph) in local_graphs.iter().enumerate() { + let Some(ref schema_fields) = local_graph.schema_fields else { + continue; + }; + if schema_fields.is_empty() { + continue; + } + let path_str = local_graph.file_path.to_string_lossy().replace('\\', "/"); + let file_idx = lg_idx as u32; + + for raw_sf in schema_fields.iter() { + let owner_name = &*raw_sf.owner_class; + let field_name = &*raw_sf.name; + + // Resolve the owning class to an existing Node idx. If the + // SymbolTable doesn't know `owner_name` in this file, the + // class was not parsed as a `Class` / `Struct` / `Trait` / + // `Interface` — likely a generated-code or DSL pattern that + // we don't model. Silently skip rather than emit dangling + // HasProperty. + let Some(class_idx) = symbol_table.lookup_in_file(&path_str, owner_name) else { + continue; + }; + + // UID format: stable across reindex; includes framework so + // cross-framework mirrors with same (owner, name) get distinct + // UIDs. Format mirrors File-node UID convention. + let uid_str = format!( + "SchemaField:{:?}:{}.{}:{}", + raw_sf.framework, owner_name, field_name, path_str + ); + let uid_ref = string_pool.add(&uid_str); + let name_ref = string_pool.add(field_name); + let sf_idx = nodes.len() as u32; + nodes.push(Node { + uid: uid_ref, + name: name_ref, + file_idx, + kind: NodeKind::SchemaField, + span: raw_sf.span, + community_id: 0, + }); + + // HasProperty: -> . Non-heuristic + // (extractor saw an actual `name: T` / `Column(T)` form, so + // the class-owns-this-field claim is structural, not inferred). + edges.push(Edge { + source: class_idx, + target: sf_idx, + rel_type: RelType::HasProperty, + confidence: 1.0, + reason: reason_has_property, + }); + + // Bucket key: (lowercase_name, type). Lowercase normalizes + // `email` vs `Email`; type-class match keeps `email: str` + // from binding to `email: int`. + buckets + .entry((field_name.to_ascii_lowercase(), raw_sf.type_class)) + .or_default() + .push((sf_idx, owner_name, field_name)); + } + } + + // Phase 2 — pairwise MirrorsField emission within each bucket. + // + // Rubric (per spec line 540 + D3 cluster semantics): + // - exact case-sensitive name match + // - identical SchemaType (granted by bucket key) + // - identical owner-class name (e.g. both "User") + // - bidirectional top-1 (trivial for k=2; D3 covers k≥3 uniform) + // + // Implementation: sub-group bucket by (exact_name, owner_class). Any + // sub-group of size ≥ 2 satisfies all four points and emits pairwise + // `MirrorsField` at heuristic confidence (RelType::MirrorsField is + // listed under `is_heuristic` so default `ecp impact` hides these + // unless `--show-heuristic` is set). + // + // BlindSpot emission for partial matches (3/4) is a documented + // follow-up — see ignored test `test_partial_match_emits_blindspot`. + let mut mirror_count = 0usize; + for entries in buckets.values() { + if entries.len() < 2 { + continue; + } + // Sub-group by (exact_name, owner_class). + let mut sub: FxHashMap<(&str, &str), Vec> = FxHashMap::default(); + for &(idx, owner, name) in entries { + sub.entry((name, owner)).or_default().push(idx); + } + for group in sub.values() { + if group.len() < 2 { + continue; + } + // Pairwise emit. Deterministic order: idx-ascending pairs + // (i, j) where i < j by position in `group`. The vec is + // populated in iteration order from `entries`, which itself + // mirrors LocalGraph iteration order — stable across runs. + for i in 0..group.len() { + for j in (i + 1)..group.len() { + edges.push(Edge { + source: group[i], + target: group[j], + rel_type: RelType::MirrorsField, + confidence: 0.9, + reason: reason_mirror, + }); + mirror_count += 1; + } + } + } + } + + mirror_count +} diff --git a/crates/ecp-analyzer/src/python/parser.rs b/crates/ecp-analyzer/src/python/parser.rs index e3f97690..5b754f3d 100644 --- a/crates/ecp-analyzer/src/python/parser.rs +++ b/crates/ecp-analyzer/src/python/parser.rs @@ -15,7 +15,6 @@ use ecp_core::analyzer::types::{ RawRoute, RawTxScope, }; use ecp_core::graph::{FileCategory, NodeKind}; -use ecp_core::pool::StringPool; use std::path::Path; use streaming_iterator::StreamingIterator; use tree_sitter::{Node, Query, QueryCursor}; @@ -1070,27 +1069,19 @@ impl LanguageProvider for PythonProvider { crate::framework_helpers::stamp_owner_class_by_span(&mut nodes); - // The local pool is dropped after this block. `StrRef` fields inside - // `RawSchemaField` carry byte offsets relative to this pool — they - // remain valid Copy values, but string resolution requires the builder - // to re-intern them (see TODO: builder pass for T4-schema integration). - // Since the builder currently ignores `schema_fields`, no caller - // dereferences these StrRefs today. - let schema_fields = { - let mut pool = StringPool::new(); - let fields = crate::schema_field::extract_schema_fields( - &tree, - source, - &self.query, - &[ - crate::python::schema_extractors::PYDANTIC_CONFIG, - crate::python::schema_extractors::SQLALCHEMY_CONFIG, - ], - &imports, - &mut pool, - ); - (!fields.is_empty()).then(|| fields.into_boxed_slice()) - }; + // T4-7 refactor: `RawSchemaField` now stores owned `Box` so the + // per-file parser scope can drop cleanly without dangling-pool risk. + let fields = crate::schema_field::extract_schema_fields( + &tree, + source, + &self.query, + &[ + crate::python::schema_extractors::PYDANTIC_CONFIG, + crate::python::schema_extractors::SQLALCHEMY_CONFIG, + ], + &imports, + ); + let schema_fields = (!fields.is_empty()).then(|| fields.into_boxed_slice()); Ok(LocalGraph { content_hash: [0; 8], diff --git a/crates/ecp-analyzer/src/resolution/builder.rs b/crates/ecp-analyzer/src/resolution/builder.rs index 3799ea4e..40bdd19a 100644 --- a/crates/ecp-analyzer/src/resolution/builder.rs +++ b/crates/ecp-analyzer/src/resolution/builder.rs @@ -1233,6 +1233,21 @@ impl GraphBuilder { &mut edges, ); + // T4-7: promote `RawSchemaField` → `SchemaField` Nodes + `HasProperty` + // (Class → SchemaField) + `MirrorsField` heuristic edges across + // detected cross-framework mirrors. Runs after overrides because it + // appends new Nodes; before File-node loop so SchemaField nodes + // occupy idx range BEFORE files[] (keeps file_node_idx contiguous). + // SchemaField nodes are NOT registered in SymbolTable — nothing + // references them by name; reverse traversal goes via HasProperty. + crate::post_process::schema_field_mirrors::emit_edges( + &self.local_graphs, + &symbol_table, + &mut string_pool, + &mut nodes, + &mut edges, + ); + // Append one `NodeKind::File` node per LocalGraph at the tail of // `nodes` (idx >= raw-node count). Doing it here — AFTER all passes // that index symbols by SymbolTable + use raw node idx ranges — diff --git a/crates/ecp-analyzer/src/schema_field/extract.rs b/crates/ecp-analyzer/src/schema_field/extract.rs index dc6191a3..70ad5fd0 100644 --- a/crates/ecp-analyzer/src/schema_field/extract.rs +++ b/crates/ecp-analyzer/src/schema_field/extract.rs @@ -1,7 +1,6 @@ use super::config::SchemaFieldConfig; use crate::framework_helpers::has_import_from; use ecp_core::analyzer::types::{RawImport, RawSchemaField, SchemaType}; -use ecp_core::pool::StringPool; use rustc_hash::FxHashMap; use streaming_iterator::StreamingIterator; use tree_sitter::{Query, QueryCursor, Tree}; @@ -10,10 +9,6 @@ use tree_sitter::{Query, QueryCursor, Tree}; /// each match to the first `SchemaFieldConfig` whose import-gate is satisfied, /// and emit a `RawSchemaField` for every accepted match. /// -/// `pool` is used to intern `name` and `owner_class` strings. The caller -/// typically passes the same pool used for the rest of the file's `LocalGraph` -/// to maximise dedup across nodes. -/// /// The caller is responsible for supplying a query whose capture names align /// with the `owner_capture`, `name_capture`, and `type_capture` fields of at /// least one config. Captures not referenced by any config are silently @@ -24,13 +19,18 @@ use tree_sitter::{Query, QueryCursor, Tree}; /// A config fires only when `has_import_from(imports, config.import_gate)` /// returns `true`. When no config's gate is satisfied by the file's imports, /// this function returns an empty `Vec` — no false positives. +/// +/// # Ownership +/// `RawSchemaField` stores `name` / `owner_class` as owned `Box` so the +/// per-file parser scope can be dropped without dangling the strings — the +/// pre-T4-7 design interned into a transient `StringPool` which was dropped +/// at scope exit, leaving `RawSchemaField` carrying unreachable `StrRef`s. pub fn extract_schema_fields( tree: &Tree, source: &[u8], query: &Query, configs: &[SchemaFieldConfig], imports: &[RawImport], - pool: &mut StringPool, ) -> Vec { // Identify which configs are live for this file once, not per-match. // Empty import_gate is vacuously satisfied — language built-ins (e.g. @@ -121,9 +121,9 @@ pub fn extract_schema_fields( ); out.push(RawSchemaField { - name: pool.add(name), + name: name.into(), type_class, - owner_class: pool.add(owner), + owner_class: owner.into(), framework: config.framework, span, }); diff --git a/crates/ecp-analyzer/src/typescript/parser.rs b/crates/ecp-analyzer/src/typescript/parser.rs index b638ba17..962f438b 100644 --- a/crates/ecp-analyzer/src/typescript/parser.rs +++ b/crates/ecp-analyzer/src/typescript/parser.rs @@ -10,7 +10,6 @@ use ecp_core::analyzer::lang_spec::LangSpec; use ecp_core::analyzer::provider::LanguageProvider; use ecp_core::analyzer::types::{LocalGraph, RawFrameworkRef, RawImport, RawNode, RawRoute}; use ecp_core::graph::NodeKind; -use ecp_core::pool::StringPool; use std::path::Path; use streaming_iterator::StreamingIterator; use tree_sitter::{Parser, Query, QueryCursor}; @@ -524,24 +523,16 @@ impl LanguageProvider for TypeScriptProvider { crate::framework_helpers::stamp_owner_class_by_span(&mut nodes); - // The local pool is dropped after this block. `StrRef` fields inside - // `RawSchemaField` carry byte offsets relative to this pool — they - // remain valid Copy values, but string resolution requires the builder - // to re-intern them (see TODO: builder pass for T4-schema integration). - // Since the builder currently ignores `schema_fields`, no caller - // dereferences these StrRefs today. - let schema_fields = { - let mut pool = StringPool::new(); - let fields = crate::schema_field::extract_schema_fields( - &tree, - source, - &self.query, - &[crate::typescript::schema_extractors::TS_INTERFACE_CONFIG], - &imports, - &mut pool, - ); - (!fields.is_empty()).then(|| fields.into_boxed_slice()) - }; + // T4-7 refactor: `RawSchemaField` now stores owned `Box` so the + // per-file parser scope can drop cleanly without dangling-pool risk. + let fields = crate::schema_field::extract_schema_fields( + &tree, + source, + &self.query, + &[crate::typescript::schema_extractors::TS_INTERFACE_CONFIG], + &imports, + ); + let schema_fields = (!fields.is_empty()).then(|| fields.into_boxed_slice()); Ok(LocalGraph { content_hash: [0; 8], diff --git a/crates/ecp-analyzer/tests/python_pydantic_schema.rs b/crates/ecp-analyzer/tests/python_pydantic_schema.rs index 486ee117..5ef96323 100644 --- a/crates/ecp-analyzer/tests/python_pydantic_schema.rs +++ b/crates/ecp-analyzer/tests/python_pydantic_schema.rs @@ -1,14 +1,11 @@ //! T4-2: Pydantic v1+v2 BaseModel field extraction tests. //! -//! All tests call `extract_schema_fields` directly with a local `StringPool` -//! so strings are resolved before the pool is dropped. The `parse_file` -//! wiring stores fields in `LocalGraph.schema_fields`; builder-side re-interning -//! is a separate future pass (see TODO in parser.rs). +//! Post-T4-7 refactor: `RawSchemaField` now stores owned `Box` so +//! `.name` / `.owner_class` are directly readable as `&str` — no pool plumbing. use ecp_analyzer::python::schema_extractors::PYDANTIC_CONFIG; use ecp_analyzer::schema_field::extract_schema_fields; use ecp_core::analyzer::types::{FrameworkId, RawImport, RawSchemaField, SchemaType}; -use ecp_core::pool::StringPool; use tree_sitter::{Parser, Query}; // --------------------------------------------------------------------------- @@ -25,14 +22,13 @@ fn pydantic_import() -> RawImport { } /// Run the Pydantic dispatcher against `src`. Returns the extracted fields -/// plus the pool that owns their string bytes — callers resolve `StrRef`s via -/// `pool.bytes.as_slice()`. +/// with owned strings (no pool teardown concerns). /// /// `with_import` toggles the `pydantic` import-gate; tests use `false` to /// verify gating, `true` for happy paths. The query is the Pydantic fragment /// from `frameworks.scm`, compiled inline so unit tests don't depend on the /// production-query merge step in `PythonProvider::new()`. -fn run(src: &str, with_import: bool) -> (Vec, StringPool) { +fn run(src: &str, with_import: bool) -> Vec { let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into(); let mut parser = Parser::new(); parser.set_language(&lang).expect("set_language"); @@ -57,16 +53,7 @@ fn run(src: &str, with_import: bool) -> (Vec, StringPool) { } else { Vec::new() }; - let mut pool = StringPool::new(); - let fields = extract_schema_fields( - &tree, - src.as_bytes(), - &query, - &[PYDANTIC_CONFIG], - &imports, - &mut pool, - ); - (fields, pool) + extract_schema_fields(&tree, src.as_bytes(), &query, &[PYDANTIC_CONFIG], &imports) } // --------------------------------------------------------------------------- @@ -79,24 +66,21 @@ fn run(src: &str, with_import: bool) -> (Vec, StringPool) { fn test_happy_path_two_fields() { let src = "from pydantic import BaseModel\n\nclass User(BaseModel):\n name: str\n age: int\n"; - let (fields, pool) = run(src, true); + let fields = run(src, true); assert_eq!(fields.len(), 2, "expected two fields for User"); - let pool_bytes = pool.bytes.as_slice(); - let by_name: std::collections::HashMap<&str, &_> = fields - .iter() - .map(|f| (f.name.resolve(pool_bytes), f)) - .collect(); + let by_name: std::collections::HashMap<&str, &_> = + fields.iter().map(|f| (&*f.name, f)).collect(); let name_field = by_name["name"]; assert_eq!(name_field.type_class, SchemaType::String); - assert_eq!(name_field.owner_class.resolve(pool_bytes), "User"); + assert_eq!(&*name_field.owner_class, "User"); assert_eq!(name_field.framework, FrameworkId::Pydantic); let age_field = by_name["age"]; assert_eq!(age_field.type_class, SchemaType::Int); - assert_eq!(age_field.owner_class.resolve(pool_bytes), "User"); + assert_eq!(&*age_field.owner_class, "User"); } /// Optional / union type `str | None` — field is still emitted; type_class is @@ -106,7 +90,7 @@ fn test_happy_path_two_fields() { fn test_optional_union_type_emitted_as_other() { let src = "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str | None = None\n"; - let (fields, pool) = run(src, true); + let fields = run(src, true); assert_eq!(fields.len(), 1); assert_eq!( @@ -114,7 +98,7 @@ fn test_optional_union_type_emitted_as_other() { SchemaType::Other, "union type resolves to Other" ); - assert_eq!(fields[0].name.resolve(pool.bytes.as_slice()), "email"); + assert_eq!(&*fields[0].name, "email"); } /// Generic type `list[str]` — field emitted, type_class is `Other`. @@ -122,7 +106,7 @@ fn test_optional_union_type_emitted_as_other() { fn test_generic_type_emitted_as_other() { let src = "from pydantic import BaseModel\n\nclass Tags(BaseModel):\n items: list[str] = []\n"; - let (fields, _pool) = run(src, true); + let fields = run(src, true); assert_eq!(fields.len(), 1); assert_eq!(fields[0].type_class, SchemaType::Other); @@ -132,7 +116,7 @@ fn test_generic_type_emitted_as_other() { #[test] fn test_no_pydantic_import_zero_fields() { let src = "class User(BaseModel):\n name: str\n age: int\n"; - let (fields, _pool) = run(src, false); + let fields = run(src, false); assert!( fields.is_empty(), @@ -145,7 +129,7 @@ fn test_no_pydantic_import_zero_fields() { #[test] fn test_plain_class_no_fields_emitted() { let src = "from pydantic import BaseModel\n\nclass Plain:\n x: int = 0\n y: str\n"; - let (fields, _pool) = run(src, true); + let fields = run(src, true); assert!( fields.is_empty(), @@ -168,13 +152,9 @@ fn test_plain_class_no_fields_emitted() { #[ignore = "BlindSpot: inherited fields via intermediate class require cross-file symbol resolution beyond single-file tree-sitter scope"] fn test_inherited_model_own_fields_only() { let src = "from pydantic import BaseModel\n\nclass User(BaseModel):\n name: str\n\nclass Admin(User):\n role: str\n"; - let (fields, pool) = run(src, true); + let fields = run(src, true); - let pool_bytes = pool.bytes.as_slice(); - let owners: Vec<&str> = fields - .iter() - .map(|f| f.owner_class.resolve(pool_bytes)) - .collect(); + let owners: Vec<&str> = fields.iter().map(|f| &*f.owner_class).collect(); assert!( !owners.contains(&"Admin"), "Admin is not a direct BaseModel subclass" @@ -189,14 +169,10 @@ fn test_inherited_model_own_fields_only() { #[test] fn test_multiple_models_in_file() { let src = "from pydantic import BaseModel\n\nclass Foo(BaseModel):\n x: int\n\nclass Bar(BaseModel):\n y: str\n z: bool\n"; - let (fields, pool) = run(src, true); + let fields = run(src, true); assert_eq!(fields.len(), 3, "Foo(1) + Bar(2) = 3 total fields"); - let pool_bytes = pool.bytes.as_slice(); - let owners: Vec<&str> = fields - .iter() - .map(|f| f.owner_class.resolve(pool_bytes)) - .collect(); + let owners: Vec<&str> = fields.iter().map(|f| &*f.owner_class).collect(); assert!(owners.contains(&"Foo")); assert!(owners.contains(&"Bar")); } @@ -216,9 +192,12 @@ fn test_parse_file_populates_schema_fields() { let fields = local.schema_fields.expect("schema_fields must be Some"); assert_eq!(fields.len(), 2, "Item has 2 fields"); - // StrRefs are live only within the parse_file-local pool; assert - // count and framework only — string resolution requires builder re-intern. + // Post-T4-7: owned `Box` is directly readable. + let names: Vec<&str> = fields.iter().map(|f| &*f.name).collect(); + assert!(names.contains(&"name")); + assert!(names.contains(&"price")); assert!(fields.iter().all(|f| f.framework == FrameworkId::Pydantic)); + assert!(fields.iter().all(|f| &*f.owner_class == "Item")); } /// No pydantic import → `LocalGraph.schema_fields` is `None`. diff --git a/crates/ecp-analyzer/tests/python_sqlalchemy_schema.rs b/crates/ecp-analyzer/tests/python_sqlalchemy_schema.rs index bf086a03..a409e97d 100644 --- a/crates/ecp-analyzer/tests/python_sqlalchemy_schema.rs +++ b/crates/ecp-analyzer/tests/python_sqlalchemy_schema.rs @@ -4,15 +4,12 @@ //! A) Classic `Column()` declarative (SQLAlchemy 1.x / 2.x compatible) //! B) `Mapped[T]` typed declarative (SQLAlchemy 2.0 style) //! -//! All tests call `extract_schema_fields` directly with a local `StringPool` -//! so strings are resolved before the pool is dropped. The `parse_file` -//! wiring stores fields in `LocalGraph.schema_fields`; builder-side re-interning -//! is a separate future pass (see TODO in parser.rs). +//! Post-T4-7 refactor: `RawSchemaField` now stores owned `Box` so +//! `.name` / `.owner_class` are directly readable as `&str` — no pool plumbing. use ecp_analyzer::python::schema_extractors::SQLALCHEMY_CONFIG; use ecp_analyzer::schema_field::extract_schema_fields; use ecp_core::analyzer::types::{FrameworkId, RawImport, RawSchemaField, SchemaType}; -use ecp_core::pool::StringPool; use tree_sitter::{Parser, Query}; // --------------------------------------------------------------------------- @@ -51,8 +48,7 @@ const SQLA_QUERY: &str = r#" /// Parse `src`, fabricate `RawImport`s from `import_sources` (only the /// `source` field is checked by `has_import_from`), then run the dispatcher. -/// Returns the extracted fields plus the pool that owns the interned strings. -fn run(src: &str, import_sources: &[&str]) -> (Vec, StringPool) { +fn run(src: &str, import_sources: &[&str]) -> Vec { let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into(); let mut parser = Parser::new(); parser.set_language(&lang).expect("set_language"); @@ -67,16 +63,13 @@ fn run(src: &str, import_sources: &[&str]) -> (Vec, StringPool) binding_kind: None, }) .collect(); - let mut pool = StringPool::new(); - let fields = extract_schema_fields( + extract_schema_fields( &tree, src.as_bytes(), &query, &[SQLALCHEMY_CONFIG], &imports, - &mut pool, - ); - (fields, pool) + ) } // --------------------------------------------------------------------------- @@ -88,13 +81,12 @@ fn run(src: &str, import_sources: &[&str]) -> (Vec, StringPool) fn test_column_string_type() { let src = "from sqlalchemy import Column, String\n\nclass User(Base):\n name = Column(String)\n"; - let (fields, pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert_eq!(fields.len(), 1); - let pool_bytes = pool.bytes.as_slice(); assert_eq!(fields[0].type_class, SchemaType::String); - assert_eq!(fields[0].name.resolve(pool_bytes), "name"); - assert_eq!(fields[0].owner_class.resolve(pool_bytes), "User"); + assert_eq!(&*fields[0].name, "name"); + assert_eq!(&*fields[0].owner_class, "User"); assert_eq!(fields[0].framework, FrameworkId::SqlAlchemy); } @@ -103,11 +95,11 @@ fn test_column_string_type() { fn test_column_integer_type() { let src = "from sqlalchemy import Column, Integer\n\nclass User(Base):\n id = Column(Integer, primary_key=True)\n"; - let (fields, pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert_eq!(fields.len(), 1); assert_eq!(fields[0].type_class, SchemaType::Int); - assert_eq!(fields[0].name.resolve(pool.bytes.as_slice()), "id"); + assert_eq!(&*fields[0].name, "id"); } /// `Column(String(50))` — the first arg is a *call* not an identifier. @@ -117,7 +109,7 @@ fn test_column_integer_type() { #[test] fn test_column_with_size_arg_not_captured() { let src = "from sqlalchemy import Column, String\n\nclass Post(Base):\n title = Column(String(50))\n"; - let (fields, _pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert!( fields.is_empty(), @@ -129,13 +121,12 @@ fn test_column_with_size_arg_not_captured() { #[test] fn test_mapped_column_typed_int() { let src = "from sqlalchemy.orm import Mapped, mapped_column\n\nclass User(Base):\n id: Mapped[int] = mapped_column(primary_key=True)\n"; - let (fields, pool) = run(src, &["sqlalchemy.orm"]); + let fields = run(src, &["sqlalchemy.orm"]); assert_eq!(fields.len(), 1); assert_eq!(fields[0].type_class, SchemaType::Int); - let pool_bytes = pool.bytes.as_slice(); - assert_eq!(fields[0].name.resolve(pool_bytes), "id"); - assert_eq!(fields[0].owner_class.resolve(pool_bytes), "User"); + assert_eq!(&*fields[0].name, "id"); + assert_eq!(&*fields[0].owner_class, "User"); assert_eq!(fields[0].framework, FrameworkId::SqlAlchemy); } @@ -143,18 +134,18 @@ fn test_mapped_column_typed_int() { #[test] fn test_mapped_column_str() { let src = "from sqlalchemy.orm import Mapped, mapped_column\n\nclass User(Base):\n name: Mapped[str] = mapped_column()\n"; - let (fields, pool) = run(src, &["sqlalchemy.orm"]); + let fields = run(src, &["sqlalchemy.orm"]); assert_eq!(fields.len(), 1); assert_eq!(fields[0].type_class, SchemaType::String); - assert_eq!(fields[0].name.resolve(pool.bytes.as_slice()), "name"); + assert_eq!(&*fields[0].name, "name"); } /// No sqlalchemy import → import gate blocks all emission. #[test] fn test_no_sqlalchemy_import_no_emit() { let src = "class User(Base):\n name = Column(String)\n"; - let (fields, _pool) = run(src, &[]); + let fields = run(src, &[]); assert!( fields.is_empty(), @@ -166,14 +157,10 @@ fn test_no_sqlalchemy_import_no_emit() { #[test] fn test_multiple_classes() { let src = "from sqlalchemy import Column, Integer, String\n\nclass User(Base):\n id = Column(Integer)\n name = Column(String)\n\nclass Post(Base):\n id = Column(Integer)\n title = Column(String)\n"; - let (fields, pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert_eq!(fields.len(), 4, "User(2) + Post(2) = 4 fields"); - let pool_bytes = pool.bytes.as_slice(); - let owners: Vec<&str> = fields - .iter() - .map(|f| f.owner_class.resolve(pool_bytes)) - .collect(); + let owners: Vec<&str> = fields.iter().map(|f| &*f.owner_class).collect(); assert_eq!(owners.iter().filter(|&&o| o == "User").count(), 2); assert_eq!(owners.iter().filter(|&&o| o == "Post").count(), 2); } @@ -183,14 +170,14 @@ fn test_multiple_classes() { #[test] fn test_ignores_relationship() { let src = "from sqlalchemy import Column, Integer\nfrom sqlalchemy.orm import relationship\n\nclass User(Base):\n id = Column(Integer)\n posts = relationship(\"Post\")\n"; - let (fields, pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert_eq!( fields.len(), 1, "relationship() must not emit a schema field" ); - assert_eq!(fields[0].name.resolve(pool.bytes.as_slice()), "id"); + assert_eq!(&*fields[0].name, "id"); } /// `parse_file` integration smoke test: PythonProvider populates @@ -208,12 +195,16 @@ fn test_parse_file_populates_schema_fields() { let fields = local.schema_fields.expect("schema_fields must be Some"); assert_eq!(fields.len(), 2, "User has 2 columns"); + let names: Vec<&str> = fields.iter().map(|f| &*f.name).collect(); + assert!(names.contains(&"id")); + assert!(names.contains(&"name")); assert!( fields .iter() .all(|f| f.framework == FrameworkId::SqlAlchemy), "all fields must have SqlAlchemy framework" ); + assert!(fields.iter().all(|f| &*f.owner_class == "User")); } /// No sqlalchemy import → `LocalGraph.schema_fields` is `None`. @@ -246,7 +237,7 @@ fn test_parse_file_no_import_schema_fields_none() { #[ignore = "BlindSpot: Column(String(50)) and Column(Namespace.Type) — first arg is a call/attribute, not a bare identifier; requires smarter type unwrapping"] fn test_column_parameterised_type_blind_spot() { let src = "from sqlalchemy import Column, String\n\nclass Post(Base):\n title = Column(String(50))\n body = Column(String(255))\n"; - let (fields, _pool) = run(src, &["sqlalchemy"]); + let fields = run(src, &["sqlalchemy"]); assert_eq!( fields.len(), diff --git a/crates/ecp-analyzer/tests/schema_field_extract.rs b/crates/ecp-analyzer/tests/schema_field_extract.rs index 8a345fef..f3f75bc8 100644 --- a/crates/ecp-analyzer/tests/schema_field_extract.rs +++ b/crates/ecp-analyzer/tests/schema_field_extract.rs @@ -5,7 +5,6 @@ use ecp_analyzer::schema_field::{extract_schema_fields, SchemaFieldConfig}; use ecp_core::analyzer::types::{FrameworkId, RawImport, SchemaType}; -use ecp_core::pool::StringPool; use tree_sitter::{Parser, Query}; // --------------------------------------------------------------------------- @@ -97,7 +96,6 @@ fn test_config_driven_dispatch_picks_right_framework_label() { let src = "class MyModel:\n name: str = None\n"; let (tree, lang) = python_tree(src); let query = Query::new(&lang, FIELD_QUERY).expect("query compile"); - let mut pool = StringPool::new(); // Only lib-alpha import → CONFIG_A should fire, CONFIG_B should not. let imports = vec![fake_import("lib-alpha")]; @@ -107,7 +105,6 @@ fn test_config_driven_dispatch_picks_right_framework_label() { &query, &[CONFIG_A, CONFIG_B], &imports, - &mut pool, ); assert_eq!(fields.len(), 1, "expected exactly one field"); @@ -115,14 +112,12 @@ fn test_config_driven_dispatch_picks_right_framework_label() { // Flip: only lib-beta import → CONFIG_B fires. let imports_b = vec![fake_import("lib-beta")]; - let mut pool2 = StringPool::new(); let fields_b = extract_schema_fields( &tree, src.as_bytes(), &query, &[CONFIG_A, CONFIG_B], &imports_b, - &mut pool2, ); assert_eq!(fields_b.len(), 1); @@ -135,7 +130,6 @@ fn test_import_gate_negative_drops_capture() { let src = "class MyModel:\n name: str = None\n"; let (tree, lang) = python_tree(src); let query = Query::new(&lang, FIELD_QUERY).expect("query compile"); - let mut pool = StringPool::new(); // Imports for an unrelated library — neither gate satisfied. let imports = vec![fake_import("unrelated-lib")]; @@ -145,7 +139,6 @@ fn test_import_gate_negative_drops_capture() { &query, &[CONFIG_A, CONFIG_B], &imports, - &mut pool, ); assert!(fields.is_empty(), "import gate must block all extraction"); diff --git a/crates/ecp-analyzer/tests/schema_field_mirror.rs b/crates/ecp-analyzer/tests/schema_field_mirror.rs new file mode 100644 index 00000000..28ed2c0c --- /dev/null +++ b/crates/ecp-analyzer/tests/schema_field_mirror.rs @@ -0,0 +1,291 @@ +//! T4-7: `SchemaField` Node + `HasProperty` + `MirrorsField` end-to-end +//! emission tests. +//! +//! Exercises the full pipeline: per-language parsers emit +//! `RawSchemaField` → `GraphBuilder::build()` → `post_process::schema_field_mirrors` +//! → final `ZeroCopyGraph` with SchemaField nodes + HasProperty + MirrorsField edges. + +use ecp_analyzer::python::parser::PythonProvider; +use ecp_analyzer::resolution::builder::GraphBuilder; +use ecp_core::analyzer::provider::LanguageProvider; +use ecp_core::graph::{NodeKind, RelType, ZeroCopyGraph}; + +fn parse_python(path: &str, src: &str) -> ecp_core::analyzer::types::LocalGraph { + let provider = PythonProvider::new().expect("python provider"); + provider + .parse_file(path.as_ref(), src.as_bytes()) + .expect("parse_file") +} + +fn build(local_graphs: Vec) -> ZeroCopyGraph { + let mut builder = GraphBuilder::new(); + for lg in local_graphs { + builder.add_graph(lg); + } + builder.build() +} + +/// Lookup helper: count SchemaField nodes whose name resolves to `name`. +fn count_schema_field_nodes(graph: &ZeroCopyGraph, name: &str) -> usize { + let pool = graph.string_pool.as_slice(); + graph + .nodes + .iter() + .filter(|n| n.kind == NodeKind::SchemaField && n.name.resolve(pool) == name) + .count() +} + +/// Lookup helper: count edges of a given rel_type. +fn count_edges(graph: &ZeroCopyGraph, rel: RelType) -> usize { + graph.edges.iter().filter(|e| e.rel_type == rel).count() +} + +/// Lookup helper: count MirrorsField edges between two specific SchemaField +/// indices (in either direction). +fn count_mirror_edges_between(graph: &ZeroCopyGraph, a: u32, b: u32) -> usize { + graph + .edges + .iter() + .filter(|e| { + e.rel_type == RelType::MirrorsField + && ((e.source == a && e.target == b) || (e.source == b && e.target == a)) + }) + .count() +} + +/// Find the (idx, owner_class_name) of every SchemaField named `field_name`. +/// Owner class is derived by walking the inbound HasProperty edge. +fn find_schema_fields_with_owners<'g>( + graph: &'g ZeroCopyGraph, + field_name: &str, +) -> Vec<(u32, &'g str)> { + let pool = graph.string_pool.as_slice(); + let mut out = Vec::new(); + for (idx, node) in graph.nodes.iter().enumerate() { + if node.kind != NodeKind::SchemaField || node.name.resolve(pool) != field_name { + continue; + } + let sf_idx = idx as u32; + // Find the HasProperty edge whose target is this SchemaField. + for edge in &graph.edges { + if edge.rel_type == RelType::HasProperty && edge.target == sf_idx { + let class_idx = edge.source as usize; + let class_name = graph.nodes[class_idx].name.resolve(pool); + out.push((sf_idx, class_name)); + break; + } + } + } + out +} + +// --------------------------------------------------------------------------- +// Spec test cases (T4-7 line 543-547) +// --------------------------------------------------------------------------- + +/// `test_pair_strict_match_emits_mirrorsfield` — Pydantic `User.email: str` + +/// SQLA `User.email = Column(String)` → one MirrorsField edge. +#[test] +fn test_pair_strict_match_emits_mirrorsfield() { + let pyd = parse_python( + "models/pyd.py", + "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str\n", + ); + let sqla = parse_python( + "models/sqla.py", + "from sqlalchemy import Column, String\n\nclass User(Base):\n email = Column(String)\n", + ); + + let graph = build(vec![pyd, sqla]); + + // Two SchemaField nodes named "email". + assert_eq!( + count_schema_field_nodes(&graph, "email"), + 2, + "two SchemaField nodes (Pydantic + SQLA) for User.email" + ); + + // Two HasProperty edges (one per SchemaField). + assert!( + count_edges(&graph, RelType::HasProperty) >= 2, + "at least two HasProperty edges" + ); + + // Exactly one MirrorsField edge between them (pairwise, k=2). + let fields = find_schema_fields_with_owners(&graph, "email"); + assert_eq!(fields.len(), 2, "Pydantic + SQLA both emit email"); + let (a_idx, a_owner) = fields[0]; + let (b_idx, b_owner) = fields[1]; + assert_eq!(a_owner, "User"); + assert_eq!(b_owner, "User"); + assert_eq!( + count_mirror_edges_between(&graph, a_idx, b_idx), + 1, + "exactly one MirrorsField edge between the two User.email fields" + ); +} + +/// `test_three_way_cluster_all_pairs_emit_mirrorsfield` (D3) — +/// Pydantic + SQLA + a TS interface for the same `User.email` → 3 pairs of +/// MirrorsField edges (k=3 cluster, k×(k-1)/2 = 3 edges). +#[test] +fn test_three_way_cluster_all_pairs_emit_mirrorsfield() { + use ecp_analyzer::typescript::TypeScriptProvider; + + let pyd = parse_python( + "models/pyd.py", + "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str\n", + ); + let sqla = parse_python( + "models/sqla.py", + "from sqlalchemy import Column, String\n\nclass User(Base):\n email = Column(String)\n", + ); + let ts_provider = TypeScriptProvider::new().expect("ts provider"); + let ts = ts_provider + .parse_file( + "models/user.ts".as_ref(), + b"interface User { email: string; }", + ) + .expect("parse_file"); + + let graph = build(vec![pyd, sqla, ts]); + + // Three SchemaField nodes. + assert_eq!( + count_schema_field_nodes(&graph, "email"), + 3, + "three SchemaField nodes for User.email (Pydantic + SQLA + TS)" + ); + + // k=3 cluster → 3 pairwise MirrorsField edges (3 choose 2). + assert_eq!( + count_edges(&graph, RelType::MirrorsField), + 3, + "k=3 cluster must emit 3 pairwise MirrorsField edges" + ); +} + +/// `test_partial_match_emits_blindspot` — Pydantic `User.email` + something +/// like SQLA `User.user_email` (3/4 match: name differs) → BlindSpot. +/// +/// **T4-7 v1 limitation**: BlindSpot emission for partial matches is a +/// documented follow-up. Currently the field is silently dropped. +#[test] +#[ignore = "BlindSpot for partial-match SchemaField pairs is a T4-7 follow-up — see schema_field_mirrors.rs Phase 2 docs"] +fn test_partial_match_emits_blindspot() { + let pyd = parse_python( + "models/pyd.py", + "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str\n", + ); + let sqla = parse_python( + "models/sqla.py", + "from sqlalchemy import Column, String\n\nclass User(Base):\n user_email = Column(String)\n", + ); + + let graph = build(vec![pyd, sqla]); + + // No MirrorsField (different name). + assert_eq!(count_edges(&graph, RelType::MirrorsField), 0); + + // BlindSpot expected — checked in follow-up PR. + assert!( + graph + .blind_spots + .iter() + .any(|bs| bs.kind.resolve(graph.string_pool.as_slice()) + == "schema-field-mirror-candidate"), + "partial match must surface as BlindSpot" + ); +} + +/// `test_different_class_name_blindspot` — Pydantic `User.email` + SQLA +/// `Admin.email` (same type, different owner) → no MirrorsField + (future) +/// BlindSpot. +#[test] +fn test_different_class_name_drops_silently() { + let pyd = parse_python( + "models/pyd.py", + "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str\n", + ); + let sqla = parse_python( + "models/sqla.py", + "from sqlalchemy import Column, String\n\nclass Admin(Base):\n email = Column(String)\n", + ); + + let graph = build(vec![pyd, sqla]); + + // Both SchemaField nodes exist (HasProperty still emitted per-class). + assert_eq!(count_schema_field_nodes(&graph, "email"), 2); + + // But NO MirrorsField (different owner-class fails the 4-point rubric). + assert_eq!( + count_edges(&graph, RelType::MirrorsField), + 0, + "different owner-class must not emit MirrorsField" + ); +} + +// --------------------------------------------------------------------------- +// Additional integration coverage +// --------------------------------------------------------------------------- + +/// HasProperty edges that target SchemaField nodes specifically (T4-7 +/// emission) — distinct from HasProperty edges to plain `Property` nodes +/// emitted by `class_membership`. This test focuses only on the +/// SchemaField subset. +#[test] +fn test_has_property_edge_direction_and_owner() { + let pyd = parse_python( + "models/pyd.py", + "from pydantic import BaseModel\n\nclass User(BaseModel):\n email: str\n age: int\n", + ); + let graph = build(vec![pyd]); + + let pool = graph.string_pool.as_slice(); + let sf_has_props: Vec<_> = graph + .edges + .iter() + .filter(|e| { + e.rel_type == RelType::HasProperty + && graph.nodes[e.target as usize].kind == NodeKind::SchemaField + }) + .collect(); + assert_eq!( + sf_has_props.len(), + 2, + "User has 2 SchemaField properties → 2 HasProperty→SchemaField edges" + ); + + for edge in &sf_has_props { + let src = &graph.nodes[edge.source as usize]; + assert_eq!( + src.name.resolve(pool), + "User", + "HasProperty source must be Class User" + ); + } +} + +/// MirrorsField edges are listed under `is_heuristic` — default `ecp impact` +/// hides them. This test verifies the structural property; the impact CLI +/// filtering is already covered by `tests/impact_heuristic_filter.rs`. +#[test] +fn test_mirrors_field_is_heuristic() { + assert!( + RelType::MirrorsField.is_heuristic(), + "MirrorsField MUST be marked heuristic so ecp impact hides it by default" + ); +} + +/// File with no schema_fields → no SchemaField nodes, no MirrorsField edges. +/// Confirms the empty-fast-path doesn't break the rest of the build. +#[test] +fn test_no_schema_fields_no_emission() { + let plain = parse_python("models/plain.py", "def add(x, y):\n return x + y\n"); + let graph = build(vec![plain]); + + assert_eq!(count_schema_field_nodes(&graph, "email"), 0); + assert_eq!(count_edges(&graph, RelType::MirrorsField), 0); + // HasProperty count from this file is 0 (no class). + assert_eq!(count_edges(&graph, RelType::HasProperty), 0); +} diff --git a/crates/ecp-analyzer/tests/typescript_interface_schema.rs b/crates/ecp-analyzer/tests/typescript_interface_schema.rs index d4582e3c..98a0e1a2 100644 --- a/crates/ecp-analyzer/tests/typescript_interface_schema.rs +++ b/crates/ecp-analyzer/tests/typescript_interface_schema.rs @@ -1,8 +1,7 @@ //! T4-4: TypeScript `interface` property extraction tests. //! -//! Most tests call `extract_schema_fields` directly with a local `StringPool` -//! so strings are resolved before the pool is dropped. The last two tests use -//! `TypeScriptProvider::parse_file` as an integration smoke check. +//! Post-T4-7 refactor: `RawSchemaField` now stores owned `Box` so +//! `.name` / `.owner_class` are directly readable as `&str` — no pool plumbing. //! //! Import gate is empty for TS interfaces (language built-in, no framework //! import required); `extract_schema_fields` treats `&[]` as vacuously satisfied. @@ -10,7 +9,6 @@ use ecp_analyzer::schema_field::extract_schema_fields; use ecp_analyzer::typescript::schema_extractors::TS_INTERFACE_CONFIG; use ecp_core::analyzer::types::{FrameworkId, RawSchemaField, SchemaType}; -use ecp_core::pool::StringPool; use tree_sitter::{Parser, Query}; // --------------------------------------------------------------------------- @@ -34,24 +32,14 @@ const INTERFACE_QUERY: &str = r#" "#; /// Run the TS interface dispatcher against `src`. -/// Returns extracted fields + the pool that owns their string bytes. -fn run(src: &str) -> (Vec, StringPool) { +fn run(src: &str) -> Vec { let lang: tree_sitter::Language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(); let mut parser = Parser::new(); parser.set_language(&lang).expect("set_language"); let tree = parser.parse(src.as_bytes(), None).expect("parse"); let query = Query::new(&lang, INTERFACE_QUERY).expect("query compile"); // No imports needed — empty gate is always active. - let mut pool = StringPool::new(); - let fields = extract_schema_fields( - &tree, - src.as_bytes(), - &query, - &[TS_INTERFACE_CONFIG], - &[], - &mut pool, - ); - (fields, pool) + extract_schema_fields(&tree, src.as_bytes(), &query, &[TS_INTERFACE_CONFIG], &[]) } // --------------------------------------------------------------------------- @@ -61,39 +49,36 @@ fn run(src: &str) -> (Vec, StringPool) { #[test] fn test_simple_string_field() { let src = "interface User { name: string; }"; - let (fields, pool) = run(src); + let fields = run(src); assert_eq!(fields.len(), 1); let f = &fields[0]; - let bytes = pool.bytes.as_slice(); - assert_eq!(f.name.resolve(bytes), "name"); + assert_eq!(&*f.name, "name"); assert_eq!(f.type_class, SchemaType::String); - assert_eq!(f.owner_class.resolve(bytes), "User"); + assert_eq!(&*f.owner_class, "User"); assert_eq!(f.framework, FrameworkId::TypeScriptInterface); } #[test] fn test_number_field() { let src = "interface Item { age: number; }"; - let (fields, pool) = run(src); + let fields = run(src); assert_eq!(fields.len(), 1); let f = &fields[0]; - let bytes = pool.bytes.as_slice(); - assert_eq!(f.name.resolve(bytes), "age"); + assert_eq!(&*f.name, "age"); // classify_ts_type maps `number` → SchemaType::Int assert_eq!(f.type_class, SchemaType::Int); - assert_eq!(f.owner_class.resolve(bytes), "Item"); + assert_eq!(&*f.owner_class, "Item"); } #[test] fn test_boolean_field() { let src = "interface Config { active: boolean; }"; - let (fields, pool) = run(src); + let fields = run(src); assert_eq!(fields.len(), 1); - let bytes = pool.bytes.as_slice(); - assert_eq!(fields[0].name.resolve(bytes), "active"); + assert_eq!(&*fields[0].name, "active"); assert_eq!(fields[0].type_class, SchemaType::Bool); } @@ -106,7 +91,7 @@ fn test_date_field() { // The query intentionally restricts to `predefined_type` to avoid capturing // arbitrary type references. Date support requires a separate query arm. let src = "interface Event { created: Date; }"; - let (fields, _pool) = run(src); + let fields = run(src); // Date is a type_identifier, not a predefined_type — not captured by this query. assert!( fields.is_empty(), @@ -119,7 +104,7 @@ fn test_union_type_other() { // `string | null` is a `union_type` node, not `predefined_type`. // The query only captures `predefined_type` → this field is not emitted. let src = "interface Contact { email: string | null; }"; - let (fields, _pool) = run(src); + let fields = run(src); assert!( fields.is_empty(), "union_type does not match predefined_type capture — field not emitted" @@ -130,7 +115,7 @@ fn test_union_type_other() { fn test_array_type_other() { // `string[]` is an `array_type` node, not `predefined_type`. let src = "interface Post { tags: string[]; }"; - let (fields, _pool) = run(src); + let fields = run(src); assert!( fields.is_empty(), "array_type does not match predefined_type capture — field not emitted" @@ -140,30 +125,26 @@ fn test_array_type_other() { #[test] fn test_multiple_interfaces() { let src = "interface Foo { x: number; label: string; } interface Bar { active: boolean; }"; - let (fields, pool) = run(src); + let fields = run(src); assert_eq!(fields.len(), 3, "Foo(2) + Bar(1) = 3 total fields"); - let bytes = pool.bytes.as_slice(); - let owners: Vec<&str> = fields - .iter() - .map(|f| f.owner_class.resolve(bytes)) - .collect(); + let owners: Vec<&str> = fields.iter().map(|f| &*f.owner_class).collect(); assert!(owners.contains(&"Foo"), "Foo fields must appear"); assert!(owners.contains(&"Bar"), "Bar fields must appear"); let foo_fields: Vec<&str> = fields .iter() - .filter(|f| f.owner_class.resolve(bytes) == "Foo") - .map(|f| f.name.resolve(bytes)) + .filter(|f| &*f.owner_class == "Foo") + .map(|f| &*f.name) .collect(); assert!(foo_fields.contains(&"x")); assert!(foo_fields.contains(&"label")); let bar_fields: Vec<&str> = fields .iter() - .filter(|f| f.owner_class.resolve(bytes) == "Bar") - .map(|f| f.name.resolve(bytes)) + .filter(|f| &*f.owner_class == "Bar") + .map(|f| &*f.name) .collect(); assert!(bar_fields.contains(&"active")); } @@ -173,14 +154,13 @@ fn test_nested_interface_does_not_emit_extras() { // `user: User` — `User` is a `type_identifier`, not `predefined_type`. // Only the `predefined_type` arm fires; `user` field is not emitted. let src = "interface Wrapper { user: User; count: number; }"; - let (fields, pool) = run(src); + let fields = run(src); // Only `count: number` (predefined_type) is captured; `user: User` is not. assert_eq!(fields.len(), 1, "only predefined_type fields are captured"); - let bytes = pool.bytes.as_slice(); - assert_eq!(fields[0].name.resolve(bytes), "count"); + assert_eq!(&*fields[0].name, "count"); assert_eq!(fields[0].type_class, SchemaType::Int); - assert_eq!(fields[0].owner_class.resolve(bytes), "Wrapper"); + assert_eq!(&*fields[0].owner_class, "Wrapper"); } /// BlindSpot: optional properties (`name?: string`) use `property_signature` @@ -190,12 +170,11 @@ fn test_nested_interface_does_not_emit_extras() { #[test] fn test_optional_property_is_captured() { let src = "interface User { name?: string; }"; - let (fields, pool) = run(src); + let fields = run(src); // Optional properties ARE captured — `?` is a modifier on the // property_signature, not on the name node, so the query fires. assert_eq!(fields.len(), 1); - let bytes = pool.bytes.as_slice(); - assert_eq!(fields[0].name.resolve(bytes), "name"); + assert_eq!(&*fields[0].name, "name"); assert_eq!(fields[0].type_class, SchemaType::String); } @@ -207,7 +186,7 @@ fn test_optional_property_is_captured() { #[ignore = "BlindSpot: type alias object literals require a separate query arm (T4-5)"] fn test_type_alias_not_captured() { let src = "type Point = { x: number; y: number; }"; - let (fields, _pool) = run(src); + let fields = run(src); // Currently zero — type aliases use type_alias_declaration, not // interface_declaration. T4-5 tracks this gap. assert!( @@ -233,9 +212,13 @@ fn test_parse_file_populates_schema_fields() { let fields = local.schema_fields.expect("schema_fields must be Some"); assert_eq!(fields.len(), 2, "Product has 2 predefined-type fields"); + let names: Vec<&str> = fields.iter().map(|f| &*f.name).collect(); + assert!(names.contains(&"name")); + assert!(names.contains(&"price")); assert!(fields .iter() .all(|f| f.framework == FrameworkId::TypeScriptInterface)); + assert!(fields.iter().all(|f| &*f.owner_class == "Product")); } #[test] diff --git a/crates/ecp-core/src/analyzer/types.rs b/crates/ecp-core/src/analyzer/types.rs index 2b1b77bd..c29a026b 100644 --- a/crates/ecp-core/src/analyzer/types.rs +++ b/crates/ecp-core/src/analyzer/types.rs @@ -110,7 +110,7 @@ pub struct RawFrameworkRef { } /// Primitive type of a schema column or model field. -#[derive(Archive, Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Archive, Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Eq, Hash)] #[rkyv(derive(Debug))] pub enum SchemaType { String, @@ -224,12 +224,19 @@ impl FrameworkId { } /// ORM / schema model field detected at static-analysis time. +/// +/// Field-name + owner-class are stored as owned `Box` rather than +/// `StrRef` because per-language parsers run in isolated scopes — the +/// `StringPool` they intern into is dropped before the `LocalGraph` reaches +/// the builder. Owned strings cost an extra 16 B per field but eliminate +/// the dangling-StrRef pre-T4-7 bug entirely. Aligns with `RawNode.name` +/// which is also `String` for the same reason. #[derive(Archive, Deserialize, Serialize, Debug, Clone)] #[rkyv(derive(Debug))] pub struct RawSchemaField { - pub name: StrRef, + pub name: Box, pub type_class: SchemaType, - pub owner_class: StrRef, + pub owner_class: Box, pub framework: FrameworkId, pub span: (u32, u32, u32, u32), }