From f4cd6334a1b9b547f8243fae719b0ac4ce327fc1 Mon Sep 17 00:00:00 2001 From: Alex Kesling Date: Fri, 13 Mar 2026 09:52:49 -0400 Subject: [PATCH] feat: add .gitignore and .qualignore support for file discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the manual walk_dir with the `ignore` crate's WalkBuilder, which respects .gitignore (including parent dirs, global, and git/info/exclude), .qualignore (custom, gitignore-compatible syntax), and adds --no-ignore to bypass all filtering. Document the feature in SPEC.md §10, README.md, and AGENTS.md. --- .gitattributes | 1 + .qualignore | 1 + AGENTS.md | 9 +++ Cargo.lock | 119 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + README.md | 2 + SPEC.md | 38 +++++++++++- src/cli/commands/check.rs | 6 +- src/cli/commands/compact.rs | 6 +- src/cli/commands/ls.rs | 6 +- src/cli/commands/praise.rs | 6 +- src/cli/commands/score.rs | 6 +- src/cli/commands/show.rs | 6 +- src/qual_file.rs | 104 +++++++++++++++++++++---------- tests/integration.rs | 2 +- 15 files changed, 274 insertions(+), 39 deletions(-) create mode 100644 .gitattributes create mode 100644 .qualignore diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a3a69af --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.qual merge=union diff --git a/.qualignore b/.qualignore new file mode 100644 index 0000000..7e7ff1e --- /dev/null +++ b/.qualignore @@ -0,0 +1 @@ +site/examples/ diff --git a/AGENTS.md b/AGENTS.md index f35d95f..2f342bd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,6 +33,15 @@ - Include results for `cargo fmt`, `cargo clippy --all-targets --all-features`, and `cargo test --all-features`; attach CLI examples when changing text output. - For release work, note whether `./scripts/release.sh --execute` should be run. +## Keeping Things in Sync +When making changes, verify that all affected surfaces stay consistent: +- **SPEC.md** — Section 7 (Library API) must match public function signatures. Section 10 (File Discovery) must match discovery behavior. Update the spec version when semantics change. +- **README.md** — Core Concepts and CLI Commands table should reflect current behavior. +- **site/** — `site/js/playground.js` contains a JavaScript scoring engine for the web playground. If scoring logic, record format, or field names change, update it to match. +- **Cargo.toml** — Bump the crate version for any user-visible change (new feature, behavior change, bug fix). Coordinate with `SPEC.md` version when the spec itself changes. +- **Tests** — Many test files have local `make_att()`/`make_record()` helpers that construct records by hand. When adding or renaming fields on `Attestation`, `Epoch`, or `DependencyRecord`, update all helpers (~6 locations across `src/` and `tests/`). Run `cargo test --all-features` to catch any you miss. +- **Golden IDs** — `tests/integration.rs` pins BLAKE3 IDs for attestation, epoch, and dependency records. Any change to canonical form (field order, new envelope fields, MCF rules) will break these. Update the expected hashes after confirming the new values are correct. + ## Slash Command Discovery - Unrecognized slash commands should be looked up as files under `.claude/commands/` (e.g., `/foo` looks for `.claude/commands/foo.md`). - If a matching file exists, treat its contents as the command definition; otherwise continue without adding anything to context. diff --git a/Cargo.lock b/Cargo.lock index d7fe47b..d6fa964 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -114,6 +123,16 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -234,6 +253,31 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crossterm" version = "0.29.0" @@ -345,6 +389,19 @@ dependencies = [ "wasip3", ] +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -396,6 +453,22 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -608,6 +681,7 @@ dependencies = [ "clap", "comfy-table", "figment", + "ignore", "petgraph", "rand", "serde", @@ -669,6 +743,23 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "rustix" version = "1.1.4" @@ -688,6 +779,15 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -900,6 +1000,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -1013,6 +1123,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 852ade4..ce5bbb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ cli = ["dep:clap", "dep:comfy-table", "dep:figment", "dep:rand"] [dependencies] blake3 = "1" chrono = { version = "0.4", features = ["serde"] } +ignore = "0.4" petgraph = "0.7" serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/README.md b/README.md index 589c847..4767b75 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ qualifier check --min-score 0 **.qual files** are JSONL files containing records. The recommended layout is one `.qual` file per directory. See [SPEC.md](SPEC.md) for layout options and trade-offs. +**File discovery** respects `.gitignore` and `.qualignore` (gitignore-compatible syntax) by default, so vendored or generated `.qual` files can be excluded from scoring. Pass `--no-ignore` to bypass all ignore rules. See [SPEC.md §10](SPEC.md#10-file-discovery) for details. + ## CLI Commands | Command | Description | diff --git a/SPEC.md b/SPEC.md index 9b3af6d..b1034c4 100644 --- a/SPEC.md +++ b/SPEC.md @@ -934,7 +934,7 @@ pub fn finalize_record(record: Record) -> Record; pub struct QualFile { pub path: PathBuf, pub subject: String, pub records: Vec } pub fn parse(path: &Path) -> Result; pub fn append(path: &Path, record: &Record) -> Result<()>; -pub fn discover(root: &Path) -> Result>; +pub fn discover(root: &Path, respect_ignore: bool) -> Result>; // qualifier::scoring pub struct ScoreReport { pub raw: i32, pub effective: i32, pub limiting_path: Option> } @@ -1007,6 +1007,42 @@ The project root is determined by searching upward for VCS markers (`.git`, `.hg`, `.jj`, `.pijul`, `_FOSSIL_`, `.svn`) or a `qualifier.graph.jsonl` file, whichever is found first. +### 10.1 Ignore Rules + +By default, qualifier respects ignore rules from two sources during file +discovery: + +1. **`.gitignore`** — Standard Git ignore files, including: + - `.gitignore` files at any level of the tree + - `.git/info/exclude` (per-repo excludes) + - The global gitignore file (e.g., `~/.config/git/ignore`) + - `.gitignore` files in parent directories above the project root + (matching Git's own behavior in monorepos) + +2. **`.qualignore`** — A qualifier-specific ignore file using the same + syntax as `.gitignore`. Place a `.qualignore` file anywhere in the tree + to exclude paths from qualifier's discovery walk. Useful for ignoring + vendored code, generated files, or example directories that have `.qual` + files you want qualifier to skip without affecting Git. + +Paths matched by either source are excluded from all discovery commands: +`score`, `show`, `check`, `ls`, `compact`, and `praise`/`blame`. + +### 10.2 `--no-ignore` + +Pass `--no-ignore` to any discovery command to bypass all ignore rules. +This forces qualifier to walk every non-hidden directory and discover all +`.qual` files regardless of `.gitignore` or `.qualignore` entries. + +### 10.3 Hidden Directories + +Hidden directories (names starting with `.`) are always skipped during +discovery, regardless of ignore settings. This prevents qualifier from +descending into `.git`, `.vscode`, `.idea`, and similar tool directories. + +Hidden *files* (like `.qual`) are not skipped — the per-directory `.qual` +layout depends on this. + ## 11. Crate Structure A single crate published as `qualifier` on crates.io. diff --git a/src/cli/commands/check.rs b/src/cli/commands/check.rs index ebbb098..1e1e44d 100644 --- a/src/cli/commands/check.rs +++ b/src/cli/commands/check.rs @@ -13,13 +13,17 @@ pub struct Args { /// Path to the dependency graph file #[arg(long)] pub graph: Option, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } pub fn run(args: Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let graph = crate::cli::config::load_graph(args.graph.as_deref(), root.as_deref()); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let qual_files = qual_file::discover(discover_root)?; + let qual_files = qual_file::discover(discover_root, !args.no_ignore)?; let scores = scoring::effective_scores(&graph, &qual_files); diff --git a/src/cli/commands/compact.rs b/src/cli/commands/compact.rs index 1be3260..1bea73a 100644 --- a/src/cli/commands/compact.rs +++ b/src/cli/commands/compact.rs @@ -21,6 +21,10 @@ pub struct Args { /// Preview without writing #[arg(long)] pub dry_run: bool, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } pub fn run(args: Args) -> crate::Result<()> { @@ -48,7 +52,7 @@ pub fn run(args: Args) -> crate::Result<()> { fn run_all(args: &Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let qual_files = qual_file::discover(discover_root)?; + let qual_files = qual_file::discover(discover_root, !args.no_ignore)?; if qual_files.is_empty() { println!("No .qual files found."); diff --git a/src/cli/commands/ls.rs b/src/cli/commands/ls.rs index 3a9a874..f72e66b 100644 --- a/src/cli/commands/ls.rs +++ b/src/cli/commands/ls.rs @@ -27,13 +27,17 @@ pub struct Args { /// Path to the dependency graph file #[arg(long)] pub graph: Option, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } pub fn run(args: Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let graph = crate::cli::config::load_graph(args.graph.as_deref(), root.as_deref()); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let qual_files = qual_file::discover(discover_root)?; + let qual_files = qual_file::discover(discover_root, !args.no_ignore)?; let scores = scoring::effective_scores(&graph, &qual_files); diff --git a/src/cli/commands/praise.rs b/src/cli/commands/praise.rs index 3342284..0ee76a6 100644 --- a/src/cli/commands/praise.rs +++ b/src/cli/commands/praise.rs @@ -18,6 +18,10 @@ pub struct Args { #[cfg(not(target_os = "emscripten"))] #[arg(long)] pub vcs: bool, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } /// Record-based praise output — works everywhere including emscripten. @@ -33,7 +37,7 @@ pub fn run(args: Args) -> crate::Result<()> { fn run_records(args: Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let all_qual_files = qual_file::discover(discover_root)?; + let all_qual_files = qual_file::discover(discover_root, !args.no_ignore)?; let records: Vec<&crate::attestation::Record> = qual_file::find_records_for(&args.artifact, &all_qual_files); diff --git a/src/cli/commands/score.rs b/src/cli/commands/score.rs index fa300a8..9d1b771 100644 --- a/src/cli/commands/score.rs +++ b/src/cli/commands/score.rs @@ -17,13 +17,17 @@ pub struct Args { /// Path to the dependency graph file #[arg(long)] pub graph: Option, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } pub fn run(args: Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let graph = crate::cli::config::load_graph(args.graph.as_deref(), root.as_deref()); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let qual_files = qual_file::discover(discover_root)?; + let qual_files = qual_file::discover(discover_root, !args.no_ignore)?; let scores = scoring::effective_scores(&graph, &qual_files); diff --git a/src/cli/commands/show.rs b/src/cli/commands/show.rs index c3bc788..c670737 100644 --- a/src/cli/commands/show.rs +++ b/src/cli/commands/show.rs @@ -17,13 +17,17 @@ pub struct Args { /// Path to the dependency graph file #[arg(long)] pub graph: Option, + + /// Disable .gitignore and .qualignore filtering + #[arg(long)] + pub no_ignore: bool, } pub fn run(args: Args) -> crate::Result<()> { let root = find_project_root(Path::new(".")); let graph = crate::cli::config::load_graph(args.graph.as_deref(), root.as_deref()); let discover_root = root.as_deref().unwrap_or(Path::new(".")); - let all_qual_files = qual_file::discover(discover_root)?; + let all_qual_files = qual_file::discover(discover_root, !args.no_ignore)?; let records = qual_file::find_records_for(&args.artifact, &all_qual_files); diff --git a/src/qual_file.rs b/src/qual_file.rs index 6d0a1c1..1e62456 100644 --- a/src/qual_file.rs +++ b/src/qual_file.rs @@ -176,40 +176,52 @@ pub fn find_qual_file_for(subject: &str) -> Option { /// Discover all `.qual` files under a root directory. /// /// Walks the directory tree recursively, collecting every file whose name -/// ends with `.qual`. Returns them sorted by path for determinism. -pub fn discover(root: &Path) -> crate::Result> { - let mut qual_files = Vec::new(); - walk_dir(root, &mut qual_files)?; - qual_files.sort_by(|a, b| a.path.cmp(&b.path)); - Ok(qual_files) -} - -fn walk_dir(dir: &Path, out: &mut Vec) -> crate::Result<()> { - let entries = match fs::read_dir(dir) { - Ok(entries) => entries, - Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => return Ok(()), - Err(e) => return Err(e.into()), - }; +/// ends with `.qual`. Respects `.gitignore` and `.qualignore` by default. +/// Pass `respect_ignore: false` to bypass all ignore rules. +/// +/// Returns them sorted by path for determinism. +pub fn discover(root: &Path, respect_ignore: bool) -> crate::Result> { + use ignore::WalkBuilder; + + let mut builder = WalkBuilder::new(root); + builder.hidden(false); // allow hidden files like .qual + + if respect_ignore { + builder + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .add_custom_ignore_filename(".qualignore"); + } else { + builder + .git_ignore(false) + .git_global(false) + .git_exclude(false) + .ignore(false); + } + + // Skip hidden directories (like .git, .vscode, etc.) but allow hidden + // files (like .qual) — matches the old walk_dir behavior. + builder.filter_entry(|entry| { + if entry.file_type().is_some_and(|ft| ft.is_dir()) { + return !entry.file_name().to_string_lossy().starts_with('.'); + } + true + }); - for entry in entries { - let entry = entry?; + let mut qual_files = Vec::new(); + for entry in builder.build() { + let entry = entry.map_err(|e| crate::Error::Io(std::io::Error::other(e)))?; let path = entry.path(); - - // Skip hidden directories (like .git) - if path.is_dir() { - let name = entry.file_name(); - if name.to_string_lossy().starts_with('.') { - continue; - } - walk_dir(&path, out)?; - } else if path.extension().and_then(|e| e.to_str()) == Some("qual") - || entry.file_name() == ".qual" + if path.is_file() + && (path.extension().and_then(|e| e.to_str()) == Some("qual") + || path.file_name().and_then(|f| f.to_str()) == Some(".qual")) { - out.push(parse(&path)?); + qual_files.push(parse(path)?); } } - - Ok(()) + qual_files.sort_by(|a, b| a.path.cmp(&b.path)); + Ok(qual_files) } /// Derive the subject name from a `.qual` file path. @@ -392,7 +404,7 @@ mod tests { // Also create a non-qual file that should be ignored fs::write(src.join("a.rs"), "fn main() {}").unwrap(); - let found = discover(dir.path()).unwrap(); + let found = discover(dir.path(), true).unwrap(); assert_eq!(found.len(), 2); } @@ -405,10 +417,40 @@ mod tests { let r = make_record("x", Kind::Pass, 10, "ok"); append(&hidden.join("x.qual"), &r).unwrap(); - let found = discover(dir.path()).unwrap(); + let found = discover(dir.path(), true).unwrap(); assert_eq!(found.len(), 0); } + #[test] + fn test_discover_respects_qualignore() { + let dir = tempfile::tempdir().unwrap(); + let src = dir.path().join("src"); + let examples = dir.path().join("examples"); + fs::create_dir_all(&src).unwrap(); + fs::create_dir_all(&examples).unwrap(); + + let r1 = make_record("src/a.rs", Kind::Pass, 10, "ok"); + let r2 = make_record("examples/demo.rs", Kind::Pass, 10, "ok"); + + append(&src.join("a.rs.qual"), &r1).unwrap(); + append(&examples.join("demo.rs.qual"), &r2).unwrap(); + + // Without .qualignore: both found + let found = discover(dir.path(), true).unwrap(); + assert_eq!(found.len(), 2); + + // Add .qualignore excluding examples/ + fs::write(dir.path().join(".qualignore"), "examples/\n").unwrap(); + + let found = discover(dir.path(), true).unwrap(); + assert_eq!(found.len(), 1); + assert!(found[0].path.to_string_lossy().contains("src")); + + // With --no-ignore: both found again + let found = discover(dir.path(), false).unwrap(); + assert_eq!(found.len(), 2); + } + #[test] fn test_write_all() { let dir = tempfile::tempdir().unwrap(); diff --git a/tests/integration.rs b/tests/integration.rs index bea980e..1a704b8 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -311,7 +311,7 @@ fn test_discovery_walks_tree() { std::fs::create_dir_all(hidden.parent().unwrap()).unwrap(); std::fs::write(&hidden, "").unwrap(); - let found = qual_file::discover(dir.path()).unwrap(); + let found = qual_file::discover(dir.path(), true).unwrap(); assert_eq!(found.len(), 3); let subjects: Vec<&str> = found.iter().map(|qf| qf.subject.as_str()).collect();