From 02183fb1249dd58031d0f8c9f2a5f8f582283f4c Mon Sep 17 00:00:00 2001 From: Alex Martynov Date: Tue, 2 Jun 2026 14:41:12 +0200 Subject: [PATCH] feat(scan): support .codegraphignore to override .gitignore and defaults Project-root .codegraphignore is the final authority on indexing scope: !path force-includes code hidden by the root/nested .gitignore or git itself, plain lines force-exclude, last match wins. Force-include is code-aware (dependency/build dirs stay excluded unless named) and routes the scan to the filesystem walk. --- CHANGELOG.md | 4 + README.md | 31 ++++ __tests__/codegraphignore.test.ts | 204 ++++++++++++++++++++++ src/extraction/index.ts | 280 ++++++++++++++++++++++++++---- src/sync/watcher.ts | 32 +++- 5 files changed, 517 insertions(+), 34 deletions(-) create mode 100644 __tests__/codegraphignore.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index f0fcf6519..7ac011e6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### New Features + +- A new optional `.codegraphignore` file at your project root lets you override what gets indexed, taking precedence over the built-in defaults and every `.gitignore` (your repo's, nested ones, and even files git itself ignores). Use `!path/` to force-index code that's otherwise hidden — for example a monorepo whose real code lives in a directory the root `.gitignore` excludes, or behind a nested `.gitignore`. Force-include is code-aware: a broad `!app/` brings in that subtree's source but still leaves `node_modules`, `dist`, `.yarn`, and similar dependency/build dirs out unless you name one explicitly. Plain lines (no `!`) force-exclude, and the last matching line wins, so you can re-include a tree and then trim a few files back out. (#511) + ## [0.9.8] - 2026-06-01 diff --git a/README.md b/README.md index 6feb5c452..d7b3defda 100644 --- a/README.md +++ b/README.md @@ -565,6 +565,37 @@ add a negation — `!vendor/`. The defaults apply uniformly, so committing a dependency or build directory doesn't force it into the graph; the `.gitignore` negation is the explicit opt-in. +### `.codegraphignore` — overriding what gets indexed + +For cases a `.gitignore` negation can't reach — code hidden by the **root** +`.gitignore`, by a **nested** `.gitignore`, by an **embedded git repo**, or +otherwise invisible to git — drop a `.codegraphignore` at your project root. It +is the final authority on indexing scope, overriding the built-in defaults +**and** every `.gitignore`. Same syntax as `.gitignore`: + +- `path` — **force-exclude** (drop it even if it would otherwise be indexed). +- `!path` — **force-include** (index it even if git/`.gitignore`/the defaults hide it). +- **Last matching line wins**, so you can re-include a tree and then trim a few files back out. + +Force-include is **code-aware**: a broad `!app/` brings in that subtree's +*source*, but still leaves built-in dependency/build dirs (`node_modules`, +`dist`, `.yarn`, …) out — unless you name one explicitly (`!app/node_modules/mypkg/`). + +Example — a monorepo whose real code lives under `environment/`, which the root +`.gitignore` excludes and whose own `.gitignore` further hides `src/app-*` and +`src/common`: + +```gitignore +# .codegraphignore +!environment/ # index environment's code (deps stay excluded) + +environment/.idea/ # trim tooling noise a broad include pulls in +environment/.pnp.cjs +``` + +A `.codegraphignore` with a force-include automatically scans via the +filesystem (rather than git), since git can't list the files it ignores. + ## Supported Platforms Every release ships a self-contained build (bundled Node runtime — nothing to diff --git a/__tests__/codegraphignore.test.ts b/__tests__/codegraphignore.test.ts new file mode 100644 index 000000000..484c39d53 --- /dev/null +++ b/__tests__/codegraphignore.test.ts @@ -0,0 +1,204 @@ +/** + * .codegraphignore Tests + * + * The project-root `.codegraphignore` is the final authority over what the + * indexer includes — it overrides the built-in default-ignores AND every + * `.gitignore` (root, nested, and git's own view). Force-include is code-aware: + * a broad `!dir/` re-includes that subtree's source but NOT built-in dependency + * dirs (node_modules, dist, …) unless an anchor reaches into them specifically. + * + * These tests exercise both scan paths: the filesystem walk (non-git temp dirs) + * and the git fast path (real `git init` repos, where force-include must route + * the scan onto the walk so git-ignored files can be resurfaced). + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { execFileSync } from 'child_process'; +import { scanDirectory, loadCodegraphOverride } from '../src/extraction'; + +function createTempDir(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cgi-')); +} + +function cleanupTempDir(dir: string): void { + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); +} + +function write(root: string, rel: string, content = 'export const x = 1;\n'): void { + const full = path.join(root, rel); + fs.mkdirSync(path.dirname(full), { recursive: true }); + fs.writeFileSync(full, content); +} + +function gitInit(dir: string): void { + const opts = { cwd: dir, stdio: 'pipe' as const }; + execFileSync('git', ['init', '-q'], opts); + execFileSync('git', ['config', 'user.email', 'test@test.com'], opts); + execFileSync('git', ['config', 'user.name', 'Test'], opts); +} + +function gitCommitAll(dir: string): void { + const opts = { cwd: dir, stdio: 'pipe' as const }; + execFileSync('git', ['add', '-A'], opts); + execFileSync('git', ['commit', '-q', '-m', 'init'], opts); +} + +describe('.codegraphignore — loader', () => { + let tempDir: string; + beforeEach(() => { tempDir = createTempDir(); }); + afterEach(() => { cleanupTempDir(tempDir); }); + + it('returns null when the file is absent', () => { + expect(loadCodegraphOverride(tempDir)).toBeNull(); + }); + + it('returns null for an empty or comments-only file', () => { + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '\n \n# just a comment\n'); + expect(loadCodegraphOverride(tempDir)).toBeNull(); + }); + + it('reports hasForceInclude only when a "!" line exists', () => { + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), 'build/\n'); + expect(loadCodegraphOverride(tempDir)?.hasForceInclude).toBe(false); + + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!vendor/\n'); + expect(loadCodegraphOverride(tempDir)?.hasForceInclude).toBe(true); + }); +}); + +describe('.codegraphignore — filesystem walk (non-git)', () => { + let tempDir: string; + beforeEach(() => { tempDir = createTempDir(); }); + afterEach(() => { cleanupTempDir(tempDir); }); + + it('force-excludes a path that would otherwise be indexed', () => { + write(tempDir, 'src/keep.ts'); + write(tempDir, 'src/drop.ts'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), 'src/drop.ts\n'); + + const files = scanDirectory(tempDir); + expect(files).toContain('src/keep.ts'); + expect(files).not.toContain('src/drop.ts'); + }); + + it('force-includes a file hidden by the root .gitignore', () => { + write(tempDir, 'src/app.ts'); + write(tempDir, 'generated/out.ts'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'generated/\n'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!generated/\n'); + + const files = scanDirectory(tempDir); + expect(files).toContain('src/app.ts'); + expect(files).toContain('generated/out.ts'); + }); + + it('force-includes files hidden by a NESTED .gitignore', () => { + write(tempDir, 'app/src/app-admin/index.ts'); + write(tempDir, 'app/src/common/util.ts'); + // nested gitignore (relative to app/) hides the very dirs we want + fs.writeFileSync(path.join(tempDir, 'app', '.gitignore'), 'src/app-*\nsrc/common\n'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!app/src/\n'); + + const files = scanDirectory(tempDir); + expect(files).toContain('app/src/app-admin/index.ts'); + expect(files).toContain('app/src/common/util.ts'); + }); + + it('descends into a dir excluded by .gitignore to reach a buried include', () => { + // Mirrors the real repo: root ignores `environment`, which itself ignores + // src/app-* and src/common; `!environment/src/` must reach all of it. + write(tempDir, 'environment/src/app-fund/page.ts'); + write(tempDir, 'environment/src/common/links.ts'); + write(tempDir, 'environment/src/base/main.ts'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'environment\n'); + fs.writeFileSync(path.join(tempDir, 'environment', '.gitignore'), 'src/app-*\nsrc/common\n'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!environment/src/\n'); + + const files = scanDirectory(tempDir); + expect(files).toContain('environment/src/app-fund/page.ts'); + expect(files).toContain('environment/src/common/links.ts'); + expect(files).toContain('environment/src/base/main.ts'); + }); + + it('code-aware: a broad include indexes code but NOT built-in dep dirs', () => { + write(tempDir, 'environment/src/app-fund/page.ts'); + write(tempDir, 'environment/vite.config.ts'); + write(tempDir, 'environment/node_modules/pkg/index.js'); + write(tempDir, 'environment/dist/bundle.js'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'environment\n'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!environment/\n'); + + const files = scanDirectory(tempDir); + expect(files).toContain('environment/src/app-fund/page.ts'); + expect(files).toContain('environment/vite.config.ts'); // config IS code + expect(files.some((f) => f.includes('node_modules'))).toBe(false); + expect(files.some((f) => f.includes('/dist/') || f.startsWith('environment/dist/'))).toBe(false); + }); + + it('code-aware: an explicit anchor reaches surgically INTO a dep dir', () => { + write(tempDir, 'env/node_modules/keep/index.js'); + write(tempDir, 'env/node_modules/other/index.js'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'env\n'); + fs.writeFileSync( + path.join(tempDir, '.codegraphignore'), + '!env/\n!env/node_modules/keep/\n' + ); + + const files = scanDirectory(tempDir); + expect(files).toContain('env/node_modules/keep/index.js'); + expect(files).not.toContain('env/node_modules/other/index.js'); + }); + + it('does not affect an unrelated project (regression guard)', () => { + write(tempDir, 'src/a.ts'); + write(tempDir, 'lib/b.ts'); + fs.writeFileSync(path.join(tempDir, '.codegraphignore'), '!only-this-dir/\n'); + + // Force-include of a non-existent dir must not stop normal dirs being walked. + const files = scanDirectory(tempDir); + expect(files).toContain('src/a.ts'); + expect(files).toContain('lib/b.ts'); + }); +}); + +describe('.codegraphignore — git fast path', () => { + let tempDir: string; + beforeEach(() => { tempDir = createTempDir(); }); + afterEach(() => { cleanupTempDir(tempDir); }); + + it('routes to the walk and resurfaces a git-ignored file', () => { + const root = path.join(tempDir, 'repo'); + fs.mkdirSync(root, { recursive: true }); + gitInit(root); + write(root, 'src/tracked.ts'); + write(root, 'secret/buried.ts'); + fs.writeFileSync(path.join(root, '.gitignore'), 'secret/\n'); + gitCommitAll(root); // secret/ is git-ignored, never committed + + // Without override: git fast path drops the ignored file. + expect(scanDirectory(root)).not.toContain('secret/buried.ts'); + + // With force-include: routed to the walk, file resurfaces. + fs.writeFileSync(path.join(root, '.codegraphignore'), '!secret/\n'); + const files = scanDirectory(root); + expect(files).toContain('src/tracked.ts'); + expect(files).toContain('secret/buried.ts'); + }); + + it('force-exclude works on the git fast path (no force-include present)', () => { + const root = path.join(tempDir, 'repo'); + fs.mkdirSync(root, { recursive: true }); + gitInit(root); + write(root, 'src/keep.ts'); + write(root, 'src/drop.ts'); + gitCommitAll(root); + + fs.writeFileSync(path.join(root, '.codegraphignore'), 'src/drop.ts\n'); + const files = scanDirectory(root); + expect(files).toContain('src/keep.ts'); + expect(files).not.toContain('src/drop.ts'); + }); +}); diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 42037d7f6..956869617 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -177,6 +177,176 @@ export function buildDefaultIgnore(rootDir: string): Ignore { return ig; } +/** + * Filename of the optional project-root override list. See {@link loadCodegraphOverride}. + */ +export const CODEGRAPHIGNORE_FILE = '.codegraphignore'; + +/** A single parsed `.codegraphignore` directive line. */ +interface OverrideLine { + /** Single-pattern matcher (the line with any leading `!` stripped). */ + matcher: Ignore; + /** True when the line began with `!` (force-include); false = force-exclude. */ + negation: boolean; +} + +/** + * Project-root `.codegraphignore` override — final authority over what the + * indexer includes, beating the built-in defaults AND every `.gitignore` (root, + * nested, and — by routing the scan to the filesystem walk — git's own view). + * Two directive forms, evaluated last-match-wins per path: + * + * pattern force-EXCLUDE — drop the path even if git/defaults would keep it + * !pattern force-INCLUDE — index the path even if git/.gitignore/defaults drop it + * + * Force-include is *code-aware*: it re-includes paths hidden by any `.gitignore`, + * but does NOT resurrect a built-in dependency/build dir (node_modules, dist, + * .yarn, …) caught under a broad include like `!environment/` — those stay + * excluded unless an include line's literal anchor reaches into the dep dir + * itself (e.g. `!environment/node_modules/mypkg/`). So `!environment/` means + * "index environment's code" without dragging in its dependencies. + * + * Because git cannot enumerate files it ignores (nor cross an embedded-repo + * boundary), the presence of any force-include routes `scanDirectory` to the + * git-agnostic filesystem walk — see {@link scanDirectory}. + */ +export interface CodegraphOverride { + /** True when at least one `!` (force-include) line is present. */ + readonly hasForceInclude: boolean; + /** + * Final verdict for a file/dir path (POSIX, project-relative; a dir may carry + * a trailing `/`). 'include'/'exclude' are authoritative; null = no opinion, + * defer to the existing default/.gitignore logic. A code-aware drop of a dep + * dir under a broad include surfaces as 'exclude'. + */ + verdict(rel: string): 'include' | 'exclude' | null; + /** + * Whether the walk should descend into a directory. 'include' = descend even + * if defaults/.gitignore exclude it (needed to reach a buried force-include); + * 'exclude' = prune; null = defer to the existing logic. + */ + shouldDescend(relDir: string): 'include' | 'exclude' | null; +} + +/** Strip a `.codegraphignore` pattern to its literal leading path (before any glob char). */ +function patternAnchor(pattern: string): string { + let p = pattern.replace(/^\//, '').replace(/\/+$/, ''); + const glob = p.search(/[*?[]/); + if (glob >= 0) { + p = p.slice(0, glob); + const slash = p.lastIndexOf('/'); + p = slash >= 0 ? p.slice(0, slash) : ''; + } + return p; +} + +/** + * Load and parse the project-root `.codegraphignore`. Returns null when the file + * is absent, unreadable, or has no effective directives — so every call site + * collapses to today's exact behavior when no override is present. + */ +export function loadCodegraphOverride(rootDir: string): CodegraphOverride | null { + let raw: string; + try { + const p = path.join(rootDir, CODEGRAPHIGNORE_FILE); + if (!fs.existsSync(p)) return null; + raw = fs.readFileSync(p, 'utf-8'); + } catch { + return null; + } + + const lines: OverrideLine[] = []; + const includeAnchors: string[] = []; + let hasGlobalInclude = false; + + for (let line of raw.split('\n')) { + line = line.replace(/\r$/, '').replace(/\s+$/, ''); + if (!line || line.startsWith('#')) continue; + // gitignore escape: a leading "\#" / "\!" is literal, not comment/negation. + if (line.startsWith('\\#') || line.startsWith('\\!')) line = line.slice(1); + + let negation = false; + let pat = line; + if (pat.startsWith('!')) { negation = true; pat = pat.slice(1); } + if (!pat) continue; + + lines.push({ matcher: ignore().add(pat), negation }); + if (negation) { + const anchor = patternAnchor(pat); + if (anchor === '') hasGlobalInclude = true; + else includeAnchors.push(anchor); + } + } + + if (lines.length === 0) return null; + + const hasForceInclude = lines.some((l) => l.negation); + + // last-match-wins: the last line that matches a path decides its directive. + // Test both the bare and trailing-slash forms so a dir-only pattern (`foo/`) + // matches whether the caller passed the path with a slash or not. + const directiveFor = (rel: string): 'include' | 'exclude' | null => { + const probe = rel.replace(/\/+$/, ''); + if (!probe) return null; + let result: 'include' | 'exclude' | null = null; + for (const l of lines) { + if (l.matcher.ignores(probe) || l.matcher.ignores(probe + '/')) { + result = l.negation ? 'include' : 'exclude'; + } + } + return result; + }; + + // Shallowest path segment that is a built-in default-ignore dir, returned as + // the dir path up to and including it. Drives the code-aware include rule. + const firstDefaultDir = (rel: string): string | null => { + const segs = rel.replace(/\/+$/, '').split('/'); + for (let i = 0; i < segs.length; i++) { + if (DEFAULT_IGNORE_DIRS.has(segs[i]!)) return segs.slice(0, i + 1).join('/'); + } + return null; + }; + + // True when `relDir` is at, or on the path to, a force-include anchor — i.e. + // the walk must descend into it (even through a default-ignored dir) to reach + // an explicitly-included subtree. hasGlobalInclude (a pure-glob `!*.x`) forces + // a full descent — the documented cost of an unanchored include. + const isStrictAncestorOfAnchor = (relDir: string): boolean => { + if (hasGlobalInclude) return true; + return includeAnchors.some((a) => a === relDir || a.startsWith(relDir + '/')); + }; + + const verdict = (rel: string): 'include' | 'exclude' | null => { + const d = directiveFor(rel); + if (d !== 'include') return d; // 'exclude' or null pass through unchanged + // code-aware: a force-include must not resurrect a built-in dep/build dir + // (node_modules, dist, .yarn, …) unless an include anchor *inside* that dep + // dir specifically covers this path. So `!environment/` indexes the code but + // not the deps, while `!environment/node_modules/mypkg/` reaches in surgically. + const probe = rel.replace(/\/+$/, ''); + const depDir = firstDefaultDir(probe); + if (depDir) { + const covered = includeAnchors.some( + (a) => + (a === depDir || a.startsWith(depDir + '/')) && + (probe === a || probe.startsWith(a + '/')) + ); + if (!covered) return 'exclude'; + } + return 'include'; + }; + + const shouldDescend = (relDir: string): 'include' | 'exclude' | null => { + const probe = relDir.replace(/\/+$/, ''); + // Descend if an include anchor lives at or below this dir — even into a + // default-ignored dir; the per-file verdict still filters siblings out. + if (isStrictAncestorOfAnchor(probe)) return 'include'; + return verdict(probe + '/'); + }; + + return { hasForceInclude, verdict, shouldDescend }; +} + /** * Collect git-visible files (tracked + untracked, .gitignore-respected) from the * git repository rooted at `repoDir`, adding each to `files` with `prefix` @@ -233,7 +403,7 @@ function collectGitFiles(repoDir: string, prefix: string, files: Set): v * embedded (nested, non-submodule) git repos. Returns null on failure * (non-git project) so callers can fall back to a filesystem walk. */ -function getGitVisibleFiles(rootDir: string): Set | null { +function getGitVisibleFiles(rootDir: string, override?: CodegraphOverride | null): Set | null { try { // Check if the project directory is gitignored by a parent repo. // When rootDir lives inside a parent git repo that ignores it, @@ -265,7 +435,17 @@ function getGitVisibleFiles(rootDir: string): Set | null { // committing a dependency/build dir doesn't make it project code. A // `.gitignore` negation (e.g. `!vendor/`) is the explicit opt-in. (issue #407) const ig = buildDefaultIgnore(rootDir); - return new Set([...files].filter((f) => !ig.ignores(f))); + // A `.codegraphignore` force-exclude line drops the path; a force-include + // keeps it (force-include normally routes to the walk before reaching here, + // but applying it is harmless). Otherwise fall back to the default matcher. + return new Set([...files].filter((f) => { + if (override) { + const v = override.verdict(f); + if (v === 'exclude') return false; + if (v === 'include') return true; + } + return !ig.ignores(f); + })); } catch { return null; } @@ -334,23 +514,29 @@ export function scanDirectory( rootDir: string, onProgress?: (current: number, file: string) => void ): string[] { - // Fast path: use git to get all visible files (respects .gitignore everywhere) - const gitFiles = getGitVisibleFiles(rootDir); - if (gitFiles) { - const files: string[] = []; - let count = 0; - for (const filePath of gitFiles) { - if (isSourceFile(filePath)) { - files.push(filePath); - count++; - onProgress?.(count, filePath); + const override = loadCodegraphOverride(rootDir); + // A `.codegraphignore` force-include can't be served by the git fast path — + // git cannot enumerate files it ignores, nor cross an embedded-repo boundary — + // so route straight to the git-agnostic filesystem walk when one is present. + if (!override?.hasForceInclude) { + // Fast path: use git to get all visible files (respects .gitignore everywhere) + const gitFiles = getGitVisibleFiles(rootDir, override); + if (gitFiles) { + const files: string[] = []; + let count = 0; + for (const filePath of gitFiles) { + if (isSourceFile(filePath)) { + files.push(filePath); + count++; + onProgress?.(count, filePath); + } } + return files; } - return files; } - // Fallback: walk filesystem for non-git projects - return scanDirectoryWalk(rootDir, onProgress); + // Fallback: walk filesystem for non-git projects (and force-include overrides) + return scanDirectoryWalk(rootDir, onProgress, override); } /** @@ -361,25 +547,28 @@ export async function scanDirectoryAsync( rootDir: string, onProgress?: (current: number, file: string) => void ): Promise { - const gitFiles = getGitVisibleFiles(rootDir); - if (gitFiles) { - const files: string[] = []; - let count = 0; - for (const filePath of gitFiles) { - if (isSourceFile(filePath)) { - files.push(filePath); - count++; - onProgress?.(count, filePath); - // Yield every 100 files so worker threads can render progress - if (count % 100 === 0) { - await new Promise(r => setImmediate(r)); + const override = loadCodegraphOverride(rootDir); + if (!override?.hasForceInclude) { + const gitFiles = getGitVisibleFiles(rootDir, override); + if (gitFiles) { + const files: string[] = []; + let count = 0; + for (const filePath of gitFiles) { + if (isSourceFile(filePath)) { + files.push(filePath); + count++; + onProgress?.(count, filePath); + // Yield every 100 files so worker threads can render progress + if (count % 100 === 0) { + await new Promise(r => setImmediate(r)); + } } } + return files; } - return files; } - return scanDirectoryWalk(rootDir, onProgress); + return scanDirectoryWalk(rootDir, onProgress, override); } /** @@ -387,7 +576,8 @@ export async function scanDirectoryAsync( */ function scanDirectoryWalk( rootDir: string, - onProgress?: (current: number, file: string) => void + onProgress?: (current: number, file: string) => void, + override?: CodegraphOverride | null ): string[] { const files: string[] = []; let count = 0; @@ -415,6 +605,14 @@ function scanDirectoryWalk( }; const isIgnored = (fullPath: string, isDir: boolean, matchers: ScopedIgnore[]): boolean => { + // `.codegraphignore` is the final authority — it overrides the built-in + // defaults and every (root or nested) .gitignore below. + if (override) { + const relRoot = normalizePath(path.relative(rootDir, fullPath)) + (isDir ? '/' : ''); + const v = override.verdict(relRoot); + if (v === 'exclude') return true; + if (v === 'include') return false; + } for (const { dir, ig } of matchers) { let rel = normalizePath(path.relative(dir, fullPath)); if (!rel || rel.startsWith('..')) continue; // not under this matcher's dir @@ -424,6 +622,19 @@ function scanDirectoryWalk( return false; }; + // Whether to descend into a directory. `.codegraphignore` may force descent + // into an otherwise-excluded dir to reach a buried force-include (e.g. walk + // into `environment/` — ignored by root .gitignore — to reach `!environment/src/`). + const decideDescend = (fullPath: string, matchers: ScopedIgnore[]): boolean => { + if (override) { + const relDir = normalizePath(path.relative(rootDir, fullPath)); + const sd = override.shouldDescend(relDir); + if (sd === 'include') return true; + if (sd === 'exclude') return false; + } + return !isIgnored(fullPath, true, matchers); + }; + function walk(dir: string, matchers: ScopedIgnore[]): void { let realDir: string; try { @@ -465,7 +676,7 @@ function scanDirectoryWalk( const realTarget = fs.realpathSync(fullPath); const stat = fs.statSync(realTarget); if (stat.isDirectory()) { - if (!isIgnored(fullPath, true, active)) { + if (decideDescend(fullPath, active)) { walk(fullPath, active); } } else if (stat.isFile()) { @@ -482,7 +693,7 @@ function scanDirectoryWalk( } if (entry.isDirectory()) { - if (!isIgnored(fullPath, true, active)) { + if (decideDescend(fullPath, active)) { walk(fullPath, active); } } else if (entry.isFile()) { @@ -1456,7 +1667,10 @@ export class ExtractionOrchestrator { * Uses git status as a fast path when available, falling back to full scan. */ getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } { - const gitChanges = getGitChangedFiles(this.rootDir); + // `git status` cannot see files a force-include resurrects from .gitignore, + // so skip the git fast path and use the full scan (which routes to the walk). + const override = loadCodegraphOverride(this.rootDir); + const gitChanges = override?.hasForceInclude ? null : getGitChangedFiles(this.rootDir); if (gitChanges) { // === Git fast path === diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index f4f4460fb..f0c964f11 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -18,7 +18,8 @@ import * as path from 'path'; import type { Stats } from 'fs'; import chokidar, { FSWatcher } from 'chokidar'; import type { Ignore } from 'ignore'; -import { isSourceFile, buildDefaultIgnore } from '../extraction'; +import { isSourceFile, buildDefaultIgnore, loadCodegraphOverride } from '../extraction'; +import type { CodegraphOverride } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; import { watchDisabledReason } from './watch-policy'; @@ -130,6 +131,10 @@ export class FileWatcher { // once at start(). Same source of truth the indexer uses, so watcher scope // can never diverge from index scope. private ignoreMatcher: Ignore | null = null; + // Optional `.codegraphignore` override (final authority over .gitignore + + // defaults). Built once at start(), beside ignoreMatcher, from the same loader + // the indexer uses — so watch scope tracks index scope through the override too. + private override: CodegraphOverride | null = null; private readonly projectRoot: string; private readonly debounceMs: number; @@ -171,6 +176,7 @@ export class FileWatcher { // chokidar only registers an inotify watch on directories that pass this // filter — that's the #276 fix. this.ignoreMatcher = buildDefaultIgnore(this.projectRoot); + this.override = loadCodegraphOverride(this.projectRoot); try { this.watcher = chokidar.watch(this.projectRoot, { @@ -257,6 +263,29 @@ export class FileWatcher { const rel = normalizePath(path.relative(this.projectRoot, testPath)); if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside if (this.isAlwaysIgnored(rel)) return true; + // `.codegraphignore` overrides defaults + .gitignore: a force-include keeps + // the path watched (descending into otherwise-excluded dirs to reach buried + // includes); a force-exclude prunes it. null verdict falls through to the + // default matcher below. + if (this.override) { + if (stats?.isDirectory()) { + const sd = this.override.shouldDescend(rel); + if (sd === 'include') return false; + if (sd === 'exclude') return true; + } else if (stats) { + const v = this.override.verdict(rel); + if (v === 'include') return false; + if (v === 'exclude') return true; + } else { + // Stats unknown: include wins if either the dir-descent or file verdict + // says so; otherwise an explicit exclude prunes. + const sd = this.override.shouldDescend(rel); + if (sd === 'include') return false; + const v = this.override.verdict(rel); + if (v === 'include') return false; + if (sd === 'exclude' || v === 'exclude') return true; + } + } if (!this.ignoreMatcher) return false; if (stats) { return this.ignoreMatcher.ignores(stats.isDirectory() ? rel + '/' : rel); @@ -284,6 +313,7 @@ export class FileWatcher { this.pendingFiles.clear(); this.chokidarReady = false; this.ignoreMatcher = null; + this.override = null; logDebug('File watcher stopped'); }