@@ -30,6 +30,7 @@ import {
3030import { getFileCommitDates } from '../utils/git-dates.js' ;
3131import {
3232 CODEBASE_CONTEXT_DIRNAME ,
33+ INDEXING_STATS_FILENAME ,
3334 INTELLIGENCE_FILENAME ,
3435 KEYWORD_INDEX_FILENAME ,
3536 MANIFEST_FILENAME ,
@@ -51,6 +52,13 @@ export interface IndexerOptions {
5152 incrementalOnly ?: boolean ;
5253}
5354
55+ interface PersistedIndexingStats {
56+ indexedFiles : number ;
57+ totalChunks : number ;
58+ totalFiles : number ;
59+ generatedAt : string ;
60+ }
61+
5462export class CodebaseIndexer {
5563 private rootPath : string ;
5664 private config : CodebaseConfig ;
@@ -181,16 +189,18 @@ export class CodebaseIndexer {
181189 // Phase 1b: Incremental diff (if incremental mode)
182190 const contextDir = path . join ( this . rootPath , CODEBASE_CONTEXT_DIRNAME ) ;
183191 const manifestPath = path . join ( contextDir , MANIFEST_FILENAME ) ;
192+ const indexingStatsPath = path . join ( contextDir , INDEXING_STATS_FILENAME ) ;
184193 let diff : ManifestDiff | null = null ;
185194 let currentHashes : Record < string , string > | null = null ;
195+ let previousManifest : FileManifest | null = null ;
186196
187197 if ( this . incrementalOnly ) {
188198 this . updateProgress ( 'scanning' , 10 ) ;
189199 console . error ( 'Computing file hashes for incremental diff...' ) ;
190200 currentHashes = await computeFileHashes ( files , this . rootPath ) ;
191201
192- const oldManifest = await readManifest ( manifestPath ) ;
193- diff = diffManifest ( oldManifest , currentHashes ) ;
202+ previousManifest = await readManifest ( manifestPath ) ;
203+ diff = diffManifest ( previousManifest , currentHashes ) ;
194204
195205 console . error (
196206 `Incremental diff: ${ diff . added . length } added, ${ diff . changed . length } changed, ` +
@@ -210,6 +220,52 @@ export class CodebaseIndexer {
210220 this . updateProgress ( 'complete' , 100 ) ;
211221 stats . duration = Date . now ( ) - startTime ;
212222 stats . completedAt = new Date ( ) ;
223+
224+ let restoredFromPersistedStats = false ;
225+
226+ try {
227+ const persisted = JSON . parse (
228+ await fs . readFile ( indexingStatsPath , 'utf-8' )
229+ ) as Partial < PersistedIndexingStats > ;
230+
231+ if (
232+ typeof persisted . indexedFiles === 'number' &&
233+ typeof persisted . totalChunks === 'number' &&
234+ typeof persisted . totalFiles === 'number'
235+ ) {
236+ stats . indexedFiles = persisted . indexedFiles ;
237+ stats . totalChunks = persisted . totalChunks ;
238+ stats . totalFiles = persisted . totalFiles ;
239+ restoredFromPersistedStats = true ;
240+ }
241+ } catch {
242+ // No persisted stats yet — fall back below
243+ }
244+
245+ if ( ! restoredFromPersistedStats ) {
246+ if ( previousManifest ) {
247+ stats . indexedFiles = Object . keys ( previousManifest . files ) . length ;
248+ }
249+
250+ try {
251+ const existingIndexPath = path . join ( contextDir , KEYWORD_INDEX_FILENAME ) ;
252+ const existingChunks = JSON . parse ( await fs . readFile ( existingIndexPath , 'utf-8' ) ) ;
253+ if ( Array . isArray ( existingChunks ) ) {
254+ stats . totalChunks = existingChunks . length ;
255+ if ( stats . indexedFiles === 0 ) {
256+ const uniqueFiles = new Set (
257+ existingChunks . map ( ( c : { filePath ?: string } ) => c . filePath )
258+ ) ;
259+ stats . indexedFiles = uniqueFiles . size ;
260+ }
261+ }
262+ } catch {
263+ // Keyword index doesn't exist yet — keep best-known counts
264+ }
265+ }
266+
267+ stats . totalFiles = files . length ;
268+
213269 return stats ;
214270 }
215271 }
@@ -559,6 +615,14 @@ export class CodebaseIndexer {
559615 } ;
560616 await writeManifest ( manifestPath , manifest ) ;
561617
618+ const persistedStats : PersistedIndexingStats = {
619+ indexedFiles : stats . indexedFiles ,
620+ totalChunks : stats . totalChunks ,
621+ totalFiles : stats . totalFiles ,
622+ generatedAt : new Date ( ) . toISOString ( )
623+ } ;
624+ await fs . writeFile ( indexingStatsPath , JSON . stringify ( persistedStats , null , 2 ) ) ;
625+
562626 // Phase 5: Complete
563627 this . updateProgress ( 'complete' , 100 ) ;
564628
@@ -591,6 +655,7 @@ export class CodebaseIndexer {
591655
592656 private async scanFiles ( ) : Promise < string [ ] > {
593657 const files : string [ ] = [ ] ;
658+ const seen = new Set < string > ( ) ;
594659
595660 // Read .gitignore if respecting it
596661 let ig : ReturnType < typeof ignore . default > | null = null ;
@@ -617,6 +682,12 @@ export class CodebaseIndexer {
617682 } ) ;
618683
619684 for ( const file of matches ) {
685+ const normalizedFile = file . replace ( / \\ / g, '/' ) ;
686+ if ( seen . has ( normalizedFile ) ) {
687+ continue ;
688+ }
689+ seen . add ( normalizedFile ) ;
690+
620691 const relativePath = path . relative ( this . rootPath , file ) ;
621692
622693 // Check gitignore
0 commit comments