From a9502088ad84c521c52cdfb5476ba0e535db3a5f Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 09:07:58 +0000 Subject: [PATCH 1/4] fix(crawler): improve crawl progress display to show done/found/remaining Change progress format from "Crawling: 8/108(0/0) (7%) [10 parallel]" to "Crawling: 8 done / 108 found (+0/0 ext) [100 remaining] [10 parallel]" to clarify that the total is expected to grow during crawling. Closes #37 https://claude.ai/code/session_01LZgFU6p94rfq1ea8i2aJai --- .../@nitpicker/crawler/src/crawler/crawler.ts | 19 ++-- .../src/crawler/format-crawl-progress.spec.ts | 96 +++++++++++++++++++ .../src/crawler/format-crawl-progress.ts | 56 +++++++++++ 3 files changed, 161 insertions(+), 10 deletions(-) create mode 100644 packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts create mode 100644 packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.ts b/packages/@nitpicker/crawler/src/crawler/crawler.ts index ef19a36..29fbb71 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.ts @@ -22,6 +22,7 @@ import { crawlerLog } from '../debug.js'; import { detectPaginationPattern } from './detect-pagination-pattern.js'; import { fetchDestination } from './fetch-destination.js'; +import { formatCrawlProgress } from './format-crawl-progress.js'; import { generatePredictedUrls } from './generate-predicted-urls.js'; import { handleIgnoreAndSkip } from './handle-ignore-and-skip.js'; import { handleResourceResponse } from './handle-resource-response.js'; @@ -579,16 +580,14 @@ export default class Crawler extends EventEmitter { interval: this.#options.interval, verbose: this.#options.verbose || !process.stdout.isTTY, header: (_progress, done, total, limit) => { - const allDone = done + resumeOffset; - const allTotal = total + resumeOffset; - const extTotal = externalUrls.size; - const extDone = externalDoneUrls.size; - const pct = allTotal > 0 ? Math.round((allDone / allTotal) * 100) : 0; - return ( - c.bold(`Crawling: ${allDone - extDone}/${allTotal - extTotal}`) + - c.dim(`(${extDone}/${extTotal})`) + - c.bold(` (${pct}%) [${limit} parallel]`) - ); + return formatCrawlProgress({ + done, + total, + resumeOffset, + externalTotal: externalUrls.size, + externalDone: externalDoneUrls.size, + limit, + }); }, onPush: (url) => { const key = protocolAgnosticKey(url.withoutHashAndAuth); diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts new file mode 100644 index 0000000..96f4f72 --- /dev/null +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts @@ -0,0 +1,96 @@ +import c from 'ansi-colors'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; + +import { formatCrawlProgress } from './format-crawl-progress.js'; + +describe('formatCrawlProgress', () => { + const originalEnabled = c.enabled; + + beforeAll(() => { + c.enabled = false; + }); + + afterAll(() => { + c.enabled = originalEnabled; + }); + + it('shows done, found, remaining for internal pages', () => { + const result = formatCrawlProgress({ + done: 50, + total: 100, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toContain('50 done / 100 found'); + expect(result).toContain('50 remaining'); + }); + + it('includes external page counts', () => { + const result = formatCrawlProgress({ + done: 60, + total: 120, + resumeOffset: 0, + externalTotal: 20, + externalDone: 10, + limit: 5, + }); + expect(result).toContain('50 done / 100 found'); + expect(result).toContain('+10/20 ext'); + expect(result).toContain('60 remaining'); + }); + + it('includes resumeOffset in done and total counts', () => { + const result = formatCrawlProgress({ + done: 30, + total: 50, + resumeOffset: 100, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toContain('130 done / 150 found'); + expect(result).toContain('20 remaining'); + }); + + it('shows parallel count', () => { + const result = formatCrawlProgress({ + done: 10, + total: 20, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 8, + }); + expect(result).toContain('8 parallel'); + }); + + it('handles zero total', () => { + const result = formatCrawlProgress({ + done: 0, + total: 0, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toContain('0 done / 0 found'); + expect(result).toContain('0 remaining'); + }); + + it('calculates remaining correctly with both internal and external', () => { + const result = formatCrawlProgress({ + done: 80, + total: 200, + resumeOffset: 0, + externalTotal: 50, + externalDone: 30, + limit: 10, + }); + // internal remaining: (200-50) - (80-30) = 150 - 50 = 100 + // external remaining: 50 - 30 = 20 + // total remaining: 120 + expect(result).toContain('120 remaining'); + }); +}); diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts new file mode 100644 index 0000000..b0bff02 --- /dev/null +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts @@ -0,0 +1,56 @@ +import c from 'ansi-colors'; + +/** + * Parameters for formatting crawl progress display. + */ +interface FormatCrawlProgressParams { + /** Number of URLs completed by the deal queue */ + readonly done: number; + /** Total number of URLs in the deal queue (including completed) */ + readonly total: number; + /** Offset from a previous resumed session */ + readonly resumeOffset: number; + /** Number of external URLs discovered */ + readonly externalTotal: number; + /** Number of external URLs completed */ + readonly externalDone: number; + /** Number of parallel workers */ + readonly limit: number; +} + +/** + * Formats the crawl progress header for the deal() progress display. + * + * Shows "done / found (remaining)" format instead of "done/total" + * to make it clearer that the total is expected to grow during crawling. + * @param root0 + * @param root0.done + * @param root0.total + * @param root0.resumeOffset + * @param root0.externalTotal + * @param root0.externalDone + * @param root0.limit + */ +export function formatCrawlProgress({ + done, + total, + resumeOffset, + externalTotal, + externalDone, + limit, +}: FormatCrawlProgressParams): string { + const allDone = done + resumeOffset; + const allTotal = total + resumeOffset; + const internalDone = allDone - externalDone; + const internalTotal = allTotal - externalTotal; + const internalRemaining = internalTotal - internalDone; + const externalRemaining = externalTotal - externalDone; + const totalRemaining = internalRemaining + externalRemaining; + + return ( + c.bold(`Crawling: ${internalDone} done / ${internalTotal} found`) + + c.dim(` (+${externalDone}/${externalTotal} ext)`) + + c.bold(` [${totalRemaining} remaining]`) + + c.dim(` [${limit} parallel]`) + ); +} From 8dc096bdd9e8c1747e67a3d736e0c7df9362dfc5 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:18:39 +0000 Subject: [PATCH 2/4] test(crawler): add exact format and combined parameter tests for formatCrawlProgress - Add exact output format test using toBe for regression detection - Add resumeOffset + externalTotal combined parameter test - Clean up auto-generated JSDoc @param root0 annotations https://claude.ai/code/session_01LZgFU6p94rfq1ea8i2aJai --- .../src/crawler/format-crawl-progress.spec.ts | 30 +++++++++++++++++++ .../src/crawler/format-crawl-progress.ts | 14 ++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts index 96f4f72..c172d83 100644 --- a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts @@ -93,4 +93,34 @@ describe('formatCrawlProgress', () => { // total remaining: 120 expect(result).toContain('120 remaining'); }); + + it('produces exact expected format', () => { + const result = formatCrawlProgress({ + done: 50, + total: 100, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toBe( + 'Crawling: 50 done / 100 found (+0/0 ext) [50 remaining] [10 parallel]', + ); + }); + + it('combines resumeOffset with external URLs correctly', () => { + const result = formatCrawlProgress({ + done: 40, + total: 80, + resumeOffset: 20, + externalTotal: 10, + externalDone: 5, + limit: 5, + }); + // allDone=60, allTotal=100, internalDone=55, internalTotal=90 + // internalRemaining=35, externalRemaining=5, totalRemaining=40 + expect(result).toContain('55 done / 90 found'); + expect(result).toContain('+5/10 ext'); + expect(result).toContain('40 remaining'); + }); }); diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts index b0bff02..458c9fb 100644 --- a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts @@ -23,13 +23,13 @@ interface FormatCrawlProgressParams { * * Shows "done / found (remaining)" format instead of "done/total" * to make it clearer that the total is expected to grow during crawling. - * @param root0 - * @param root0.done - * @param root0.total - * @param root0.resumeOffset - * @param root0.externalTotal - * @param root0.externalDone - * @param root0.limit + * @param params - The crawl progress parameters. + * @param params.done + * @param params.total + * @param params.resumeOffset + * @param params.externalTotal + * @param params.externalDone + * @param params.limit */ export function formatCrawlProgress({ done, From 85f06b0992f0dbf1504a3a7ed3f0c5cb59d70000 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:31:31 +0000 Subject: [PATCH 3/4] Restore percentage display in crawl progress The percentage gives users a quick-glance progress indicator that complements the "remaining" count. While % can decrease as new URLs are discovered during crawling, having both metrics together avoids confusion. https://claude.ai/code/session_01LZgFU6p94rfq1ea8i2aJai --- .../src/crawler/format-crawl-progress.spec.ts | 26 ++++++++++++++++++- .../src/crawler/format-crawl-progress.ts | 3 ++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts index c172d83..929ab63 100644 --- a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.spec.ts @@ -94,6 +94,30 @@ describe('formatCrawlProgress', () => { expect(result).toContain('120 remaining'); }); + it('shows percentage', () => { + const result = formatCrawlProgress({ + done: 50, + total: 100, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toContain('(50%)'); + }); + + it('shows 0% when total is zero', () => { + const result = formatCrawlProgress({ + done: 0, + total: 0, + resumeOffset: 0, + externalTotal: 0, + externalDone: 0, + limit: 10, + }); + expect(result).toContain('(0%)'); + }); + it('produces exact expected format', () => { const result = formatCrawlProgress({ done: 50, @@ -104,7 +128,7 @@ describe('formatCrawlProgress', () => { limit: 10, }); expect(result).toBe( - 'Crawling: 50 done / 100 found (+0/0 ext) [50 remaining] [10 parallel]', + 'Crawling: 50 done / 100 found (+0/0 ext) (50%) [50 remaining] [10 parallel]', ); }); diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts index 458c9fb..07009c6 100644 --- a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts @@ -46,11 +46,12 @@ export function formatCrawlProgress({ const internalRemaining = internalTotal - internalDone; const externalRemaining = externalTotal - externalDone; const totalRemaining = internalRemaining + externalRemaining; + const pct = allTotal > 0 ? Math.round((allDone / allTotal) * 100) : 0; return ( c.bold(`Crawling: ${internalDone} done / ${internalTotal} found`) + c.dim(` (+${externalDone}/${externalTotal} ext)`) + - c.bold(` [${totalRemaining} remaining]`) + + c.bold(` (${pct}%) [${totalRemaining} remaining]`) + c.dim(` [${limit} parallel]`) ); } From 6e4b14d03fb9eabcf5a9e4e573bc1896db1eceb3 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:38:33 +0000 Subject: [PATCH 4/4] Add @returns and @param descriptions to formatCrawlProgress JSDoc https://claude.ai/code/session_01LZgFU6p94rfq1ea8i2aJai --- .../crawler/src/crawler/format-crawl-progress.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts index 07009c6..d41cc1f 100644 --- a/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts +++ b/packages/@nitpicker/crawler/src/crawler/format-crawl-progress.ts @@ -24,12 +24,13 @@ interface FormatCrawlProgressParams { * Shows "done / found (remaining)" format instead of "done/total" * to make it clearer that the total is expected to grow during crawling. * @param params - The crawl progress parameters. - * @param params.done - * @param params.total - * @param params.resumeOffset - * @param params.externalTotal - * @param params.externalDone - * @param params.limit + * @param params.done - Number of URLs completed by the deal queue. + * @param params.total - Total number of URLs in the deal queue (including completed). + * @param params.resumeOffset - Offset from a previous resumed session. + * @param params.externalTotal - Number of external URLs discovered. + * @param params.externalDone - Number of external URLs completed. + * @param params.limit - Number of parallel workers. + * @returns The formatted progress string with ANSI color codes. */ export function formatCrawlProgress({ done,