From 1324b32f0a28b324a44f4f1150ae38e5f2b11d6a Mon Sep 17 00:00:00 2001
From: Denys Kuchma <den.kuchma@ukr.net>
Date: Mon, 11 May 2026 21:33:45 +0300
Subject: [PATCH 1/2] new doc-collector

---
 bin/explorbot-cli.ts                          |   2 +
 boat/doc-collector/bin/doc-collector-cli.ts   |   5 +
 boat/doc-collector/package.json               |  24 ++
 boat/doc-collector/src/ai/documentarian.ts    | 184 +++++++++
 boat/doc-collector/src/cli.ts                 | 119 ++++++
 boat/doc-collector/src/config.ts              | 162 ++++++++
 boat/doc-collector/src/docbot.ts              | 391 ++++++++++++++++++
 boat/doc-collector/src/docs-renderer.ts       | 187 +++++++++
 boat/doc-collector/src/path-filter.ts         |  46 +++
 boat/doc-collector/src/research-navigation.ts |  90 ++++
 bun.lock                                      |   6 +-
 docs/commands.md                              |  32 +-
 docs/doc-collector.md                         | 140 +++++++
 package.json                                  |   2 +-
 src/ai/researcher.ts                          |   3 +-
 src/ai/researcher/coordinates.ts              |   2 +-
 src/ai/researcher/parser.ts                   |   3 +
 src/config.ts                                 |  16 +-
 src/explorbot.ts                              |   1 +
 src/state-manager.ts                          |   4 +
 src/utils/url-matcher.ts                      |   7 +-
 src/utils/web-element.ts                      |   5 +-
 tests/unit/config.test.ts                     |  42 ++
 tests/unit/doc-collector.test.ts              | 247 +++++++++++
 tests/unit/research-parser.test.ts            |  15 +
 tests/unit/url-matcher.test.ts                |  16 +
 26 files changed, 1738 insertions(+), 13 deletions(-)
 create mode 100644 boat/doc-collector/bin/doc-collector-cli.ts
 create mode 100644 boat/doc-collector/package.json
 create mode 100644 boat/doc-collector/src/ai/documentarian.ts
 create mode 100644 boat/doc-collector/src/cli.ts
 create mode 100644 boat/doc-collector/src/config.ts
 create mode 100644 boat/doc-collector/src/docbot.ts
 create mode 100644 boat/doc-collector/src/docs-renderer.ts
 create mode 100644 boat/doc-collector/src/path-filter.ts
 create mode 100644 boat/doc-collector/src/research-navigation.ts
 create mode 100644 docs/doc-collector.md
 create mode 100644 tests/unit/config.test.ts
 create mode 100644 tests/unit/doc-collector.test.ts

diff --git a/bin/explorbot-cli.ts b/bin/explorbot-cli.ts
index d19469c..5499742 100755
--- a/bin/explorbot-cli.ts
+++ b/bin/explorbot-cli.ts
@@ -823,6 +823,8 @@ program
   });
 
 import { createApiCommands } from '../boat/api-tester/src/cli.ts';
+import { createDocsCommands } from '../boat/doc-collector/src/cli.ts';
 program.addCommand(createApiCommands('api'));
+program.addCommand(createDocsCommands('docs'));
 
 program.parse();
diff --git a/boat/doc-collector/bin/doc-collector-cli.ts b/boat/doc-collector/bin/doc-collector-cli.ts
new file mode 100644
index 0000000..1502211
--- /dev/null
+++ b/boat/doc-collector/bin/doc-collector-cli.ts
@@ -0,0 +1,5 @@
+#!/usr/bin/env bun
+import { createDocsCommands } from '../src/cli.ts';
+
+const program = createDocsCommands('doc-collector');
+program.parse();
diff --git a/boat/doc-collector/package.json b/boat/doc-collector/package.json
new file mode 100644
index 0000000..c081616
--- /dev/null
+++ b/boat/doc-collector/package.json
@@ -0,0 +1,24 @@
+{
+  "name": "doc-collector",
+  "version": "1.0.0",
+  "description": "AI-powered website documentation collector",
+  "type": "module",
+  "bin": {
+    "doc-collector": "./bin/doc-collector-cli.ts"
+  },
+  "scripts": {
+    "format": "biome format --write .",
+    "lint:fix": "biome lint --write .",
+    "check:fix": "biome check --write ."
+  },
+  "dependencies": {
+    "ai": "^6.0.6",
+    "commander": "^14.0.1",
+    "dedent": "^1.6.0",
+    "zod": "^4.1.8"
+  },
+  "devDependencies": {
+    "@biomejs/biome": "^1.5.3",
+    "typescript": "^5.0.0"
+  }
+}
diff --git a/boat/doc-collector/src/ai/documentarian.ts b/boat/doc-collector/src/ai/documentarian.ts
new file mode 100644
index 0000000..8396d3a
--- /dev/null
+++ b/boat/doc-collector/src/ai/documentarian.ts
@@ -0,0 +1,184 @@
+import dedent from 'dedent';
+import { z } from 'zod';
+import type { AIProvider } from '../../../../src/ai/provider.ts';
+import type { WebPageState } from '../../../../src/state-manager.ts';
+import type { DocbotConfig } from '../config.ts';
+
+class Documentarian {
+  private provider: AIProvider;
+  private config: DocbotConfig;
+
+  constructor(provider: AIProvider, config: DocbotConfig = {}) {
+    this.provider = provider;
+    this.config = config;
+  }
+
+  async document(state: WebPageState, research: string): Promise<PageDocumentation> {
+    try {
+      return await this.generateDocumentation(state, research);
+    } catch (error) {
+      if (!this.shouldRetryWithSanitizedResearch(error)) {
+        throw error;
+      }
+
+      return this.generateDocumentation(state, this.sanitizeResearch(research), true);
+    }
+  }
+
+  private getSystemPrompt(): string {
+    const customPrompt = this.config.docs?.prompt;
+    let promptSuffix = '';
+    if (customPrompt) {
+      promptSuffix = customPrompt;
+    }
+
+    return dedent`
+    <role>
+    You are a product analyst preparing functional website documentation from UI research.
+    </role>
+
+    <task>
+    Convert exploratory UI research into a precise spec of what users can do on the current page.
+    Distinguish proven capabilities from assumptions.
+    Prefer accuracy over coverage.
+    </task>
+
+    <rules>
+    Only list capabilities that are grounded in the provided page research.
+    Put actions into "can" only when there is direct evidence in the page context.
+    Put actions into "might" only when the UI strongly suggests a capability but proof is incomplete.
+    Describe each action from the end-user perspective.
+    Be explicit about scope:
+    - one item
+    - list of items
+    - bulk operations
+    - all items
+    - page-level
+    Avoid implementation details, selectors, and QA wording.
+    Avoid duplicate actions with different phrasing.
+    </rules>
+
+    ${promptSuffix}
+    `;
+  }
+
+  private buildPrompt(state: WebPageState, research: string, simplified = false): string {
+    const headings = [state.h1, state.h2, state.h3, state.h4].filter(Boolean).join(' | ');
+    const links = (state.links || [])
+      .slice(0, 50)
+      .map((link) => `- ${link.title}: ${link.url}`)
+      .join('\n');
+    const simplificationNote = simplified
+      ? dedent`
+        <fallback_mode>
+        The research text was simplified because the original formatting was noisy.
+        Ignore malformed table syntax and rely only on clear, repeated signals.
+        Prefer fewer actions over speculative coverage.
+        </fallback_mode>
+        `
+      : '';
+
+    return dedent`
+    <page>
+    URL: ${state.url}
+    Title: ${state.title || ''}
+    Headings: ${headings}
+    </page>
+
+    <navigation_links>
+    ${links}
+    </navigation_links>
+
+    <research>
+    ${research}
+    </research>
+
+    ${simplificationNote}
+
+    <output_requirements>
+    Return structured data.
+    summary: short page purpose statement.
+    can: actions you are 100% sure are available on page.
+    might: actions that look possible but are not fully proven.
+    For each action provide:
+    - action: concise user-facing capability phrased as "user can ..."
+    - scope: one of one item, list of items, bulk operations, all items, page-level
+    - evidence: short reason based on visible UI or research
+    </output_requirements>
+    `;
+  }
+
+  private async generateDocumentation(state: WebPageState, research: string, simplified = false): Promise<PageDocumentation> {
+    const messages = [
+      {
+        role: 'system' as const,
+        content: this.getSystemPrompt(),
+      },
+      {
+        role: 'user' as const,
+        content: this.buildPrompt(state, research, simplified),
+      },
+    ];
+
+    const response = await this.provider.generateObject(messages, pageDocumentationSchema, undefined, {
+      agentName: 'documentarian',
+    });
+
+    return response.object as PageDocumentation;
+  }
+
+  private shouldRetryWithSanitizedResearch(error: unknown): boolean {
+    const message = error instanceof Error ? error.message : String(error);
+    return message.includes('Failed to generate JSON') || message.includes('failed_generation');
+  }
+
+  private sanitizeResearch(research: string): string {
+    const lines = research.split('\n');
+    const sanitized: string[] = [];
+
+    for (const line of lines) {
+      if (!line.trim()) {
+        sanitized.push(line);
+        continue;
+      }
+
+      if (!line.includes('|')) {
+        sanitized.push(line);
+        continue;
+      }
+
+      const pipeCount = (line.match(/\|/g) || []).length;
+      if (pipeCount < 2) {
+        continue;
+      }
+
+      if (line.includes('|------')) {
+        sanitized.push(line);
+        continue;
+      }
+
+      if (line.trim().startsWith('|') && pipeCount >= 4) {
+        sanitized.push(line);
+      }
+    }
+
+    return sanitized.join('\n');
+  }
+}
+
+const capabilitySchema = z.object({
+  action: z.string(),
+  scope: z.enum(['one item', 'list of items', 'bulk operations', 'all items', 'page-level']),
+  evidence: z.string(),
+});
+
+const pageDocumentationSchema = z.object({
+  summary: z.string(),
+  can: z.array(capabilitySchema),
+  might: z.array(capabilitySchema),
+});
+
+type PageDocumentation = z.infer<typeof pageDocumentationSchema>;
+
+export { Documentarian };
+export type { PageDocumentation };
diff --git a/boat/doc-collector/src/cli.ts b/boat/doc-collector/src/cli.ts
new file mode 100644
index 0000000..40a84c0
--- /dev/null
+++ b/boat/doc-collector/src/cli.ts
@@ -0,0 +1,119 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { Command } from 'commander';
+import { setPreserveConsoleLogs } from '../../../src/utils/logger.ts';
+import { DocBot, type DocbotOptions } from './docbot.ts';
+
+function buildOptions(options: any): DocbotOptions {
+  let session = options.session;
+  if (options.session === true) {
+    session = 'output/session.json';
+  }
+
+  return {
+    verbose: options.verbose || options.debug,
+    config: options.config,
+    path: options.path,
+    show: options.show,
+    headless: options.headless,
+    incognito: options.incognito,
+    session,
+    docsConfig: options.docsConfig,
+  };
+}
+
+function addCommonOptions(cmd: Command): Command {
+  return cmd
+    .option('-v, --verbose', 'Enable verbose logging')
+    .option('--debug', 'Enable debug logging')
+    .option('-c, --config <path>', 'Path to explorbot configuration file')
+    .option('--docs-config <path>', 'Path to doc collector configuration file')
+    .option('-p, --path <path>', 'Working directory path')
+    .option('-s, --show', 'Show browser window')
+    .option('--headless', 'Run browser in headless mode')
+    .option('--incognito', 'Run without recording experiences')
+    .option('--session [file]', 'Save/restore browser session from file');
+}
+
+export function createDocsCommands(name = 'docs'): Command {
+  const cmd = new Command(name);
+  cmd.description('AI-powered website documentation collector');
+
+  addCommonOptions(cmd.command('collect <path>').description('Crawl pages and generate documentation spec').option('--max-pages <count>', 'Maximum number of pages to document')).action(async (startPath, options) => {
+    setPreserveConsoleLogs(true);
+
+    try {
+      const bot = new DocBot({
+        ...buildOptions(options),
+        startUrl: startPath,
+      });
+      await bot.start();
+
+      let maxPages: number | undefined;
+      if (options.maxPages) {
+        maxPages = Number.parseInt(options.maxPages, 10);
+      }
+
+      const result = await bot.collect(startPath, { maxPages });
+
+      console.log(`\nDocumented ${result.pages.length} page(s)`);
+      console.log(`Skipped ${result.skipped.length} page(s)`);
+      console.log(`Spec index: ${result.indexPath}`);
+      console.log(`Pages dir: ${path.join(result.outputDir, 'pages')}`);
+
+      await bot.stop();
+      process.exit(0);
+    } catch (error) {
+      console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
+      process.exit(1);
+    }
+  });
+
+  cmd
+    .command('init')
+    .description('Initialize doc collector configuration')
+    .option('-f, --force', 'Overwrite existing config file')
+    .option('-p, --path <path>', 'Working directory for initialization')
+    .action(async (options) => {
+      const originalCwd = process.cwd();
+      if (options.path) {
+        const resolvedPath = path.resolve(options.path);
+        fs.mkdirSync(resolvedPath, { recursive: true });
+        process.chdir(resolvedPath);
+        console.log(`Working in: ${resolvedPath}`);
+      }
+
+      const configPath = path.resolve('docbot.config.ts');
+      if (fs.existsSync(configPath) && !options.force) {
+        console.log(`Config file already exists: ${configPath}`);
+        console.log('Use --force to overwrite.');
+        process.exit(1);
+      }
+
+      const configContent = `export default {
+          docs: {
+            maxPages: 100,
+            output: 'docs',
+            screenshot: true,
+            collapseDynamicPages: true,
+            scope: 'site',
+            includePaths: [],
+            excludePaths: [],
+            deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
+            minCanActions: 1,
+            minInteractiveElements: 3,
+            // prompt: 'Add domain-specific documentation guidance here',
+          },
+        };
+      `;
+
+      fs.writeFileSync(configPath, configContent, 'utf8');
+      console.log(`Created: ${configPath}`);
+
+      if (process.cwd() !== originalCwd) {
+        process.chdir(originalCwd);
+      }
+    });
+
+  return cmd;
+}
diff --git a/boat/doc-collector/src/config.ts b/boat/doc-collector/src/config.ts
new file mode 100644
index 0000000..79ed513
--- /dev/null
+++ b/boat/doc-collector/src/config.ts
@@ -0,0 +1,162 @@
+import { existsSync, readFileSync } from 'node:fs';
+import path, { resolve } from 'node:path';
+import { parseEnv } from 'node:util';
+import { ConfigParser } from '../../../src/config.ts';
+
+class DocbotConfigParser {
+  private static instance: DocbotConfigParser;
+  private config: DocbotConfig | null = null;
+  private configPath: string | null = null;
+
+  private constructor() {}
+
+  static getInstance(): DocbotConfigParser {
+    if (!DocbotConfigParser.instance) {
+      DocbotConfigParser.instance = new DocbotConfigParser();
+    }
+    return DocbotConfigParser.instance;
+  }
+
+  static loadEnv(filePath: string): void {
+    const resolved = resolve(filePath);
+    if (!existsSync(resolved)) return;
+    Object.assign(process.env, parseEnv(readFileSync(resolved, 'utf8')));
+  }
+
+  async loadConfig(options?: { config?: string; path?: string }): Promise<DocbotConfig> {
+    if (this.config && !options?.config && !options?.path) {
+      return this.config;
+    }
+
+    const originalCwd = process.cwd();
+    if (options?.path) {
+      process.chdir(resolve(options.path));
+    }
+
+    DocbotConfigParser.loadEnv('.env');
+
+    try {
+      const resolvedPath = options?.config || this.findConfigFile();
+      if (!resolvedPath) {
+        this.config = this.mergeWithDefaults({});
+        this.configPath = null;
+        return this.config;
+      }
+
+      const configModule = await this.loadConfigModule(resolvedPath);
+      const loadedConfig = configModule.default || configModule;
+      this.config = this.mergeWithDefaults(loadedConfig || {});
+      this.configPath = resolvedPath;
+      return this.config;
+    } finally {
+      if (options?.path && originalCwd !== process.cwd()) {
+        process.chdir(originalCwd);
+      }
+    }
+  }
+
+  getConfig(): DocbotConfig {
+    if (this.config) {
+      return this.config;
+    }
+    return this.mergeWithDefaults({});
+  }
+
+  getConfigPath(): string | null {
+    return this.configPath;
+  }
+
+  getOutputDir(): string {
+    const outputDir = ConfigParser.getInstance().getOutputDir();
+    const docsOutput = this.getConfig().docs?.output;
+    if (!docsOutput) {
+      return path.join(outputDir, 'docs');
+    }
+    return path.join(outputDir, docsOutput);
+  }
+
+  private findConfigFile(): string | null {
+    const possiblePaths = ['docbot.config.js', 'docbot.config.mjs', 'docbot.config.ts'];
+
+    for (const candidate of possiblePaths) {
+      const fullPath = resolve(process.cwd(), candidate);
+      if (existsSync(fullPath)) {
+        return fullPath;
+      }
+    }
+
+    return null;
+  }
+
+  private async loadConfigModule(configPath: string): Promise<any> {
+    const ext = configPath.split('.').pop();
+
+    if (ext === 'ts') {
+      try {
+        return await import(configPath);
+      } catch {
+        const require = (await import('node:module')).createRequire(import.meta.url);
+        return require(configPath);
+      }
+    }
+
+    if (ext === 'js' || ext === 'mjs') {
+      return await import(configPath);
+    }
+
+    return JSON.parse(readFileSync(configPath, 'utf8'));
+  }
+
+  private mergeWithDefaults(config: Partial<DocbotConfig>): DocbotConfig {
+    return this.deepMerge(
+      {
+        docs: {
+          maxPages: 100,
+          output: 'docs',
+          screenshot: true,
+          collapseDynamicPages: true,
+          scope: 'site',
+          includePaths: [],
+          excludePaths: [],
+          deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
+          minCanActions: 1,
+          minInteractiveElements: 3,
+        },
+      },
+      config
+    );
+  }
+
+  private deepMerge(target: any, source: any): any {
+    const result = { ...target };
+
+    for (const key in source) {
+      if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && source[key].constructor === Object) {
+        result[key] = this.deepMerge(result[key] || {}, source[key]);
+        continue;
+      }
+      result[key] = source[key];
+    }
+
+    return result;
+  }
+}
+
+interface DocbotConfig {
+  docs?: {
+    maxPages?: number;
+    output?: string;
+    screenshot?: boolean;
+    prompt?: string;
+    collapseDynamicPages?: boolean;
+    scope?: 'site' | 'section' | 'subtree';
+    includePaths?: string[];
+    excludePaths?: string[];
+    deniedPathSegments?: string[];
+    minCanActions?: number;
+    minInteractiveElements?: number;
+  };
+}
+
+export { DocbotConfigParser };
+export type { DocbotConfig };
diff --git a/boat/doc-collector/src/docbot.ts b/boat/doc-collector/src/docbot.ts
new file mode 100644
index 0000000..70d03f0
--- /dev/null
+++ b/boat/doc-collector/src/docbot.ts
@@ -0,0 +1,391 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import path from 'node:path';
+import { ExplorBot, type ExplorBotOptions } from '../../../src/explorbot.ts';
+import type { Link, WebPageState } from '../../../src/state-manager.ts';
+import { normalizeUrl } from '../../../src/state-manager.ts';
+import { sanitizeFilename } from '../../../src/utils/strings.ts';
+import { tag } from '../../../src/utils/logger.ts';
+import { Documentarian, type PageDocumentation } from './ai/documentarian.ts';
+import { type DocbotConfig, DocbotConfigParser } from './config.ts';
+import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts';
+import { getDocPageKey, shouldCrawlDocPath } from './path-filter.ts';
+import { extractResearchNavigationTargets } from './research-navigation.ts';
+
+class DocBot {
+  private explorBot: ExplorBot;
+  private configParser: DocbotConfigParser;
+  private config: DocbotConfig = {};
+  private documentarian!: Documentarian;
+  private options: DocbotOptions;
+  private scopeRoot = '/';
+
+  constructor(options: DocbotOptions = {}) {
+    this.options = options;
+    const baseUrl = this.extractAbsoluteBaseUrl(options.startUrl || '/');
+    this.explorBot = new ExplorBot({
+      baseUrl,
+      verbose: options.verbose,
+      config: options.config,
+      path: options.path,
+      show: options.show,
+      headless: options.headless,
+      incognito: options.incognito,
+      session: options.session,
+    });
+    this.configParser = DocbotConfigParser.getInstance();
+  }
+
+  async start(): Promise<void> {
+    await this.explorBot.start();
+    this.config = await this.configParser.loadConfig({
+      config: this.options.docsConfig,
+      path: this.options.path,
+    });
+    this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config);
+    this.ensureDirectory(this.configParser.getOutputDir());
+    this.ensureDirectory(this.getPagesDir());
+  }
+
+  async stop(): Promise<void> {
+    await this.explorBot.stop();
+  }
+
+  async collect(startPath: string, opts: CollectOptions = {}): Promise<CollectionResult> {
+    const effectiveStartPath = this.normalizeStartPath(startPath);
+    this.scopeRoot = this.getScopeRoot(effectiveStartPath);
+    const effectiveMaxPages = this.getMaxPages(opts.maxPages);
+    const queue: string[] = [];
+    const queued = new Set<string>();
+    const documented = new Set<string>();
+    const pages: DocumentedPage[] = [];
+    const skipped: SkippedPage[] = [];
+    const baseUrl = this.explorBot.getConfig().playwright.url;
+
+    this.enqueuePath(effectiveStartPath, queue, queued);
+
+    while (queue.length > 0 && pages.length < effectiveMaxPages) {
+      const target = queue.shift();
+      if (!target) {
+        continue;
+      }
+
+      const targetKey = this.getPageKey(target);
+      if (documented.has(targetKey)) {
+        continue;
+      }
+
+      const stateManager = this.explorBot.getExplorer().getStateManager();
+      if (stateManager.hasVisitedState(target)) {
+        continue;
+      }
+
+      try {
+        tag('info').log(`Collecting docs for ${this.toDisplayUrl(target, baseUrl)}`);
+        await this.explorBot.visit(target);
+
+        if (stateManager.isInDeadLoop()) {
+          tag('warning').log('Dead loop detected during docs crawl, stopping collection');
+          skipped.push({
+            url: target,
+            reason: 'dead loop detected during crawl',
+          });
+          break;
+        }
+
+        const state = this.explorBot.getCurrentState();
+        if (!state) {
+          skipped.push({
+            url: target,
+            reason: 'page state was not captured after navigation',
+          });
+          continue;
+        }
+
+        const pageKey = this.getPageKey(state.url || target);
+        if (documented.has(pageKey)) {
+          continue;
+        }
+
+        const research = await this.explorBot.agentResearcher().research(state, {
+          screenshot: this.shouldUseScreenshots(),
+          force: true,
+        });
+        const documentation = await this.documentarian.document(state, research);
+        const lowSignalReason = this.getLowSignalReason(documentation, research);
+        if (lowSignalReason) {
+          skipped.push({
+            url: state.url,
+            reason: lowSignalReason,
+          });
+          documented.add(pageKey);
+          continue;
+        }
+        const filePath = this.savePageDocumentation(state, documentation);
+
+        pages.push({
+          url: state.url,
+          title: state.title || '',
+          summary: documentation.summary,
+          canCount: documentation.can.length,
+          mightCount: documentation.might.length,
+          canActions: documentation.can.map((item) => item.action),
+          mightActions: documentation.might.map((item) => item.action),
+          filePath,
+        });
+        documented.add(pageKey);
+
+        const nextPaths = this.extractNextPaths(state, baseUrl, research);
+        for (const nextPath of nextPaths) {
+          if (documented.has(this.getPageKey(nextPath))) {
+            continue;
+          }
+          if (stateManager.hasVisitedState(nextPath)) {
+            continue;
+          }
+          this.enqueuePath(nextPath, queue, queued);
+        }
+      } catch (error) {
+        const reason = error instanceof Error ? error.message : String(error);
+        tag('warning').log(`Skipping ${target}: ${reason}`);
+        skipped.push({
+          url: target,
+          reason,
+        });
+      }
+    }
+
+    const indexPath = this.saveIndex(effectiveStartPath, pages, skipped, effectiveMaxPages);
+
+    return {
+      pages,
+      skipped,
+      indexPath,
+      outputDir: this.configParser.getOutputDir(),
+    };
+  }
+
+  private getMaxPages(override?: number): number {
+    if (override && override > 0) {
+      return override;
+    }
+
+    const configured = this.config.docs?.maxPages;
+    if (configured && configured > 0) {
+      return configured;
+    }
+
+    return 100;
+  }
+
+  private shouldUseScreenshots(): boolean {
+    const screenshot = this.config.docs?.screenshot;
+    if (screenshot === false) {
+      return false;
+    }
+    return true;
+  }
+
+  private extractNextPaths(state: WebPageState, baseUrl: string, research: string): string[] {
+    const paths: string[] = [];
+    const seen = new Set<string>();
+
+    for (const link of state.links || []) {
+      const nextPath = this.resolveLink(link, baseUrl);
+      if (!nextPath) {
+        continue;
+      }
+      if (!shouldCrawlDocPath(nextPath, this.config)) {
+        continue;
+      }
+      if (!this.isInScope(nextPath)) {
+        continue;
+      }
+      if (seen.has(nextPath)) {
+        continue;
+      }
+      seen.add(nextPath);
+      paths.push(nextPath);
+    }
+
+    for (const target of extractResearchNavigationTargets(state, research)) {
+      if (!shouldCrawlDocPath(target, this.config)) {
+        continue;
+      }
+      if (!this.isInScope(target)) {
+        continue;
+      }
+      if (seen.has(target)) {
+        continue;
+      }
+      seen.add(target);
+      paths.push(target);
+    }
+
+    return paths;
+  }
+
+  private resolveLink(link: Link, baseUrl: string): string | null {
+    let resolved: URL;
+
+    try {
+      resolved = new URL(link.url, baseUrl);
+    } catch {
+      return null;
+    }
+
+    const base = new URL(baseUrl);
+    if (resolved.origin !== base.origin) {
+      return null;
+    }
+
+    const pathName = resolved.pathname || '/';
+    return `${pathName}${resolved.search}${resolved.hash}`;
+  }
+
+  private toDisplayUrl(target: string, baseUrl: string): string {
+    try {
+      return new URL(target, baseUrl).toString();
+    } catch {
+      return target;
+    }
+  }
+
+  private enqueuePath(inputPath: string, queue: string[], queued: Set<string>): void {
+    const normalized = normalizeUrl(inputPath);
+    const pageKey = this.getPageKey(inputPath);
+    if (queued.has(pageKey)) {
+      return;
+    }
+    queued.add(pageKey);
+    if (!inputPath.startsWith('/')) {
+      queue.push(`/${normalized}`);
+      return;
+    }
+    queue.push(inputPath);
+  }
+
+  private getPageKey(pageUrl: string): string {
+    return getDocPageKey(pageUrl, this.config);
+  }
+
+  private normalizeStartPath(startPath: string): string {
+    try {
+      const parsed = new URL(startPath);
+      return `${parsed.pathname || '/'}${parsed.search}${parsed.hash}`;
+    } catch {
+      return startPath;
+    }
+  }
+
+  private extractAbsoluteBaseUrl(startPath: string): string | undefined {
+    try {
+      const parsed = new URL(startPath);
+      return parsed.origin;
+    } catch {
+      return undefined;
+    }
+  }
+
+  private isInScope(target: string): boolean {
+    const normalized = this.normalizeStartPath(target);
+    const scope = this.config.docs?.scope || 'site';
+
+    if (scope === 'site') {
+      return true;
+    }
+
+    if (scope === 'subtree') {
+      return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`);
+    }
+
+    if (scope === 'section') {
+      return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`) || normalized.startsWith(`${this.scopeRoot}-`);
+    }
+
+    return true;
+  }
+
+  private getScopeRoot(startPath: string): string {
+    const normalized = this.normalizeStartPath(startPath);
+    const parts = normalized.split('/').filter(Boolean);
+    if (parts.length === 0) {
+      return '/';
+    }
+    if (parts.length >= 4) {
+      return `/${parts.slice(0, 4).join('/')}`;
+    }
+    return `/${parts.join('/')}`;
+  }
+
+  private getLowSignalReason(documentation: PageDocumentation, research: string): string | null {
+    const minCanActions = this.config.docs?.minCanActions ?? 1;
+    const minInteractiveElements = this.config.docs?.minInteractiveElements ?? 3;
+
+    if (documentation.can.length >= minCanActions) {
+      return null;
+    }
+
+    const interactiveCount = this.countInteractiveElements(research);
+    if (interactiveCount >= minInteractiveElements) {
+      return null;
+    }
+
+    return `low-signal page: only ${documentation.can.length} proven actions and ${interactiveCount} interactive elements`;
+  }
+
+  private countInteractiveElements(research: string): number {
+    const matches = [...research.matchAll(/\((\d+) elements?\)/g)];
+    return matches.reduce((sum, match) => sum + Number.parseInt(match[1], 10), 0);
+  }
+
+  private savePageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
+    const pagePath = this.getPageFilePath(state.url);
+    writeFileSync(pagePath, renderPageDocumentation(state, documentation), 'utf8');
+    return pagePath;
+  }
+
+  private saveIndex(startPath: string, pages: DocumentedPage[], skipped: SkippedPage[], maxPages: number): string {
+    const indexPath = path.join(this.configParser.getOutputDir(), 'spec.md');
+    writeFileSync(indexPath, renderSpecIndex(this.configParser.getOutputDir(), startPath, pages, skipped, maxPages), 'utf8');
+    return indexPath;
+  }
+
+  private getPagesDir(): string {
+    return path.join(this.configParser.getOutputDir(), 'pages');
+  }
+
+  private getPageFilePath(pageUrl: string): string {
+    const normalized = normalizeUrl(pageUrl || '/');
+    const baseName = sanitizeFilename(normalized || 'root');
+    if (baseName) {
+      return path.join(this.getPagesDir(), `${baseName}.md`);
+    }
+    return path.join(this.getPagesDir(), 'root.md');
+  }
+
+  private ensureDirectory(dirPath: string): void {
+    if (existsSync(dirPath)) {
+      return;
+    }
+    mkdirSync(dirPath, { recursive: true });
+  }
+}
+
+interface DocbotOptions extends ExplorBotOptions {
+  docsConfig?: string;
+  startUrl?: string;
+}
+
+interface CollectOptions {
+  maxPages?: number;
+}
+
+interface CollectionResult {
+  pages: DocumentedPage[];
+  skipped: SkippedPage[];
+  indexPath: string;
+  outputDir: string;
+}
+
+export { DocBot };
+export type { DocbotOptions, CollectOptions, CollectionResult, DocumentedPage, SkippedPage };
diff --git a/boat/doc-collector/src/docs-renderer.ts b/boat/doc-collector/src/docs-renderer.ts
new file mode 100644
index 0000000..0be4736
--- /dev/null
+++ b/boat/doc-collector/src/docs-renderer.ts
@@ -0,0 +1,187 @@
+import path from 'node:path';
+import type { WebPageState } from '../../../src/state-manager.ts';
+import type { PageDocumentation } from './ai/documentarian.ts';
+
+function renderPageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
+  const lines: string[] = [];
+  lines.push(`# ${state.url}`);
+  lines.push('');
+
+  if (state.title) {
+    lines.push(`Title: ${normalizeInlineText(state.title)}`);
+    lines.push('');
+  }
+
+  lines.push('## Purpose');
+  lines.push('');
+  lines.push(ensureSentence(documentation.summary));
+  lines.push('');
+  lines.push('## User Can');
+  lines.push('');
+
+  if (documentation.can.length === 0) {
+    lines.push('- No proven actions were identified from the collected research.');
+    lines.push('');
+  }
+
+  for (const item of documentation.can) {
+    lines.push(`- ${normalizeAction(item.action)} -> ${item.scope}`);
+    lines.push(`  Proof: ${ensureSentence(item.evidence)}`);
+  }
+
+  if (documentation.can.length > 0) {
+    lines.push('');
+  }
+
+  lines.push('## User Might');
+  lines.push('');
+
+  if (documentation.might.length === 0) {
+    lines.push('- No assumption-based actions were identified.');
+    lines.push('');
+  }
+
+  for (const item of documentation.might) {
+    lines.push(`- ${normalizeAction(item.action, 'might')} -> ${item.scope}`);
+    lines.push(`  Signal: ${ensureSentence(item.evidence)}`);
+  }
+
+  if (documentation.might.length > 0) {
+    lines.push('');
+  }
+
+  return `${lines.join('\n').trimEnd()}\n`;
+}
+
+function renderSpecIndex(outputDir: string, startPath: string, pages: DocumentedPage[], skipped: SkippedPage[], maxPages: number): string {
+  const lines: string[] = [];
+  lines.push('# Website Spec');
+  lines.push('');
+  lines.push('## Overview');
+  lines.push('');
+  lines.push(`Start page: ${startPath}`);
+  lines.push(`Pages documented: ${pages.length}`);
+  lines.push(`Pages skipped: ${skipped.length}`);
+  lines.push(`Max pages: ${maxPages}`);
+  lines.push('');
+  lines.push('## Pages');
+  lines.push('');
+
+  if (pages.length === 0) {
+    lines.push('- No pages were documented.');
+    lines.push('');
+  }
+
+  for (const page of pages) {
+    const relativeFile = path.relative(outputDir, page.filePath).replaceAll('\\', '/');
+    lines.push(`### [${page.url}](${relativeFile})`);
+    lines.push('');
+    lines.push(`Purpose: ${ensureSentence(page.summary)}`);
+    lines.push(`Proven actions: ${page.canCount}`);
+    lines.push(`Possible actions: ${page.mightCount}`);
+    if (page.title) {
+      lines.push(`Title: ${normalizeInlineText(page.title)}`);
+    }
+    lines.push('');
+
+    if (page.canActions.length > 0) {
+      lines.push('User Can:');
+      for (const action of page.canActions) {
+        lines.push(`- ${normalizeAction(action, 'can')}`);
+      }
+      lines.push('');
+    }
+
+    if (page.mightActions.length > 0) {
+      lines.push('User Might:');
+      for (const action of page.mightActions) {
+        lines.push(`- ${normalizeAction(action, 'might')}`);
+      }
+      lines.push('');
+    }
+  }
+
+  if (skipped.length > 0) {
+    lines.push('## Skipped');
+    lines.push('');
+
+    for (const page of skipped) {
+      lines.push(`- ${page.url}. Reason: ${ensureSentence(page.reason)}`);
+    }
+
+    lines.push('');
+  }
+
+  return `${lines.join('\n').trimEnd()}\n`;
+}
+
+function normalizeAction(action: string, kind: 'can' | 'might' = 'can'): string {
+  const trimmed = normalizeInlineText(action);
+  if (!trimmed) {
+    return 'user can interact with this page';
+  }
+
+  const normalized = ensureSentence(trimmed).slice(0, -1);
+  const lower = normalized.toLowerCase();
+
+  if (kind === 'can') {
+    if (lower.startsWith('user can ')) {
+      return normalized;
+    }
+    if (lower.startsWith('can ')) {
+      return `user can ${normalized.slice(4)}`;
+    }
+    if (lower.startsWith('user might ')) {
+      return `user can ${normalized.slice(11)}`;
+    }
+    return `user can ${normalized}`;
+  }
+
+  if (lower.startsWith('user might ')) {
+    return normalized;
+  }
+  if (lower.startsWith('might ')) {
+    return `user might ${normalized.slice(6)}`;
+  }
+  if (lower.startsWith('user can ')) {
+    return `user might ${normalized.slice(9)}`;
+  }
+  if (lower.startsWith('can ')) {
+    return `user might ${normalized.slice(4)}`;
+  }
+  return `user might ${normalized}`;
+}
+
+function ensureSentence(text: string): string {
+  const trimmed = normalizeInlineText(text);
+  if (!trimmed) {
+    return '';
+  }
+  if (/[.!?]$/.test(trimmed)) {
+    return trimmed;
+  }
+  return `${trimmed}.`;
+}
+
+function normalizeInlineText(text: string): string {
+  return text.normalize('NFKC').replace(/\s+/g, ' ').trim();
+}
+
+interface DocumentedPage {
+  url: string;
+  title: string;
+  summary: string;
+  canCount: number;
+  mightCount: number;
+  canActions: string[];
+  mightActions: string[];
+  filePath: string;
+}
+
+interface SkippedPage {
+  url: string;
+  reason: string;
+}
+
+export { renderPageDocumentation, renderSpecIndex, ensureSentence, normalizeAction };
+export type { DocumentedPage, SkippedPage };
diff --git a/boat/doc-collector/src/path-filter.ts b/boat/doc-collector/src/path-filter.ts
new file mode 100644
index 0000000..a2fda2c
--- /dev/null
+++ b/boat/doc-collector/src/path-filter.ts
@@ -0,0 +1,46 @@
+import { normalizeUrl } from '../../../src/state-manager.ts';
+import { matchesUrl, generalizeUrl } from '../../../src/utils/url-matcher.ts';
+import type { DocbotConfig } from './config.ts';
+
+const DEFAULT_DENIED_PATH_SEGMENTS = ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'];
+
+export function shouldCrawlDocPath(nextPath: string, config: DocbotConfig = {}): boolean {
+  const parsed = new URL(nextPath, 'http://localhost');
+  const segments = parsed.pathname
+    .split('/')
+    .map((segment) => segment.trim().toLowerCase())
+    .filter(Boolean);
+  const normalizedPath = parsed.pathname || '/';
+
+  const includePaths = config.docs?.includePaths || [];
+  if (includePaths.length > 0) {
+    return includePaths.some((pattern) => matchesUrl(pattern, normalizedPath));
+  }
+
+  const excludePaths = config.docs?.excludePaths || [];
+  if (excludePaths.some((pattern) => matchesUrl(pattern, normalizedPath))) {
+    return false;
+  }
+
+  if (segments.length === 0) {
+    return true;
+  }
+
+  const terminalActions = new Set((config.docs?.deniedPathSegments || DEFAULT_DENIED_PATH_SEGMENTS).map((segment) => segment.trim().toLowerCase()).filter(Boolean));
+  if (segments.some((segment) => terminalActions.has(segment))) {
+    return false;
+  }
+
+  return true;
+}
+
+export function getDocPageKey(pageUrl: string, config: DocbotConfig = {}): string {
+  const normalized = normalizeUrl(pageUrl || '/');
+  const path = normalized.startsWith('/') ? normalized : `/${normalized}`;
+
+  if (config.docs?.collapseDynamicPages === false) {
+    return normalizeUrl(path);
+  }
+
+  return normalizeUrl(generalizeUrl(path));
+}
diff --git a/boat/doc-collector/src/research-navigation.ts b/boat/doc-collector/src/research-navigation.ts
new file mode 100644
index 0000000..e130a0d
--- /dev/null
+++ b/boat/doc-collector/src/research-navigation.ts
@@ -0,0 +1,90 @@
+import type { WebPageState } from '../../../src/state-manager.ts';
+import { parseResearchSections, type ResearchElement } from '../../../src/ai/researcher/parser.ts';
+
+const OPEN_API_TAG_SELECTOR_PATTERN = /api-\d+\/tag\/([a-z0-9-]+)(?:["'#/\]\s]|$)/i;
+const OPEN_API_NAVIGATION_SECTION_KEYWORDS = ['navigation', 'menu'];
+
+export function extractResearchNavigationTargets(state: WebPageState, research: string): string[] {
+  const currentUrl = state.url || '/';
+  const sections = parseResearchSections(research);
+  const targets: string[] = [];
+  const seen = new Set<string>();
+
+  for (const section of sections) {
+    const sectionName = section.name.toLowerCase();
+    if (!OPEN_API_NAVIGATION_SECTION_KEYWORDS.some((keyword) => sectionName.includes(keyword))) {
+      continue;
+    }
+
+    for (const element of section.elements) {
+      const target = extractNavigationTarget(currentUrl, element);
+      if (!target || seen.has(target)) {
+        continue;
+      }
+
+      seen.add(target);
+      targets.push(target);
+    }
+  }
+
+  return targets;
+}
+
+function extractNavigationTarget(currentUrl: string, element: ResearchElement): string | null {
+  const openApiTagFromCss = extractOpenApiTagHashFromCss(element.css);
+  if (openApiTagFromCss) {
+    return buildSamePageHashTarget(currentUrl, openApiTagFromCss);
+  }
+
+  if (!currentUrl.includes('#tag/')) {
+    return null;
+  }
+
+  const inferredOpenApiTag = inferOpenApiTagSlugFromLabel(element.name);
+  if (!inferredOpenApiTag) {
+    return null;
+  }
+
+  return buildSamePageHashTarget(currentUrl, `tag/${inferredOpenApiTag}`);
+}
+
+function extractOpenApiTagHashFromCss(css: string | null): string | null {
+  if (!css) {
+    return null;
+  }
+
+  const normalizedSelector = css.replaceAll('\\/', '/');
+  const match = normalizedSelector.match(OPEN_API_TAG_SELECTOR_PATTERN);
+  if (!match?.[1]) {
+    return null;
+  }
+
+  return `tag/${match[1].toLowerCase()}`;
+}
+
+function inferOpenApiTagSlugFromLabel(name: string): string | null {
+  const cleanedLabel = name
+    .replace(/^'+|'+$/g, '')
+    .replace(/\(expanded\)|\(collapsed\)|open group|close group|show more/gi, '')
+    .trim();
+
+  if (!cleanedLabel.includes('/')) {
+    return null;
+  }
+
+  const slug = cleanedLabel
+    .split('/')
+    .map((part) => part.trim().toLowerCase())
+    .filter(Boolean)
+    .join('-')
+    .replace(/[^a-z0-9-]+/g, '-')
+    .replace(/-+/g, '-')
+    .replace(/^-|-$/g, '');
+
+  return slug || null;
+}
+
+function buildSamePageHashTarget(currentUrl: string, hashPath: string): string {
+  const [baseWithSearch] = currentUrl.split('#');
+  return `${baseWithSearch}#${hashPath}`;
+}
diff --git a/bun.lock b/bun.lock
index 55e37da..613fe7a 100644
--- a/bun.lock
+++ b/bun.lock
@@ -22,7 +22,7 @@
         "@opentelemetry/sdk-trace-base": "^2.2.0",
         "@opentelemetry/semantic-conventions": "^1.38.0",
         "@scalar/openapi-parser": "^0.25.6",
-        "@testomatio/reporter": "^2.7.9-beta.2-markdown",
+        "@testomatio/reporter": "^2.7.9-beta.3-markdown",
         "ai": "^6.0.6",
         "axe-core": "^4.11.1",
         "bash-tool": "^1.3.15",
@@ -934,7 +934,7 @@
 
     "@testing-library/react": ["@testing-library/react@16.3.0", "", { "dependencies": { "@babel/runtime": "^7.12.5" }, "peerDependencies": { "@testing-library/dom": "^10.0.0", "@types/react": "^18.0.0 || ^19.0.0", "@types/react-dom": "^18.0.0 || ^19.0.0", "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw=="],
 
-    "@testomatio/reporter": ["@testomatio/reporter@2.7.9-beta.2-markdown", "", { "dependencies": { "@aws-sdk/client-s3": "^3.279.0", "@aws-sdk/lib-storage": "^3.279.0", "@cucumber/cucumber": "^10.9.0", "@octokit/rest": "^21.1.1", "callsite-record": "^4.1.4", "commander": "^12", "cross-spawn": "^7.0.3", "csv-writer": "^1.6.0", "debug": "4.3.4", "dotenv": "^16.0.1", "fast-xml-parser": "^5.3.4", "file-url": "3.0.0", "filesize": "^10.1.6", "gaxios": ">=6.0 || >=7.0.0-rc.4 || <8", "glob": "^10.3", "handlebars": "^4.7.8", "has-flag": "^5.0.1", "humanize-duration": "^3.27.3", "is-valid-path": "^0.1.1", "js-yaml": "^4.1.1", "json-cycle": "^1.3.0", "lodash.memoize": "^4.1.2", "lodash.merge": "^4.6.2", "minimatch": "^10.2.4", "picocolors": "^1.0.1", "pretty-ms": "^7.0.1", "promise-retry": "^2.0.1", "strip-ansi": "7.1.0", "uuid": "^9.0.0" }, "bin": { "report-xml": "src/bin/reportXml.js", "start-test-run": "src/bin/startTest.js", "upload-artifacts": "src/bin/uploadArtifacts.js", "reporter": "src/bin/cli.js" } }, "sha512-RBZAN/Je4FwDUkCv07BWgbkhllwcesd3C7dNGgUhbJ/5c+qITInZuf7GAGMPLwCJJm3JJjkfVcLkX+Injw5ppA=="],
+    "@testomatio/reporter": ["@testomatio/reporter@2.7.9-beta.3-markdown", "", { "dependencies": { "@aws-sdk/client-s3": "^3.279.0", "@aws-sdk/lib-storage": "^3.279.0", "@cucumber/cucumber": "^10.9.0", "@octokit/rest": "^21.1.1", "callsite-record": "^4.1.4", "commander": "^12", "cross-spawn": "^7.0.3", "csv-writer": "^1.6.0", "debug": "4.3.4", "dotenv": "^16.0.1", "fast-xml-parser": "^5.3.4", "file-url": "3.0.0", "filesize": "^10.1.6", "gaxios": ">=6.0 || >=7.0.0-rc.4 || <8", "glob": "^10.3", "handlebars": "^4.7.8", "has-flag": "^5.0.1", "humanize-duration": "^3.27.3", "is-valid-path": "^0.1.1", "js-yaml": "^4.1.1", "json-cycle": "^1.3.0", "lodash.memoize": "^4.1.2", "lodash.merge": "^4.6.2", "marked": "^14.1.4", "minimatch": "^10.2.4", "picocolors": "^1.0.1", "pretty-ms": "^7.0.1", "promise-retry": "^2.0.1", "strip-ansi": "7.1.0", "uuid": "^9.0.0" }, "bin": { "report-xml": "src/bin/reportXml.js", "start-test-run": "src/bin/startTest.js", "upload-artifacts": "src/bin/uploadArtifacts.js", "reporter": "src/bin/cli.js" } }, "sha512-up5EWx9WV9AX+jYwStCXaXyPOH4DDTtsiJ218KVDTUVpAnim563dEm6DCgYznMJelmahKs1/Bi+J8Duyu7JfOg=="],
 
     "@tokenizer/inflate": ["@tokenizer/inflate@0.4.1", "", { "dependencies": { "debug": "^4.4.3", "token-types": "^6.1.1" } }, "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA=="],
 
@@ -2848,6 +2848,8 @@
 
     "@testomatio/reporter/js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="],
 
+    "@testomatio/reporter/marked": ["marked@14.1.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-vkVZ8ONmUdPnjCKc5uTRvmkRbx4EAi2OkTOXmfTDhZz3OFqMNBM1oTTWwTr4HY4uAEojhzPf+Fy8F1DWa3Sndg=="],
+
     "@testomatio/reporter/strip-ansi": ["strip-ansi@7.1.0", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ=="],
 
     "@types/jsdom/parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="],
diff --git a/docs/commands.md b/docs/commands.md
index 3aa791c..bb4f79c 100644
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -23,7 +23,7 @@ CLI commands run headless by default, execute the task, and exit. TUI commands r
 
 ## Common Options
 
-These options are available on all CLI commands (`start`, `explore`, `plan`, `drill`, `research`, `context`):
+These options are available on all CLI commands (`start`, `explore`, `plan`, `drill`, `research`, `context`, `docs collect`):
 
 | Option | Description |
 |--------|-------------|
@@ -274,6 +274,36 @@ Navigate to a URI or state using AI assistance.
 
 The Navigator agent figures out how to reach the destination.
 
+## Documentation Collection
+
+### `explorbot docs collect <path-or-url>`
+
+Crawl pages and generate a documentation spec with `Purpose`, `User Can`, and `User Might` sections for each documented page.
+
+```bash
+explorbot docs collect /users/sign_in
+explorbot docs collect /docs/openapi#tag/project-analytics-tags --max-pages 20
+explorbot docs collect https://teleportal.ua/ua/serials/stb/kod --path explorbot-testing --show --session --max-pages 20
+```
+
+Output is written to:
+
+- `output/docs/spec.md`
+- `output/docs/pages/*.md`
+
+Use `docbot.config.*` to control crawl scope, path filters, dynamic-page collapsing, and low-signal page skipping.
+
+See [Documentation Collection](./doc-collector.md) for full configuration, crawl modes, and examples.
+
+### `explorbot docs init`
+
+Create a starter `docbot.config.ts` file.
+
+```bash
+explorbot docs init
+explorbot docs init --path explorbot-testing
+```
+
 ## Test Rerun
 
 ### `/runs [file]`
diff --git a/docs/doc-collector.md b/docs/doc-collector.md
new file mode 100644
index 0000000..2059246
--- /dev/null
+++ b/docs/doc-collector.md
@@ -0,0 +1,140 @@
+# Documentation Collection
+
+`doc-collector` crawls pages and generates a lightweight spec:
+
+- `output/docs/spec.md`
+- `output/docs/pages/*.md`
+- `output/research/*.md`
+
+Each page is summarized as:
+
+- `Purpose`
+- `User Can`
+- `User Might`
+
+## Commands
+
+### `explorbot docs collect <path-or-url>`
+
+Start from a relative path or a full URL:
+
+```bash
+explorbot docs collect /users/sign_in
+explorbot docs collect /docs/openapi#tag/project-analytics-tags --max-pages 20
+explorbot docs collect https://teleportal.ua/ua/serials/stb/kod --path explorbot-testing --show --session --max-pages 20
+```
+
+Supported options:
+
+| Option | Description |
+|--------|-------------|
+| `--max-pages <count>` | Limit documented pages |
+| `-c, --config <path>` | Path to `explorbot.config.*` |
+| `--docs-config <path>` | Path to `docbot.config.*` |
+| `-p, --path <path>` | Working directory |
+| `-s, --show` | Show browser window |
+| `--headless` | Run headless |
+| `--incognito` | Do not record experiences |
+| `--session [file]` | Save or restore browser session |
+| `-v, --verbose` | Verbose logging |
+| `--debug` | Debug logging |
+
+If you pass a full URL, its origin is used as the runtime base URL for that run.
+
+### `explorbot docs init`
+
+Create a starter `docbot.config.ts`:
+
+```bash
+explorbot docs init
+explorbot docs init --path explorbot-testing
+explorbot docs init --path explorbot-testing --force
+```
+
+### Standalone CLI
+
+```bash
+bun boat/doc-collector/bin/doc-collector-cli.ts collect /users/sign_in --max-pages 20
+```
+
+## Config
+
+The collector loads `docbot.config.js`, `docbot.config.mjs`, or `docbot.config.ts`. If none exists, defaults are used.
+
+```ts
+export default {
+  docs: {
+    maxPages: 100,
+    output: 'docs',
+    screenshot: true,
+    collapseDynamicPages: true,
+    scope: 'site',
+    includePaths: [],
+    excludePaths: [],
+    deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
+    minCanActions: 1,
+    minInteractiveElements: 3,
+    // prompt: 'Add domain-specific guidance here',
+  },
+};
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `maxPages` | `100` | Maximum pages to document |
+| `output` | `'docs'` | Output folder inside `output/` |
+| `screenshot` | `true` | Allow screenshot-assisted research |
+| `prompt` | unset | Extra instructions for the Documentarian |
+| `collapseDynamicPages` | `true` | Collapse dynamic URLs like `/users/123` and `/users/456` into one crawl key |
+| `scope` | `'site'` | Crawl breadth mode |
+| `includePaths` | `[]` | Only allow matching paths |
+| `excludePaths` | `[]` | Exclude matching paths |
+| `deniedPathSegments` | built-in list | Block terminal or destructive endpoints |
+| `minCanActions` | `1` | Minimum proven actions before a page is considered low-signal |
+| `minInteractiveElements` | `3` | Minimum interactive elements before a page is considered low-signal |
+
+## Scope Modes
+
+### `site`
+
+Crawl across the whole current origin.
+
+### `subtree`
+
+Stay inside the starting path and its descendants.
+
+Start page:
+
+```text
+/ua/serials/stb/kod
+```
+
+Allowed:
+
+- `/ua/serials/stb/kod`
+- `/ua/serials/stb/kod/2026`
+- `/ua/serials/stb/kod/2025/seriya-12`
+
+Blocked:
+
+- `/ua/serials`
+- `/ua/show`
+- `/ua/person/...`
+
+### `section`
+
+Softer boundary than `subtree`: keep the same scope root, its descendants, and closely related slug variations.
+
+## Notes
+
+- same-origin only
+- visited pages are tracked through the state manager
+- dead loops are stopped
+- next targets are discovered from links and research navigation
+- low-signal pages can be skipped
+
+## Related Docs
+
+- [commands.md](./commands.md) - terminal command reference
+- [configuration.md](./configuration.md) - main Explorbot configuration
+- [researcher.md](./researcher.md) - researcher behavior
diff --git a/package.json b/package.json
index 120d2d8..baf45dd 100644
--- a/package.json
+++ b/package.json
@@ -14,7 +14,7 @@
   "bin": {
     "explorbot": "./dist/bin/explorbot-cli.js"
   },
-  "files": ["dist/", "src/**/*.ts", "src/**/*.tsx", "bin/**/*.ts", "boat/api-tester/src/**/*.ts", "rules/", "assets/sample-files/"],
+  "files": ["dist/", "src/**/*.ts", "src/**/*.tsx", "bin/**/*.ts", "boat/api-tester/src/**/*.ts", "boat/doc-collector/src/**/*.ts", "boat/doc-collector/bin/**/*.ts", "boat/doc-collector/package.json", "rules/", "assets/sample-files/"],
   "scripts": {
     "build": "bun run src/index.tsx build && bun run build:bin",
     "build:bin": "bun build bin/explorbot-cli.ts --outdir bin --target node --external commander --format esm",
diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts
index be503f8..d6708ae 100644
--- a/src/ai/researcher.ts
+++ b/src/ai/researcher.ts
@@ -121,7 +121,8 @@ export class Researcher extends ResearcherBase implements Agent {
 
     const sessionName = `researcher: ${state.url}`;
     return Observability.run(sessionName, { tags: ['researcher'], sessionId: stateHash }, async () => {
-      tag('info').log(`Researching ${state.url} to understand the context...`);
+      const displayUrl = state.fullUrl || state.url;
+      tag('info').log(`Researching ${displayUrl} to understand the context...`);
       setActivity(`${this.emoji} Researching...`, 'action');
 
       await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
diff --git a/src/ai/researcher/coordinates.ts b/src/ai/researcher/coordinates.ts
index b4d9b43..9a45f04 100644
--- a/src/ai/researcher/coordinates.ts
+++ b/src/ai/researcher/coordinates.ts
@@ -198,7 +198,7 @@ export function WithCoordinates<T extends Constructor>(Base: T) {
       const eidxWithoutCoords: string[] = [];
       for (const section of sections) {
         for (const el of section.elements) {
-          if (el.eidx && !el.coordinates) eidxWithoutCoords.push(el.eidx);
+          if (el.eidx && /^e\d+$/i.test(el.eidx) && !el.coordinates) eidxWithoutCoords.push(el.eidx);
         }
       }
       if (eidxWithoutCoords.length === 0) return;
diff --git a/src/ai/researcher/parser.ts b/src/ai/researcher/parser.ts
index 0bfb09f..f6cab4d 100644
--- a/src/ai/researcher/parser.ts
+++ b/src/ai/researcher/parser.ts
@@ -64,6 +64,9 @@ export function mapRowToElement(row: Record<string, string>): ResearchElement |
 
   let eidxRaw = (colMap.eidx || '').trim();
   if (eidxRaw && /^\d+$/.test(eidxRaw)) eidxRaw = `e${eidxRaw}`;
+  if (eidxRaw && !/^e\d+$/i.test(eidxRaw)) {
+    eidxRaw = '';
+  }
 
   const aria = parseAriaLocator(colMap.aria || '-');
 
diff --git a/src/config.ts b/src/config.ts
index f1b65d2..105677e 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -266,6 +266,7 @@ export class ConfigParser {
   private static instance: ConfigParser;
   private config: ExplorbotConfig | null = null;
   private configPath: string | null = null;
+  private runtimeBaseUrlOverride: string | null = null;
 
   private constructor() {}
 
@@ -285,8 +286,9 @@ export class ConfigParser {
   public async loadConfig(options?: {
     config?: string;
     path?: string;
+    baseUrl?: string;
   }): Promise<ExplorbotConfig> {
-    if (this.config && !options?.config && !options?.path) {
+    if (this.config && !options?.config && !options?.path && this.runtimeBaseUrlOverride === (options?.baseUrl || null)) {
       return this.config;
     }
 
@@ -317,7 +319,8 @@ export class ConfigParser {
         throw new Error('Configuration file is empty or invalid');
       }
 
-      this.config = this.resolveConfig(loadedConfig as ExplorbotConfig);
+      this.config = this.resolveConfig(loadedConfig as ExplorbotConfig, options);
+      this.runtimeBaseUrlOverride = options?.baseUrl || null;
       this.configPath = resolvedPath;
 
       log(`Configuration loaded from: ${resolvedPath}`);
@@ -372,6 +375,7 @@ export class ConfigParser {
     if (ConfigParser.instance) {
       ConfigParser.instance.config = null;
       ConfigParser.instance.configPath = null;
+      ConfigParser.instance.runtimeBaseUrlOverride = null;
     }
   }
 
@@ -455,11 +459,17 @@ export class ConfigParser {
     }
   }
 
-  private resolveConfig(config: ExplorbotConfig): ExplorbotConfig {
+  private resolveConfig(config: ExplorbotConfig, options?: { baseUrl?: string }): ExplorbotConfig {
     if (config.web?.url && !config.playwright?.url) {
       config.playwright = config.playwright || { browser: 'chromium', url: '' };
       config.playwright.url = config.web.url;
     }
+
+    if (options?.baseUrl) {
+      config.playwright = config.playwright || { browser: 'chromium', url: '' };
+      config.playwright.url = options.baseUrl;
+    }
+
     return config;
   }
 
diff --git a/src/explorbot.ts b/src/explorbot.ts
index e377b5b..e6ea64a 100644
--- a/src/explorbot.ts
+++ b/src/explorbot.ts
@@ -34,6 +34,7 @@ import { sanitizeFilename } from './utils/strings.ts';
 
 export interface ExplorBotOptions {
   from?: string;
+  baseUrl?: string;
   verbose?: boolean;
   config?: string;
   path?: string;
diff --git a/src/state-manager.ts b/src/state-manager.ts
index 0fbb4a2..c9dc246 100644
--- a/src/state-manager.ts
+++ b/src/state-manager.ts
@@ -547,6 +547,10 @@ export class StateManager {
 }
 
 export function normalizeUrl(url: string): string {
+  if (url.startsWith('/')) {
+    return url.replace(/^\/+/, '').replace(/\/+$/g, '');
+  }
+
   try {
     const parsed = new URL(url, 'http://localhost');
     const path = parsed.pathname.replace(/^\/+|\/+$/g, '');
diff --git a/src/utils/url-matcher.ts b/src/utils/url-matcher.ts
index de73d06..7198a3c 100644
--- a/src/utils/url-matcher.ts
+++ b/src/utils/url-matcher.ts
@@ -82,10 +82,13 @@ export function matchesUrl(pattern: string, path: string): boolean {
 }
 
 export function extractStatePath(url: string): string {
-  if (url.startsWith('/')) return url;
+  if (url.startsWith('/')) {
+    return `/${url.replace(/^\/+/, '')}`;
+  }
   try {
     const urlObj = new URL(url);
-    return `${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
+    const normalizedPathname = `/${urlObj.pathname.replace(/^\/+/, '')}`;
+    return `${normalizedPathname}${urlObj.search}${urlObj.hash}`;
   } catch {
     return url;
   }
diff --git a/src/utils/web-element.ts b/src/utils/web-element.ts
index 7e0e593..83beb39 100644
--- a/src/utils/web-element.ts
+++ b/src/utils/web-element.ts
@@ -122,7 +122,8 @@ export class WebElement {
   }
 
   static async fromEidxList(page: any, eidxList: string[]): Promise<WebElement[]> {
-    if (eidxList.length === 0) return [];
+    const validEidxList = eidxList.filter((eidx) => /^e\d+$/i.test(eidx));
+    if (validEidxList.length === 0) return [];
 
     const rawList: RawElementData[] = await page.evaluate(
       ([list, extractFnStr, config]: [string[], string, ElementExtractionConfig]) => {
@@ -136,7 +137,7 @@ export class WebElement {
         }
         return results;
       },
-      [eidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
+      [validEidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
     );
 
     return rawList.map((d) => WebElement.fromRawData(d));
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
new file mode 100644
index 0000000..1bfb587
--- /dev/null
+++ b/tests/unit/config.test.ts
@@ -0,0 +1,42 @@
+import { beforeEach, describe, expect, it } from 'bun:test';
+import { ConfigParser } from '../../src/config.ts';
+
+describe('ConfigParser runtime baseUrl overrides', () => {
+  beforeEach(() => {
+    ConfigParser.resetForTesting();
+  });
+
+  it('reloads config when runtime baseUrl override changes', async () => {
+    const parser = ConfigParser.getInstance();
+    const originalLoadConfigModule = (parser as any).loadConfigModule;
+    const originalFindConfigFile = (parser as any).findConfigFile;
+
+    (parser as any).findConfigFile = () => '/virtual/explorbot.config.ts';
+    (parser as any).loadConfigModule = async () => ({
+      default: {
+        playwright: {
+          url: 'https://default.example.com',
+          browser: 'chromium',
+        },
+        ai: {
+          model: { modelId: 'test-model', provider: 'test' },
+          config: {},
+        },
+      },
+    });
+
+    try {
+      const first = await parser.loadConfig({ baseUrl: 'https://one.example.com' });
+      const second = await parser.loadConfig({ baseUrl: 'https://two.example.com' });
+      const fallback = await parser.loadConfig();
+
+      expect(first.playwright.url).toBe('https://one.example.com');
+      expect(second.playwright.url).toBe('https://two.example.com');
+      expect(fallback.playwright.url).toBe('https://default.example.com');
+    } finally {
+      (parser as any).loadConfigModule = originalLoadConfigModule;
+      (parser as any).findConfigFile = originalFindConfigFile;
+      ConfigParser.resetForTesting();
+    }
+  });
+});
diff --git a/tests/unit/doc-collector.test.ts b/tests/unit/doc-collector.test.ts
new file mode 100644
index 0000000..ee30b7b
--- /dev/null
+++ b/tests/unit/doc-collector.test.ts
@@ -0,0 +1,247 @@
+import { describe, expect, it } from 'bun:test';
+import { DocBot } from '../../boat/doc-collector/src/docbot.ts';
+import { Documentarian } from '../../boat/doc-collector/src/ai/documentarian.ts';
+import { normalizeAction, renderPageDocumentation, renderSpecIndex } from '../../boat/doc-collector/src/docs-renderer.ts';
+import { getDocPageKey, shouldCrawlDocPath } from '../../boat/doc-collector/src/path-filter.ts';
+import { extractResearchNavigationTargets } from '../../boat/doc-collector/src/research-navigation.ts';
+
+describe('doc-collector path filter', () => {
+  it('allows regular documentation pages', () => {
+    expect(shouldCrawlDocPath('/users/sign_in')).toBe(true);
+    expect(shouldCrawlDocPath('/users/sign_up')).toBe(true);
+    expect(shouldCrawlDocPath('/users/password/new')).toBe(true);
+    expect(shouldCrawlDocPath('/users/sso')).toBe(true);
+    expect(shouldCrawlDocPath('/users/auth/google_oauth2')).toBe(true);
+  });
+
+  it('skips callback and destructive endpoints', () => {
+    expect(shouldCrawlDocPath('/users/auth/github/callback')).toBe(false);
+    expect(shouldCrawlDocPath('/logout')).toBe(false);
+  });
+
+  it('supports config-driven include and exclude path policies', () => {
+    expect(
+      shouldCrawlDocPath('/admin/users', {
+        docs: {
+          excludePaths: ['/admin/*'],
+        },
+      })
+    ).toBe(false);
+
+    expect(
+      shouldCrawlDocPath('/admin/users', {
+        docs: {
+          includePaths: ['/admin/*'],
+        },
+      })
+    ).toBe(true);
+  });
+
+  it('generalizes dynamic pages into one crawl key by default', () => {
+    expect(getDocPageKey('/users/123')).toBe(getDocPageKey('/users/456'));
+    expect(getDocPageKey('/users/123/edit')).toBe(getDocPageKey('/users/456/edit'));
+  });
+
+  it('can keep dynamic pages separate when configured', () => {
+    expect(
+      getDocPageKey('/users/123', {
+        docs: {
+          collapseDynamicPages: false,
+        },
+      })
+    ).toBe('users/123');
+  });
+});
+
+describe('doc-collector research navigation', () => {
+  it('extracts openapi tag targets from navigation and menu sections', () => {
+    const research = `
+## Navigation
+
+| Element | Type | ARIA | CSS |
+|------|------|------|------|
+| 'Project / Analytics / Tags' | button | { role: 'button', text: 'Project / Analytics / Tags Open Group' } | 'button[id="api-1/tag/project-analytics-tags"]' |
+| 'Project / Analytics / Labels' | button | { role: 'button', text: 'Project / Analytics / Labels Open Group' } | 'button:has-text("Project / Analytics / Labels")' |
+| 'Shows linked issues from jira statistics for a project' | button | { role: 'button', text: 'Shows linked issues from jira statistics for a project' } | 'button:has-text("Shows linked issues")' |
+
+## Menu
+
+| Element | Type | ARIA | CSS |
+|------|------|------|------|
+| 'Show More' | button | { role: 'button', text: 'Show all Project / Analytics / Jira endpoints' } | 'button[id="api-1/tag/project-analytics-jira"]' |
+`;
+
+    expect(
+      extractResearchNavigationTargets(
+        {
+          url: '/docs/openapi#tag/project-analytics-tests',
+        },
+        research
+      )
+    ).toEqual([
+      '/docs/openapi#tag/project-analytics-tags',
+      '/docs/openapi#tag/project-analytics-labels',
+      '/docs/openapi#tag/project-analytics-jira',
+    ]);
+  });
+});
+
+describe('doc-collector renderer', () => {
+  it('renders page documentation in spec format', () => {
+    const markdown = renderPageDocumentation(
+      {
+        url: '/users/sign_in',
+        title: 'Testomat.io',
+      },
+      {
+        summary: 'Sign in page for existing users',
+        can: [
+          {
+            action: 'user can sign in with email and password',
+            scope: 'page-level',
+            evidence: 'Email and password fields plus submit button are visible',
+          },
+        ],
+        might: [
+          {
+            action: 'use social login',
+            scope: 'one item',
+            evidence: 'OAuth buttons are shown in the form',
+          },
+        ],
+      }
+    );
+
+    expect(markdown).toContain('## Purpose');
+    expect(markdown).toContain('- user can sign in with email and password -> page-level');
+    expect(markdown).toContain('Proof: Email and password fields plus submit button are visible.');
+    expect(markdown).toContain('- user might use social login -> one item');
+    expect(markdown).toContain('Signal: OAuth buttons are shown in the form.');
+  });
+
+  it('renders aggregate spec index with skipped pages', () => {
+    const markdown = renderSpecIndex(
+      'D:/project/output/docs',
+      '/users/sign_in',
+      [
+        {
+          url: '/users/sign_in',
+          title: 'Testomat.io',
+          summary: 'Sign in page',
+          canCount: 7,
+          mightCount: 1,
+          canActions: ['user can sign in with email and password'],
+          mightActions: ['user might use social login'],
+          filePath: 'D:/project/output/docs/pages/users_sign_in.md',
+        },
+      ],
+      [
+        {
+          url: '/users/auth/google_oauth2',
+          reason: 'redirected into external auth flow',
+        },
+      ],
+      20
+    );
+
+    expect(markdown).toContain('## Overview');
+    expect(markdown).toContain('### [/users/sign_in](pages/users_sign_in.md)');
+    expect(markdown).toContain('Proven actions: 7');
+    expect(markdown).toContain('User Can:');
+    expect(markdown).toContain('- user can sign in with email and password');
+    expect(markdown).toContain('User Might:');
+    expect(markdown).toContain('- user might use social login');
+    expect(markdown).toContain('## Skipped');
+    expect(markdown).toContain('/users/auth/google_oauth2. Reason: redirected into external auth flow.');
+  });
+
+  it('normalizes might-actions without duplicating prefixes', () => {
+    expect(normalizeAction('user might be able to submit the login form by pressing Enter', 'might')).toBe('user might be able to submit the login form by pressing Enter');
+    expect(normalizeAction('user can submit the login form by pressing Enter', 'might')).toBe('user might submit the login form by pressing Enter');
+  });
+
+});
+
+describe('doc-collector scope and signal', () => {
+  it('keeps subtree scope around the start page', () => {
+    const bot = new DocBot();
+    (bot as any).config = { docs: { scope: 'subtree' } };
+    (bot as any).scopeRoot = '/ua/serials/stb/kod';
+
+    expect((bot as any).isInScope('/ua/serials/stb/kod/2026')).toBe(true);
+    expect((bot as any).isInScope('/ua/serials/stb/kod/2026/seriya-1')).toBe(true);
+    expect((bot as any).isInScope('/ua/person/actor')).toBe(false);
+    expect((bot as any).isInScope('/ua/faq')).toBe(false);
+  });
+
+  it('marks pages with weak docs and few controls as low-signal', () => {
+    const bot = new DocBot();
+    (bot as any).config = { docs: { minCanActions: 1, minInteractiveElements: 3 } };
+
+    expect(
+      (bot as any).getLowSignalReason(
+        { summary: 'The page currently loads with no visible content.', can: [], might: [] },
+        '* Content (0 elements) `main`\n\nChars: 120'
+      )
+    ).toContain('low-signal page');
+  });
+
+  it('keeps pages with proven actions out of low-signal skip', () => {
+    const bot = new DocBot();
+    (bot as any).config = { docs: { minCanActions: 1, minInteractiveElements: 3 } };
+
+    expect(
+      (bot as any).getLowSignalReason(
+        { summary: 'Serial details page.', can: [{ action: 'watch episode', scope: 'one item', evidence: 'episode links visible' }], might: [] },
+        '* Episodes (10 elements) `.tp-show__list`\n\nChars: 1200'
+      )
+    ).toBeNull();
+  });
+});
+
+describe('documentarian fallback', () => {
+  it('retries with sanitized research after JSON generation failure', async () => {
+    const calls: string[] = [];
+    const provider = {
+      async generateObject(messages: Array<{ role: string; content: string }>) {
+        calls.push(messages[1].content);
+        if (calls.length === 1) {
+          throw new Error('Failed to generate JSON. Please adjust your prompt. See failed_generation for more details.');
+        }
+        return {
+          object: {
+            summary: 'Episode page',
+            can: [
+              {
+                action: 'user can watch the episode',
+                scope: 'one item',
+                evidence: 'Video player is visible',
+              },
+            ],
+            might: [],
+          },
+        };
+      },
+    } as any;
+
+    const documentarian = new Documentarian(provider, {});
+    const result = await documentarian.document(
+      {
+        url: '/ua/serials/stb/kod',
+        title: 'K.O.D.',
+      },
+      `## Content
+
+| Element | Type | ARIA | CSS | Coordinates |
+|------|------|------|------|------|
+| 'Play button' | link | { role: 'link', text: 'play' } | 'a.about-project__play' | (468, 537) |
+| 'Broken row' | link | - | 2026' } | 'a[href="/ua/serials/stb/kod/2026"]' |
+`
+    );
+
+    expect(result.summary).toBe('Episode page');
+    expect(result.can).toHaveLength(1);
+    expect(calls).toHaveLength(2);
+    expect(calls[1]).toContain('<fallback_mode>');
+  });
+});
diff --git a/tests/unit/research-parser.test.ts b/tests/unit/research-parser.test.ts
index 960beb0..9cd9cb7 100644
--- a/tests/unit/research-parser.test.ts
+++ b/tests/unit/research-parser.test.ts
@@ -144,4 +144,19 @@ describe('formatResearchSummary', () => {
     const summary = formatResearchSummary(md);
     expect(summary).toContain('* Section (1 element)');
   });
+
+  it('drops invalid non-eidx values from the eidx column', () => {
+    const md = dedent`
+      ## Navigation
+
+      | Element | Type | ARIA | CSS | eidx |
+      |------|------|------|------|------|
+      | 'Year link' | link | { role: 'link', text: '2026' } | 'a[href*="/kod/2026"]' | 'a[href*="/kod/2026"]' |
+      | 'Episode link' | link | { role: 'link', text: 'Episode 1' } | 'a[href*="/episode-1"]' | 42 |
+    `;
+
+    const sections = parseResearchSections(md);
+    expect(sections[0].elements[0].eidx).toBeNull();
+    expect(sections[0].elements[1].eidx).toBe('e42');
+  });
 });
diff --git a/tests/unit/url-matcher.test.ts b/tests/unit/url-matcher.test.ts
index 5229476..71128fb 100644
--- a/tests/unit/url-matcher.test.ts
+++ b/tests/unit/url-matcher.test.ts
@@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it } from 'bun:test';
 import { ConfigParser } from '../../src/config';
+import { normalizeUrl } from '../../src/state-manager';
 import { extractStatePath, generalizeSegment, generalizeUrl, hasDynamicUrlSegment, isDynamicSegment, matchesUrl } from '../../src/utils/url-matcher';
 
 describe('url-matcher', () => {
@@ -166,12 +167,27 @@ describe('url-matcher', () => {
       expect(extractStatePath('/dashboard')).toBe('/dashboard');
     });
 
+    it('collapses repeated leading slashes for path-like URLs', () => {
+      expect(extractStatePath('///series/page/57/')).toBe('/series/page/57/');
+    });
+
     it('strips host from absolute URL, keeps hash', () => {
       expect(extractStatePath('https://example.com/page#section')).toBe('/page#section');
     });
 
+    it('collapses repeated leading slashes in absolute URL paths', () => {
+      expect(extractStatePath('https://example.com///series/page/57/')).toBe('/series/page/57/');
+    });
+
     it('returns original string when URL is unparseable', () => {
       expect(extractStatePath('not a url')).toBe('not a url');
     });
   });
+
+  describe('normalizeUrl', () => {
+    it('treats repeated leading slashes as a relative path, not a protocol-relative URL', () => {
+      expect(normalizeUrl('///series/page/57/')).toBe('series/page/57');
+      expect(normalizeUrl('/series/page/57/')).toBe('series/page/57');
+    });
+  });
 });

From e1a355a3297d8e1bd22444b52fb5cdf1437381e2 Mon Sep 17 00:00:00 2001
From: Denys Kuchma <den.kuchma@ukr.net>
Date: Mon, 11 May 2026 21:44:15 +0300
Subject: [PATCH 2/2] fix format

---
 tests/unit/doc-collector.test.ts | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/tests/unit/doc-collector.test.ts b/tests/unit/doc-collector.test.ts
index ee30b7b..25f3b37 100644
--- a/tests/unit/doc-collector.test.ts
+++ b/tests/unit/doc-collector.test.ts
@@ -78,11 +78,7 @@ describe('doc-collector research navigation', () => {
         },
         research
       )
-    ).toEqual([
-      '/docs/openapi#tag/project-analytics-tags',
-      '/docs/openapi#tag/project-analytics-labels',
-      '/docs/openapi#tag/project-analytics-jira',
-    ]);
+    ).toEqual(['/docs/openapi#tag/project-analytics-tags', '/docs/openapi#tag/project-analytics-labels', '/docs/openapi#tag/project-analytics-jira']);
   });
 });
 
@@ -159,7 +155,6 @@ describe('doc-collector renderer', () => {
     expect(normalizeAction('user might be able to submit the login form by pressing Enter', 'might')).toBe('user might be able to submit the login form by pressing Enter');
     expect(normalizeAction('user can submit the login form by pressing Enter', 'might')).toBe('user might submit the login form by pressing Enter');
   });
-
 });
 
 describe('doc-collector scope and signal', () => {
@@ -178,24 +173,14 @@ describe('doc-collector scope and signal', () => {
     const bot = new DocBot();
     (bot as any).config = { docs: { minCanActions: 1, minInteractiveElements: 3 } };
 
-    expect(
-      (bot as any).getLowSignalReason(
-        { summary: 'The page currently loads with no visible content.', can: [], might: [] },
-        '* Content (0 elements) `main`\n\nChars: 120'
-      )
-    ).toContain('low-signal page');
+    expect((bot as any).getLowSignalReason({ summary: 'The page currently loads with no visible content.', can: [], might: [] }, '* Content (0 elements) `main`\n\nChars: 120')).toContain('low-signal page');
   });
 
   it('keeps pages with proven actions out of low-signal skip', () => {
     const bot = new DocBot();
     (bot as any).config = { docs: { minCanActions: 1, minInteractiveElements: 3 } };
 
-    expect(
-      (bot as any).getLowSignalReason(
-        { summary: 'Serial details page.', can: [{ action: 'watch episode', scope: 'one item', evidence: 'episode links visible' }], might: [] },
-        '* Episodes (10 elements) `.tp-show__list`\n\nChars: 1200'
-      )
-    ).toBeNull();
+    expect((bot as any).getLowSignalReason({ summary: 'Serial details page.', can: [{ action: 'watch episode', scope: 'one item', evidence: 'episode links visible' }], might: [] }, '* Episodes (10 elements) `.tp-show__list`\n\nChars: 1200')).toBeNull();
   });
 });