Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bin/explorbot-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,8 @@ program
});

import { createApiCommands } from '../boat/api-tester/src/cli.ts';
import { createDocsCommands } from '../boat/doc-collector/src/cli.ts';
program.addCommand(createApiCommands('api'));
program.addCommand(createDocsCommands('docs'));

program.parse();
5 changes: 5 additions & 0 deletions boat/doc-collector/bin/doc-collector-cli.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bun
import { createDocsCommands } from '../src/cli.ts';

const program = createDocsCommands('doc-collector');
program.parse();
24 changes: 24 additions & 0 deletions boat/doc-collector/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "doc-collector",
"version": "1.0.0",
"description": "AI-powered website documentation collector",
"type": "module",
"bin": {
"doc-collector": "./bin/doc-collector-cli.ts"
},
"scripts": {
"format": "biome format --write .",
"lint:fix": "biome lint --write .",
"check:fix": "biome check --write ."
},
"dependencies": {
"ai": "^6.0.6",
"commander": "^14.0.1",
"dedent": "^1.6.0",
"zod": "^4.1.8"
},
"devDependencies": {
"@biomejs/biome": "^1.5.3",
"typescript": "^5.0.0"
}
}
184 changes: 184 additions & 0 deletions boat/doc-collector/src/ai/documentarian.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import dedent from 'dedent';
import { z } from 'zod';
import type { AIProvider } from '../../../../src/ai/provider.ts';
import type { WebPageState } from '../../../../src/state-manager.ts';
import type { DocbotConfig } from '../config.ts';

class Documentarian {
private provider: AIProvider;
private config: DocbotConfig;

constructor(provider: AIProvider, config: DocbotConfig = {}) {
this.provider = provider;
this.config = config;
}

async document(state: WebPageState, research: string): Promise<PageDocumentation> {
try {
return await this.generateDocumentation(state, research);
} catch (error) {
if (!this.shouldRetryWithSanitizedResearch(error)) {
throw error;
}

return this.generateDocumentation(state, this.sanitizeResearch(research), true);
}
}

private getSystemPrompt(): string {
const customPrompt = this.config.docs?.prompt;
let promptSuffix = '';
if (customPrompt) {
promptSuffix = customPrompt;
}

return dedent`
<role>
You are a product analyst preparing functional website documentation from UI research.
</role>

<task>
Convert exploratory UI research into a precise spec of what users can do on the current page.
Distinguish proven capabilities from assumptions.
Prefer accuracy over coverage.
</task>

<rules>
Only list capabilities that are grounded in the provided page research.
Put actions into "can" only when there is direct evidence in the page context.
Put actions into "might" only when the UI strongly suggests a capability but proof is incomplete.
Describe each action from the end-user perspective.
Be explicit about scope:
- one item
- list of items
- bulk operations
- all items
- page-level
Avoid implementation details, selectors, and QA wording.
Avoid duplicate actions with different phrasing.
</rules>

${promptSuffix}
`;
}

private buildPrompt(state: WebPageState, research: string, simplified = false): string {
const headings = [state.h1, state.h2, state.h3, state.h4].filter(Boolean).join(' | ');
const links = (state.links || [])
.slice(0, 50)
.map((link) => `- ${link.title}: ${link.url}`)
.join('\n');
const simplificationNote = simplified
? dedent`
<fallback_mode>
The research text was simplified because the original formatting was noisy.
Ignore malformed table syntax and rely only on clear, repeated signals.
Prefer fewer actions over speculative coverage.
</fallback_mode>
`
: '';

return dedent`
<page>
URL: ${state.url}
Title: ${state.title || ''}
Headings: ${headings}
</page>

<navigation_links>
${links}
</navigation_links>

<research>
${research}
</research>

${simplificationNote}

<output_requirements>
Return structured data.
summary: short page purpose statement.
can: actions you are 100% sure are available on page.
might: actions that look possible but are not fully proven.
For each action provide:
- action: concise user-facing capability phrased as "user can ..."
- scope: one of one item, list of items, bulk operations, all items, page-level
- evidence: short reason based on visible UI or research
</output_requirements>
`;
}

private async generateDocumentation(state: WebPageState, research: string, simplified = false): Promise<PageDocumentation> {
const messages = [
{
role: 'system' as const,
content: this.getSystemPrompt(),
},
{
role: 'user' as const,
content: this.buildPrompt(state, research, simplified),
},
];

const response = await this.provider.generateObject(messages, pageDocumentationSchema, undefined, {
agentName: 'documentarian',
});

return response.object as PageDocumentation;
}

private shouldRetryWithSanitizedResearch(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return message.includes('Failed to generate JSON') || message.includes('failed_generation');
}

private sanitizeResearch(research: string): string {
const lines = research.split('\n');
const sanitized: string[] = [];

for (const line of lines) {
if (!line.trim()) {
sanitized.push(line);
continue;
}

if (!line.includes('|')) {
sanitized.push(line);
continue;
}

const pipeCount = (line.match(/\|/g) || []).length;
if (pipeCount < 2) {
continue;
}

if (line.includes('|------')) {
sanitized.push(line);
continue;
}

if (line.trim().startsWith('|') && pipeCount >= 4) {
sanitized.push(line);
}
}

return sanitized.join('\n');
}
}

const capabilitySchema = z.object({
action: z.string(),
scope: z.enum(['one item', 'list of items', 'bulk operations', 'all items', 'page-level']),
evidence: z.string(),
});

const pageDocumentationSchema = z.object({
summary: z.string(),
can: z.array(capabilitySchema),
might: z.array(capabilitySchema),
});

type PageDocumentation = z.infer<typeof pageDocumentationSchema>;

export { Documentarian };
export type { PageDocumentation };
119 changes: 119 additions & 0 deletions boat/doc-collector/src/cli.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import fs from 'node:fs';
import path from 'node:path';
import { Command } from 'commander';
import { setPreserveConsoleLogs } from '../../../src/utils/logger.ts';
import { DocBot, type DocbotOptions } from './docbot.ts';

function buildOptions(options: any): DocbotOptions {
let session = options.session;
if (options.session === true) {
session = 'output/session.json';
}

return {
verbose: options.verbose || options.debug,
config: options.config,
path: options.path,
show: options.show,
headless: options.headless,
incognito: options.incognito,
session,
docsConfig: options.docsConfig,
};
}

function addCommonOptions(cmd: Command): Command {
return cmd
.option('-v, --verbose', 'Enable verbose logging')
.option('--debug', 'Enable debug logging')
.option('-c, --config <path>', 'Path to explorbot configuration file')
.option('--docs-config <path>', 'Path to doc collector configuration file')
.option('-p, --path <path>', 'Working directory path')
.option('-s, --show', 'Show browser window')
.option('--headless', 'Run browser in headless mode')
.option('--incognito', 'Run without recording experiences')
.option('--session [file]', 'Save/restore browser session from file');
}

export function createDocsCommands(name = 'docs'): Command {
const cmd = new Command(name);
cmd.description('AI-powered website documentation collector');

addCommonOptions(cmd.command('collect <path>').description('Crawl pages and generate documentation spec').option('--max-pages <count>', 'Maximum number of pages to document')).action(async (startPath, options) => {
setPreserveConsoleLogs(true);

try {
const bot = new DocBot({
...buildOptions(options),
startUrl: startPath,
});
await bot.start();

let maxPages: number | undefined;
if (options.maxPages) {
maxPages = Number.parseInt(options.maxPages, 10);
}

const result = await bot.collect(startPath, { maxPages });

console.log(`\nDocumented ${result.pages.length} page(s)`);
console.log(`Skipped ${result.skipped.length} page(s)`);
console.log(`Spec index: ${result.indexPath}`);
console.log(`Pages dir: ${path.join(result.outputDir, 'pages')}`);

await bot.stop();
process.exit(0);
} catch (error) {
console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
process.exit(1);
}
});

cmd
.command('init')
.description('Initialize doc collector configuration')
.option('-f, --force', 'Overwrite existing config file')
.option('-p, --path <path>', 'Working directory for initialization')
.action(async (options) => {
const originalCwd = process.cwd();
if (options.path) {
const resolvedPath = path.resolve(options.path);
fs.mkdirSync(resolvedPath, { recursive: true });
process.chdir(resolvedPath);
console.log(`Working in: ${resolvedPath}`);
}

const configPath = path.resolve('docbot.config.ts');
if (fs.existsSync(configPath) && !options.force) {
console.log(`Config file already exists: ${configPath}`);
console.log('Use --force to overwrite.');
process.exit(1);
}

const configContent = `export default {
docs: {
maxPages: 100,
output: 'docs',
screenshot: true,
collapseDynamicPages: true,
scope: 'site',
includePaths: [],
excludePaths: [],
deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
minCanActions: 1,
minInteractiveElements: 3,
// prompt: 'Add domain-specific documentation guidance here',
},
};
`;

fs.writeFileSync(configPath, configContent, 'utf8');
console.log(`Created: ${configPath}`);

if (process.cwd() !== originalCwd) {
process.chdir(originalCwd);
}
});

return cmd;
}
Loading
Loading