diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b67bc82..ff8db38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,8 @@ jobs: - run: pnpm install --frozen-lockfile - run: pnpm build - run: pnpm typecheck - - run: pnpm --filter tutorial-forge test + # Unit tests across all packages (core + cli vitest; example-app is e2e-only). + - run: pnpm -r test - name: Install Playwright Chromium if: steps.changes.outputs.docs_only != 'true' run: pnpm --filter tutorial-forge-example-app exec playwright install --with-deps chromium diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f76234..8afe6fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Everything below is opt-in. Notes for existing consumers: - **`render --phase record` runs without a prior `tts` phase (#50).** The TTS-free framing check — `--phase record --contact-sheet`, used to verify selectors and framing across a whole tutorial before paying for narration — no longer throws `run the tts phase first` on a fresh work dir. When `tts.json` is absent it falls back to silent placeholder timings (steps pace as silent), so you get a contact sheet at zero TTS cost. `--phase post` still requires real timings. New exports: `silentTTSResult`, `loadTTSResultIfPresent`. - **Broader recap-framing lint (#49).** The strict-mode lint that nudges the last step to close with a recap (#36) no longer false-warns on valid past-tense / accomplishment recaps (e.g. "you created an event, set up ticketing… from here you can…"). `RECAP_CUE_RE` now also matches accomplishment phrasing and the "from here you…" hand-off, bringing it to parity with the looser objective cue. Advisory-only — it never fails a render. +- **Parallel batch rendering (#62).** Render multiple tutorials × languages concurrently with `--render-concurrency ` (or `renderConcurrency` in config); default **1** (serial, unchanged). The record phase is mostly real-time waiting, so a batch (e.g. regenerating a whole tutorial set) runs much faster in parallel — near-linear up to CPU/RAM. **Opt-in because it requires a parallel-safe adapter** (concurrent renders each run their own `setup`/`teardown`, so a shared seed DB must isolate per render — see [adapters.md → Parallel rendering](docs/adapters.md#parallel-rendering)). New export: `mapLimit`. ## 0.12.0 — chapters that work on YouTube & Vimeo diff --git a/docs/adapters.md b/docs/adapters.md index 67d6bf3..c1acbd6 100644 --- a/docs/adapters.md +++ b/docs/adapters.md @@ -121,3 +121,17 @@ Thunks run in reverse registration order, so the last thing created is the first - **Keep secrets in env vars.** The adapter is plain code in your repo; read credentials from the environment, as in any e2e test. - **Teardown failures are non-fatal, and must tolerate partial setup.** They log a warning and the render still succeeds — teardown runs after the manifest is final, and also after a *failed* setup, so null-check what you delete. - **Verify setup before a full render.** `tutorial-forge doctor` checks the app is reachable; add `--setup` to actually run `adapter.setup` once and tear it down. It catches the "reachable but pointed at the wrong database" case — a green reachability check followed by a guaranteed sign-in failure — before you wait out a whole render. + +## Parallel rendering + +By default the CLI renders one tutorial × language at a time. Because the record phase mostly *waits* (each step holds in real time for its narration), the machine is near-idle during it — so rendering a **set** of tutorials is much faster in parallel. Opt in with `--render-concurrency ` (or `renderConcurrency` in `forge.config.ts`); the default is `1` (serial). + +Concurrency > 1 only works if **your adapter is parallel-safe**, because each concurrent render runs its own `setup`/`teardown` against your app at the same time. The contract: + +- **Isolate seed data per render.** Concurrent `setup` calls must not collide on shared state. Give each render its own namespace — a per-worker database/schema, a unique tenant or account, or seed records keyed so they can't clash — rather than seeding into one shared space. If two renders seed and tear down the same rows, they'll corrupt each other. +- **Don't assume a single live browser/page.** Each render drives its own browser; adapters that reach for a module-global page or client will break. Use `ctx.state` (see above) for per-render handoff, never a shared singleton. +- **Make teardown idempotent and scoped.** It already must tolerate partial setup; under concurrency it must also only remove *its own* render's data. + +If you're not sure your adapter meets this, leave concurrency at `1` — it's the safe default. (TTS synthesis is already safely parallelized within a render via `ttsConcurrency`, independent of this.) + +If one render fails, the command stops *scheduling* new ones and exits non-zero, but renders already in flight run to completion first (their logs may interleave after the error) — so a failed batch can still leave a few finished videos behind. diff --git a/packages/cli/package.json b/packages/cli/package.json index 7dff5b9..879cf4d 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -28,7 +28,7 @@ "scripts": { "build": "tsc -p tsconfig.json", "typecheck": "tsc -p tsconfig.json --noEmit", - "test": "echo 'no unit tests in cli'", + "test": "vitest run", "prepublishOnly": "tsc -p tsconfig.json" }, "dependencies": { @@ -38,7 +38,8 @@ "tutorial-forge": "workspace:*" }, "devDependencies": { - "playwright": "^1.59.0" + "playwright": "^1.59.0", + "vitest": "^3.0.0" }, "engines": { "node": ">=20" diff --git a/packages/cli/src/main.ts b/packages/cli/src/main.ts index 1934365..c52d402 100644 --- a/packages/cli/src/main.ts +++ b/packages/cli/src/main.ts @@ -40,6 +40,7 @@ program .option('--keep-work', 'keep the work directory on success') .option('--out-dir ', 'output directory (overrides config)') .option('--concurrency ', 'TTS synthesis concurrency') + .option('--render-concurrency ', 'render N tutorials in parallel (default 1; only with parallel-safe adapters)') .option('--config ', 'path to forge.config.ts') .option('--lang ', 'render these languages (comma-separated, e.g. "es,fr"); overrides config.languages') .option('--zoom', 'zoom toward click targets (overrides config.zoom)') diff --git a/packages/cli/src/render.ts b/packages/cli/src/render.ts index c24cd00..46b65f2 100644 --- a/packages/cli/src/render.ts +++ b/packages/cli/src/render.ts @@ -1,5 +1,5 @@ import { join, resolve } from 'node:path'; -import { render, type ForgeConfig } from 'tutorial-forge'; +import { render, mapLimit, type ForgeConfig } from 'tutorial-forge'; import { loadConfig, discoverTutorials } from './load.js'; export interface RenderCmdOptions { @@ -9,6 +9,8 @@ export interface RenderCmdOptions { keepWork?: boolean; outDir?: string; concurrency?: string; + /** How many tutorial×language renders to run in parallel (default 1). */ + renderConcurrency?: string; config?: string; /** Comma-separated language list, e.g. "es,fr". Overrides config.languages. */ lang?: string; @@ -50,6 +52,34 @@ function resolveRecorder(value: string | undefined): 'video' | 'screencast' | un return value; } +/** + * How many renders to run in parallel: the `--render-concurrency` flag wins over + * `config.renderConcurrency`, default 1 (serial). A non-positive or non-numeric + * value clamps to 1, so the worst case is today's safe serial behavior. + */ +export function resolveRenderConcurrency( + flag: string | undefined, + configValue: number | undefined, +): number { + const raw = flag !== undefined ? parseInt(flag, 10) : configValue; + return Number.isFinite(raw) && (raw as number) >= 1 ? Math.floor(raw as number) : 1; +} + +/** + * Flatten discovered tutorials × languages into a flat render-job list, in + * tutorial-major order. Languages are de-duplicated first: two jobs with the + * same `(id, lang)` would target the same `.forge/` work dir and + * `.mp4` output, which under `--render-concurrency > 1` means two + * renders writing the same paths at once (e.g. `--lang "es,es"`). + */ +export function buildRenderJobs( + discovered: Array<{ tutorial: T }>, + langs: Array, +): Array<{ tutorial: T; lang: string | null }> { + const uniqueLangs = [...new Set(langs)]; + return discovered.flatMap(({ tutorial }) => uniqueLangs.map((lang) => ({ tutorial, lang }))); +} + export async function renderCommand(globs: string[], opts: RenderCmdOptions): Promise { const cwd = process.cwd(); const config: ForgeConfig = await loadConfig(cwd, opts.config); @@ -70,49 +100,55 @@ export async function renderCommand(globs: string[], opts: RenderCmdOptions): Pr const langs: Array = opts.lang?.split(',').map((l) => l.trim()).filter(Boolean) ?? config.languages ?? [null]; - for (const { tutorial } of discovered) { - for (const lang of langs) { - const suffix = lang ? `.${lang}` : ''; - const label = lang ? ` [${lang}]` : ''; - console.log(`\n▶ ${tutorial.id}${label} — ${tutorial.title} (${tutorial.steps.length} steps)`); - const result = await render(tutorial, config.adapter, { - tts: (lang && config.ttsByLang?.[lang]) || config.tts, - output: join(outDir, `${tutorial.id}${suffix}.mp4`), - workDir: join(cwd, '.forge', `${tutorial.id}${suffix}`), - viewport: config.viewport, - headless: opts.headed ? false : config.headless ?? true, - cursor: config.cursor, - callouts: config.callouts, - subtitles: config.subtitles, - captionStyle: config.captionStyle, - leadInMs: config.leadInMs, - keepWorkDir: opts.keepWork ?? config.keepWorkDir, - ttsCacheDir: config.ttsCacheDir, - ttsConcurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : config.ttsConcurrency, - phase: opts.phase, - lang: lang ?? undefined, - defaultLang, - zoom: opts.zoom ?? config.zoom, - idleSpeedup: opts.idleSpeedup ?? config.idleSpeedup, - gif: resolveGifOption(opts, config.gif), - recorder: resolveRecorder(opts.recorder) ?? config.recorder, - debug: opts.debug, - contactSheet: opts.contactSheet ?? config.contactSheet, - // --no-chapters forces off; otherwise fall back to config (post defaults on). - chapters: opts.chapters === false ? false : config.chapters, - // --no-cards forces off; otherwise fall back to config (post defaults on). - cards: opts.cards === false ? false : config.cards, - }); - if (opts.phase === 'all' || opts.phase === 'post') { - console.log(`✓ ${result.output} (${(result.outputDurationMs / 1000).toFixed(1)}s)`); - if (result.srtPath) console.log(` subtitles: ${result.srtPath}`); - if (result.chaptersVttPath) console.log(` chapters: ${result.chaptersVttPath}`); - if (result.gifPath) console.log(` gif: ${result.gifPath}`); - if (result.contactSheetPath) console.log(` contact: ${result.contactSheetPath}`); - } else { - console.log(`✓ phase "${opts.phase}" complete — work dir: ${result.workDir}`); - if (result.contactSheetPath) console.log(` contact: ${result.contactSheetPath}`); - } - } + // Flatten tutorial × language into a job list so it can run with bounded + // concurrency. Default 1 = today's serial, fail-fast behavior unchanged. + const jobs = buildRenderJobs(discovered, langs); + const renderConcurrency = resolveRenderConcurrency(opts.renderConcurrency, config.renderConcurrency); + if (renderConcurrency > 1) { + console.log(`rendering ${jobs.length} job(s) at concurrency ${renderConcurrency}`); } + + await mapLimit(jobs, renderConcurrency, async ({ tutorial, lang }) => { + const suffix = lang ? `.${lang}` : ''; + const label = lang ? ` [${lang}]` : ''; + console.log(`\n▶ ${tutorial.id}${label} — ${tutorial.title} (${tutorial.steps.length} steps)`); + const result = await render(tutorial, config.adapter, { + tts: (lang && config.ttsByLang?.[lang]) || config.tts, + output: join(outDir, `${tutorial.id}${suffix}.mp4`), + workDir: join(cwd, '.forge', `${tutorial.id}${suffix}`), + viewport: config.viewport, + headless: opts.headed ? false : config.headless ?? true, + cursor: config.cursor, + callouts: config.callouts, + subtitles: config.subtitles, + captionStyle: config.captionStyle, + leadInMs: config.leadInMs, + keepWorkDir: opts.keepWork ?? config.keepWorkDir, + ttsCacheDir: config.ttsCacheDir, + ttsConcurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : config.ttsConcurrency, + phase: opts.phase, + lang: lang ?? undefined, + defaultLang, + zoom: opts.zoom ?? config.zoom, + idleSpeedup: opts.idleSpeedup ?? config.idleSpeedup, + gif: resolveGifOption(opts, config.gif), + recorder: resolveRecorder(opts.recorder) ?? config.recorder, + debug: opts.debug, + contactSheet: opts.contactSheet ?? config.contactSheet, + // --no-chapters forces off; otherwise fall back to config (post defaults on). + chapters: opts.chapters === false ? false : config.chapters, + // --no-cards forces off; otherwise fall back to config (post defaults on). + cards: opts.cards === false ? false : config.cards, + }); + if (opts.phase === 'all' || opts.phase === 'post') { + console.log(`✓ ${result.output} (${(result.outputDurationMs / 1000).toFixed(1)}s)`); + if (result.srtPath) console.log(` subtitles: ${result.srtPath}`); + if (result.chaptersVttPath) console.log(` chapters: ${result.chaptersVttPath}`); + if (result.gifPath) console.log(` gif: ${result.gifPath}`); + if (result.contactSheetPath) console.log(` contact: ${result.contactSheetPath}`); + } else { + console.log(`✓ phase "${opts.phase}" complete — work dir: ${result.workDir}`); + if (result.contactSheetPath) console.log(` contact: ${result.contactSheetPath}`); + } + }); } diff --git a/packages/cli/test/render.test.ts b/packages/cli/test/render.test.ts new file mode 100644 index 0000000..da8f43c --- /dev/null +++ b/packages/cli/test/render.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from 'vitest'; +import { resolveRenderConcurrency, buildRenderJobs } from '../src/render.js'; + +describe('resolveRenderConcurrency (#62)', () => { + it('defaults to 1 when neither flag nor config is set (serial — unchanged behavior)', () => { + expect(resolveRenderConcurrency(undefined, undefined)).toBe(1); + }); + + it('uses the config value when there is no flag', () => { + expect(resolveRenderConcurrency(undefined, 4)).toBe(4); + }); + + it('lets the flag win over config', () => { + expect(resolveRenderConcurrency('2', 8)).toBe(2); + }); + + it('clamps non-positive / non-numeric flags to 1 (safe serial)', () => { + for (const bad of ['0', '-3', 'abc', '']) { + expect(resolveRenderConcurrency(bad, 8)).toBe(1); + } + }); + + it('floors a fractional flag', () => { + expect(resolveRenderConcurrency('3.9', undefined)).toBe(3); + }); +}); + +describe('buildRenderJobs (#62)', () => { + const discovered = [{ tutorial: { id: 'a' } }, { tutorial: { id: 'b' } }]; + + it('flattens tutorial × language in tutorial-major order', () => { + expect(buildRenderJobs(discovered, [null]).map((j) => [j.tutorial.id, j.lang])).toEqual([ + ['a', null], + ['b', null], + ]); + expect(buildRenderJobs(discovered, ['es', 'fr']).map((j) => [j.tutorial.id, j.lang])).toEqual([ + ['a', 'es'], + ['a', 'fr'], + ['b', 'es'], + ['b', 'fr'], + ]); + }); + + it('de-duplicates languages so two jobs cannot collide on a work dir', () => { + expect(buildRenderJobs([{ tutorial: { id: 'a' } }], ['es', 'es', 'fr', 'es']).map((j) => j.lang)).toEqual([ + 'es', + 'fr', + ]); + }); + + it('produces exactly one job for a single tutorial rendered in the source language', () => { + expect(buildRenderJobs([{ tutorial: { id: 'a' } }], [null])).toHaveLength(1); + }); +}); diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 14b9d71..c9a654c 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -18,6 +18,14 @@ export interface ForgeConfig { keepWorkDir?: boolean; ttsCacheDir?: string; ttsConcurrency?: number; + /** + * How many tutorial×language renders to run in parallel. Default 1 (serial). + * Only raise this if your adapter is parallel-safe — concurrent renders each + * run their own `setup`/`teardown` against your app, so a shared seed DB must + * isolate per render (e.g. unique seed data, or a per-worker namespace) or + * they'll collide. See docs/adapters.md. + */ + renderConcurrency?: number; /** Languages rendered by default (overridable with --lang). Omit for source-language only. */ languages?: string[]; /** The language tutorial narration is written in. Default 'en'. */ @@ -63,6 +71,7 @@ const configSchema = z.object({ keepWorkDir: z.boolean().optional(), ttsCacheDir: z.string().optional(), ttsConcurrency: z.number().int().positive().optional(), + renderConcurrency: z.number().int().positive().optional(), languages: z.array(z.string().min(2)).optional(), defaultLang: z.string().min(2).optional(), ttsByLang: z diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index ef8e31c..feaab72 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -94,3 +94,4 @@ export { type SpeedSegment, type TimeMap, } from './post/retime.js'; +export { mapLimit } from './util/fs.js'; diff --git a/packages/core/src/tts/cache.ts b/packages/core/src/tts/cache.ts index 48b5d57..32899bd 100644 --- a/packages/core/src/tts/cache.ts +++ b/packages/core/src/tts/cache.ts @@ -1,6 +1,7 @@ import { homedir } from 'node:os'; import { join, dirname } from 'node:path'; import { copyFile, rename, rm } from 'node:fs/promises'; +import { randomUUID } from 'node:crypto'; import type { TTSProvider } from '../types.js'; import { sha256 } from '../util/hash.js'; import { ensureDir, exists } from '../util/fs.js'; @@ -30,8 +31,14 @@ export async function synthesizeCached( const cached = join(cacheDir, `${cacheKeyFor(provider, text)}.wav`); if (!(await exists(cached))) { await ensureDir(cacheDir); - const raw = cached + '.raw.tmp'; - const normalized = cached + '.tmp'; + // Per-call unique temp paths: two renders synthesizing the SAME cache key + // concurrently (same provider + text — the same-language batch-regen case) + // must not share a temp file, or they'd clobber each other mid-write and one + // job's cleanup would delete the other's in-flight file. The final `rename` + // into `cached` is atomic regardless of which job wins. + const token = `${process.pid}.${randomUUID()}`; + const raw = `${cached}.${token}.raw.tmp`; + const normalized = `${cached}.${token}.tmp`; try { await provider.synthesize(text, raw); await normalizeToWav(raw, normalized); diff --git a/packages/core/src/util/fs.ts b/packages/core/src/util/fs.ts index 50a7f12..07f6631 100644 --- a/packages/core/src/util/fs.ts +++ b/packages/core/src/util/fs.ts @@ -25,12 +25,24 @@ export async function mapLimit( ): Promise { const results: R[] = new Array(items.length); let next = 0; + // On the first error: stop *scheduling* new items, let the items already in + // flight settle, then reject with the first error. We don't reject via + // Promise.all (which would return before the in-flight items finish, leaving + // them running orphaned past the call) — workers swallow into `firstError` and + // we rethrow only once every worker has drained. A failure thus never kicks + // off the rest of the queue, and never outlives the call. + let firstError: unknown; const workers = Array.from({ length: Math.max(1, Math.min(limit, items.length)) }, async () => { - while (next < items.length) { + while (next < items.length && firstError === undefined) { const i = next++; - results[i] = await fn(items[i] as T, i); + try { + results[i] = await fn(items[i] as T, i); + } catch (err) { + if (firstError === undefined) firstError = err; + } } }); await Promise.all(workers); + if (firstError !== undefined) throw firstError; return results; } diff --git a/packages/core/test/maplimit.test.ts b/packages/core/test/maplimit.test.ts new file mode 100644 index 0000000..5ff6624 --- /dev/null +++ b/packages/core/test/maplimit.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it } from 'vitest'; +import { mapLimit } from '../src/util/fs.js'; + +const tick = () => new Promise((r) => setTimeout(r, 1)); + +describe('mapLimit', () => { + it('runs every item and preserves result order', async () => { + const out = await mapLimit([1, 2, 3, 4, 5], 2, async (n) => { + await tick(); + return n * 10; + }); + expect(out).toEqual([10, 20, 30, 40, 50]); + }); + + it('never exceeds the concurrency cap', async () => { + let inFlight = 0; + let peak = 0; + await mapLimit(Array.from({ length: 12 }, (_, i) => i), 3, async () => { + inFlight++; + peak = Math.max(peak, inFlight); + await tick(); + inFlight--; + }); + expect(peak).toBe(3); + }); + + it('limit 1 runs strictly sequentially (today\'s serial behavior)', async () => { + const order: number[] = []; + await mapLimit([0, 1, 2], 1, async (n) => { + order.push(n); // start + await tick(); + order.push(n + 100); // end — must finish before the next starts + }); + expect(order).toEqual([0, 100, 1, 101, 2, 102]); + }); + + it('limit 1 is fail-fast: a throw stops before later items run (matches the old for-loop)', async () => { + const seen: number[] = []; + await expect( + mapLimit([0, 1, 2], 1, async (n) => { + seen.push(n); + if (n === 1) throw new Error('boom'); + }), + ).rejects.toThrow('boom'); + expect(seen).toEqual([0, 1]); // item 2 never started + }); + + it('rejects when a job throws under concurrency > 1', async () => { + await expect( + mapLimit([0, 1, 2, 3], 2, async (n) => { + await tick(); + if (n === 2) throw new Error('kaboom'); + }), + ).rejects.toThrow('kaboom'); + }); + + it('on a throw, stops scheduling new items but lets in-flight ones finish (concurrency > 1)', async () => { + const started: number[] = []; + const finished: number[] = []; + await expect( + mapLimit([0, 1, 2, 3, 4], 2, async (n) => { + started.push(n); + await tick(); + if (n === 0) throw new Error('stop'); + finished.push(n); + }), + ).rejects.toThrow('stop'); + // Two workers start 0 and 1; 0 throws, 1 (already in flight) finishes. + // The abort flag prevents 2, 3, 4 from ever starting. + expect(started.sort()).toEqual([0, 1]); + expect(finished).toEqual([1]); + }); + + it('returns [] for empty input at any limit', async () => { + expect(await mapLimit([], 4, async () => 1)).toEqual([]); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 01309bb..9fe0a76 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -36,6 +36,9 @@ importers: playwright: specifier: ^1.59.0 version: 1.60.0 + vitest: + specifier: ^3.0.0 + version: 3.2.6(@types/node@22.19.20)(jiti@2.7.0)(tsx@4.22.4) packages/core: dependencies: