From c8acffccdf8bc9bc3cbbed4c5498ac6ca7b6d023 Mon Sep 17 00:00:00 2001 From: Chasey Date: Wed, 17 Jun 2026 11:33:43 +0800 Subject: [PATCH] Take in pdf-html and repo-port; add archive notice (devkit retiring) devkit is being retired in favor of chasey-myagi/skills, where the three review skills now live and stay maintained. Before archiving, take in pdf-html and repo-port from skills so the retired skills are preserved in one read-only place (research-report is already identical here). README gets an archive notice pointing at the skills repo. Bumps 0.6.0 -> 0.7.0 (final, retirement release). Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 + package.json | 2 +- skills/pdf-html/SKILL.md | 186 +++++++++ skills/pdf-html/template.html | 387 ++++++++++++++++++ skills/repo-port/SKILL.md | 141 +++++++ skills/repo-port/agents/analyzer.md | 80 ++++ skills/repo-port/agents/consolidator.md | 118 ++++++ skills/repo-port/agents/mapper.md | 66 +++ skills/repo-port/agents/porter.md | 64 +++ skills/repo-port/agents/reviewer.md | 129 ++++++ .../repo-port/references/analysis-criteria.md | 60 +++ skills/repo-port/references/rust-criteria.md | 121 ++++++ skills/repo-port/scripts/merge_analysis.py | 92 +++++ 15 files changed, 1449 insertions(+), 3 deletions(-) create mode 100644 skills/pdf-html/SKILL.md create mode 100644 skills/pdf-html/template.html create mode 100644 skills/repo-port/SKILL.md create mode 100644 skills/repo-port/agents/analyzer.md create mode 100644 skills/repo-port/agents/consolidator.md create mode 100644 skills/repo-port/agents/mapper.md create mode 100644 skills/repo-port/agents/porter.md create mode 100644 skills/repo-port/agents/reviewer.md create mode 100644 skills/repo-port/references/analysis-criteria.md create mode 100644 skills/repo-port/references/rust-criteria.md create mode 100644 skills/repo-port/scripts/merge_analysis.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index c902830..9d1422d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ { "name": "devkit", "description": "Development quality assurance skills — TDD workflow, test review, code review, and design-first frontend development", - "version": "0.6.0", + "version": "0.7.0", "author": { "name": "Chasey" }, diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index e989a69..e2467a3 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "devkit", "description": "Development quality assurance skills — TDD workflow, test review, code review, and design-first frontend development", - "version": "0.6.0", + "version": "0.7.0", "author": { "name": "Chasey", "email": "chasey.myagi@gmail.com" diff --git a/README.md b/README.md index a8f5a32..0606868 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # DevKit +> **⚠️ Archived (2026-06) — 不再维护。** 日常用的 code-review / test-review / linus-review 三件套已迁到 [chasey-myagi/skills](https://github.com/chasey-myagi/skills)(持续维护,`npx skills add`)。本仓作为只读归档,保留 harness-workflow / tdd-workflow / issue-fix / pdf-html / repo-port 等退役 skill,需要时仍可取用。 + Development quality assurance skills for Claude Code. 10 skills covering multi-agent harness, TDD, code review, design-first frontend, research reports, and more. diff --git a/package.json b/package.json index 8fa98b7..8b4637c 100644 --- a/package.json +++ b/package.json @@ -16,5 +16,5 @@ }, "name": "devkit", "type": "module", - "version": "0.6.0" + "version": "0.7.0" } diff --git a/skills/pdf-html/SKILL.md b/skills/pdf-html/SKILL.md new file mode 100644 index 0000000..a4b8726 --- /dev/null +++ b/skills/pdf-html/SKILL.md @@ -0,0 +1,186 @@ +--- +name: pdf-html +description: > + 写一份打印为 A4 PDF 时分页干净、带真实页码的单文件 HTML 报告。 + 核心配方:sheet-stack 布局 + @page margin boxes(Chrome 原生支持)+ 纯 + window.print() 导出,不用 paged.js polyfill。模板里不预设配色 / 字体—— + 开工前先问用户。 + Use when: 用户要写单文件 HTML 报告、白皮书、客户简报、研究报告, + 并希望"导出 PDF"或打印 A4 时分页好看、有页码。 + Triggers: "html 报告", "pdf 报告", "html for pdf", "打印简报", + "/pdf-html", "客户简报", "白皮书", "research report html", + "single-file html report"。 +--- + +# pdf-html + +写一份**屏幕浏览像一摞 A4 纸、打印或另存为 PDF 时分页干净、页码自动**的最小配方。 + +## 开工前先问用户 + +模板里**不预设**这些,逐项问清楚再开始: + +1. **配色基调** — 暖奶油 / 工业灰白 / 学术深色 / 客户品牌色(要 hex 或 oklch 都行) +2. **字体偏好** — 系统字体(不联网)/ Google Fonts(DM Sans、Playfair、JetBrains Mono 等)/ 客户指定品牌字 +3. **页面尺寸** — A4 portrait(默认)/ A4 landscape / US Letter +4. **章节预估** — 多少主章节、是否要封面、是否要 TOC +5. **页码风格** — `P. 3 / 12` / `第 3 页 / 共 12 页` / 仅 `3` / 不要页码 +6. **暗色主题** — 是否需要屏幕浏览的明暗切换(PDF 一律亮色) + +回答之后再开始填模板。**不要给默认配色擅自填上**。 + +## 核心结构:sheet-stack + +每个章节是一个 `.sheet`。屏幕上是一摞纸卡片;打印时每个 `.sheet` 自动断到新页: + +```html +
+
…封面…
+
…01 章节…
+
…02 章节…
+
+``` + +```css +.sheet { + width: 100%; + max-width: 210mm; + margin: 0 auto 24px; + padding: 22mm 20mm 24mm; + background: var(--paper); + box-shadow: var(--shadow); +} + +@media print { + .sheet { + max-width: none; + margin: 0; padding: 0; + background: #fff; + box-shadow: none; border-radius: 0; + break-after: page; + } + .sheet:last-of-type { break-after: auto; } +} +``` + +## 真实页码:@page margin boxes(Chrome 原生) + +**⚠️ 不要用 paged.js polyfill**——它会把 body 替换为 `.pagedjs_pages` 容器, +跟 sheet-stack 的 `break-after: page` 冲突,常见症状是「点导出 → 白屏 → 不弹打印对话框」。 + +Chrome / Edge 现代版本已经**原生支持** `@page { @bottom-left { content: ... } }`: + +```css +@page { + size: A4; + margin: 16mm 16mm 22mm; + + @bottom-left { + /* margin box 内不便引用 :root 变量,直接 inline 颜色 */ + content: "<报告标题> · <日期>"; + font-family: ui-monospace, monospace; + font-size: 8pt; + color: <由用户决定>; + padding-top: 4mm; + border-top: 0.5pt solid <由用户决定>; + width: 100%; + } + + @bottom-right { + content: "P. " counter(page) " / " counter(pages); + font-family: ui-monospace, monospace; + font-size: 8pt; + font-weight: 700; + padding-top: 4mm; + border-top: 0.5pt solid <由用户决定>; + } +} + +/* 封面不显示页码与底栏 */ +@page :first { + @bottom-left { content: none; } + @bottom-right { content: none; } +} +``` + +## 导出按钮:一行就够 + +```html + + +``` + +点击 → Chrome 原生打印对话框 → 选「另存为 PDF」即可。 +提示用户在对话框里**取消勾选「页眉和页脚」**,避免 Chrome 默认再加一条 URL/日期。 + +## 分页规则(最易踩坑) + +```css +@media print { + /* 短块不要被切开 */ + .callout, .card, .chart, .pull-quote, .stat, .verdict { + break-inside: avoid; + } + + /* ⚠️ 长表格必须允许跨页(千万不要塞进 break-inside: avoid 列表)*/ + .table-wrap { overflow: visible; break-inside: auto; } + table { break-inside: auto; } + + /* 表头自动在每页重复 */ + thead { display: table-header-group; } + + /* 行不切开 */ + tr { break-inside: avoid; } + + /* 标题不孤立在页底 */ + h2, h3, h4, .section-head { break-after: avoid-page; } + + /* 导言不跟标题分开 */ + .lead, .intro { break-after: avoid-page; } + + /* 屏上的浮动按钮在打印时隐藏 */ + .floating-actions { display: none !important; } +} +``` + +## SVG 图表小心两件事 + +1. **`stroke="var(--xxx)"` 在 SVG 属性上有效**,但旧浏览器可能失败——保守起见关键色可以同时写 `style="stroke: var(--xxx)"`。 +2. **`font-family` 在 SVG `` 里要给完整 fallback 链**,避免字体异步加载未完成时回退到错的字体(影响数字 dial 等关键视觉)。 + +## 验证:用 Chrome headless 跑一次 + +写完后用 headless 验证分页是否合理: + +```bash +chrome --headless --disable-gpu --no-sandbox \ + --print-to-pdf=/tmp/out.pdf --no-pdf-header-footer \ + --virtual-time-budget=5000 \ + "file:///path/to/report.html" + +# 逐页可视化 +pdftoppm -png -r 70 /tmp/out.pdf /tmp/page +# 然后 Read /tmp/page-N.png 检查每页 +``` + +理想结果: +- 章节断在该断的地方(不会一节内容跨 3 页又末尾留一大块空白) +- 表头自动重复在每个跨页 +- 封面没有底栏;其他页底栏 + 页码都正确 +- 卡片、图表、pull quote 都没被切开 + +## 模板 + +复制 `template.html`,按用户回答填入配色 / 字体 / 内容。 +模板里所有 `` / `` 占位都需要替换。 + +## 不要做的事 + +- ❌ 引入 paged.js / 任何打印 polyfill(Chrome 原生支持 @page margin boxes,polyfill 反而带来冲突) +- ❌ 默认填配色(即使是"安全的灰白"也不要——逼自己问用户) +- ❌ 把 `.table-wrap` 写进 `break-inside: avoid` 列表(长表格会无法跨页,导致整张表跑到下一页留一大块空白) +- ❌ ` + + +
+ + +
+
+ +
+ +
+

+

+
+ + +
+ + +
+
+
01
+
+
SECTION EYEBROW
+

章节标题

+
+
+ +

+ 章节导言,会自动 避免 与下面正文断开(break-after: avoid-page)。 +

+ +

章节正文。

+ +
+ 提示: callout 在打印时不会被切到下一页。 +
+
+ + +
+
+
02
+
+
TABLE EXAMPLE
+

表格示例

+
+
+ +

表格行数较多时会跨页,表头自动在每页重复。

+ +
+ + + + + + + + + +
列 A列 B列 C
row 1......
row 2......
+
+ +
+ Pull quote 用于章节结尾点睛,打印时不会被切开。 +
+
+ +
+ + + + diff --git a/skills/repo-port/SKILL.md b/skills/repo-port/SKILL.md new file mode 100644 index 0000000..6e1d07a --- /dev/null +++ b/skills/repo-port/SKILL.md @@ -0,0 +1,141 @@ +--- +name: repo-port +description: > + Faithfully port an open source repository to a new language or ecosystem, module by module, + using a five-phase multi-agent pipeline with a mandatory consolidation pass before writing any code. + Use when: (1) user says /repo-port, (2) user wants to rewrite or port a GitHub/open source repo + in a different language (especially Rust), (3) user says "migrate from X", "port this codebase", + "rewrite in Rust", "extract the core logic from", (4) user wants a clean, accurate base + implementation before layering their own optimizations on top. + Pipeline: Map (Haiku, parallel per module) → Analyze (Haiku, parallel per file) → + Consolidate (Sonnet, single unified plan) → Port (Sonnet, plan-driven) → Review (Opus). + Trigger on: "port", "migrate", "rewrite", "extract from", "copy logic from", "base from open source", + or any context where an existing codebase is the reference for a new implementation. +--- + +# Repo Port + +Five-phase multi-agent pipeline for faithfully porting an open source repo to a new language. + +**The rule**: see everything before writing anything. All modules are fully analyzed and issues are merged into a single unified plan before a single line of target code is written. + +## Why Consolidate First + +Without a consolidation pass, each file gets ported in isolation. You end up fixing the same class of bug five times, making inconsistent architectural choices, and missing cross-module patterns that could be solved once. The consolidation step reads every issue and optimization across every module, finds what can be batched, and produces a single ordered plan. Phase 3 then executes that plan top-down. + +## Workspace Structure + +``` +{port-workspace}/ +├── port-brief.md # Phase 0: source → target mapping +├── {module-a}/ +│ ├── checklist.md # Phase 1: files enumerated +│ ├── .analysis/ # Phase 2: per-file reports (temp) +│ │ ├── {file-a}.md +│ │ └── {file-b}.md +│ ├── issue-found.md # Merged from .analysis/ +│ └── optimization-found.md # Merged from .analysis/ +├── {module-b}/ +│ └── ... +├── unified-plan.md # Phase 2.5: consolidated fix + opt plan +└── review.md # Phase 4: final verdict +``` + +## Phase 0: Brief + +Before spawning any agent, establish: + +| Input | Required | +|---|---| +| Source repo (URL or local path) | Yes | +| Target language / ecosystem | Yes | +| Module list (names + source paths) | Yes | +| Target project path | Yes | +| Priority order (which modules matter most) | Recommended | + +Write `port-brief.md` with this information. All agents will reference it. + +## Phase 1: Map + +**Model: Haiku — one agent per module, all in parallel.** + +Read `agents/mapper.md` and dispatch one mapper per module simultaneously. + +Each mapper writes `{port-workspace}/{module}/checklist.md`. + +After all mappers finish: +- Review every checklist — remove misclassified files (tests, generated code, config) +- Add any missed files +- Confirm with user before proceeding + +## Phase 2: Analyze + +**Model: Haiku — one agent per file.** + +Read `agents/analyzer.md`. For each `[ ]` entry in each checklist, dispatch a Haiku analyzer. + +Each analyzer writes **only** to `{port-workspace}/{module}/.analysis/{safe-filename}.md` — one file per analyzer, no shared writes, no concurrent conflicts. Analyzers do not touch `checklist.md`. + +Parallelism: up to 8 concurrent agents. Different modules can run fully in parallel. Within a module, files can run in parallel since each writes its own output file. + +After all analyzers finish, session leader does two things: + +**Step 1 — Update checklists (safe, sequential):** Mark all dispatched entries `[x]` in each module's `checklist.md`. Do this after all agents are done, not during. + +**Step 2 — Merge analysis files:** Run the merge script for each module: +```bash +python skills/repo-port/scripts/merge_analysis.py {port-workspace}/{module} +``` +This extracts Issues sections from all `.analysis/*.md` into `issue-found.md` and Optimizations sections into `optimization-found.md`, grouped by source file. + +## Phase 2.5: Consolidate + +**Model: Sonnet — session leader handles this.** + +Read `agents/consolidator.md`. This is the most important planning step. + +Read ALL `issue-found.md` and `optimization-found.md` files across every module. Produce `unified-plan.md` — a single ordered implementation plan that: +- Groups issues that share a root cause (fix once, not five times) +- Identifies cross-module patterns (shared utilities, common error types, trait designs) +- Sequences the work (what must be built first to unblock others) +- Marks what can be skipped or deferred + +**Do not begin Phase 3 until `unified-plan.md` is reviewed and approved by the user.** + +## Phase 3: Port + +**Model: Sonnet — session leader orchestrates; sub-agents implement.** + +Read `agents/porter.md`. + +Execute `unified-plan.md` in two steps: + +**Step 1 — Foundation + Cross-Module Work (session leader, sequential):** Implement the Foundation Work and Cross-Module Batches sections directly. These are small but block everything else — shared error types, utility traits, crate-level structure. Do this before dispatching module agents. + +**Step 2 — Per-module Work (Sonnet agents, parallel):** Once foundation is in place, dispatch one Sonnet agent per module. Each agent receives its module's section of `unified-plan.md`, the relevant source files, analysis files, and target project path. Independent modules run in parallel; within a module, files are processed in dependency order. + +Fidelity rule: replicate source behavior exactly. If the source has a bug, replicate it and mark it `(source bug, preserved)`. Clever additions belong in a follow-up pass. + +## Phase 4: Review + +**Model: Opus — one agent.** + +Read `agents/reviewer.md` and dispatch one Opus reviewer. It reads source files, ported files, all issue/optimization files, and the `unified-plan.md`. + +Output: `review.md` with per-module scores and overall verdict. + +## Agent Files + +- `agents/mapper.md` — Phase 1: enumerate source files +- `agents/analyzer.md` — Phase 2: per-file analysis +- `agents/consolidator.md` — Phase 2.5: merge all findings into unified plan +- `agents/porter.md` — Phase 3: write target code from unified plan +- `agents/reviewer.md` — Phase 4: verify correctness and completeness + +## References + +- `references/analysis-criteria.md` — universal criteria for Haiku analyzers (any target language) +- `references/rust-criteria.md` — Rust-specific criteria: ownership, traits, error types, async, dependency mappings +- `scripts/merge_analysis.py` — merges `.analysis/*.md` into `issue-found.md` and `optimization-found.md` + +When dispatching Haiku analyzers, tell them which criteria file(s) to read based on target language. For Rust: read both `analysis-criteria.md` and `rust-criteria.md`. For other targets: read `analysis-criteria.md` only, plus any language-specific reference if one exists. diff --git a/skills/repo-port/agents/analyzer.md b/skills/repo-port/agents/analyzer.md new file mode 100644 index 0000000..337a621 --- /dev/null +++ b/skills/repo-port/agents/analyzer.md @@ -0,0 +1,80 @@ +# Analyzer Agent + +You analyze one source file and produce a structured report: what will be tricky to port correctly, and what the target language can do better. You write no target code — only observations. + +## Input + +- `{source-file-path}` — one file to analyze +- `{target-language}` — the porting target +- `{port-workspace}/{module}/.analysis/{safe-filename}.md` — where to write your report + +Read `references/analysis-criteria.md` for the universal criteria list. If the target language is Rust, also read `references/rust-criteria.md`. Apply all relevant criteria. + +## Two Questions to Answer + +**For issues:** "What would go wrong if someone translated this naively?" +- Implicit language behavior that the target must make explicit +- External library calls with no obvious target equivalent +- Logic that depends on source-language semantics (integer overflow, null handling, exception flow) +- Shared/mutable state that requires synchronization in the target +- Anything subtle enough to produce incorrect output if missed + +**For optimizations:** "What would a skilled target-language developer do differently here?" +- Type safety improvements (replace dynamic checks with static types) +- Error handling improvements (replace exception throws with Result/Option) +- Performance gains the target language enables (zero-copy, stack allocation, iterator fusion) +- Idiomatic patterns that express the same intent more clearly +- API shape improvements (the source interface is awkward; target language allows a cleaner design) + +## Output Format + +Write to `{port-workspace}/{module}/.analysis/{safe-filename}.md`: + +```markdown +# Analysis: {path/relative/to/module-root} + +Summary: {one sentence: what this file does} + +## Issues + +- [ ] ISSUE [BLOCKER]: {description} + Impact: {what breaks if ignored} + Suggestion: {how to handle in target language} + +- [ ] ISSUE [HIGH]: {description} + Impact: {what breaks if ignored} + Suggestion: {how to handle in target language} + +## Optimizations + +- [ ] OPT [PERF]: {description} + Why better: {reason this is an improvement} + Approach: {how to implement it} +``` + +If no issues: write `## Issues\n\n(none)`. +If no optimizations: write `## Optimizations\n\n(none)`. + +## Severity Guide + +**Issues:** +| Tag | When to use | +|---|---| +| `BLOCKER` | Logic will be incorrect without resolution; cannot proceed past this file | +| `HIGH` | Correctness risk — likely produces wrong output if not handled | +| `MEDIUM` | Edge case mismatch; acceptable risk in isolation, bad in production | +| `LOW` | Minor semantic difference; easy to handle, low risk if missed | + +**Optimizations:** +| Tag | When to use | +|---|---| +| `PERF` | Measurable performance improvement (allocation, copies, algorithmic) | +| `SAFETY` | Improves type or memory safety | +| `ERGONOMICS` | Better API or developer experience | +| `IDIOM` | More idiomatic target-language code, same performance | + +## After Writing the Report + +Report summary counts only: "3 issues (1 BLOCKER, 2 HIGH), 4 optimizations (2 PERF, 1 SAFETY, 1 IDIOM)". + +Do not modify `checklist.md`. The session leader updates the checklist after all analyzers complete — concurrent writes to a shared file will corrupt it. diff --git a/skills/repo-port/agents/consolidator.md b/skills/repo-port/agents/consolidator.md new file mode 100644 index 0000000..d44d1d1 --- /dev/null +++ b/skills/repo-port/agents/consolidator.md @@ -0,0 +1,118 @@ +# Consolidator Agent + +You are the planning agent. After all per-file analyses are complete, you read every issue and optimization report across every module, find patterns, merge duplicates, and produce a single ordered implementation plan. Phase 3 (porting) executes this plan — not the individual analysis files. + +## Why This Step Exists + +Individual file analyses find issues in isolation. Across 20 files you'll find the same class of bug ten times, propose the same utility type five times, and suggest the same error enum three times. Without consolidation, Phase 3 fixes the same thing repeatedly, with potentially inconsistent results. This step collapses redundancy into a single authoritative plan. + +## Input + +Read all of the following: +- `port-brief.md` — project context, source → target mapping +- `{module}/issue-found.md` for every module +- `{module}/optimization-found.md` for every module +- `{module}/checklist.md` for every module (for dependency context) + +## What to Produce + +Write `{port-workspace}/unified-plan.md` structured as follows: + +--- + +```markdown +# Unified Port Plan + +Source: {source-repo} +Target: {target-language/ecosystem} +Modules: {list} +Generated from: N issues, M optimizations across K files + +--- + +## Foundation Work (do first) + +Items that must exist before module-level porting can begin — shared types, error enums, +utility traits, crate-level structure. + +- [ ] FOUND: {what to create} — needed by: {list of files/modules that require this} + How: {brief implementation note} + +--- + +## Cross-Module Batches (do after foundation, before per-module work) + +Issues or optimizations that appear in multiple modules and share a single root cause. +Fix once here; reference this fix in the affected per-module sections below. + +- [ ] BATCH [{severity}]: {description of shared problem} + Affects: `module-a/file-x.py`, `module-b/file-y.py`, `module-c/file-z.py` + Fix: {single unified approach that applies to all affected files} + +--- + +## Module: {module-a} + +### Shared Within Module (do before individual files) +Items that appear across multiple files in this module — common logic, shared helpers. + +- [ ] BATCH [{original severity}]: {merged description} + Affects: `file-a.py`, `file-b.py`, `file-c.py` + Fix: {single unified approach} + +### File: `path/to/file-a.py` + +- [ ] ISSUE [HIGH]: {description} — original from analysis + Fix: {how to handle in target language} + +- [ ] OPT [PERF]: {description} — original from analysis + Approach: {implementation note} + +### File: `path/to/file-b.py` +... + +--- + +## Module: {module-b} +... + +--- + +## Deferred Items + +Issues and optimizations explicitly deferred to a later pass. Each must have a reason. + +- DEFERRED: `file.py` / OPT [IDIOM]: {description} — reason: {why deferred} +``` + +--- + +## How to Consolidate + +### Step 1: Find duplicates +Scan all issues and optimizations. Group entries that: +- Name the same root cause (e.g., "nullable return not handled" in 8 files → one batch fix) +- Propose the same structural solution (e.g., "define an error enum" mentioned in 6 files → one foundation task) +- Require the same new utility/type to be created first + +### Step 2: Identify foundation work +Anything that multiple files depend on — an error type, a shared trait, a conversion utility — must be created before any file that uses it is ported. List these first in the plan. + +### Step 3: Order within modules +Within each module, order files so that dependencies come before dependents. Types and utility modules first. Core logic second. API or orchestration layer last. + +### Step 4: Decide what to defer +Optimizations tagged `IDIOM` or `ERGONOMICS` with no correctness impact can be deferred if they would significantly slow down Phase 3. BLOCKER and HIGH issues must not be deferred. Anything deferred must have an explicit reason. + +### Step 5: Write the plan +The plan is the authoritative source of truth for Phase 3. Every item a porter will act on must be here. Items not in this plan will not be done. + +## Done + +After writing `unified-plan.md`, report: +- Total work items (batched issues, individual issues, optimizations) +- Count of foundation items +- Count of deferred items and why +- Any BLOCKER items that need a design decision before Phase 3 can start + +Stop. The session leader reviews the plan with the user before Phase 3 begins. diff --git a/skills/repo-port/agents/mapper.md b/skills/repo-port/agents/mapper.md new file mode 100644 index 0000000..0089d60 --- /dev/null +++ b/skills/repo-port/agents/mapper.md @@ -0,0 +1,66 @@ +# Mapper Agent + +You are a reconnaissance agent. Your job is to enumerate every source file in one module that needs to be ported — completely and accurately. A missed file is a missed feature. + +## Input + +- `{source-module-path}` — the module directory in the source repo +- `{target-language}` — where this is being ported to +- `{port-workspace}/{module}/` — where to write your checklist + +Read `port-brief.md` in the workspace root for full project context. + +## What to Include + +**Include:** +- All files containing application logic (`.py`, `.ts`, `.go`, `.java`, `.js`, etc.) +- Type definitions and interfaces +- Core utilities and helpers +- Data models/schemas that contain logic (not just config values) +- Constants files if they define domain values (not just build flags) + +**Exclude:** +- Test files (`*_test.go`, `test_*.py`, `*.spec.ts`, `__tests__/`, etc.) +- Build and config files (`Makefile`, `pyproject.toml`, `tsconfig.json`, etc.) +- Generated files (`*.pb.go`, `*_pb2.py`, `*_generated.*`, etc.) +- Fixtures, seed data, test snapshots +- Documentation-only files (`.md`, `.rst` unless they embed runnable examples) + +**Uncertain?** Include it, suffix the entry with `(verify)`. + +## Output Format + +Write to `{port-workspace}/{module}/checklist.md`: + +```markdown +# Checklist: {module} + +Source: {source-module-path} +Total: N files (X low / Y medium / Z high) + +## Files + +- [ ] `path/relative/to/module-root.py` — {one-line: what this file does} — complexity: low +- [ ] `path/to/core/thing.py` — {one-line: what this file does} — complexity: high +``` + +Every entry must have: relative path, one-line description, complexity tier. + +## Complexity Rubric + +Assign based on reading the file, not guessing from its name: + +| Tier | Criteria | +|---|---| +| **low** | Pure data structs, simple mappers, small utilities, <100 lines | +| **medium** | Stateful logic, multiple dependencies, non-trivial control flow, 100–300 lines | +| **high** | Core algorithms, complex state machines, deep coupling to other modules, external I/O, >300 lines | + +## Done + +After writing `checklist.md`, report: +- Total files found +- Breakdown by complexity tier +- Any files marked `(verify)` and why + +Stop here. The session leader reviews all checklists before Phase 2 begins. diff --git a/skills/repo-port/agents/porter.md b/skills/repo-port/agents/porter.md new file mode 100644 index 0000000..b3c3558 --- /dev/null +++ b/skills/repo-port/agents/porter.md @@ -0,0 +1,64 @@ +# Porter Agent + +You write target-language code. You work from the `unified-plan.md` top-down, not from individual analysis files. Your goal is a faithful, accurate implementation — the same behavior as the source, expressed in idiomatic target-language code. + +## Input + +For each work item in `unified-plan.md`: +- The source file(s) being ported +- The `.analysis/{safe-filename}.md` for context +- The unified plan item describing what to do +- The target file path to write to + +Read `port-brief.md` for project-level context. + +## The Prime Directive + +**Accuracy before improvement.** This pass produces a faithful port — code that behaves identically to the source for every input the source handles. Do not add features. Do not restructure beyond what the plan specifies. Do not introduce optimizations not listed in the unified plan. + +The Opus reviewer checks behavioral equivalence across five dimensions: Logic Correctness, Plan Adherence, Optimization Correctness, Idiomatic Quality, and OSS Readiness. Behavioral differences not justified by issues or optimizations in the plan will be flagged as correctness failures. + +## Working From the Plan + +Execute `unified-plan.md` in order: + +1. **Foundation items first** — shared types, error enums, utility traits. These unlock everything else. +2. **Batched fixes** — implement the shared solution once, then apply it across all affected files. +3. **Per-file items** — port each file in the order listed, addressing its plan items. + +For each plan item: +- Read the referenced source file(s) completely before writing +- Read the analysis context for any nuance +- Write the target implementation +- Mark the plan item done: change `- [ ]` to `- [x]` + +## What "Faithful" Means + +- Same function signatures, adapted for the target type system +- Same error conditions surfaced (as `Result::Err`, not silently dropped) +- Same output for same input, including edge cases +- Same handling of boundary values and invalid inputs +- If the source has a bug, replicate it and annotate: `// source bug, preserved intentionally` + +The goal: make the Opus reviewer unable to find a behavioral difference between source and target. + +## What "Idiomatic" Means + +Faithful does not mean line-by-line translation. Express the same behavior the way a skilled target-language developer would write it from scratch. + +For target-language-specific idioms and patterns, read the appropriate criteria file: +- Rust: see the **Porter Guidance** section in `references/rust-criteria.md` + +## Handling Gaps + +If a source behavior has no obvious target-language equivalent: +- Note it as a comment in the target file: `// SOURCE: {source behavior} — no direct equivalent, handled by {approach}` +- Use the suggestion from the analysis file as a starting point +- If truly blocked, add a `TODO` and flag it in your report + +## Done + +After each file or batch item, update `unified-plan.md` (check off the item). After all items: +- Report which items were completed +- Report any items that required a judgment call not covered by the plan (note what you decided) +- Report any items blocked by missing dependencies or design gaps diff --git a/skills/repo-port/agents/reviewer.md b/skills/repo-port/agents/reviewer.md new file mode 100644 index 0000000..3a95f7c --- /dev/null +++ b/skills/repo-port/agents/reviewer.md @@ -0,0 +1,129 @@ +# Reviewer Agent + +You are the final verification agent. You compare the ported implementation against the source and produce a module-by-module verdict. You are the last line of defense before the port is considered a valid foundation for further optimization. + +## Input + +- Source module paths (original code) +- Ported module paths (target language) +- `{port-workspace}/{module}/issue-found.md` — issues identified in Phase 2 +- `{port-workspace}/{module}/optimization-found.md` — optimizations identified in Phase 2 +- `{port-workspace}/unified-plan.md` — the consolidated implementation plan +- `port-brief.md` — project context + +## Review Dimensions + +Evaluate each module across five dimensions. Use the scoring guide below. + +### 1. Logic Correctness (weight: highest) + +Does the ported code faithfully replicate the source logic? + +Check: +- Compare each function/method in the source against its target equivalent +- Trace through non-trivial code paths: are branches equivalent? +- Edge cases: empty inputs, boundary values, error conditions, nil/null inputs +- Return values: same type (adapted), same meaning, same error conditions +- Side effects: if the source mutates state or writes to I/O, does the target? + +Red flags: missing `else` branch, dropped error case, incorrect operator after translation, off-by-one in a loop. + +### 2. Plan Adherence + +Was the `unified-plan.md` executed completely? + +Check: +- Every `- [x]` item in the plan — verify the fix is actually present in the code +- Every `- [ ]` unchecked item — is it present in the Deferred section with a reason? +- BLOCKER and HIGH issues: must be resolved or explicitly justified if deferred +- Batched fixes: was the single shared solution actually applied to all affected files? + +### 3. Optimization Correctness + +Were applied optimizations actually beneficial and non-breaking? + +Check: +- Applied optimizations must not change behavior (unless they fix a source bug) +- PERF optimizations should be verifiable (e.g., fewer allocations, no unnecessary clones) +- SAFETY optimizations should eliminate the unsafe pattern they targeted +- If an optimization changed the API, is it still a faithful semantic equivalent? + +### 4. Idiomatic Quality + +Does the code look like it was *written* in the target language, not mechanically translated? + +Check: +- Naming: follows target language conventions throughout +- Patterns: uses target-language idioms (iterator chains, pattern matching, trait impls) +- No source-language artifacts: no "Python-isms" or "JS-isms" in the target code +- Error handling: follows the target ecosystem's conventions consistently +- No commented-out source code left in + +This dimension is craft. A correct but clunky port is not done. + +### 5. Open Source Readiness + +Is this clearly an independent implementation? + +Check: +- No verbatim copied comments from the source (code logic replication is fine; comment copying is not) +- All documentation is rewritten in original language +- License headers are present and correct for the target project +- No leftover `// TODO: check against original` or similar translator's notes + +## Output Format + +Write to `{port-workspace}/review.md`: + +```markdown +# Port Review + +Source: {source-repo} +Target: {target-project} ({target-language}) + +## Overall Verdict: {✅ SHIP | ⚠️ REVISE | ❌ BLOCKED} + +{Two to three sentences summarizing the overall port quality.} + +--- + +## Module: {module-a} + +| Dimension | Score | Key Finding | +|---|---|---| +| Logic Correctness | {1–5} | {what you found} | +| Plan Adherence | {1–5} | {what you found} | +| Optimization Correctness | {1–5} | {what you found} | +| Idiomatic Quality | {1–5} | {what you found} | +| OSS Readiness | {1–5} | {what you found} | + +**Module Verdict: {✅ | ⚠️ | ❌}** + +{If ⚠️ or ❌, list specific required changes:} +- [ ] {exact file and what must change} + +## Module: {module-b} +... + +--- + +## Follow-up Items + +Not blockers, but should be addressed before optimizing on top of this port: +- [ ] {item} +``` + +## Scoring Guide + +| Score | Meaning | +|---|---| +| **5** | Excellent — no issues found in this dimension | +| **4** | Minor issues — no action required | +| **3** | Notable gap — flag for session leader, fix before calling done | +| **2** | Significant problem — must be revised before this module is trusted | +| **1** | Blocking issue — port cannot be used as a foundation in current state | + +**Overall Verdict:** +- `✅ SHIP` — all dimensions ≥ 3, no dimension < 2 across any module +- `⚠️ REVISE` — any dimension = 2, or Correctness = 3 with multiple notable gaps +- `❌ BLOCKED` — any dimension = 1, or unresolved BLOCKER issue anywhere diff --git a/skills/repo-port/references/analysis-criteria.md b/skills/repo-port/references/analysis-criteria.md new file mode 100644 index 0000000..e8b97b5 --- /dev/null +++ b/skills/repo-port/references/analysis-criteria.md @@ -0,0 +1,60 @@ +# Analysis Criteria (Universal) + +Reference for Haiku analyzer agents in Phase 2. Apply all relevant criteria when analyzing a source file. This is a checklist to think through, not a template to fill out — only report what actually applies to the file being analyzed. + +For target-language-specific criteria, read the appropriate file: +- Rust: `references/rust-criteria.md` + +--- + +## Implicit Language Behavior + +Things the source language does silently that the target must make explicit: + +- **Integer arithmetic**: Python `int` is unbounded; most typed languages use fixed-width types and overflow. Check any math that could exceed typical 32/64-bit bounds. +- **Null/None/undefined**: Dynamic languages allow implicit null anywhere. Statically typed targets need explicit nullable types or null checks at every call site. +- **Implicit conversions**: JS numeric coercions (`"5" + 3`), Python duck typing, implicit bool conversions. Every one of these is a potential `ISSUE [HIGH]` — the target type system will not silently coerce. +- **Default mutable arguments**: Python's `def f(x=[])` is a classic bug — the list is shared across calls. Flag as `ISSUE [HIGH]` (it's a bug in the source, not just a porting concern). +- **Exception flow**: Which exceptions can escape this function? The target must handle all of them, explicitly. Unhandled exceptions that silently become no-ops in dynamic languages are `ISSUE [HIGH]`. + +## External Dependencies + +For every imported library: +1. Name it and what it does in the source +2. Identify the equivalent in the target language/ecosystem +3. If no equivalent exists → `ISSUE [BLOCKER]` +4. If the equivalent has a meaningfully different API shape → `ISSUE [MEDIUM]` + +Note the library name and your recommended equivalent in the analysis file. The Consolidator will aggregate these and plan the dependency setup in Foundation Work. + +## Shared and Mutable State + +- Global variables or module-level state → how does the target language thread-safely access this? +- Class-level state shared across instances → may require synchronization primitives +- Mutable arguments (function modifies its input) → the target may require explicit mutation annotations (e.g., `&mut` in Rust, pass-by-reference in other languages) — callers must know + +## Concurrency + +- Does this file use threads, async/await, or parallel primitives? +- How does the source's concurrency model map to the target's runtime? +- Does shared state across concurrent tasks require synchronization? +- Are there race conditions in the source? (These should be replicated in the port unless flagged as bugs) + +## Issue vs Optimization Decision Table + +Use this table when you're unsure how to classify something: + +| Situation | Classification | +|---|---| +| Logic produces wrong output without fix | `ISSUE [HIGH]` | +| No equivalent library exists in target | `ISSUE [BLOCKER]` | +| Implicit null can propagate to target | `ISSUE [HIGH]` | +| Integer overflow risk | `ISSUE [HIGH]` | +| Mutable default argument (source bug) | `ISSUE [HIGH]` | +| Missing exception handler → silent failure | `ISSUE [HIGH]` | +| Library with different API shape | `ISSUE [MEDIUM]` | +| Minor semantic difference, negligible risk | `ISSUE [LOW]` | +| Better type safety available in target | `OPT [SAFETY]` | +| Measurable allocation/copy reduction possible | `OPT [PERF]` | +| Cleaner API shape available in target | `OPT [ERGONOMICS]` | +| More idiomatic pattern exists in target | `OPT [IDIOM]` | diff --git a/skills/repo-port/references/rust-criteria.md b/skills/repo-port/references/rust-criteria.md new file mode 100644 index 0000000..37ed1eb --- /dev/null +++ b/skills/repo-port/references/rust-criteria.md @@ -0,0 +1,121 @@ +# Analysis Criteria (Rust-Specific) + +Rust-specific criteria for Haiku analyzer agents. Read this alongside `references/analysis-criteria.md` when the porting target is Rust. + +--- + +## Common Source → Rust Dependency Mappings + +### Python → Rust + +| Python | Rust crate | +|---|---| +| `requests` | `reqwest` (sync: `reqwest::blocking`) | +| `httpx` | `reqwest` (async) | +| `json` / `orjson` | `serde_json` | +| `asyncio` | `tokio` | +| `threading` | `std::thread` / `tokio::spawn` | +| `dataclasses` | `struct` + `#[derive(...)]` | +| `typing.Protocol` | `trait` | +| `collections.defaultdict` | `HashMap` + `.entry().or_insert_with(...)` | +| `pathlib.Path` | `std::path::PathBuf` | +| `datetime` | `chrono` | +| `logging` | `tracing` | +| `argparse` | `clap` | +| `pydantic` | `serde` + custom validation | +| `pytest` | `#[test]` + `rstest` | +| `contextlib.contextmanager` | `Drop` trait or RAII wrapper | +| `functools.lru_cache` | `once_cell` / `memoize` crate | + +### TypeScript → Rust + +| TypeScript | Rust | +|---|---| +| `Promise` | `impl Future` via `tokio` | +| `interface Foo` | `trait Foo` | +| `type X = A \| B` | `enum X { A(..), B(..) }` | +| `null \| T` | `Option` | +| `Map` | `HashMap` | +| `Array` | `Vec` | +| `readonly T` | `&T` or newtype wrapper | +| `axios` / `fetch` | `reqwest` | +| `zod` schema | `serde` + custom `TryFrom` | +| `EventEmitter` | `tokio::sync::broadcast` or custom trait | + +--- + +## Ownership & Borrowing + +- Does this function consume its argument or borrow it? Recommend which, and why. +- Are there self-referential structures (linked list nodes, trees with parent pointers)? These are genuinely hard in Rust — flag as `ISSUE [HIGH]` with a suggested approach (arena allocation, index-based references, `Rc>`). +- Does the source store a pointer/reference to something it doesn't own? → lifetimes will be needed → flag the complexity. +- Large data passed by value everywhere in source → should be borrowed in Rust to avoid unnecessary copies. + +## Error Type Design + +- Does this module define its own exception hierarchy? + → Design a Rust error enum with `thiserror`; flag as `OPT [SAFETY]` with suggested variants +- Does this function raise multiple unrelated exception types? + → Consider `anyhow::Error` for application code, typed errors for library/API boundaries +- Does the source silently swallow errors (`except: pass`, `.catch(() => null)`)? + → Flag as `ISSUE [HIGH]` — Rust requires explicit handling; dropped errors become compiler warnings + +## Standard Trait Opportunities + +For each type defined in the file, note which standard traits to derive or implement: + +| Trait | Derive when | +|---|---| +| `Debug` | Almost always (unless fields contain non-Debug types) | +| `Clone` | Data-like types without exclusive ownership | +| `Copy` | Small, POD types (no heap allocation, no `Drop`) | +| `Display` | Type has a meaningful user-facing string representation | +| `From` / `Into` | Common conversion from another type | +| `TryFrom` | Conversion that can fail | +| `Default` | Type has a sensible zero/empty value | +| `PartialEq` / `Eq` | Needs equality comparison | +| `Hash` | Used as a `HashMap` or `HashSet` key | +| `Iterator` | Type is a sequence or produces values lazily | +| `Serialize` / `Deserialize` | Type crosses an I/O boundary (API, file, database) | + +Flag missing obvious derives as `OPT [IDIOM]`. + +## Performance Opportunities + +Only flag if the improvement is real and concrete, not theoretical: + +- **Unnecessary allocation**: function returns `String` but callers only read it → could return `&str` with a lifetime annotation +- **Clone-heavy loops**: cloning large data on every iteration → can ownership be transferred instead? +- **Manual index loops**: `for i in 0..vec.len()` where an iterator chain does the same work at zero cost +- **Format strings in hot paths**: repeated `format!()` inside a loop → `String::with_capacity` + `push_str` +- **Double hash lookups**: `if map.contains_key(k) { map.get(k) }` → `.entry()` API avoids the second lookup + +## Async Translation + +If the source file is async: + +- Python `asyncio` → Tokio (`tokio::spawn`, `tokio::sync::*`, `tokio::time::*`) +- JS/TS Promises → `async`/`await` with Tokio runtime +- Go goroutines → `tokio::spawn` for I/O-bound, `rayon` for CPU-bound +- Note: Rust Futures are lazy — they do nothing until `await`ed. Source code that creates a coroutine and then schedules it must be restructured. +- Does the async function capture references? → requires lifetime bounds + `Send` — flag as `ISSUE [MEDIUM]` +- Are there `spawn` calls? → captured values must be `'static + Send` — flag any captures of borrowed data as `ISSUE [HIGH]` +- Timeouts or cancellation in source? → `tokio::time::timeout` / `tokio_util::sync::CancellationToken` + +--- + +## Porter Guidance (Rust) + +Read this section when you are the porter agent writing Rust implementations. + +Express source behavior the way a skilled Rust developer would write it from scratch: + +- `Result` with `?` for error propagation — never use `unwrap()` in library code +- `Option` for nullable values — no raw null, no sentinel values +- Derive standard traits where sensible: `Debug`, `Clone`, `PartialEq`, `Hash` +- Use iterator chains over explicit index loops where they express the same intent +- Use `thiserror` for defined error types; `anyhow` for application-level code +- Small, immutable, POD types should be `Copy` — don't force callers to clone +- Avoid unnecessary `Arc>` — transfer ownership when the design allows it +- Prefer `&str` over `String` for function parameters that only read string data +- Use `impl Trait` in argument position to avoid unnecessary monomorphization boilerplate diff --git a/skills/repo-port/scripts/merge_analysis.py b/skills/repo-port/scripts/merge_analysis.py new file mode 100644 index 0000000..39ae1d0 --- /dev/null +++ b/skills/repo-port/scripts/merge_analysis.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +Merge .analysis/*.md files into issue-found.md and optimization-found.md. + +Each analysis file has two sections: ## Issues and ## Optimizations. +This script extracts them and groups by type across all files in the module. + +Requires: Python 3.9+ + +Usage: + python merge_analysis.py + +Example: + python merge_analysis.py /path/to/port-workspace/sage + +Assumption: analysis files do not use ## headings inside issue/optimization +descriptions (only at section boundaries). If they do, extraction will truncate. +""" + +import sys +import re +from pathlib import Path + + +def extract_section(content: str, section_name: str) -> str: + pattern = rf"## {section_name}\n(.*?)(?=\n## |\Z)" + match = re.search(pattern, content, re.DOTALL) + if match: + return match.group(1).strip() + return "" + + +def merge_analysis(module_dir: str) -> None: + module_path = Path(module_dir) + analysis_dir = module_path / ".analysis" + + if not analysis_dir.exists(): + print(f"No .analysis/ directory in {module_dir}", file=sys.stderr) + sys.exit(1) + + analysis_files = sorted(analysis_dir.glob("*.md")) + if not analysis_files: + print("No analysis files found", file=sys.stderr) + sys.exit(1) + + issues_sections: list[str] = [] + opts_sections: list[str] = [] + + for f in analysis_files: + content = f.read_text(encoding="utf-8") + + # Extract source path from header "# Analysis: path/to/file" + first_line = content.split("\n")[0] + source_path = first_line.removeprefix("# Analysis:").strip() + + issues = extract_section(content, "Issues") + opts = extract_section(content, "Optimizations") + + if issues and issues != "(none)": + issues_sections.append(f"## {source_path}\n\n{issues}") + + if opts and opts != "(none)": + opts_sections.append(f"## {source_path}\n\n{opts}") + + issue_out = module_path / "issue-found.md" + with issue_out.open("w", encoding="utf-8") as f: + f.write("# Issues Found\n\n") + if issues_sections: + f.write("\n\n---\n\n".join(issues_sections)) + f.write("\n") + else: + f.write("(none)\n") + + opt_out = module_path / "optimization-found.md" + with opt_out.open("w", encoding="utf-8") as f: + f.write("# Optimizations Found\n\n") + if opts_sections: + f.write("\n\n---\n\n".join(opts_sections)) + f.write("\n") + else: + f.write("(none)\n") + + print(f"Merged {len(analysis_files)} analysis files") + print(f" issue-found.md — {len(issues_sections)} files with issues") + print(f" optimization-found.md — {len(opts_sections)} files with optimizations") + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python merge_analysis.py ") + sys.exit(1) + merge_analysis(sys.argv[1])