diff --git a/review/SKILL.md b/review/SKILL.md
index 52560d774..305e852d9 100644
--- a/review/SKILL.md
+++ b/review/SKILL.md
@@ -591,6 +591,44 @@ Run `git diff origin/` to get the full diff. This includes both committed
---
+## Step 3.5: Classify PR type
+
+Before applying the checklist, classify the PR based on the files changed in the diff stat:
+
+1. Count files by category from the `git diff --stat` output:
+ - **Application code:** `.ts`, `.js`, `.py`, `.rb`, `.go`, `.rs`, `.java`, `.tsx`, `.jsx`, `.vue`, `.svelte` (source files in `src/`, `lib/`, `app/`, or similar)
+ - **CI/Infra:** `.yml`, `.yaml` in `.github/`, `.circleci/`, `.gitlab-ci/`; `Dockerfile*`, `docker-compose*`, `Makefile`, `Justfile`
+ - **Scripts:** `.sh`, `.bash`, `.zsh`, `.bat`, `.ps1`, `.cmd` files; files in `bin/`, `scripts/` directories
+ - **Config:** `.json`, `.yaml`, `.yml`, `.toml`, `.ini`, `.env*` config files (not in CI dirs)
+ - **Docs:** `.md`, `.txt`, `.rst` files
+ - **Tests:** files in `test/`, `tests/`, `spec/`, `__tests__/` directories; files matching `*.test.*`, `*.spec.*`
+
+2. Determine the **primary PR type** based on which category has the most changed files:
+ - `APPLICATION` — majority application code
+ - `CI_INFRA` — majority CI/infra files
+ - `SCRIPTS` — majority scripts
+ - `CONFIG` — majority config files
+ - `DOCS` — majority documentation
+ - `TESTS` — majority test files
+ - `MIXED` — no single category >50% of changed files
+
+3. Output (before the two-pass review):
+ ```
+ PR Type: [TYPE] ([N] app, [N] ci, [N] scripts, [N] config, [N] docs, [N] tests)
+ ```
+
+4. **Category relevance guide:**
+ - `APPLICATION` or `MIXED`: Run ALL checklist categories (current behavior)
+ - `CI_INFRA`: Prioritize Distribution & CI/CD Pipeline, Configuration & Infrastructure Safety, Script & Shell Quality. Skip SQL, LLM, View/Frontend, Type Coercion categories.
+ - `SCRIPTS`: Prioritize Script & Shell Quality, Platform & Convention Consistency, Shell Injection. Skip SQL, LLM, View/Frontend categories.
+ - `CONFIG`: Prioritize Configuration & Infrastructure Safety, Platform & Convention Consistency. Skip most application categories.
+ - `DOCS`: Check Dead Code & Consistency (stale docs), Platform & Convention Consistency (naming). Skip all code categories.
+ - `TESTS`: Run Test Gaps, Completeness Gaps, and application categories relevant to what's being tested.
+
+ **Important:** "Skip" means "don't spend time on categories that will obviously return N/A." If a skipped category is unexpectedly relevant (e.g., a CI file that also contains SQL), still flag it. Use judgment — the classification is a guide, not a hard rule.
+
+---
+
## Prior Learnings
Search for relevant learnings from previous sessions:
diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl
index fa14f26a7..8618f2e03 100644
--- a/review/SKILL.md.tmpl
+++ b/review/SKILL.md.tmpl
@@ -104,6 +104,44 @@ Run `git diff origin/` to get the full diff. This includes both committed
---
+## Step 3.5: Classify PR type
+
+Before applying the checklist, classify the PR based on the files changed in the diff stat:
+
+1. Count files by category from the `git diff --stat` output:
+ - **Application code:** `.ts`, `.js`, `.py`, `.rb`, `.go`, `.rs`, `.java`, `.tsx`, `.jsx`, `.vue`, `.svelte` (source files in `src/`, `lib/`, `app/`, or similar)
+ - **CI/Infra:** `.yml`, `.yaml` in `.github/`, `.circleci/`, `.gitlab-ci/`; `Dockerfile*`, `docker-compose*`, `Makefile`, `Justfile`
+ - **Scripts:** `.sh`, `.bash`, `.zsh`, `.bat`, `.ps1`, `.cmd` files; files in `bin/`, `scripts/` directories
+ - **Config:** `.json`, `.yaml`, `.yml`, `.toml`, `.ini`, `.env*` config files (not in CI dirs)
+ - **Docs:** `.md`, `.txt`, `.rst` files
+ - **Tests:** files in `test/`, `tests/`, `spec/`, `__tests__/` directories; files matching `*.test.*`, `*.spec.*`
+
+2. Determine the **primary PR type** based on which category has the most changed files:
+ - `APPLICATION` — majority application code
+ - `CI_INFRA` — majority CI/infra files
+ - `SCRIPTS` — majority scripts
+ - `CONFIG` — majority config files
+ - `DOCS` — majority documentation
+ - `TESTS` — majority test files
+ - `MIXED` — no single category >50% of changed files
+
+3. Output (before the two-pass review):
+ ```
+ PR Type: [TYPE] ([N] app, [N] ci, [N] scripts, [N] config, [N] docs, [N] tests)
+ ```
+
+4. **Category relevance guide:**
+ - `APPLICATION` or `MIXED`: Run ALL checklist categories (current behavior)
+ - `CI_INFRA`: Prioritize Distribution & CI/CD Pipeline, Configuration & Infrastructure Safety, Script & Shell Quality. Skip SQL, LLM, View/Frontend, Type Coercion categories.
+ - `SCRIPTS`: Prioritize Script & Shell Quality, Platform & Convention Consistency, Shell Injection. Skip SQL, LLM, View/Frontend categories.
+ - `CONFIG`: Prioritize Configuration & Infrastructure Safety, Platform & Convention Consistency. Skip most application categories.
+ - `DOCS`: Check Dead Code & Consistency (stale docs), Platform & Convention Consistency (naming). Skip all code categories.
+ - `TESTS`: Run Test Gaps, Completeness Gaps, and application categories relevant to what's being tested.
+
+ **Important:** "Skip" means "don't spend time on categories that will obviously return N/A." If a skipped category is unexpectedly relevant (e.g., a CI file that also contains SQL), still flag it. Use judgment — the classification is a guide, not a hard rule.
+
+---
+
{{LEARNINGS_SEARCH}}
## Step 4: Two-pass review
diff --git a/review/checklist.md b/review/checklist.md
index cfedcf81f..93da21953 100644
--- a/review/checklist.md
+++ b/review/checklist.md
@@ -154,6 +154,40 @@ To do this: use Grep to find all references to the sibling values (e.g., grep fo
- Internal tools not distributed outside the team
- Test-only CI changes (adding test steps, not publish steps)
+#### Script & Shell Quality
+- Shell scripts without `set -euo pipefail` (or individual `set -e`, `set -u`) — failures silently ignored
+- Missing or incorrect shebang lines (`#!/usr/bin/env bash` preferred over `#!/bin/bash` for portability)
+- Unquoted variables in shell scripts (`$VAR` instead of `"$VAR"`) — word splitting and globbing bugs
+- `eval` usage with variable interpolation — injection risk
+- Platform-specific commands without guards (e.g., `sed -i ''` is macOS-only, Linux needs `sed -i`)
+- Scripts missing `chmod +x` or not matching the executable permissions of sibling scripts in the same directory
+
+**DO NOT flag:**
+- Scripts that are explicitly single-platform (documented in comments or filename like `*.bat`)
+- Helper scripts only run via a parent script that already sets shell options
+
+#### Platform & Convention Consistency
+- New files using a different toolchain than existing files in the same directory (e.g., `.bat` script added to a directory of `.sh` scripts, `Makefile` added alongside existing `package.json` scripts)
+- CI workflow targeting an OS not covered by the project's test matrix or existing CI jobs (e.g., Windows step in a Linux-only CI)
+- New config files duplicating settings already managed by an existing config (e.g., adding `.prettierrc` when `package.json` already has prettier config)
+- Naming convention violations: new files not matching the case/separator pattern of sibling files (e.g., `myScript.sh` among `kebab-case.sh` files)
+
+**DO NOT flag:**
+- Cross-platform additions that are clearly intentional (documented in PR or commit message)
+- Files in new directories that establish their own conventions
+
+#### Configuration & Infrastructure Safety
+- Secrets or tokens hardcoded in CI workflows — must use `${{ secrets.X }}` or environment variable references
+- CI workflows with `pull_request_target` trigger without explicit commit SHA pinning — allows arbitrary code execution from forks
+- Docker images pinned by mutable tag (`:latest`, `:main`) instead of SHA digest in CI — supply chain risk
+- Overly broad file permissions in scripts (`chmod 777` or world-writable)
+- Missing `.gitignore` entries for new generated/build artifact directories
+- Environment-specific paths hardcoded (e.g., `/home/runner/`, `/Users/`) instead of using variables
+
+**DO NOT flag:**
+- Development-only Docker compose files using `:latest`
+- Local development scripts with relaxed permissions
+
---
## Severity Classification
@@ -171,7 +205,10 @@ CRITICAL (highest severity): INFORMATIONAL (lower severity):
├─ Type Coercion at Boundaries
├─ View/Frontend
├─ Performance & Bundle Impact
- └─ Distribution & CI/CD Pipeline
+ ├─ Distribution & CI/CD Pipeline
+ ├─ Script & Shell Quality
+ ├─ Platform & Convention Consistency
+ └─ Configuration & Infrastructure Safety
All findings are actioned via Fix-First Review. Severity determines
presentation order and classification of AUTO-FIX vs ASK — critical
diff --git a/test/fixtures/review-eval-ci-baseline.yml b/test/fixtures/review-eval-ci-baseline.yml
new file mode 100644
index 000000000..f19b70303
--- /dev/null
+++ b/test/fixtures/review-eval-ci-baseline.yml
@@ -0,0 +1,17 @@
+name: CI
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ with:
+ node-version: '20'
+ - run: npm ci
+ - run: npm test
diff --git a/test/fixtures/review-eval-ci-blindspot.yml b/test/fixtures/review-eval-ci-blindspot.yml
new file mode 100644
index 000000000..0d28dbc6b
--- /dev/null
+++ b/test/fixtures/review-eval-ci-blindspot.yml
@@ -0,0 +1,27 @@
+name: CI
+on:
+ push:
+ branches: [main]
+ pull_request_target:
+ branches: [main]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ with:
+ node-version: '20'
+ - run: npm ci
+ - run: npm test
+
+ deploy:
+ runs-on: windows-latest
+ needs: test
+ steps:
+ - uses: actions/checkout@v4
+ - run: |
+ TOKEN="ghp_abc123secrettoken"
+ curl -H "Authorization: token $TOKEN" https://api.github.com/repos
+ - run: chmod 777 deploy.sh && ./deploy.sh
diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts
index b475daad7..b4620356d 100644
--- a/test/helpers/touchfiles.ts
+++ b/test/helpers/touchfiles.ts
@@ -58,6 +58,7 @@ export const E2E_TOUCHFILES: Record = {
'review-enum-completeness': ['review/**', 'test/fixtures/review-eval-enum*.rb'],
'review-base-branch': ['review/**'],
'review-design-lite': ['review/**', 'test/fixtures/review-eval-design-slop.*'],
+ 'review-ci-blindspot': ['review/**', 'test/fixtures/review-eval-ci-*'],
// Office Hours
'office-hours-spec-review': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
@@ -199,6 +200,7 @@ export const E2E_TIERS: Record = {
'review-enum-completeness': 'gate',
'review-base-branch': 'gate',
'review-design-lite': 'periodic', // 4/7 threshold is subjective
+ 'review-ci-blindspot': 'gate', // Security guardrail: CI blindspot detection
'review-coverage-audit': 'gate',
'review-plan-completion': 'gate',
'review-dashboard-via': 'gate',
diff --git a/test/skill-e2e-review.test.ts b/test/skill-e2e-review.test.ts
index dacd4b166..e9714cd51 100644
--- a/test/skill-e2e-review.test.ts
+++ b/test/skill-e2e-review.test.ts
@@ -252,6 +252,91 @@ Important: The design checklist should catch issues like blacklisted fonts, smal
}, 300_000);
});
+// --- Review: CI/infra blindspot E2E ---
+
+describeIfSelected('Review CI blindspot E2E', ['review-ci-blindspot'], () => {
+ let ciDir: string;
+
+ beforeAll(() => {
+ ciDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-ci-blindspot-'));
+
+ const run = (cmd: string, args: string[]) =>
+ spawnSync(cmd, args, { cwd: ciDir, stdio: 'pipe', timeout: 5000 });
+
+ run('git', ['init', '-b', 'main']);
+ run('git', ['config', 'user.email', 'test@test.com']);
+ run('git', ['config', 'user.name', 'Test']);
+
+ // Commit clean CI baseline on main
+ fs.mkdirSync(path.join(ciDir, '.github', 'workflows'), { recursive: true });
+ const baseline = fs.readFileSync(path.join(ROOT, 'test', 'fixtures', 'review-eval-ci-baseline.yml'), 'utf-8');
+ fs.writeFileSync(path.join(ciDir, '.github', 'workflows', 'ci.yml'), baseline);
+
+ // Add a few bash scripts to establish repo conventions
+ fs.mkdirSync(path.join(ciDir, 'bin'), { recursive: true });
+ fs.writeFileSync(path.join(ciDir, 'bin', 'test.sh'), '#!/usr/bin/env bash\nset -euo pipefail\nnpm test\n');
+ fs.writeFileSync(path.join(ciDir, 'bin', 'build.sh'), '#!/usr/bin/env bash\nset -euo pipefail\nnpm run build\n');
+
+ run('git', ['add', '.']);
+ run('git', ['commit', '-m', 'initial CI and scripts']);
+
+ // Feature branch with planted CI issues
+ run('git', ['checkout', '-b', 'feature/update-ci']);
+ const blindspot = fs.readFileSync(path.join(ROOT, 'test', 'fixtures', 'review-eval-ci-blindspot.yml'), 'utf-8');
+ fs.writeFileSync(path.join(ciDir, '.github', 'workflows', 'ci.yml'), blindspot);
+ run('git', ['add', '.']);
+ run('git', ['commit', '-m', 'update CI workflow']);
+
+ // Copy review skill files
+ fs.copyFileSync(path.join(ROOT, 'review', 'SKILL.md'), path.join(ciDir, 'review-SKILL.md'));
+ fs.copyFileSync(path.join(ROOT, 'review', 'checklist.md'), path.join(ciDir, 'review-checklist.md'));
+ fs.copyFileSync(path.join(ROOT, 'review', 'greptile-triage.md'), path.join(ciDir, 'review-greptile-triage.md'));
+ });
+
+ afterAll(() => {
+ try { fs.rmSync(ciDir, { recursive: true, force: true }); } catch {}
+ });
+
+ testConcurrentIfSelected('review-ci-blindspot', async () => {
+ const result = await runSkillTest({
+ prompt: `You are in a git repo on branch feature/update-ci with changes against main.
+Read review-SKILL.md for the review workflow instructions.
+Also read review-checklist.md and apply it.
+Skip the preamble bash block, lake intro, telemetry, and contributor mode sections — go straight to the review.
+Run /review on the current diff (git diff main...HEAD).
+
+The diff modifies a CI workflow file. Pay attention to PR type classification and CI-specific review categories.
+Write your review findings to ${ciDir}/review-output.md`,
+ workingDirectory: ciDir,
+ maxTurns: 20,
+ timeout: 180_000,
+ testName: 'review-ci-blindspot',
+ runId,
+ });
+
+ logCost('/review ci-blindspot', result);
+ recordE2E(evalCollector, '/review CI blindspot', 'Review CI blindspot E2E', result);
+ expect(result.exitReason).toBe('success');
+
+ // Verify the review caught at least 2 of 3 planted issues
+ const reviewPath = path.join(ciDir, 'review-output.md');
+ if (fs.existsSync(reviewPath)) {
+ const review = fs.readFileSync(reviewPath, 'utf-8').toLowerCase();
+ let detected = 0;
+
+ // Issue 1: pull_request_target security risk
+ if (review.includes('pull_request_target') || review.includes('fork') || review.includes('arbitrary code')) detected++;
+ // Issue 2: Hardcoded secret/token
+ if (review.includes('hardcoded') || review.includes('secret') || review.includes('token') || review.includes('ghp_')) detected++;
+ // Issue 3: chmod 777 or Windows runner inconsistency
+ if (review.includes('777') || review.includes('windows') || review.includes('permission') || review.includes('platform')) detected++;
+
+ console.log(`CI blindspot review detected ${detected}/3 planted issues`);
+ expect(detected).toBeGreaterThanOrEqual(2);
+ }
+ }, 210_000);
+});
+
// --- Base branch detection smoke tests ---
describeIfSelected('Base branch detection', ['review-base-branch', 'ship-base-branch', 'retro-base-branch'], () => {