modem-dev · anirudw · Apr 11, 2026
diff --git a/README.md b/README.md
@@ -55,6 +55,12 @@ Scan the current repo in lint mode:
 slop-scan scan . --lint
 ```
 
+Scan the current repo with pinned benchmark reference context:
+
+```bash
+slop-scan scan . --ref
+```
+
 Scan another repo and get JSON:
 
 ```bash
@@ -128,6 +134,7 @@ Current checks focus on patterns that often show up in unreviewed generated code
   - findings / function
 - top file hotspots
 - top directory hotspots
+- side-by-side pinned benchmark reference context with `--ref`
 - grouped lint-style findings with `--lint`
 - full-fidelity findings with evidence in `--json`
 
@@ -148,6 +155,10 @@ The repo ships with a **pinned, recreatable benchmark set** comparing known AI-g
 
 **Blended score** = geometric mean of the six normalized-metric ratios versus the mature OSS cohort medians, then rescaled so the mature OSS cohort median is **1.00**. Higher means a repo is consistently noisier across the benchmark dimensions.
 
+Use `--ref` with the default text output to compare a scanned repo's normalized metrics side by side with the pinned cohort medians.
+
+The CLI reference context is packaged as `src/reference-baseline.ts`, generated from the pinned benchmark snapshot with `bun run reference:update`.
+
 ### Cohort medians
 
 | Metric              | AI median | Mature OSS median |     Ratio |

diff --git a/package.json b/package.json
@@ -48,10 +48,11 @@
     "test": "bun test",
     "prepare": "husky",
     "prepack": "bun run build",
+    "reference:update": "bun run scripts/update-reference-baseline.ts",
     "benchmark:fetch": "bun run scripts/benchmark-fetch.ts",
     "benchmark:scan": "bun run scripts/benchmark-scan.ts",
     "benchmark:report": "bun run scripts/benchmark-report.ts",
-    "benchmark:update": "bun run benchmark:fetch && bun run benchmark:scan && bun run benchmark:report"
+    "benchmark:update": "bun run benchmark:fetch && bun run benchmark:scan && bun run benchmark:report && bun run reference:update"
   },
   "dependencies": {
     "globby": "^16.2.0",

diff --git a/scripts/self-scan-stable.ts b/scripts/self-scan-stable.ts
@@ -36,6 +36,7 @@ const STABLE_PACKAGE_PATH = path.resolve("node_modules/slop-scan-stable/package.
 const STABLE_BIN_PATH = path.resolve("node_modules/slop-scan-stable/bin/slop-scan.js");
 const UPDATE_FLAG = "--update";
 const SCORE_EPSILON = 1e-9;
+const STABLE_SCAN_MAX_BUFFER = 10 * 1024 * 1024;
 
 function countRuleHits(report: ScanReport): Record<string, number> {
   const counts = new Map<string, number>();
@@ -61,13 +62,14 @@ function runStableScan(): ScanReport {
   const result = spawnSync("node", [STABLE_BIN_PATH, "scan", ".", "--json"], {
     cwd: process.cwd(),
     encoding: "utf8",
+    maxBuffer: STABLE_SCAN_MAX_BUFFER,
   });
 
   if (result.status !== 0) {
-    if (result.stdout.length > 0) {
+    if (result.stdout?.length > 0) {
       console.log(result.stdout.trimEnd());
     }
-    if (result.stderr.length > 0) {
+    if (result.stderr?.length > 0) {
       console.error(result.stderr.trimEnd());
     }
 

diff --git a/scripts/update-reference-baseline.ts b/scripts/update-reference-baseline.ts
@@ -0,0 +1,143 @@
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import path from "node:path";
+import {
+  DEFAULT_BENCHMARK_SET_PATH,
+  loadBenchmarkSet,
+  resolveProjectPath,
+} from "../src/benchmarks/manifest";
+import type { BenchmarkSnapshot } from "../src/benchmarks/types";
+import { getOption } from "./lib/get-option";
+
+interface ReferenceBaselinePayload {
+  benchmarkSetId: string;
+  benchmarkSetName: string;
+  generatedAt: string;
+  analyzerVersion: string;
+  configMode: "default";
+  cohorts: {
+    explicitAi: {
+      label: string;
+      repoCount: number;
+      medians: BenchmarkSnapshot["cohorts"]["explicit-ai"]["medians"];
+      blendedScoreMedian: number | null;
+    };
+    matureOss: {
+      label: string;
+      repoCount: number;
+      medians: BenchmarkSnapshot["cohorts"]["mature-oss"]["medians"];
+      blendedScoreMedian: number | null;
+    };
+  };
+}
+
+function renderReferenceBaseline(payload: ReferenceBaselinePayload): string {
+  const explicitAi = payload.cohorts.explicitAi;
+  const matureOss = payload.cohorts.matureOss;
+
+  return [
+    'import type { NormalizedMetrics } from "./core/types";',
+    "",
+    "export interface ReferenceBenchmarkCohort {",
+    "  label: string;",
+    "  repoCount: number;",
+    "  medians: NormalizedMetrics;",
+    "  blendedScoreMedian: number | null;",
+    "}",
+    "",
+    "export interface ReferenceBaseline {",
+    "  benchmarkSetId: string;",
+    "  benchmarkSetName: string;",
+    "  generatedAt: string;",
+    "  analyzerVersion: string;",
+    '  configMode: "default";',
+    "  cohorts: {",
+    "    explicitAi: ReferenceBenchmarkCohort;",
+    "    matureOss: ReferenceBenchmarkCohort;",
+    "  };",
+    "}",
+    "",
+    "export const DEFAULT_REFERENCE_BASELINE = {",
+    `  benchmarkSetId: ${JSON.stringify(payload.benchmarkSetId)},`,
+    `  benchmarkSetName: ${JSON.stringify(payload.benchmarkSetName)},`,
+    `  generatedAt: ${JSON.stringify(payload.generatedAt)},`,
+    `  analyzerVersion: ${JSON.stringify(payload.analyzerVersion)},`,
+    `  configMode: ${JSON.stringify(payload.configMode)},`,
+    "  cohorts: {",
+    "    explicitAi: {",
+    `      label: ${JSON.stringify(explicitAi.label)},`,
+    `      repoCount: ${explicitAi.repoCount},`,
+    "      medians: {",
+    renderMetrics(explicitAi.medians, "        "),
+    "      },",
+    `      blendedScoreMedian: ${formatNullableNumber(explicitAi.blendedScoreMedian)},`,
+    "    },",
+    "    matureOss: {",
+    `      label: ${JSON.stringify(matureOss.label)},`,
+    `      repoCount: ${matureOss.repoCount},`,
+    "      medians: {",
+    renderMetrics(matureOss.medians, "        "),
+    "      },",
+    `      blendedScoreMedian: ${formatNullableNumber(matureOss.blendedScoreMedian)},`,
+    "    },",
+    "  },",
+    "} satisfies ReferenceBaseline;",
+    "",
+  ].join("\n");
+}
+
+function formatNullableNumber(value: number | null): string {
+  return value === null ? "null" : String(value);
+}
+
+function renderMetrics(
+  metrics: BenchmarkSnapshot["cohorts"]["explicit-ai"]["medians"],
+  indent: string,
+): string {
+  return [
+    `${indent}scorePerFile: ${formatNullableNumber(metrics.scorePerFile)},`,
+    `${indent}scorePerKloc: ${formatNullableNumber(metrics.scorePerKloc)},`,
+    `${indent}scorePerFunction: ${formatNullableNumber(metrics.scorePerFunction)},`,
+    `${indent}findingsPerFile: ${formatNullableNumber(metrics.findingsPerFile)},`,
+    `${indent}findingsPerKloc: ${formatNullableNumber(metrics.findingsPerKloc)},`,
+    `${indent}findingsPerFunction: ${formatNullableNumber(metrics.findingsPerFunction)},`,
+  ].join("\n");
+}
+
+const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_BENCHMARK_SET_PATH);
+const benchmarkSet = await loadBenchmarkSet(manifestPath);
+const snapshotPath = resolveProjectPath(benchmarkSet.artifacts.snapshotPath);
+const targetPath = resolveProjectPath("src/reference-baseline.ts");
+const snapshot = JSON.parse(await readFile(snapshotPath, "utf8")) as BenchmarkSnapshot;
+
+if (snapshot.benchmarkSetId !== benchmarkSet.id) {
+  throw new Error(
+    `Benchmark snapshot id mismatch: expected ${benchmarkSet.id}, got ${snapshot.benchmarkSetId}`,
+  );
+}
+
+const baseline: ReferenceBaselinePayload = {
+  benchmarkSetId: snapshot.benchmarkSetId,
+  benchmarkSetName: snapshot.benchmarkSetName,
+  generatedAt: snapshot.generatedAt,
+  analyzerVersion: snapshot.analyzerVersion,
+  configMode: snapshot.configMode,
+  cohorts: {
+    explicitAi: {
+      label: "Explicit AI median",
+      repoCount: snapshot.cohorts["explicit-ai"].repoCount,
+      medians: snapshot.cohorts["explicit-ai"].medians,
+      blendedScoreMedian: snapshot.cohorts["explicit-ai"].blendedScoreMedian,
+    },
+    matureOss: {
+      label: "Mature OSS median",
+      repoCount: snapshot.cohorts["mature-oss"].repoCount,
+      medians: snapshot.cohorts["mature-oss"].medians,
+      blendedScoreMedian: snapshot.cohorts["mature-oss"].blendedScoreMedian,
+    },
+  },
+};
+
+await mkdir(path.dirname(targetPath), { recursive: true });
+await writeFile(targetPath, renderReferenceBaseline(baseline));
+
+console.log(`Wrote reference baseline to ${targetPath}`);
diff --git a/src/cli.ts b/src/cli.ts
@@ -16,6 +16,7 @@ export function formatHelp(): string {
     "  -h, --help          Show help",
     "  --json              Output results as JSON",
     "  --lint              Output results in lint format",
+    "  --ref               Include pinned benchmark reference context in text output",
     "  --ignore <pattern>  Glob pattern to ignore (repeatable)",
     "",
     "Examples:",
@@ -29,6 +30,7 @@ export interface CliArgs {
   help: boolean;
   json: boolean;
   lint: boolean;
+  ref: boolean;
   ignore: string[];
   command: string | undefined;
   target: string;
@@ -41,6 +43,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
       help: { type: "boolean", short: "h", default: false },
       json: { type: "boolean", default: false },
       lint: { type: "boolean", default: false },
+      ref: { type: "boolean", default: false },
       ignore: { type: "string", multiple: true, default: [] },
     },
     allowPositionals: true,
@@ -53,6 +56,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
     help: values.help,
     json: values.json,
     lint: values.lint,
+    ref: values.ref,
     ignore: values.ignore,
     command: command,
     target: target,
@@ -78,6 +82,11 @@ export async function run(argv: string[]): Promise<number> {
     return 1;
   }
 
+  if (args.ref && (args.json || args.lint)) {
+    console.error("--ref can only be used with default text output.");
+    return 1;
+  }
+
   const rootDir = path.resolve(args.target);
   const loadedConfig = await loadConfigFile(rootDir);
   const config = loadedConfig.config;
@@ -93,7 +102,7 @@ export async function run(argv: string[]): Promise<number> {
 
   const result = await analyzeRepository(rootDir, config, registry);
   const reporter = registry.getReporter(args.json ? "json" : args.lint ? "lint" : "text");
-  const output = await reporter.render(result);
+  const output = await reporter.render(result, { reference: args.ref });
 
   if (output.length > 0) {
     console.log(output);

diff --git a/src/core/types.ts b/src/core/types.ts
@@ -106,7 +106,11 @@ export interface RulePlugin extends ProviderBase {
 
 export interface ReporterPlugin {
   id: string;
-  render(result: AnalysisResult): Promise<string> | string;
+  render(result: AnalysisResult, options?: ReporterOptions): Promise<string> | string;
+}
+
+export interface ReporterOptions {
+  reference?: boolean;
 }
 
 export interface AnalyzerRuntime {

diff --git a/src/node-entry.ts b/src/node-entry.ts
@@ -28,6 +28,7 @@ export type {
   LanguagePlugin,
   ProviderContext,
   ReporterPlugin,
+  ReporterOptions,
   RulePlugin,
   Scope,
 } from "./core/types";

diff --git a/src/reference-baseline.ts b/src/reference-baseline.ts
@@ -0,0 +1,56 @@
+import type { NormalizedMetrics } from "./core/types";
+
+export interface ReferenceBenchmarkCohort {
+  label: string;
+  repoCount: number;
+  medians: NormalizedMetrics;
+  blendedScoreMedian: number | null;
+}
+
+export interface ReferenceBaseline {
+  benchmarkSetId: string;
+  benchmarkSetName: string;
+  generatedAt: string;
+  analyzerVersion: string;
+  configMode: "default";
+  cohorts: {
+    explicitAi: ReferenceBenchmarkCohort;
+    matureOss: ReferenceBenchmarkCohort;
+  };
+}
+
+export const DEFAULT_REFERENCE_BASELINE = {
+  benchmarkSetId: "known-ai-vs-solid-oss",
+  benchmarkSetName: "Known AI repos vs older solid OSS repos",
+  generatedAt: "2026-04-09T00:24:29.081Z",
+  analyzerVersion: "0.2.0",
+  configMode: "default",
+  cohorts: {
+    explicitAi: {
+      label: "Explicit AI median",
+      repoCount: 9,
+      medians: {
+        scorePerFile: 0.9875000000000002,
+        scorePerKloc: 9.510586363885691,
+        scorePerFunction: 0.2286514601096154,
+        findingsPerFile: 0.30851063829787234,
+        findingsPerKloc: 2.96198782293895,
+        findingsPerFunction: 0.0842173094081491,
+      },
+      blendedScoreMedian: 3.476442610225084,
+    },
+    matureOss: {
+      label: "Mature OSS median",
+      repoCount: 8,
+      medians: {
+        scorePerFile: 0.19086548662498448,
+        scorePerKloc: 4.422142801664107,
+        scorePerFunction: 0.09195482875096181,
+        findingsPerFile: 0.06947805977819406,
+        findingsPerKloc: 1.3961844005945103,
+        findingsPerFunction: 0.02817460317460317,
+      },
+      blendedScoreMedian: 0.9999999999999999,
+    },
+  },
+} satisfies ReferenceBaseline;