Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions src/lib/knowledge-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// TTL-cached index of the shared team-knowledge corpus fetched from the GitHub
// contents API via `gh api`. The cache lives in ~/.claude/tmp/knowledge-index.json
// and is warmed by refresh-knowledge-index.ts (spawned detached at SessionStart).
//
// Fail-open contract (same as codex.ts):
// - Never throw into a hot path.
// - A network/gh failure returns the existing cache (or null). Never clobbles
// a good cache entry with empty/failed data.
// - `gh` not on PATH → return existing cache silently.
//
// The corpus repo defaults to darkroomengineering/team-knowledge, overridable
// via $KNOWLEDGE_REPO env var.

import { z } from "zod";
import { readState, writeState } from "./hook-runtime.ts";
import { hasCommand } from "./platform.ts";

// Generated/meta files in the corpus that are not knowledge notes. Defined here
// (the lower-level lib) so team-knowledge.ts can re-export it without a circular
// dependency. Mirrors the SKIP_FILES set in lint-knowledge.ts.
export const NON_NOTE_FILES = new Set(["README.md", "INDEX.md", "CONTRIBUTING.md"]);

const KNOWLEDGE_INDEX_TTL_MS = 6 * 60 * 60 * 1000; // 6 hours
const CACHE_FILE = "knowledge-index.json";
const KNOWLEDGE_REPO = process.env.KNOWLEDGE_REPO ?? "darkroomengineering/team-knowledge";

// ── Schema ─────────────────────────────────────────────────────────────────────

export const KnowledgeIndexSchema = z.object({
notes: z.array(z.string()),
checkedAt: z.string(),
});

export type KnowledgeIndex = z.infer<typeof KnowledgeIndexSchema>;

// ── Pure helpers ───────────────────────────────────────────────────────────────

/** Map a GitHub contents-API listing to sorted slugs.
* Keeps entries where `type === "file"`, name ends with `.md`, and name is
* not in NON_NOTE_FILES. Returns name without the `.md` suffix, sorted. */
export function parseContentsListing(entries: Array<{ name: string; type: string }>): string[] {
return entries
.filter((e) => e.type === "file" && e.name.endsWith(".md") && !NON_NOTE_FILES.has(e.name))
.map((e) => e.name.slice(0, -".md".length))
.sort();
}

/** True when the cache is missing or older than the TTL. */
export function isStale(checkedAt: string | undefined): boolean {
if (checkedAt === undefined) return true;
const t = Date.parse(checkedAt);
if (Number.isNaN(t)) return true;
return Date.now() - t > KNOWLEDGE_INDEX_TTL_MS;
}

// ── Cache I/O ──────────────────────────────────────────────────────────────────

/** Read the on-disk cache. Returns null on any error or schema mismatch. */
export async function readKnowledgeIndex(): Promise<KnowledgeIndex | null> {
const raw = await readState<unknown>(CACHE_FILE, null);
const parsed = KnowledgeIndexSchema.safeParse(raw);
return parsed.success ? parsed.data : null;
}

// ── Refresh (TTL-gated, fail-open) ─────────────────────────────────────────────

/** Refresh the knowledge index if stale, otherwise return the cached value.
* Network/gh failures return the existing cache (or null) without overwriting it. */
export async function refreshKnowledgeIndex(): Promise<KnowledgeIndex | null> {
// 1. Read current cache; return early if still fresh.
const existing = await readKnowledgeIndex();
if (existing && !isStale(existing.checkedAt)) {
return existing;
}

// 2. `gh` required for network fetch — bail without clobbering existing cache.
if (!hasCommand("gh")) {
return existing ?? null;
}

// 3. Fetch via `gh api`.
try {
const proc = Bun.spawn(["gh", "api", `repos/${KNOWLEDGE_REPO}/contents`], {
stdout: "pipe",
stderr: "ignore",
timeout: 10_000,
});

const [text, exit] = await Promise.all([new Response(proc.stdout).text(), proc.exited]);

if (exit !== 0) {
// Non-zero exit — don't clobber good cache.
return existing ?? null;
}

let rawEntries: unknown;
try {
rawEntries = JSON.parse(text);
} catch {
return existing ?? null;
}

if (!Array.isArray(rawEntries)) {
return existing ?? null;
}

const notes = parseContentsListing(rawEntries as Array<{ name: string; type: string }>);
const index: KnowledgeIndex = {
notes,
checkedAt: new Date().toISOString(),
};
await writeState(CACHE_FILE, index);
return index;
} catch {
// timeout, spawn error, write error — return existing cache (or null).
return existing ?? null;
}
}
67 changes: 45 additions & 22 deletions src/lib/team-knowledge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,65 @@
// counterpart to the /share-learning write path). The corpus was write-only:
// /share-learning posts notes, but nothing surfaced them at the moment an agent
// would use them, so they rarely got consulted. This emits a one-line ping when
// a local clone is configured and non-empty.
// a local clone is configured and non-empty — or from a TTL-cached remote index
// when no local clone is available.
//
// Hot-path constraint: this runs inside the SessionStart hook, so it does ZERO
// network I/O (no `gh api`) and is fully fail-open — any error → no output,
// never block session start. Live retrieval stays on-demand elsewhere; this only
// makes the agent aware the corpus exists and worth an `rg`.
// blocking network I/O and is fully fail-open — any error → no output, never
// block session start. Network warming happens in a detached background script
// (refresh-knowledge-index.ts) and the result is read from a TTL cache.
//
// Opt-in: keyed on KNOWLEDGE_REPO_PATH — the same env var lint:knowledge and
// new-note already use to find a local clone. If it's unset, this is a no-op.
// Priority:
// 1. Explicit repoPath (local clone) — used by tests and $KNOWLEDGE_REPO_PATH.
// 2. TTL cache written by refresh-knowledge-index.ts (no local clone needed).
// 3. No output.

import { readdir } from "node:fs/promises";
import { NON_NOTE_FILES, readKnowledgeIndex } from "./knowledge-index.ts";

// Generated/meta files in the corpus that are not knowledge notes. Mirrors the
// SKIP_FILES set in lint-knowledge.ts; kept local so the hot path doesn't import
// the linter module.
const NON_NOTE_FILES = new Set(["README.md", "INDEX.md", "CONTRIBUTING.md"]);
// Re-export so external consumers (lint-knowledge, tests) can import from
// team-knowledge.ts as before without knowing the source moved.
export { NON_NOTE_FILES } from "./knowledge-index.ts";

/** Lines to print at session start advertising the shared corpus, or [] when
* there's nothing to surface (no clone configured, empty, or unreadable).
* `repoPath` defaults to $KNOWLEDGE_REPO_PATH. Never throws. */
export async function teamKnowledgeAwareness(
repoPath: string | undefined = process.env.KNOWLEDGE_REPO_PATH,
): Promise<string[]> {
if (!repoPath) return [];
// Branch A: explicit repoPath — use the local clone directly (existing behavior,
// preserves all current tests). Cache is bypassed in this branch.
if (repoPath) {
try {
const entries = await readdir(repoPath);
const notes = entries.filter((n) => n.endsWith(".md") && !NON_NOTE_FILES.has(n));
if (notes.length === 0) return [];
const label = notes.length === 1 ? "note" : "notes";
return [
"",
`team-knowledge: ${notes.length} shared ${label} at ${repoPath}`,
` consult before architecture / convention / gotcha calls — rg "<topic>" "${repoPath}"`,
];
} catch {
// missing dir, permission error, etc. — stay silent, never disrupt startup.
return [];
}
}

// Branch B: no local clone — try the TTL cache written by refresh-knowledge-index.ts.
try {
const entries = await readdir(repoPath);
const notes = entries.filter((n) => n.endsWith(".md") && !NON_NOTE_FILES.has(n));
if (notes.length === 0) return [];
const label = notes.length === 1 ? "note" : "notes";
return [
"",
`team-knowledge: ${notes.length} shared ${label} at ${repoPath}`,
` consult before architecture / convention / gotcha calls — rg "<topic>" "${repoPath}"`,
];
const index = await readKnowledgeIndex();
if (index && index.notes.length > 0) {
const count = index.notes.length;
const label = count === 1 ? "note" : "notes";
return [
"",
`team-knowledge: ${count} shared ${label} — consult before architecture / convention / gotcha calls`,
];
}
} catch {
// missing dir, permission error, etc. — stay silent, never disrupt startup.
return [];
// cache unreadable — fall through silently
}

return [];
}
9 changes: 9 additions & 0 deletions src/scripts/refresh-knowledge-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bun
// Detached entrypoint for warming the knowledge-index TTL cache.
// Spawned fire-and-forget from session-start.ts (Phase 1 background tasks).
// The TTL gate lives in refreshKnowledgeIndex — spawning this every session
// is cheap; it only calls `gh api` when the cache is stale (>6h).

import { refreshKnowledgeIndex } from "../lib/knowledge-index.ts";

await refreshKnowledgeIndex().catch(() => {});
12 changes: 12 additions & 0 deletions src/scripts/session-start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ const mcpPrune = Bun.spawn(["bun", join(CLAUDE_DIR, "src", "scripts", "prune-mcp
});
mcpPrune.unref?.();

// Warm the team-knowledge TTL cache in the background. The TTL gate inside
// refreshKnowledgeIndex means this only calls `gh api` when the cache is stale
// (>6h), so spawning it every session is cheap.
const knowledgeRefresh = Bun.spawn(
["bun", join(CLAUDE_DIR, "src", "scripts", "refresh-knowledge-index.ts")],
{
stdout: "ignore",
stderr: "ignore",
},
);
knowledgeRefresh.unref?.();

const logRotations = [
rotateLog(join(CLAUDE_DIR, "sessions.log")),
rotateLog(join(CLAUDE_DIR, "hooks.log")),
Expand Down
110 changes: 110 additions & 0 deletions tests/knowledge-index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Tests for knowledge-index.ts — covers the two pure exported functions.
// No network, no real cache I/O.

import { describe, expect, test } from "bun:test";
import { isStale, parseContentsListing } from "../src/lib/knowledge-index.ts";

// ── parseContentsListing ───────────────────────────────────────────────────────

describe("parseContentsListing", () => {
test("drops entries with type !== 'file'", () => {
const entries = [
{ name: "notes", type: "dir" },
{ name: "foo.md", type: "file" },
];
expect(parseContentsListing(entries)).toEqual(["foo"]);
});

test("drops entries whose name does not end with .md", () => {
const entries = [
{ name: "foo.ts", type: "file" },
{ name: "bar.json", type: "file" },
{ name: "baz.md", type: "file" },
];
expect(parseContentsListing(entries)).toEqual(["baz"]);
});

test("drops NON_NOTE_FILES (README.md, INDEX.md, CONTRIBUTING.md)", () => {
const entries = [
{ name: "README.md", type: "file" },
{ name: "INDEX.md", type: "file" },
{ name: "CONTRIBUTING.md", type: "file" },
{ name: "gotcha.md", type: "file" },
];
expect(parseContentsListing(entries)).toEqual(["gotcha"]);
});

test("strips .md suffix → returns slug", () => {
const entries = [{ name: "my-note.md", type: "file" }];
expect(parseContentsListing(entries)).toEqual(["my-note"]);
});

test("returns slugs sorted alphabetically", () => {
const entries = [
{ name: "zebra.md", type: "file" },
{ name: "apple.md", type: "file" },
{ name: "mango.md", type: "file" },
];
expect(parseContentsListing(entries)).toEqual(["apple", "mango", "zebra"]);
});

test("returns [] for an empty listing", () => {
expect(parseContentsListing([])).toEqual([]);
});

test("returns [] when listing has only dirs and non-.md files", () => {
const entries = [
{ name: "README.md", type: "file" },
{ name: "scripts", type: "dir" },
{ name: "config.json", type: "file" },
];
expect(parseContentsListing(entries)).toEqual([]);
});

test("mixed realistic listing", () => {
const entries = [
{ name: "README.md", type: "file" },
{ name: "INDEX.md", type: "file" },
{ name: "CONTRIBUTING.md", type: "file" },
{ name: "scripts", type: "dir" },
{ name: "deployment.md", type: "file" },
{ name: "auth-patterns.md", type: "file" },
{ name: ".github", type: "dir" },
];
expect(parseContentsListing(entries)).toEqual(["auth-patterns", "deployment"]);
});
});

// ── isStale ────────────────────────────────────────────────────────────────────

describe("isStale", () => {
test("undefined → stale", () => {
expect(isStale(undefined)).toBe(true);
});

test("garbage string → stale", () => {
expect(isStale("not-a-date")).toBe(true);
});

test("epoch (very old) → stale", () => {
expect(isStale(new Date(0).toISOString())).toBe(true);
});

test("fresh timestamp (just now) → not stale", () => {
expect(isStale(new Date().toISOString())).toBe(false);
});

test("timestamp 5 hours ago → not stale (TTL is 6h)", () => {
const fiveHoursAgo = new Date(Date.now() - 5 * 60 * 60 * 1000).toISOString();
expect(isStale(fiveHoursAgo)).toBe(false);
});

test("timestamp 7 hours ago → stale (TTL is 6h)", () => {
const sevenHoursAgo = new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString();
expect(isStale(sevenHoursAgo)).toBe(true);
});

test("empty string → stale (Date.parse returns NaN)", () => {
expect(isStale("")).toBe(true);
});
});
17 changes: 17 additions & 0 deletions tests/team-knowledge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,23 @@ describe("teamKnowledgeAwareness — no-op cases", () => {
});
});

describe("teamKnowledgeAwareness — explicit repoPath bypasses cache", () => {
test("explicit repoPath uses local clone, not the TTL cache", async () => {
// Even if KNOWLEDGE_REPO_PATH is unset and the cache is cold, an explicit
// repoPath argument drives the local-clone code path (Branch A).
const dir = await sandbox();
try {
await writeFile(join(dir, "concept.md"), "# Concept");
const result = await teamKnowledgeAwareness(dir);
// Output must reference the local path (clone branch), not just a count.
expect(result.join("\n")).toContain(dir);
expect(result.join("\n")).toContain("1 shared note");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
});

describe("teamKnowledgeAwareness — non-empty corpus", () => {
test("1 note → output contains 'shared note' (singular) and the repo path", async () => {
const dir = await sandbox();
Expand Down
Loading