diff --git a/Cargo.lock b/Cargo.lock index 3d288be7e..7e06e2a1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,7 +53,7 @@ dependencies = [ [[package]] name = "aiwar-ingest" -version = "0.4.1" +version = "0.7.0" dependencies = [ "regex", "serde", @@ -1725,6 +1725,7 @@ dependencies = [ "async-stream", "axum 0.7.9", "causal-edge", + "cpic", "deno_core", "futures-core", "include_dir", @@ -1976,6 +1977,14 @@ dependencies = [ "futures-io", ] +[[package]] +name = "cpic" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -7170,7 +7179,7 @@ dependencies = [ [[package]] name = "osint-bake" -version = "0.4.1" +version = "0.7.0" dependencies = [ "aiwar-ingest", "lance-graph-contract", diff --git a/Cargo.toml b/Cargo.toml index 2a8aee60f..08053aff1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,11 @@ exclude = [ # in the same workspace graph. See # claude-notes/plans/2026-04-20-wasm-shim-merge.md. "crates/tree-sitter-language-wasm-shim", + # cpic — standalone pharmacogenomics crate (its own [workspace], serde + + # serde_json only). cockpit-server path-depends on it for the /cpic panel; + # excluding it here makes cargo treat it as a separate workspace root so the + # cross-boundary path dep resolves (else: "multiple workspace roots found"). + "cpic", ] resolver = "2" diff --git a/cockpit/src/CpicCockpit.tsx b/cockpit/src/CpicCockpit.tsx new file mode 100644 index 000000000..2182c925f --- /dev/null +++ b/cockpit/src/CpicCockpit.tsx @@ -0,0 +1,270 @@ +// /cpic — CPIC pharmacogenomics cockpit (gene-first), additive alongside /fma-body. +// +// A scenario {gene, diplotype|phenotype, drug} is posted to POST /api/cpic/reason, which runs +// the standalone `cpic` crate's reason() over the REAL published CPIC tables (allele, +// gene_result, drug, pair, guideline, recommendation). It resolves a phenotype and chains +// diplotype → phenotype → recommendation by 2-hop NARS deduction with CPIC-authoritative +// confidence (classification → f, pair cpiclevel → c). Each chain node carries its routable +// (part_of:is_a) GUID prefix — the same canonical 16-byte NodeGuid the rest of the stack uses. +// +// POC over published CPIC rules — NOT clinical decision support. +import { useEffect, useMemo, useState } from 'react'; + +interface ChainNode { + role: string; // "diplotype" | "phenotype" | "recommendation" + label: string; + guid: string; +} + +interface Outcome { + gene: string; + input: string; + drug: string; + resolved: boolean; + phenotype: string | null; + how: string | null; + chain: ChainNode[]; + classification: string | null; + cpic_level: string | null; + truth_f: number; + truth_c: number; + truth_exp: number; + recommendation: string | null; + flags: string[]; + disclaimer: string; +} + +interface Catalog { + genes: string[]; + drugs: string[]; +} + +interface Scenario { + gene: string; + input: string; + drug: string; +} + +// the four `reason` CLI demos: clean 2-hop, direct 1-hop, multi-gene flag, complex-guideline flag. +const EXAMPLES: { label: string; sc: Scenario }[] = [ + { label: 'CYP2C19 *2/*2 · clopidogrel', sc: { gene: 'CYP2C19', input: '*2/*2', drug: 'clopidogrel' } }, + { label: 'TPMT *3A/*3A · azathioprine', sc: { gene: 'TPMT', input: '*3A/*3A', drug: 'azathioprine' } }, + { label: 'HLA-B *57:01 positive · abacavir', sc: { gene: 'HLA-B', input: '*57:01 positive', drug: 'abacavir' } }, + { label: 'CYP2C9 *1/*1 · warfarin (complex)', sc: { gene: 'CYP2C9', input: '*1/*1', drug: 'warfarin' } }, +]; + +const ROLE_COLOR: Record = { + diplotype: '#6db3ff', + phenotype: '#7fd9a8', + recommendation: '#ffb86b', +}; + +function levelColor(level: string | null): string { + switch (level) { + case 'A': return '#35d07f'; + case 'B': return '#9ad07f'; + case 'C': return '#ffb547'; + case 'D': return '#ff8c63'; + default: return '#93a9bf'; + } +} + +export function CpicCockpit() { + const [catalog, setCatalog] = useState({ genes: [], drugs: [] }); + const [gene, setGene] = useState('CYP2C19'); + const [input, setInput] = useState('*2/*2'); + const [drug, setDrug] = useState('clopidogrel'); + const [outcome, setOutcome] = useState(null); + const [error, setError] = useState(null); + const [busy, setBusy] = useState(false); + + // pull the gene + drug pick-lists once (used as autocomplete; free text still allowed). + useEffect(() => { + let cancelled = false; + fetch('/api/cpic/catalog') + .then((r) => (r.ok ? r.json() : Promise.reject(new Error(`HTTP ${r.status}`)))) + .then((c: Catalog) => { if (!cancelled) setCatalog(c); }) + .catch(() => { /* catalog is optional — the inputs accept free text regardless */ }); + return () => { cancelled = true; }; + }, []); + + async function runReason(sc: Scenario) { + setBusy(true); + setError(null); + setOutcome(null); + try { + const r = await fetch('/api/cpic/reason', { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify(sc), + }); + if (!r.ok) throw new Error(`HTTP ${r.status}`); + setOutcome((await r.json()) as Outcome); + } catch (e) { + setError(`reasoning endpoint unavailable (${e}) — needs the cockpit-server backend deployed`); + } finally { + setBusy(false); + } + } + + const canReason = useMemo( + () => gene.trim() !== '' && input.trim() !== '' && drug.trim() !== '' && !busy, + [gene, input, drug, busy], + ); + + function pickExample(sc: Scenario) { + setGene(sc.gene); + setInput(sc.input); + setDrug(sc.drug); + void runReason(sc); + } + + const fieldStyle: React.CSSProperties = { + boxSizing: 'border-box', padding: '8px 10px', borderRadius: 6, + border: '1px solid #2a3242', background: '#0e1219', color: '#cdd9e5', + font: '13px ui-monospace, monospace', + }; + const chip: React.CSSProperties = { + padding: '5px 11px', borderRadius: 6, border: '1px solid #2a3242', + background: '#0e1219', color: '#9fb1c4', font: '12px ui-monospace, monospace', cursor: 'pointer', + }; + const badge = (bg: string, fg: string): React.CSSProperties => ({ + display: 'inline-block', padding: '2px 9px', borderRadius: 999, + background: bg, color: fg, font: '11px ui-monospace, monospace', marginRight: 8, + }); + + return ( +
+
+
+ CPIC pharmacogenomics +
+
+ gene → phenotype → recommendation, chained by NARS deduction over the real CPIC tables. +
+
+ confidence is CPIC-authoritative (classification → f, pair cpiclevel → c). Each node shows its + routable (part_of:is_a) GUID. POC over published CPIC rules — not clinical decision support. +
+ + {/* input row */} +
+ + + + + {catalog.genes.map((g) => + {catalog.drugs.map((d) => +
+ + {/* example chips */} +
+ examples: + {EXAMPLES.map((ex) => ( + + ))} +
+ + {error && ( +
+ {error} +
+ )} + + {outcome && ( +
+
+ {outcome.gene} {outcome.input} + {outcome.drug} +
+ {outcome.how &&
resolved via {outcome.how}
} + + {/* the reasoned chain: diplotype → phenotype → recommendation, each with its GUID */} + {outcome.chain.length > 0 && ( +
+ {outcome.chain.map((n, i) => ( +
+
+ {n.role} + {n.label} + {n.guid} +
+ {i < outcome.chain.length - 1 && ( +
+ )} +
+ ))} +
+ )} + + {outcome.resolved ? ( + <> +
+ {outcome.classification && ( + class: {outcome.classification} + )} + {outcome.cpic_level && ( + CPIC level {outcome.cpic_level} + )} + + f={outcome.truth_f.toFixed(3)} c={outcome.truth_c.toFixed(3)} · exp {outcome.truth_exp.toFixed(3)} + +
+ {outcome.recommendation && ( +
+ CPIC recommendation +
{outcome.recommendation}
+
+ )} + + ) : ( +
+ no simple phenotype → recommendation — surfaced, not fabricated. +
+ )} + + {/* flags: complexity / multi-gene / unknown-drug warnings CPIC itself raises */} + {outcome.flags.length > 0 && ( +
+ {outcome.flags.map((f, i) => ( +
⚠ {f}
+ ))} +
+ )} + +
+ {outcome.disclaimer} +
+
+ )} + + +
+
+ ); +} diff --git a/cockpit/src/main.tsx b/cockpit/src/main.tsx index 6646256ba..7871ea8db 100644 --- a/cockpit/src/main.tsx +++ b/cockpit/src/main.tsx @@ -13,6 +13,7 @@ import { TorsoSplat } from './TorsoSplat'; import { TorsoRender } from './TorsoRender'; import { TorsoMap } from './TorsoMap'; import { FmaBody } from './FmaBody'; +import { CpicCockpit } from './CpicCockpit'; import { ReasoningPage } from './ReasoningPage'; import { ErrorBoundary } from './components/ErrorBoundary'; import './styles/cockpit.css'; @@ -97,6 +98,11 @@ createRoot(document.getElementById('root')!).render( LAYER (skin/muscle/organ/skeleton/vessel/nerve buttons) + solid↔transparent. Additive; reads cockpit/public/fma_body.mesh; never touches /torso* (#57/#58). */} } /> + {/* /cpic — CPIC pharmacogenomics cockpit (gene-first): {gene, diplotype, drug} + → phenotype → recommendation, 2-hop NARS deduction over the real CPIC tables + via POST /api/cpic/reason (the standalone cpic crate). Additive, gene-first + alternative to the organ-first /fma-body. */} + } /> {/* The Palantir JSON-graph cockpit (221 aiwar nodes) stays reachable at /palantir and as the catch-all for its own sub-routes. */} } /> diff --git a/cpic/Cargo.lock b/cpic/Cargo.lock index 2cb129d17..282c6dd73 100644 --- a/cpic/Cargo.lock +++ b/cpic/Cargo.lock @@ -6,6 +6,7 @@ version = 4 name = "cpic" version = "0.1.0" dependencies = [ + "serde", "serde_json", ] @@ -46,6 +47,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] diff --git a/cpic/Cargo.toml b/cpic/Cargo.toml index d45e80006..97911f52f 100644 --- a/cpic/Cargo.toml +++ b/cpic/Cargo.toml @@ -16,6 +16,7 @@ license = "Apache-2.0" [workspace] [dependencies] +serde = { version = "1", features = ["derive"] } serde_json = "1" [profile.release] diff --git a/cpic/src/bin/reason.rs b/cpic/src/bin/reason.rs index a561087b6..4611c3d1e 100644 --- a/cpic/src/bin/reason.rs +++ b/cpic/src/bin/reason.rs @@ -1,298 +1,59 @@ -//! cpic reason — NARS reasoning over the REAL CPIC graph. +//! cpic reason — NARS reasoning over the REAL CPIC graph (thin CLI over the `cpic` lib). //! //! A scenario `{gene, diplotype|phenotype, drug}` is resolved to a phenotype, matched against //! the CPIC `recommendation` (via `lookupkey`), and chained by 2-hop NARS deduction //! `diplotype → phenotype → recommendation` with CPIC-**authoritative** confidence -//! (`classification` + pair `cpiclevel` → c) — the same algebra as the cockpit `clinical.rs` -//! demo, but every edge is a published CPIC fact. The routable `(part_of:is_a)` GUID prefix of -//! each chain node is printed. +//! (`classification` + pair `cpiclevel` → c). The reasoning itself lives in `cpic::reason` +//! so this CLI and the cockpit `/api/cpic/reason` endpoint share ONE implementation; this +//! binary only loads the tables and renders the structured `Outcome`. //! -//! Complex guidelines that CPIC flags as NOT simple diplotype→phenotype→rec (warfarin; any -//! multi-gene lookupkey) are surfaced, not silently auto-deduced. +//! Complex guidelines that CPIC flags as NOT a simple diplotype→phenotype→rec (warfarin; any +//! multi-gene lookupkey) come back `resolved == false` with `flags` explaining why — surfaced, +//! not silently auto-deduced. //! //! POC over published CPIC rules — NOT clinical decision support. //! //! Usage: reason (no args → built-in demos) -use cpic::{ - basin, cascade3, gene_part_of, norm, NodeGuid, CID_DIPLOTYPE, CID_PHENOTYPE, CID_REC, -}; -use serde_json::Value; -use std::collections::{HashMap, HashSet}; -use std::fs; +use cpic::{reason, Kb, Outcome}; -#[derive(Clone, Copy)] -struct Truth { - f: f32, - c: f32, -} -impl Truth { - /// Canonical NARS deduction `A→B, B→C ⊢ A→C`: f = f1·f2, c = f1·f2·c1·c2. - fn deduction(self, o: Truth) -> Truth { - Truth { - f: self.f * o.f, - c: self.f * o.f * self.c * o.c, - } - } - fn exp(self) -> f32 { - self.c * (self.f - 0.5) + 0.5 - } -} - -fn load(path: &str) -> Vec { - let s = fs::read_to_string(path).unwrap_or_else(|e| panic!("read {path}: {e}")); - serde_json::from_str(&s).unwrap_or_else(|e| panic!("parse {path}: {e}")) -} - -/// Routable `(part_of:is_a)` GUID prefix (classid-HEEL-HIP-TWIG-family); identity is the -/// basin-local mint allocated at ingest, shown as `··`. -fn addr(classid: u32, part: &[String], isa: &[String], basin_key: &str) -> String { - let g = NodeGuid::mint(classid, cascade3(part), cascade3(isa), basin(basin_key), 0); - format!( - "{:08x}-{:04x}-{:04x}-{:04x}-{:06x}··", - g.classid, g.heel, g.hip, g.twig, g.family - ) -} - -/// Functional-status → activity rank (the transparent simple combination rule). -fn rank(status: &str) -> Option { - let s = status.to_lowercase(); - if s.contains("no function") { - Some(0.0) - } else if s.contains("decreased") { - Some(0.5) - } else if s.contains("increased") { - Some(2.0) - } else if s.contains("normal") { - Some(1.0) - } else { - None // uncertain / unknown — not resolvable by the simple rule - } -} - -/// Activity-score sum → metabolizer class (approximates CPIC's CYP2D6 activity-score bands; -/// allele-count genes like CYP2C19/TPMT fall out of the same thresholds). -fn class_from_score(score: f32) -> &'static str { - if score <= 0.0 { - "Poor" - } else if score <= 1.0 { - "Intermediate" - } else if score <= 2.25 { - "Normal" - } else { - "Ultrarapid" - } -} - -struct Kb { - allele_status: HashMap<(String, String), String>, // (gene, allele) -> functional status - gene_results: HashSet<(String, String)>, // (gene, phenotype-string) that exist - recs: Vec, - drug_by_name: HashMap, // lc name -> (drugid, display name) - pair_level: HashMap<(String, String), String>, // (gene, drugid) -> cpiclevel - pair_gid: HashMap<(String, String), i64>, // (gene, drugid) -> guidelineid - guideline: HashMap, usize)>, // gid -> (notesonusage, gene-count) -} - -fn load_kb(dir: &str) -> Kb { - let p = |f: &str| format!("{dir}/{f}"); - let mut allele_status = HashMap::new(); - for v in load(&p("allele.json")) { - if let (Some(g), Some(n)) = (v["genesymbol"].as_str(), v["name"].as_str()) { - if let Some(s) = v["clinicalfunctionalstatus"].as_str() { - allele_status.insert((g.to_string(), n.to_string()), s.to_string()); - } - } - } - let mut gene_results = HashSet::new(); - for v in load(&p("gene_result.json")) { - if let (Some(g), Some(r)) = (v["genesymbol"].as_str(), v["result"].as_str()) { - gene_results.insert((g.to_string(), r.to_string())); - } - } - let mut drug_by_name = HashMap::new(); - for v in load(&p("drug.json")) { - if let (Some(id), Some(n)) = (v["drugid"].as_str(), v["name"].as_str()) { - drug_by_name.insert(n.to_lowercase(), (id.to_string(), n.to_string())); - } - } - let mut pair_level = HashMap::new(); - let mut pair_gid = HashMap::new(); - for v in load(&p("pair.json")) { - if let (Some(g), Some(d)) = (v["genesymbol"].as_str(), v["drugid"].as_str()) { - if let Some(l) = v["cpiclevel"].as_str() { - pair_level.insert((g.to_string(), d.to_string()), l.to_string()); - } - if let Some(gid) = v["guidelineid"].as_i64() { - pair_gid.insert((g.to_string(), d.to_string()), gid); - } - } - } - let mut guideline = HashMap::new(); - for v in load(&p("guideline.json")) { - if let Some(id) = v["id"].as_i64() { - let notes = v["notesonusage"].as_str().map(|s| s.to_string()); - let ngenes = v["genes"].as_array().map(|a| a.len()).unwrap_or(0); - guideline.insert(id, (notes, ngenes)); +/// Render a structured `Outcome` to the console — the same chain the cockpit draws visually. +fn print_outcome(o: &Outcome) { + println!( + "\n══ {} {} + {} ═════════════════════════════════════", + o.gene, o.input, o.drug + ); + if !o.resolved { + // unresolved: the flags say WHY (unknown drug, unresolvable phenotype, complex / + // multi-gene guideline). The POC surfaces the reason; it never fabricates a rec. + for f in &o.flags { + println!(" ⚠ {f}"); } - } - Kb { - allele_status, - gene_results, - recs: load(&p("recommendation.json")), - drug_by_name, - pair_level, - pair_gid, - guideline, - } -} - -/// Resolve the scenario's 2nd arg to a phenotype string + the t1 (input→phenotype) truth. -/// Direct phenotype/allele-status input → near-certain; diplotype → the simple rule (lower c). -fn resolve_phenotype(kb: &Kb, gene: &str, input: &str) -> Option<(String, Truth, String)> { - // direct: the input already names a phenotype/allele-status CPIC knows for this gene - if kb.gene_results.contains(&(gene.to_string(), input.to_string())) { - return Some((input.to_string(), Truth { f: 1.0, c: 0.99 }, "direct phenotype".into())); - } - // diplotype: split into two alleles, combine functional ranks - let alleles: Vec<&str> = input.split('/').collect(); - if alleles.len() != 2 { - return None; - } - let mut score = 0.0; - for al in &alleles { - let st = kb.allele_status.get(&(gene.to_string(), al.trim().to_string()))?; - score += rank(st)?; - } - let class = class_from_score(score); - // match the resolved class to CPIC's vocabulary for this gene (Metabolizer vs Function) - let cands = [ - format!("{class} Metabolizer"), - format!("{class} Function"), - if score == 0.5 { "Decreased Function".to_string() } else { String::new() }, - ]; - let pheno = cands - .iter() - .find(|c| !c.is_empty() && kb.gene_results.contains(&(gene.to_string(), (*c).clone())))?; - // confidence: homozygous clean call > heterozygous/mixed - let c = if alleles[0].trim() == alleles[1].trim() { 0.85 } else { 0.7 }; - Some((pheno.clone(), Truth { f: 1.0, c }, format!("simple rule (score {score})"))) -} - -fn reason(kb: &Kb, gene: &str, input: &str, drug: &str) { - println!("\n══ {gene} {input} + {drug} ═════════════════════════════════════"); - let Some((drugid, drugname)) = kb.drug_by_name.get(&drug.to_lowercase()).cloned() else { - println!(" drug '{drug}' not in CPIC drug table."); return; - }; - let Some((pheno, t1, how)) = resolve_phenotype(kb, gene, input) else { - println!(" could not resolve a phenotype for {gene} {input}"); - println!(" (gene may use a non-standard scheme — provide the phenotype string directly)."); - return; - }; - - // match the CPIC recommendation: same drug, lookupkey[gene] == phenotype - let mut hit: Option<&Value> = None; - for r in &kb.recs { - if r["drugid"].as_str() != Some(drugid.as_str()) { - continue; - } - if let Some(lk) = r["lookupkey"].as_object() { - if lk.get(gene).and_then(|v| v.as_str()) == Some(pheno.as_str()) { - hit = Some(r); - break; - } - } } - let Some(rec) = hit else { - println!(" phenotype {gene} {pheno}: no simple phenotype→rec for {drugname}."); - // surface WHY: warfarin et al. use a dosing algorithm, flagged in the guideline notes - if let Some(gid) = kb.pair_gid.get(&(gene.to_string(), drugid.clone())) { - if let Some((Some(notes), _)) = kb.guideline.get(gid) { - println!(" ⚠ COMPLEX guideline g{gid} (CPIC note): {notes}"); - } - } - return; - }; - - let gid = rec["guidelineid"].as_i64().unwrap_or(0); - let class = rec["classification"].as_str().unwrap_or("n/a"); - let text = rec["drugrecommendation"].as_str().unwrap_or(""); - let level = kb - .pair_level - .get(&(gene.to_string(), drugid.clone())) - .cloned() - .unwrap_or_default(); - - // t2 (phenotype → recommendation): f from classification, c from pair cpic level - let class_f = match class { - "Strong" => 0.95, - "Moderate" => 0.8, - "Optional" => 0.6, - _ => 0.65, - }; - let level_c = match level.as_str() { - "A" => 0.95, - "B" => 0.85, - "C" => 0.65, - "D" => 0.45, - _ => 0.7, - }; - let t2 = Truth { f: class_f, c: level_c }; - let t = t1.deduction(t2); - - // the (part_of:is_a) addresses of the chain - let dip_is_diplo = input.contains('/'); - let p_dip = { - let mut p = gene_part_of(gene); - p.push("diplotypes".into()); - p.push(norm(input)); - p - }; - let p_pheno = { - let mut p = gene_part_of(gene); - p.push("phenotypes".into()); - p.push(norm(&pheno)); - p - }; - let p_rec = vec!["recommendations".into(), format!("g{gid}"), norm(&drugid)]; - - if dip_is_diplo { - println!( - " diplotype {gene} {input:<10} {}", - addr(CID_DIPLOTYPE, &p_dip, &["diplotype".into(), "x".into()], gene) - ); - println!(" │ maps_to (t={:.2}/{:.2}, {how})", t1.f, t1.c); - } else { - println!(" input {gene} {input} ({how})"); + // the reasoned chain — each node carries its routable (part_of:is_a) GUID prefix + for n in &o.chain { + println!(" {:<14} {:<26} {}", n.role, n.label, n.guid); } + if let (Some(class), Some(level)) = (&o.classification, &o.cpic_level) { + println!(" │ recommends (class={class}, cpic level {level})"); + } + println!("\n ⊢ NARS deduction {} {} → recommendation", o.gene, o.input); println!( - " phenotype {gene} {pheno:<22} {}", - addr(CID_PHENOTYPE, &p_pheno, &["phenotype".into(), norm(&pheno)], gene) - ); - println!(" │ recommends (class={class}, cpic level {level} → t={class_f:.2}/{level_c:.2})"); - println!( - " rec g{gid} → {class:<8} {}", - addr(CID_REC, &p_rec, &["recommendation".into(), norm(class)], &format!("rec:g{gid}")) + " truth f={:.3} c={:.3} (expectation {:.3})", + o.truth_f, o.truth_c, o.truth_exp ); - println!("\n ⊢ NARS deduction {gene} {input} → recommendation"); - println!(" truth f={:.3} c={:.3} (expectation {:.3})", t.f, t.c, t.exp()); - println!(" CPIC says: {text}"); - - // surface the complexity flags CPIC itself raises - if let Some((notes, ngenes)) = kb.guideline.get(&gid) { - if let Some(n) = notes { - println!(" ⚠ COMPLEX guideline (CPIC note): {n}"); - } - if *ngenes > 1 { - println!(" ⚠ MULTI-GENE guideline ({ngenes} genes) — single-gene deduction is partial; consult the full guideline."); - } + if let Some(text) = &o.recommendation { + println!(" CPIC says: {text}"); + } + for f in &o.flags { + println!(" ⚠ {f}"); } } fn main() { let a: Vec = std::env::args().collect(); - // allow an optional leading data dir via env; default cpic/data, or `data` if run from cpic/ + // optional data dir via env; default `data` when run from cpic/, else `cpic/data`. let dir = std::env::var("CPIC_DATA").unwrap_or_else(|_| { if std::path::Path::new("data/gene.json").exists() { "data".into() @@ -300,18 +61,22 @@ fn main() { "cpic/data".into() } }); - let kb = load_kb(&dir); + let kb = Kb::load(&dir); println!("cpic reason — NARS over real CPIC edges (classification + cpiclevel → confidence)."); println!("POC over published CPIC rules — NOT clinical decision support."); if a.len() >= 4 { - reason(&kb, &a[1], &a[2], &a[3]); + print_outcome(&reason(&kb, &a[1], &a[2], &a[3])); return; } // built-in demos: clean 2-hop, direct 1-hop, multi-gene flag, complex-guideline flag - reason(&kb, "CYP2C19", "*2/*2", "clopidogrel"); - reason(&kb, "HLA-B", "*57:01 positive", "abacavir"); - reason(&kb, "TPMT", "*3A/*3A", "azathioprine"); - reason(&kb, "CYP2C9", "*1/*1", "warfarin"); + for (g, i, d) in [ + ("CYP2C19", "*2/*2", "clopidogrel"), + ("HLA-B", "*57:01 positive", "abacavir"), + ("TPMT", "*3A/*3A", "azathioprine"), + ("CYP2C9", "*1/*1", "warfarin"), + ] { + print_outcome(&reason(&kb, g, i, d)); + } } diff --git a/cpic/src/lib.rs b/cpic/src/lib.rs index 87be52b81..ee9bad060 100644 --- a/cpic/src/lib.rs +++ b/cpic/src/lib.rs @@ -164,3 +164,406 @@ pub fn func_class(status: Option<&str>) -> String { } .to_string() } + +// ════════════════════════════════════════════════════════════════════════════════════ +// Reasoning over the real CPIC graph — lives HERE (not just in `bin/reason.rs`) so the +// CLI and a server endpoint (cockpit `/api/cpic/reason`) share ONE implementation. +// `{gene, diplotype|phenotype, drug}` → phenotype → recommendation, 2-hop NARS deduction +// with CPIC-authoritative confidence (`classification`→f, pair `cpiclevel`→c). +// ════════════════════════════════════════════════════════════════════════════════════ + +use serde::Serialize; +use serde_json::Value; +use std::collections::{HashMap, HashSet}; + +/// NARS truth `(f, c)`. `deduction` is the canonical `A→B, B→C ⊢ A→C` (`f=f1·f2, c=f1·f2·c1·c2`). +#[derive(Clone, Copy)] +pub struct Truth { + pub f: f32, + pub c: f32, +} +impl Truth { + pub fn deduction(self, o: Truth) -> Truth { + Truth { f: self.f * o.f, c: self.f * o.f * self.c * o.c } + } + pub fn exp(self) -> f32 { + self.c * (self.f - 0.5) + 0.5 + } +} + +/// Functional-status → activity rank (the transparent simple combination rule). +fn rank(status: &str) -> Option { + let s = status.to_lowercase(); + if s.contains("no function") { + Some(0.0) + } else if s.contains("decreased") { + Some(0.5) + } else if s.contains("increased") { + Some(2.0) + } else if s.contains("normal") { + Some(1.0) + } else { + None + } +} + +/// Activity-score sum → metabolizer class (CYP2D6 activity-score bands; allele-count genes +/// like CYP2C19/TPMT fall out of the same thresholds). +fn class_from_score(score: f32) -> &'static str { + if score <= 0.0 { + "Poor" + } else if score <= 1.0 { + "Intermediate" + } else if score <= 2.25 { + "Normal" + } else { + "Ultrarapid" + } +} + +/// Routable `(part_of:is_a)` GUID prefix (classid-HEEL-HIP-TWIG-family); identity (the +/// basin-local mint allocated at ingest) is shown as `··`. +fn addr(classid: u32, part: &[String], isa: &[String], basin_key: &str) -> String { + let g = NodeGuid::mint(classid, cascade3(part), cascade3(isa), basin(basin_key), 0); + format!( + "{:08x}-{:04x}-{:04x}-{:04x}-{:06x}··", + g.classid, g.heel, g.hip, g.twig, g.family + ) +} + +/// The CPIC knowledge base parsed from the six published tables `reason` consults. +pub struct Kb { + allele_status: HashMap<(String, String), String>, // (gene, allele) -> functional status + gene_results: HashSet<(String, String)>, // (gene, phenotype-string) that exist + recs: Vec, + drug_by_name: HashMap, // lc name -> (drugid, display name) + pair_level: HashMap<(String, String), String>, // (gene, drugid) -> cpiclevel + pair_gid: HashMap<(String, String), i64>, // (gene, drugid) -> guidelineid + guideline: HashMap, usize)>, // gid -> (notesonusage, gene-count) +} + +impl Kb { + /// Parse the KB from the six table JSON strings (each a JSON array of objects). + pub fn from_jsons( + allele: &str, + gene_result: &str, + drug: &str, + pair: &str, + guideline: &str, + recommendation: &str, + ) -> Self { + let arr = |s: &str| -> Vec { serde_json::from_str(s).unwrap_or_default() }; + let mut allele_status = HashMap::new(); + for v in arr(allele) { + if let (Some(g), Some(n)) = (v["genesymbol"].as_str(), v["name"].as_str()) { + if let Some(st) = v["clinicalfunctionalstatus"].as_str() { + allele_status.insert((g.to_string(), n.to_string()), st.to_string()); + } + } + } + let mut gene_results = HashSet::new(); + for v in arr(gene_result) { + if let (Some(g), Some(r)) = (v["genesymbol"].as_str(), v["result"].as_str()) { + gene_results.insert((g.to_string(), r.to_string())); + } + } + let mut drug_by_name = HashMap::new(); + for v in arr(drug) { + if let (Some(id), Some(n)) = (v["drugid"].as_str(), v["name"].as_str()) { + drug_by_name.insert(n.to_lowercase(), (id.to_string(), n.to_string())); + } + } + let mut pair_level = HashMap::new(); + let mut pair_gid = HashMap::new(); + for v in arr(pair) { + if let (Some(g), Some(d)) = (v["genesymbol"].as_str(), v["drugid"].as_str()) { + if let Some(l) = v["cpiclevel"].as_str() { + pair_level.insert((g.to_string(), d.to_string()), l.to_string()); + } + if let Some(gid) = v["guidelineid"].as_i64() { + pair_gid.insert((g.to_string(), d.to_string()), gid); + } + } + } + let mut gl = HashMap::new(); + for v in arr(guideline) { + if let Some(id) = v["id"].as_i64() { + let notes = v["notesonusage"].as_str().map(|s| s.to_string()); + let ngenes = v["genes"].as_array().map(|a| a.len()).unwrap_or(0); + gl.insert(id, (notes, ngenes)); + } + } + Kb { + allele_status, + gene_results, + recs: arr(recommendation), + drug_by_name, + pair_level, + pair_gid, + guideline: gl, + } + } + + /// Load the six tables from a data directory (the CLI path). + pub fn load(dir: &str) -> Self { + let r = |f: &str| { + std::fs::read_to_string(format!("{dir}/{f}")) + .unwrap_or_else(|e| panic!("read {dir}/{f}: {e}")) + }; + Self::from_jsons( + &r("allele.json"), + &r("gene_result.json"), + &r("drug.json"), + &r("pair.json"), + &r("guideline.json"), + &r("recommendation.json"), + ) + } + + /// The six tables baked into the binary — the server / default path, no runtime files. + pub fn embedded() -> Self { + Self::from_jsons( + include_str!("../data/allele.json"), + include_str!("../data/gene_result.json"), + include_str!("../data/drug.json"), + include_str!("../data/pair.json"), + include_str!("../data/guideline.json"), + include_str!("../data/recommendation.json"), + ) + } + + /// Sorted gene symbols + drug names (for the cockpit's pickers). + pub fn catalog(&self) -> Catalog { + let mut genes: Vec = self.gene_results.iter().map(|(g, _)| g.clone()).collect(); + genes.sort(); + genes.dedup(); + let mut drugs: Vec = self.drug_by_name.values().map(|(_, n)| n.clone()).collect(); + drugs.sort(); + drugs.dedup(); + Catalog { genes, drugs } + } +} + +/// Pickable genes + drugs for the frontend dropdowns. +#[derive(Clone, Serialize)] +pub struct Catalog { + pub genes: Vec, + pub drugs: Vec, +} + +/// One node of the reasoned chain, carrying its routable `(part_of:is_a)` GUID prefix. +#[derive(Clone, Serialize)] +pub struct ChainNode { + pub role: String, // "diplotype" | "phenotype" | "recommendation" + pub label: String, + pub guid: String, +} + +/// The structured reasoning result — what the CLI prints and the cockpit renders. +#[derive(Clone, Serialize)] +pub struct Outcome { + pub gene: String, + pub input: String, + pub drug: String, + pub resolved: bool, + pub phenotype: Option, + pub how: Option, + pub chain: Vec, + pub classification: Option, + pub cpic_level: Option, + pub truth_f: f32, + pub truth_c: f32, + pub truth_exp: f32, + pub recommendation: Option, + pub flags: Vec, + pub disclaimer: String, +} + +fn new_outcome(gene: &str, input: &str, drug: &str) -> Outcome { + Outcome { + gene: gene.into(), + input: input.into(), + drug: drug.into(), + resolved: false, + phenotype: None, + how: None, + chain: vec![], + classification: None, + cpic_level: None, + truth_f: 0.0, + truth_c: 0.0, + truth_exp: 0.0, + recommendation: None, + flags: vec![], + disclaimer: "POC over published CPIC rules — NOT clinical decision support.".into(), + } +} + +/// Resolve the 2nd arg to a phenotype + the t1 (input→phenotype) truth. A direct phenotype is +/// near-certain; a diplotype is combined by the transparent simple allele-function rule (lower c). +fn resolve_phenotype(kb: &Kb, gene: &str, input: &str) -> Option<(String, Truth, String)> { + if kb.gene_results.contains(&(gene.to_string(), input.to_string())) { + return Some((input.to_string(), Truth { f: 1.0, c: 0.99 }, "direct phenotype".into())); + } + let alleles: Vec<&str> = input.split('/').collect(); + if alleles.len() != 2 { + return None; + } + let mut score = 0.0; + for al in &alleles { + let st = kb.allele_status.get(&(gene.to_string(), al.trim().to_string()))?; + score += rank(st)?; + } + let class = class_from_score(score); + let cands = [ + format!("{class} Metabolizer"), + format!("{class} Function"), + if score == 0.5 { "Decreased Function".to_string() } else { String::new() }, + ]; + let pheno = cands + .iter() + .find(|c| !c.is_empty() && kb.gene_results.contains(&(gene.to_string(), (*c).clone())))?; + let c = if alleles[0].trim() == alleles[1].trim() { 0.85 } else { 0.7 }; + Some((pheno.clone(), Truth { f: 1.0, c }, format!("simple rule (score {score})"))) +} + +/// Reason a `{gene, diplotype|phenotype, drug}` scenario over the real CPIC graph → `Outcome`. +/// `resolved == false` means CPIC has no simple phenotype→recommendation (the `flags` say why — +/// e.g. a complex / multi-gene guideline), which the POC surfaces instead of fabricating. +pub fn reason(kb: &Kb, gene: &str, input: &str, drug: &str) -> Outcome { + let mut o = new_outcome(gene, input, drug); + + let Some((drugid, _drugname)) = kb.drug_by_name.get(&drug.to_lowercase()).cloned() else { + o.flags.push(format!("drug '{drug}' is not in the CPIC drug table")); + return o; + }; + let Some((pheno, t1, how)) = resolve_phenotype(kb, gene, input) else { + o.flags + .push(format!("could not resolve a phenotype for {gene} {input} — provide the phenotype string directly")); + return o; + }; + o.phenotype = Some(pheno.clone()); + o.how = Some(how.clone()); + + // diplotype + phenotype chain nodes (emitted once a phenotype resolves) + if input.contains('/') { + let mut p = gene_part_of(gene); + p.push("diplotypes".into()); + p.push(norm(input)); + o.chain.push(ChainNode { + role: "diplotype".into(), + label: format!("{gene} {input}"), + guid: addr(CID_DIPLOTYPE, &p, &["diplotype".into(), "x".into()], gene), + }); + } + { + let mut p = gene_part_of(gene); + p.push("phenotypes".into()); + p.push(norm(&pheno)); + o.chain.push(ChainNode { + role: "phenotype".into(), + label: format!("{gene} {pheno}"), + guid: addr(CID_PHENOTYPE, &p, &["phenotype".into(), norm(&pheno)], gene), + }); + } + + // match the CPIC recommendation: same drug, lookupkey[gene] == phenotype + let rec = kb.recs.iter().find(|r| { + r["drugid"].as_str() == Some(drugid.as_str()) + && r["lookupkey"] + .as_object() + .and_then(|lk| lk.get(gene)) + .and_then(|v| v.as_str()) + == Some(pheno.as_str()) + }); + + let Some(rec) = rec else { + o.flags + .push(format!("no simple phenotype→recommendation for {gene} {pheno} + {drug}")); + if let Some(gid) = kb.pair_gid.get(&(gene.to_string(), drugid.clone())) { + if let Some((Some(notes), _)) = kb.guideline.get(gid) { + o.flags.push(format!("COMPLEX guideline g{gid} (CPIC): {notes}")); + } + } + return o; + }; + + let gid = rec["guidelineid"].as_i64().unwrap_or(0); + let class = rec["classification"].as_str().unwrap_or("n/a").to_string(); + let text = rec["drugrecommendation"].as_str().unwrap_or("").to_string(); + let level = kb + .pair_level + .get(&(gene.to_string(), drugid.clone())) + .cloned() + .unwrap_or_default(); + + let class_f = match class.as_str() { + "Strong" => 0.95, + "Moderate" => 0.8, + "Optional" => 0.6, + _ => 0.65, + }; + let level_c = match level.as_str() { + "A" => 0.95, + "B" => 0.85, + "C" => 0.65, + "D" => 0.45, + _ => 0.7, + }; + let t = t1.deduction(Truth { f: class_f, c: level_c }); + + { + let p = vec!["recommendations".into(), format!("g{gid}"), norm(&drugid)]; + o.chain.push(ChainNode { + role: "recommendation".into(), + label: format!("g{gid} → {class}"), + guid: addr(CID_REC, &p, &["recommendation".into(), norm(&class)], &format!("rec:g{gid}")), + }); + } + + o.resolved = true; + o.classification = Some(class); + o.cpic_level = Some(level); + o.truth_f = t.f; + o.truth_c = t.c; + o.truth_exp = t.exp(); + o.recommendation = Some(text); + + if let Some((notes, ngenes)) = kb.guideline.get(&gid) { + if let Some(n) = notes { + o.flags.push(format!("COMPLEX guideline (CPIC note): {n}")); + } + if *ngenes > 1 { + o.flags + .push(format!("MULTI-GENE guideline ({ngenes} genes) — single-gene deduction is partial")); + } + } + o +} + +#[cfg(test)] +mod reason_tests { + use super::*; + + fn kb() -> Kb { + Kb::embedded() + } + + #[test] + fn cyp2c19_poor_metabolizer_clopidogrel_strong() { + let o = reason(&kb(), "CYP2C19", "*2/*2", "clopidogrel"); + assert!(o.resolved, "flags: {:?}", o.flags); + assert_eq!(o.phenotype.as_deref(), Some("Poor Metabolizer")); + assert_eq!(o.classification.as_deref(), Some("Strong")); + assert!(o.truth_c > 0.0 && o.truth_f > 0.0); + assert_eq!(o.chain.len(), 3); // diplotype, phenotype, recommendation + assert!(o.recommendation.unwrap().to_lowercase().contains("clopidogrel")); + } + + #[test] + fn unknown_drug_is_flagged_not_fabricated() { + let o = reason(&kb(), "CYP2C19", "*2/*2", "definitely_not_a_drug"); + assert!(!o.resolved); + assert!(o.flags.iter().any(|f| f.contains("not in the CPIC drug table"))); + } +} diff --git a/crates/cockpit-server/Cargo.toml b/crates/cockpit-server/Cargo.toml index 9f9cc12dc..23e4eb1d3 100644 --- a/crates/cockpit-server/Cargo.toml +++ b/crates/cockpit-server/Cargo.toml @@ -49,6 +49,10 @@ notebook-query.workspace = true # AIwar renderer — loads graph data into the engine aiwar-ingest = { path = "../aiwar-ingest" } +# CPIC pharmacogenomics — standalone crate (serde + serde_json only, NO lance +# closure). Provides cpic::reason over the baked CPIC tables for the /cpic panel. +cpic = { path = "../../cpic" } + # ── Quarto rendering pipeline ─────────────────────────────────────── # pampa parses .qmd, quarto-core renders, deno_core executes JS/TS cells quarto-core.workspace = true diff --git a/crates/cockpit-server/src/main.rs b/crates/cockpit-server/src/main.rs index f461c2082..ebc493d4b 100644 --- a/crates/cockpit-server/src/main.rs +++ b/crates/cockpit-server/src/main.rs @@ -30,6 +30,7 @@ use tower_http::cors::CorsLayer; mod openai; mod graph_engine; mod clinical; +mod pgx; mod osint_gotham; mod scene_player; mod shader_stream; @@ -171,6 +172,9 @@ async fn main() { .route("/api/graph/health", get(graph_engine::graph_health_handler)) // Clinical NARS reasoning for the /fma-body organ panel (real TruthValue::deduction) .route("/api/clinical/reason", post(clinical::clinical_reason_handler)) + // CPIC pharmacogenomics for the /cpic cockpit (cpic::reason over the baked CPIC tables) + .route("/api/cpic/reason", post(pgx::cpic_reason_handler)) + .route("/api/cpic/catalog", get(pgx::cpic_catalog_handler)) // OSINT domain (classid 0x0700): the harvest as a CANON family-basin graph // (round→anchor basins, GUID-v2 tail), displayed via the OGAR ClassView. .route("/api/graph/osint", get(osint_gotham::osint_graph_handler)) diff --git a/crates/cockpit-server/src/pgx.rs b/crates/cockpit-server/src/pgx.rs new file mode 100644 index 000000000..b91d50331 --- /dev/null +++ b/crates/cockpit-server/src/pgx.rs @@ -0,0 +1,63 @@ +//! CPIC pharmacogenomics — the `/cpic` cockpit's gene-first panel, backed by the standalone +//! `cpic` crate's `reason()` over the REAL published CPIC tables (allele, gene_result, drug, +//! pair, guideline, recommendation), baked into the binary via `cpic::Kb::embedded()` so the +//! endpoint needs no runtime data files (Railway-safe). +//! +//! A scenario `{gene, input, drug}` resolves to a phenotype and chains +//! `diplotype → phenotype → recommendation` by 2-hop NARS deduction with CPIC-**authoritative** +//! confidence (`classification`→f, pair `cpiclevel`→c). Each chain node carries its routable +//! `(part_of:is_a)` GUID prefix. This is the SAME `cpic::reason` the `reason` CLI calls — +//! one engine, two surfaces (the CLAUDE module-named `cpic` crate is reached unambiguously +//! here; this module is `pgx` precisely so it does not shadow that dependency). +//! +//! POC over published CPIC rules — NOT clinical decision support. The frontend shows that +//! disclaimer in-view. + +use std::sync::LazyLock; + +use cpic::{reason, Catalog, Kb, Outcome}; + +/// The CPIC knowledge base, parsed once from the tables baked into the binary +/// (`include_str!` of `cpic/data/*.json`). No runtime files; safe on a fresh container. +static KB: LazyLock = LazyLock::new(Kb::embedded); + +/// `POST /api/cpic/reason` request — `{gene, input, drug}`. `input` is a diplotype like +/// `*2/*2`, or a phenotype / allele-status string CPIC already knows for the gene. +#[derive(serde::Deserialize)] +pub struct CpicScenario { + #[serde(default)] + pub gene: String, + #[serde(default)] + pub input: String, + #[serde(default)] + pub drug: String, +} + +/// `POST /api/cpic/reason` → the structured `Outcome` (`resolved`, `phenotype`, `chain[]` with +/// routable GUIDs, `classification`, `cpic_level`, `truth_{f,c,exp}`, `recommendation`, `flags`, +/// `disclaimer`). When CPIC has no simple phenotype→rec, `resolved == false` and `flags` say why +/// (unknown drug, unresolvable phenotype, complex / multi-gene guideline) — never fabricated. +pub async fn cpic_reason_handler(axum::Json(sc): axum::Json) -> axum::Json { + axum::Json(reason(&KB, &sc.gene, &sc.input, &sc.drug)) +} + +/// `GET /api/cpic/catalog` → sorted gene + drug pick-lists for the cockpit's dropdowns. +pub async fn cpic_catalog_handler() -> axum::Json { + axum::Json(KB.catalog()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Guards the integration wiring: the baked KB (`include_str!` paths resolving against the + // cpic crate) loads in the cockpit-server build, and a canonical scenario reasons through. + #[test] + fn embedded_kb_reasons_and_has_a_catalog() { + let o = reason(&KB, "CYP2C19", "*2/*2", "clopidogrel"); + assert!(o.resolved, "flags: {:?}", o.flags); + assert_eq!(o.phenotype.as_deref(), Some("Poor Metabolizer")); + let cat = KB.catalog(); + assert!(!cat.genes.is_empty() && !cat.drugs.is_empty()); + } +}