From 68465f8aff4390cf22b9bded39ef221ffe9ba2f5 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 20 Jun 2026 12:29:59 -0500 Subject: [PATCH] fix(node): preserve promisor mirror mode on unknown withheld-paths lookup (#48) Peer sync decided mirror mode from the origin's withheld-paths answer via classify_mirror, whose _ => Plain arm collapsed both Some(empty) (genuinely public) and None (lookup 404'd / 5xx / network / parse error) into Plain. For an existing promisor mirror that meant fetch_repo(.., Plain) unset remote.origin.promisor + partialclonefilter and ran git fetch --refetch, so a transient withheld-paths outage downgraded a still-withheld mirror to a full clone and broke syncs until the endpoint recovered. Replace classify_mirror with resolve_mirror_mode(withheld, exists, promisor): only Some(empty) downgrades an existing mirror; None preserves the on-disk promisor mode. A genuine public transition always returns Some(empty), never None, so preserving on None cannot mask one; it only suppresses the transient-error false positive. Add a three-valued PromisorProbe read from git config --get exit codes so a transient probe failure (Unknown) is not mistaken for a definitive NotPromisor and cannot itself trigger the downgrade. One warn marks the preserve path, derived from the resolved mode so it cannot drift. Tests cover every resolve_mirror_mode arm (incl. the regression and the indeterminate-probe case) and the probe's Promisor / NotPromisor / Unknown outcomes against real git repos. --- crates/gitlawb-node/src/sync.rs | 199 ++++++++++++++++++++++++++++---- 1 file changed, 179 insertions(+), 20 deletions(-) diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 615ce22..137c6e8 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -32,19 +32,50 @@ enum MirrorMode { Promisor, } -/// Decide the mirror mode from the origin's `withheld-paths` response. +/// The on-disk promisor state of an existing mirror, read from +/// `remote.origin.promisor`. Three-valued so a git error is not mistaken for a +/// definitive "not a promisor": `git config --get` collapses "key absent" and +/// "git failed" into the same non-zero exit otherwise, and treating an errored +/// probe as `NotPromisor` would let a transient failure downgrade a still-withheld +/// mirror (issue #48). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PromisorProbe { + /// `remote.origin.promisor` is `"true"`. + Promisor, + /// The key is absent (git exit 1) or set to a non-`true` value. + NotPromisor, + /// The probe itself failed (git spawn error or other non-zero exit). + Unknown, +} + +/// Decide the mirror mode from the origin's `withheld-paths` response plus, when +/// the response is unknown, the existing mirror's on-disk promisor state. /// /// `Some(non-empty)` → the repo has a private subtree → `Promisor`. /// `Some(empty)` → fully public → `Plain`. -/// `None` → the lookup 404'd or failed. Attempt a `Plain` mirror; a -/// mode-A repo also 404s the git read endpoint, so the clone -/// fails and nothing is mirrored (fail-closed at the git -/// layer), while a public repo on a peer that predates the -/// `withheld-paths` route still gets mirrored. -fn classify_mirror(withheld: Option>) -> MirrorMode { +/// `None` → the lookup 404'd or failed; the answer is *unknown*, which +/// is not the same as "public". For a fresh clone this stays +/// `Plain` (a mode-A repo also 404s the git read endpoint, so +/// the clone fails and nothing is mirrored — fail-closed at the +/// git layer — while a public repo on a peer that predates the +/// `withheld-paths` route still gets mirrored). For an existing +/// mirror it *biases toward preserving* the promisor state: a +/// genuine public transition returns `Some(empty)`, so on +/// `None` we cannot distinguish "still withheld, unreachable" +/// from "newly public, unreachable" and prefer the recoverable +/// choice over destroying the partial-clone config. An +/// indeterminate probe (`Unknown`) preserves for the same +/// reason (defense-in-depth, #48). +fn resolve_mirror_mode( + withheld: Option>, + exists: bool, + promisor: PromisorProbe, +) -> MirrorMode { match withheld { Some(globs) if !globs.is_empty() => MirrorMode::Promisor, - _ => MirrorMode::Plain, + Some(_) => MirrorMode::Plain, + None if exists && promisor != PromisorProbe::NotPromisor => MirrorMode::Promisor, + None => MirrorMode::Plain, } } @@ -186,9 +217,30 @@ async fn process_batch( let remote_url = format!("{}/{}", origin_url, item.repo); let withheld = fetch_withheld(client, &origin_url, owner_short, repo_name).await; - let mode = classify_mirror(withheld); + let exists = local_path.exists(); + let lookup_unknown = withheld.is_none(); + // Only probe the on-disk promisor state when the lookup is unknown and the + // repo already exists — the sole case where it changes the resolved mode. + let promisor = if lookup_unknown && exists { + let local_str = local_path.to_str().unwrap_or("."); + existing_promisor_state(local_str).await + } else { + PromisorProbe::NotPromisor + }; + let mode = resolve_mirror_mode(withheld, exists, promisor); + // Surface the case where an unknown withheld-paths lookup kept (or, on an + // indeterminate probe, defensively applied) promisor mode instead of + // downgrading to a full clone. Derived from the resolved mode so it cannot + // drift from resolve_mirror_mode's preserve branch. + if lookup_unknown && mode == MirrorMode::Promisor { + warn!( + repo = %item.repo, + origin = %origin_url, + "withheld-paths lookup unavailable; using promisor mirror mode to avoid an unsafe full-clone downgrade" + ); + } - let result = if local_path.exists() { + let result = if exists { fetch_repo(&local_path, &remote_url, mode).await } else { clone_repo(&remote_url, &local_path, mode).await @@ -247,7 +299,7 @@ async fn process_batch( /// Query the origin's anonymous `withheld-paths` endpoint. Returns the withheld /// glob list on a 2xx, or `None` on any non-success / network / parse error -/// (treated as "unknown" by `classify_mirror`). +/// (treated as "unknown" by `resolve_mirror_mode`). async fn fetch_withheld( client: &reqwest::Client, origin_url: &str, @@ -448,6 +500,38 @@ async fn git_config_get(repo: &str, key: &str) -> Option { (!value.is_empty()).then_some(value) } +/// Probe an existing mirror's `remote.origin.promisor` config as a three-valued +/// state. Unlike [`git_config_get`], this distinguishes a definitively-absent key +/// from a probe failure so a transient git error cannot be read as "not a +/// promisor" and trigger a downgrade (issue #48). +/// +/// `git config --get` exits 0 when the key is set, 1 when it is absent (or the +/// directory is not a git repo) — both definitive `NotPromisor` — and other +/// non-zero codes (e.g. 128 for a bad path or unreadable config) on error. A spawn +/// failure or any non-{0,1} exit is `Unknown`. +async fn existing_promisor_state(repo: &str) -> PromisorProbe { + let out = match tokio::process::Command::new("git") + .args(["-C", repo, "config", "--get", "remote.origin.promisor"]) + .output() + .await + { + Ok(out) => out, + Err(_) => return PromisorProbe::Unknown, + }; + match out.status.code() { + Some(0) => { + let value = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if value == "true" { + PromisorProbe::Promisor + } else { + PromisorProbe::NotPromisor + } + } + Some(1) => PromisorProbe::NotPromisor, + _ => PromisorProbe::Unknown, + } +} + /// Mirror-clone a repo from a remote URL into a local bare repo. /// `Promisor` mode adds `--filter=blob:limit=10g`, which marks the repo a git /// promisor (so a pack with origin-omitted withheld blobs is accepted) while @@ -538,25 +622,68 @@ mod tests { use tempfile::TempDir; #[test] - fn classify_promisor_when_withheld_nonempty() { - let mode = classify_mirror(Some(vec!["/secret/**".to_string()])); + fn resolve_promisor_when_withheld_nonempty() { + let mode = resolve_mirror_mode( + Some(vec!["/secret/**".to_string()]), + true, + PromisorProbe::NotPromisor, + ); assert!(matches!(mode, MirrorMode::Promisor)); } #[test] - fn classify_plain_when_withheld_empty() { - let mode = classify_mirror(Some(vec![])); - assert!(matches!(mode, MirrorMode::Plain)); + fn resolve_plain_when_withheld_empty() { + // A genuine public transition returns Some(empty) and still downgrades, + // regardless of whether the mirror exists or was a promisor. + for exists in [true, false] { + for probe in [ + PromisorProbe::Promisor, + PromisorProbe::NotPromisor, + PromisorProbe::Unknown, + ] { + let mode = resolve_mirror_mode(Some(vec![]), exists, probe); + assert!(matches!(mode, MirrorMode::Plain)); + } + } + } + + #[test] + fn resolve_preserves_promisor_on_unknown_lookup_for_existing_mirror() { + // Regression for #48: a transient withheld-paths outage (None) must NOT + // downgrade a still-withheld promisor mirror to a full clone. + let mode = resolve_mirror_mode(None, true, PromisorProbe::Promisor); + assert!(matches!(mode, MirrorMode::Promisor)); } #[test] - fn classify_plain_when_lookup_failed() { - // None == 404 / network error / parse failure: attempt a plain mirror - // and let the git read endpoint fail-close a mode-A repo. - let mode = classify_mirror(None); + fn resolve_preserves_promisor_on_indeterminate_probe() { + // Defense-in-depth (#48): if the config probe itself fails (Unknown) in the + // same cycle as a withheld-paths outage, bias toward preserving rather than + // firing the destructive downgrade. + let mode = resolve_mirror_mode(None, true, PromisorProbe::Unknown); + assert!(matches!(mode, MirrorMode::Promisor)); + } + + #[test] + fn resolve_plain_when_unknown_lookup_for_non_promisor_mirror() { + // An existing non-promisor mirror is unaffected by the preserve branch. + let mode = resolve_mirror_mode(None, true, PromisorProbe::NotPromisor); assert!(matches!(mode, MirrorMode::Plain)); } + #[test] + fn resolve_plain_when_unknown_lookup_for_fresh_clone() { + // No local mirror yet: None stays Plain (fail-closed at the git layer). + for probe in [ + PromisorProbe::Promisor, + PromisorProbe::NotPromisor, + PromisorProbe::Unknown, + ] { + let mode = resolve_mirror_mode(None, false, probe); + assert!(matches!(mode, MirrorMode::Plain)); + } + } + fn rb(oid: &str, cid: &str) -> ReplicaBlob { ReplicaBlob { oid: oid.to_string(), @@ -696,6 +823,38 @@ mod tests { assert_eq!(git_config(&dest, "remote.origin.mirror"), "true"); } + #[tokio::test] + async fn probe_reports_promisor_for_promisor_mirror() { + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]); + let dest = td.path().join("mirror.git"); + clone_repo(&url, &dest, MirrorMode::Promisor).await.unwrap(); + + let probe = existing_promisor_state(dest.to_str().unwrap()).await; + assert_eq!(probe, PromisorProbe::Promisor); + } + + #[tokio::test] + async fn probe_reports_not_promisor_when_key_absent() { + // A plain mirror never sets remote.origin.promisor, so `git config --get` + // exits 1 (key absent) — the probe must read that as NotPromisor, never + // Unknown (which would wrongly preserve and upgrade a plain mirror). + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]); + let dest = td.path().join("mirror.git"); + clone_repo(&url, &dest, MirrorMode::Plain).await.unwrap(); + + let probe = existing_promisor_state(dest.to_str().unwrap()).await; + assert_eq!(probe, PromisorProbe::NotPromisor); + } + + #[tokio::test] + async fn probe_reports_unknown_on_git_error() { + // A path git cannot resolve as a repo at all (exit 128) is an indeterminate + // probe, not a definitive "not a promisor" — it must map to Unknown so the + // caller preserves rather than downgrades (#48 defense-in-depth). + let probe = existing_promisor_state("/nonexistent/gitlawb-probe-xyz").await; + assert_eq!(probe, PromisorProbe::Unknown); + } + #[tokio::test] async fn promisor_fetch_updates_existing_mirror() { let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]);