Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 179 additions & 20 deletions crates/gitlawb-node/src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,50 @@ enum MirrorMode {
Promisor,
}

/// Decide the mirror mode from the origin's `withheld-paths` response.
/// The on-disk promisor state of an existing mirror, read from
/// `remote.origin.promisor`. Three-valued so a git error is not mistaken for a
/// definitive "not a promisor": `git config --get` collapses "key absent" and
/// "git failed" into the same non-zero exit otherwise, and treating an errored
/// probe as `NotPromisor` would let a transient failure downgrade a still-withheld
/// mirror (issue #48).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PromisorProbe {
/// `remote.origin.promisor` is `"true"`.
Promisor,
/// The key is absent (git exit 1) or set to a non-`true` value.
NotPromisor,
/// The probe itself failed (git spawn error or other non-zero exit).
Unknown,
}

/// Decide the mirror mode from the origin's `withheld-paths` response plus, when
/// the response is unknown, the existing mirror's on-disk promisor state.
///
/// `Some(non-empty)` → the repo has a private subtree → `Promisor`.
/// `Some(empty)` → fully public → `Plain`.
/// `None` → the lookup 404'd or failed. Attempt a `Plain` mirror; a
/// mode-A repo also 404s the git read endpoint, so the clone
/// fails and nothing is mirrored (fail-closed at the git
/// layer), while a public repo on a peer that predates the
/// `withheld-paths` route still gets mirrored.
fn classify_mirror(withheld: Option<Vec<String>>) -> MirrorMode {
/// `None` → the lookup 404'd or failed; the answer is *unknown*, which
/// is not the same as "public". For a fresh clone this stays
/// `Plain` (a mode-A repo also 404s the git read endpoint, so
/// the clone fails and nothing is mirrored — fail-closed at the
/// git layer — while a public repo on a peer that predates the
/// `withheld-paths` route still gets mirrored). For an existing
/// mirror it *biases toward preserving* the promisor state: a
/// genuine public transition returns `Some(empty)`, so on
/// `None` we cannot distinguish "still withheld, unreachable"
/// from "newly public, unreachable" and prefer the recoverable
/// choice over destroying the partial-clone config. An
/// indeterminate probe (`Unknown`) preserves for the same
/// reason (defense-in-depth, #48).
fn resolve_mirror_mode(
withheld: Option<Vec<String>>,
exists: bool,
promisor: PromisorProbe,
) -> MirrorMode {
match withheld {
Some(globs) if !globs.is_empty() => MirrorMode::Promisor,
_ => MirrorMode::Plain,
Some(_) => MirrorMode::Plain,
None if exists && promisor != PromisorProbe::NotPromisor => MirrorMode::Promisor,
None => MirrorMode::Plain,
}
}

Expand Down Expand Up @@ -186,9 +217,30 @@ async fn process_batch(
let remote_url = format!("{}/{}", origin_url, item.repo);

let withheld = fetch_withheld(client, &origin_url, owner_short, repo_name).await;
let mode = classify_mirror(withheld);
let exists = local_path.exists();
let lookup_unknown = withheld.is_none();
// Only probe the on-disk promisor state when the lookup is unknown and the
// repo already exists — the sole case where it changes the resolved mode.
let promisor = if lookup_unknown && exists {
let local_str = local_path.to_str().unwrap_or(".");
existing_promisor_state(local_str).await
} else {
PromisorProbe::NotPromisor
};
let mode = resolve_mirror_mode(withheld, exists, promisor);
// Surface the case where an unknown withheld-paths lookup kept (or, on an
// indeterminate probe, defensively applied) promisor mode instead of
// downgrading to a full clone. Derived from the resolved mode so it cannot
// drift from resolve_mirror_mode's preserve branch.
if lookup_unknown && mode == MirrorMode::Promisor {
warn!(
repo = %item.repo,
origin = %origin_url,
"withheld-paths lookup unavailable; using promisor mirror mode to avoid an unsafe full-clone downgrade"
);
}

let result = if local_path.exists() {
let result = if exists {
fetch_repo(&local_path, &remote_url, mode).await
} else {
clone_repo(&remote_url, &local_path, mode).await
Expand Down Expand Up @@ -247,7 +299,7 @@ async fn process_batch(

/// Query the origin's anonymous `withheld-paths` endpoint. Returns the withheld
/// glob list on a 2xx, or `None` on any non-success / network / parse error
/// (treated as "unknown" by `classify_mirror`).
/// (treated as "unknown" by `resolve_mirror_mode`).
async fn fetch_withheld(
client: &reqwest::Client,
origin_url: &str,
Expand Down Expand Up @@ -448,6 +500,38 @@ async fn git_config_get(repo: &str, key: &str) -> Option<String> {
(!value.is_empty()).then_some(value)
}

/// Probe an existing mirror's `remote.origin.promisor` config as a three-valued
/// state. Unlike [`git_config_get`], this distinguishes a definitively-absent key
/// from a probe failure so a transient git error cannot be read as "not a
/// promisor" and trigger a downgrade (issue #48).
///
/// `git config --get` exits 0 when the key is set, 1 when it is absent (or the
/// directory is not a git repo) — both definitive `NotPromisor` — and other
/// non-zero codes (e.g. 128 for a bad path or unreadable config) on error. A spawn
/// failure or any non-{0,1} exit is `Unknown`.
async fn existing_promisor_state(repo: &str) -> PromisorProbe {
let out = match tokio::process::Command::new("git")
.args(["-C", repo, "config", "--get", "remote.origin.promisor"])
.output()
.await
{
Ok(out) => out,
Err(_) => return PromisorProbe::Unknown,
};
match out.status.code() {
Some(0) => {
let value = String::from_utf8_lossy(&out.stdout).trim().to_string();
if value == "true" {
PromisorProbe::Promisor
} else {
PromisorProbe::NotPromisor
}
}
Some(1) => PromisorProbe::NotPromisor,
_ => PromisorProbe::Unknown,
}
}

/// Mirror-clone a repo from a remote URL into a local bare repo.
/// `Promisor` mode adds `--filter=blob:limit=10g`, which marks the repo a git
/// promisor (so a pack with origin-omitted withheld blobs is accepted) while
Expand Down Expand Up @@ -538,25 +622,68 @@ mod tests {
use tempfile::TempDir;

#[test]
fn classify_promisor_when_withheld_nonempty() {
let mode = classify_mirror(Some(vec!["/secret/**".to_string()]));
fn resolve_promisor_when_withheld_nonempty() {
let mode = resolve_mirror_mode(
Some(vec!["/secret/**".to_string()]),
true,
PromisorProbe::NotPromisor,
);
assert!(matches!(mode, MirrorMode::Promisor));
}

#[test]
fn classify_plain_when_withheld_empty() {
let mode = classify_mirror(Some(vec![]));
assert!(matches!(mode, MirrorMode::Plain));
fn resolve_plain_when_withheld_empty() {
// A genuine public transition returns Some(empty) and still downgrades,
// regardless of whether the mirror exists or was a promisor.
for exists in [true, false] {
for probe in [
PromisorProbe::Promisor,
PromisorProbe::NotPromisor,
PromisorProbe::Unknown,
] {
let mode = resolve_mirror_mode(Some(vec![]), exists, probe);
assert!(matches!(mode, MirrorMode::Plain));
}
}
}

#[test]
fn resolve_preserves_promisor_on_unknown_lookup_for_existing_mirror() {
// Regression for #48: a transient withheld-paths outage (None) must NOT
// downgrade a still-withheld promisor mirror to a full clone.
let mode = resolve_mirror_mode(None, true, PromisorProbe::Promisor);
assert!(matches!(mode, MirrorMode::Promisor));
}

#[test]
fn classify_plain_when_lookup_failed() {
// None == 404 / network error / parse failure: attempt a plain mirror
// and let the git read endpoint fail-close a mode-A repo.
let mode = classify_mirror(None);
fn resolve_preserves_promisor_on_indeterminate_probe() {
// Defense-in-depth (#48): if the config probe itself fails (Unknown) in the
// same cycle as a withheld-paths outage, bias toward preserving rather than
// firing the destructive downgrade.
let mode = resolve_mirror_mode(None, true, PromisorProbe::Unknown);
assert!(matches!(mode, MirrorMode::Promisor));
}

#[test]
fn resolve_plain_when_unknown_lookup_for_non_promisor_mirror() {
// An existing non-promisor mirror is unaffected by the preserve branch.
let mode = resolve_mirror_mode(None, true, PromisorProbe::NotPromisor);
assert!(matches!(mode, MirrorMode::Plain));
}

#[test]
fn resolve_plain_when_unknown_lookup_for_fresh_clone() {
// No local mirror yet: None stays Plain (fail-closed at the git layer).
for probe in [
PromisorProbe::Promisor,
PromisorProbe::NotPromisor,
PromisorProbe::Unknown,
] {
let mode = resolve_mirror_mode(None, false, probe);
assert!(matches!(mode, MirrorMode::Plain));
}
}

fn rb(oid: &str, cid: &str) -> ReplicaBlob {
ReplicaBlob {
oid: oid.to_string(),
Expand Down Expand Up @@ -696,6 +823,38 @@ mod tests {
assert_eq!(git_config(&dest, "remote.origin.mirror"), "true");
}

#[tokio::test]
async fn probe_reports_promisor_for_promisor_mirror() {
let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]);
let dest = td.path().join("mirror.git");
clone_repo(&url, &dest, MirrorMode::Promisor).await.unwrap();

let probe = existing_promisor_state(dest.to_str().unwrap()).await;
assert_eq!(probe, PromisorProbe::Promisor);
}

#[tokio::test]
async fn probe_reports_not_promisor_when_key_absent() {
// A plain mirror never sets remote.origin.promisor, so `git config --get`
// exits 1 (key absent) — the probe must read that as NotPromisor, never
// Unknown (which would wrongly preserve and upgrade a plain mirror).
let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]);
let dest = td.path().join("mirror.git");
clone_repo(&url, &dest, MirrorMode::Plain).await.unwrap();

let probe = existing_promisor_state(dest.to_str().unwrap()).await;
assert_eq!(probe, PromisorProbe::NotPromisor);
}

#[tokio::test]
async fn probe_reports_unknown_on_git_error() {
// A path git cannot resolve as a repo at all (exit 128) is an indeterminate
// probe, not a definitive "not a promisor" — it must map to Unknown so the
// caller preserves rather than downgrades (#48 defense-in-depth).
let probe = existing_promisor_state("/nonexistent/gitlawb-probe-xyz").await;
assert_eq!(probe, PromisorProbe::Unknown);
}

#[tokio::test]
async fn promisor_fetch_updates_existing_mirror() {
let (td, url) = bare_remote(&[("public/a.txt", b"pub\n")]);
Expand Down
Loading