From a2fa9038793b1cb460c411eeb2db8f3b84a261dd Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 19 Jun 2026 09:12:18 -0500 Subject: [PATCH 1/2] feat(node): add has_path_scoped_rule predicate for withheld-walk short-circuit Shared predicate (rules.iter().any(path_glob != "/")) both serve and replication paths will use to skip the per-blob withheld walk when no rule can withhold. Doc records the "/"-gate precondition and the validate_path_glob dependency. Adds unit tests plus a gate-aware safety invariant test. --- .../gitlawb-node/src/git/visibility_pack.rs | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index c9c6d6b..7c267d2 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -77,6 +77,25 @@ pub fn withheld_blob_oids( Ok(denied.difference(&allowed).cloned().collect()) } +/// True if any rule scopes a sub-path of the repo (i.e. is not the whole-repo +/// "/" rule). When this returns `false`, no rule can withhold an individual +/// blob: the only rules present are whole-repo "/" rules, which are already +/// resolved by the "/" gate the caller runs *before* reaching the serve / +/// replication walk (a denying "/" rule 404s the caller; see +/// `withheld_blob_oids` above). For any caller that has passed that gate, +/// `withheld_blob_oids` therefore returns an empty set, so such callers may +/// skip the (potentially expensive) per-blob walk. Do not skip the walk on this +/// predicate without the "/" gate having run first. +/// +/// Validator dependency: this predicate treats `path_glob == "/"` as the only +/// whole-repo scope. That holds because `validate_path_glob` +/// (crates/gitlawb-node/src/api/visibility.rs) rejects `/**`, the only other +/// glob whose prefix collapses to `/` and would therefore match every path. If +/// glob syntax is ever extended, revisit this predicate. +pub fn has_path_scoped_rule(rules: &[VisibilityRule]) -> bool { + rules.iter().any(|r| r.path_glob != "/") +} + /// Objects that may replicate to the public: everything not in `withheld`. /// Order-preserving. The single seam every replication site (IPFS, Pinata) /// passes its object list through; option B would later reroute the withheld @@ -215,6 +234,66 @@ mod tests { ); } + #[test] + fn has_path_scoped_rule_empty_is_false() { + assert!(!has_path_scoped_rule(&[])); + } + + #[test] + fn has_path_scoped_rule_single_root_is_false() { + assert!(!has_path_scoped_rule(&[rule("/", &[])])); + } + + #[test] + fn has_path_scoped_rule_single_scoped_is_true() { + assert!(has_path_scoped_rule(&[rule("/secret/**", &[])])); + } + + #[test] + fn has_path_scoped_rule_mixed_is_true() { + assert!(has_path_scoped_rule(&[ + rule("/", &[]), + rule("/secret/**", &[]), + ])); + } + + #[test] + fn has_path_scoped_rule_multiple_root_is_false() { + assert!(!has_path_scoped_rule(&[rule("/", &[]), rule("/", &[])])); + } + + #[test] + fn has_path_scoped_rule_safety_invariant_matches_withheld_walk() { + // Pin the claim the predicate's docs make, with its real precondition: + // when no rule is path-scoped, then *for any caller that has passed the + // whole-repo "/" gate*, withheld_blob_oids returns an empty set, so the + // walk is safe to skip. The "/" gate (resolved before the serve / + // replication call sites) is what excludes the denying-root caller; this + // function does not re-check it, so the test models only gate-passing + // callers — matching how U2/U3 consult the predicate. + let (_td, bare, _secret, _public) = fixture(); + // (rules, caller) pairs where the caller is Allowed at "/": + // - public repo, no rules, anonymous: "/" allows (is_public). + // - root-only allow-rule, the listed reader: "/" allows them. + // - root-only deny-all rule, the owner: owner bypasses every rule. + let cases: [(Vec, Option<&str>); 3] = [ + (Vec::new(), None), + ( + vec![rule("/", &["did:key:zFriend"])], + Some("did:key:zFriend"), + ), + (vec![rule("/", &[])], Some(OWNER)), + ]; + for (rules, caller) in cases { + assert!(!has_path_scoped_rule(&rules)); + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, caller).unwrap(); + assert!( + withheld.is_empty(), + "no path-scoped rule must withhold nothing for a gate-passing caller (caller={caller:?})" + ); + } + } + #[test] fn replicable_objects_drops_withheld_keeps_rest() { let all = vec!["aaa".to_string(), "bbb".to_string(), "ccc".to_string()]; From 75acc57df78b9eea09b4b67d5d130e1a41f188c4 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 19 Jun 2026 09:18:13 -0500 Subject: [PATCH 2/2] fix(node): skip withheld-walk when no path-scoped rule can withhold (#50) The serve path (git_upload_pack) ran the full per-ref withheld_blob_oids walk on every clone/fetch before falling back to a plain upload_pack, even for repos with no path-scoped rules where the walk can never withhold anything. Cost grew with #42 (per-ref git ls-tree -r). Guard both call sites on has_path_scoped_rule: the serve path skips the spawn_blocking walk entirely and serves upload_pack directly; the replication path generalizes its is_empty() short-circuit to also cover root-only rule sets. Safe because the whole-repo "/" gate runs before both sites, so a denying root rule 404s/withholds upstream and never reaches the predicate. No change to served-pack contents on the success path. --- crates/gitlawb-node/src/api/repos.rs | 61 ++++++++++++++++------------ 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 2886926..7e8ced9 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -418,33 +418,40 @@ pub async fn git_upload_pack( .map_err(|e| AppError::Git(e.to_string()))?; let body_len = body.len(); - // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep that - // off the async worker thread. - let withheld = { - let path = disk_path.clone(); - let rules = rules.clone(); - let owner_did = record.owner_did.clone(); - let caller_owned = caller.map(str::to_string); - let is_public = record.is_public; - tokio::task::spawn_blocking(move || { - visibility_pack::withheld_blob_oids( - &path, - &rules, - is_public, - &owner_did, - caller_owned.as_deref(), - ) - }) - .await - .map_err(|e| AppError::Git(e.to_string()))? - .map_err(|e| AppError::Git(e.to_string()))? - }; - - let resp = if withheld.is_empty() { + // No path-scoped rule can withhold an individual blob, and the whole-repo + // "/" gate above already enforced repo-level access. Skip the per-blob + // withheld walk and serve the pack directly. + let resp = if !visibility_pack::has_path_scoped_rule(&rules) { smart_http::upload_pack(&disk_path, body).await } else { - tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); - smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep + // that off the async worker thread. + let withheld = { + let path = disk_path.clone(); + let rules = rules.clone(); + let owner_did = record.owner_did.clone(); + let caller_owned = caller.map(str::to_string); + let is_public = record.is_public; + tokio::task::spawn_blocking(move || { + visibility_pack::withheld_blob_oids( + &path, + &rules, + is_public, + &owner_did, + caller_owned.as_deref(), + ) + }) + .await + .map_err(|e| AppError::Git(e.to_string()))? + .map_err(|e| AppError::Git(e.to_string()))? + }; + + if withheld.is_empty() { + smart_http::upload_pack(&disk_path, body).await + } else { + tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); + smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + } } .map_err(|e| { let msg = e.to_string(); @@ -651,7 +658,9 @@ pub async fn git_receive_pack( None } else { match &rules_opt { - Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()), + Some(rules) if !visibility_pack::has_path_scoped_rule(rules) => { + Some(std::collections::HashSet::new()) + } // withheld_blob_oids walks every ref with blocking `git ls-tree`; // keep that off the async worker thread. Some(rules) => {