diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 9a06f7f..1fb392e 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -6,7 +6,8 @@ use bytes::Bytes; use std::sync::Arc; use crate::auth::AuthenticatedDid; -use chrono::Utc; +use crate::db::RepoRecord; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -158,8 +159,8 @@ pub async fn list_repos( } let repos = state.db.list_all_repos_with_stars().await?; - let filtered: Vec<_> = repos - .iter() + let filtered: Vec<(RepoRecord, i64)> = repos + .into_iter() .filter(|(r, _)| { if let Some(owner) = &query.owner { let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); @@ -169,10 +170,11 @@ pub async fn list_repos( } }) .collect(); - let total = filtered.len() as i64; - let resp: Vec<_> = filtered + let deduped = dedupe_canonical_repos(filtered); + let total = deduped.len() as i64; + let resp: Vec<_> = deduped .into_iter() - .map(|(r, stars)| to_response(r, &state, *stars)) + .map(|(r, stars)| to_response(&r, &state, stars)) .collect(); let mut response = Json(resp).into_response(); response.headers_mut().insert( @@ -1000,7 +1002,7 @@ pub async fn list_refs( pub async fn list_federated_repos( State(state): State, ) -> Result> { - let local_repos = state.db.list_all_repos_with_stars().await?; + let local_repos = dedupe_canonical_repos(state.db.list_all_repos_with_stars().await?); let local_node_url = state .config .public_url @@ -1281,3 +1283,220 @@ fn to_response(record: &crate::db::RepoRecord, state: &AppState, star_count: i64 forked_from: record.forked_from.clone(), } } + +/// Description marker that `upsert_mirror_repo` stamps on short-owner peer mirror rows. +const MIRROR_DESCRIPTION: &str = "mirrored from peer"; + +/// Collapse short-owner mirror rows and canonical `did:key:` rows that point at the +/// same logical repo into a single entry, so profile/list surfaces don't render the +/// same repo twice (issue #6). +/// +/// Rows are grouped by `(normalized owner, name)`, where the normalized owner is the +/// key segment after the last `:` (so `did:key:z6Mk…` and the bare `z6Mk…` mirror row +/// collapse together). Within a group the canonical row wins: a non-mirror row is +/// preferred over a `"mirrored from peer"` row, ties broken by earliest `created_at`. +/// The survivor inherits the group's most recent `updated_at` so a gossip push that +/// only touches the mirror row still floats the repo to the top. +/// +/// This mirrors the SQL dedup already applied on the paged path in +/// `Db::list_all_repos_paged`; the two must stay in sync. +fn dedupe_canonical_repos(rows: Vec<(RepoRecord, i64)>) -> Vec<(RepoRecord, i64)> { + use std::collections::HashMap; + + fn is_mirror(r: &RepoRecord) -> bool { + r.description.as_deref() == Some(MIRROR_DESCRIPTION) + } + + // Strictly more canonical: non-mirror beats mirror; on a tie the earlier row wins. + fn outranks(candidate: &RepoRecord, current: &RepoRecord) -> bool { + match (is_mirror(candidate), is_mirror(current)) { + (false, true) => true, + (true, false) => false, + _ => candidate.created_at < current.created_at, + } + } + + // Preserve first-seen group order so output ordering stays deterministic. + let mut order: Vec<(String, String)> = Vec::new(); + let mut winners: HashMap<(String, String), (RepoRecord, i64)> = HashMap::new(); + let mut latest: HashMap<(String, String), DateTime> = HashMap::new(); + + for (rec, stars) in rows { + let short = rec + .owner_did + .rsplit(':') + .next() + .unwrap_or(&rec.owner_did) + .to_string(); + let key = (short, rec.name.clone()); + + latest + .entry(key.clone()) + .and_modify(|u| { + if rec.updated_at > *u { + *u = rec.updated_at; + } + }) + .or_insert(rec.updated_at); + + match winners.get(&key) { + None => { + order.push(key.clone()); + winners.insert(key, (rec, stars)); + } + Some((current, _)) if outranks(&rec, current) => { + winners.insert(key, (rec, stars)); + } + Some(_) => {} + } + } + + order + .into_iter() + .filter_map(|key| { + let max_updated = latest.get(&key).copied(); + winners.remove(&key).map(|(mut rec, stars)| { + if let Some(u) = max_updated { + rec.updated_at = u; + } + (rec, stars) + }) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn ts(s: &str) -> DateTime { + DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc) + } + + fn record(id: &str, owner_did: &str, name: &str, desc: &str, updated: &str) -> RepoRecord { + RepoRecord { + id: id.to_string(), + name: name.to_string(), + owner_did: owner_did.to_string(), + description: Some(desc.to_string()), + is_public: true, + default_branch: "main".to_string(), + created_at: ts("2026-01-01T00:00:00Z"), + updated_at: ts(updated), + disk_path: format!("/srv/{id}"), + forked_from: None, + machine_id: None, + } + } + + #[test] + fn canonical_row_wins_over_short_owner_mirror() { + // Order deliberately puts the mirror row first to prove ranking, not input order, decides. + let mirror = record( + "z6Mkwbud/nipmod", + "z6Mkwbud", + "nipmod", + "mirrored from peer", + "2026-02-01T00:00:00Z", + ); + let canonical = record( + "9d92186a", + "did:key:z6Mkwbud", + "nipmod", + "Decentralized npm for agents on Gitlawb", + "2026-01-15T00:00:00Z", + ); + + let out = dedupe_canonical_repos(vec![(mirror, 3), (canonical, 7)]); + + assert_eq!(out.len(), 1, "the two rows collapse into one logical repo"); + let (rec, stars) = &out[0]; + assert_eq!( + rec.owner_did, "did:key:z6Mkwbud", + "canonical did:key row wins" + ); + assert_eq!( + rec.description.as_deref(), + Some("Decentralized npm for agents on Gitlawb"), + "canonical description and metadata survive, not the mirror placeholder", + ); + assert_eq!(*stars, 7, "star count follows the canonical row"); + // Survivor inherits the group's most recent updated_at (here the mirror's). + assert_eq!(rec.updated_at, ts("2026-02-01T00:00:00Z")); + } + + #[test] + fn distinct_repos_are_preserved_in_order() { + let a = record( + "id-a", + "did:key:z6Aaa", + "alpha", + "first", + "2026-03-01T00:00:00Z", + ); + let b = record( + "id-b", + "did:key:z6Bbb", + "beta", + "second", + "2026-03-02T00:00:00Z", + ); + + let out = dedupe_canonical_repos(vec![(a, 1), (b, 2)]); + + assert_eq!(out.len(), 2); + assert_eq!(out[0].0.name, "alpha"); + assert_eq!(out[1].0.name, "beta"); + } + + #[test] + fn same_short_owner_different_repo_does_not_collapse() { + let one = record( + "id-1", + "z6Mkwbud", + "nipmod", + "mirrored from peer", + "2026-01-01T00:00:00Z", + ); + let two = record( + "id-2", + "did:key:z6Mkwbud", + "other", + "real", + "2026-01-01T00:00:00Z", + ); + + let out = dedupe_canonical_repos(vec![(one, 0), (two, 0)]); + + assert_eq!( + out.len(), + 2, + "different repo names stay separate under one owner" + ); + } + + #[test] + fn two_mirror_rows_break_tie_by_earliest_created_at() { + let mut older = record( + "id-old", + "z6X", + "r", + "mirrored from peer", + "2026-02-01T00:00:00Z", + ); + older.created_at = ts("2026-01-01T00:00:00Z"); + let mut newer = record( + "id-new", + "z6X", + "r", + "mirrored from peer", + "2026-03-01T00:00:00Z", + ); + newer.created_at = ts("2026-01-10T00:00:00Z"); + + let out = dedupe_canonical_repos(vec![(newer, 0), (older, 0)]); + + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.id, "id-old", "earliest created_at wins the tie"); + } +}