Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
233 changes: 226 additions & 7 deletions crates/gitlawb-node/src/api/repos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use bytes::Bytes;
use std::sync::Arc;

use crate::auth::AuthenticatedDid;
use chrono::Utc;
use crate::db::RepoRecord;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

Expand Down Expand Up @@ -158,8 +159,8 @@ pub async fn list_repos(
}

let repos = state.db.list_all_repos_with_stars().await?;
let filtered: Vec<_> = repos
.iter()
let filtered: Vec<(RepoRecord, i64)> = repos
.into_iter()
.filter(|(r, _)| {
if let Some(owner) = &query.owner {
let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did);
Expand All @@ -169,10 +170,11 @@ pub async fn list_repos(
}
})
.collect();
let total = filtered.len() as i64;
let resp: Vec<_> = filtered
let deduped = dedupe_canonical_repos(filtered);
let total = deduped.len() as i64;
let resp: Vec<_> = deduped
.into_iter()
.map(|(r, stars)| to_response(r, &state, *stars))
.map(|(r, stars)| to_response(&r, &state, stars))
.collect();
let mut response = Json(resp).into_response();
response.headers_mut().insert(
Expand Down Expand Up @@ -1000,7 +1002,7 @@ pub async fn list_refs(
pub async fn list_federated_repos(
State(state): State<AppState>,
) -> Result<Json<serde_json::Value>> {
let local_repos = state.db.list_all_repos_with_stars().await?;
let local_repos = dedupe_canonical_repos(state.db.list_all_repos_with_stars().await?);
let local_node_url = state
.config
.public_url
Expand Down Expand Up @@ -1281,3 +1283,220 @@ fn to_response(record: &crate::db::RepoRecord, state: &AppState, star_count: i64
forked_from: record.forked_from.clone(),
}
}

/// Description marker that `upsert_mirror_repo` stamps on short-owner peer mirror rows.
const MIRROR_DESCRIPTION: &str = "mirrored from peer";

/// Collapse short-owner mirror rows and canonical `did:key:` rows that point at the
/// same logical repo into a single entry, so profile/list surfaces don't render the
/// same repo twice (issue #6).
///
/// Rows are grouped by `(normalized owner, name)`, where the normalized owner is the
/// key segment after the last `:` (so `did:key:z6Mk…` and the bare `z6Mk…` mirror row
/// collapse together). Within a group the canonical row wins: a non-mirror row is
/// preferred over a `"mirrored from peer"` row, ties broken by earliest `created_at`.
/// The survivor inherits the group's most recent `updated_at` so a gossip push that
/// only touches the mirror row still floats the repo to the top.
///
/// This mirrors the SQL dedup already applied on the paged path in
/// `Db::list_all_repos_paged`; the two must stay in sync.
fn dedupe_canonical_repos(rows: Vec<(RepoRecord, i64)>) -> Vec<(RepoRecord, i64)> {
use std::collections::HashMap;

fn is_mirror(r: &RepoRecord) -> bool {
r.description.as_deref() == Some(MIRROR_DESCRIPTION)
}

// Strictly more canonical: non-mirror beats mirror; on a tie the earlier row wins.
fn outranks(candidate: &RepoRecord, current: &RepoRecord) -> bool {
match (is_mirror(candidate), is_mirror(current)) {
(false, true) => true,
(true, false) => false,
_ => candidate.created_at < current.created_at,
}
}

// Preserve first-seen group order so output ordering stays deterministic.
let mut order: Vec<(String, String)> = Vec::new();
let mut winners: HashMap<(String, String), (RepoRecord, i64)> = HashMap::new();
let mut latest: HashMap<(String, String), DateTime<Utc>> = HashMap::new();

for (rec, stars) in rows {
let short = rec
.owner_did
.rsplit(':')
.next()
.unwrap_or(&rec.owner_did)
.to_string();
let key = (short, rec.name.clone());

latest
.entry(key.clone())
.and_modify(|u| {
if rec.updated_at > *u {
*u = rec.updated_at;
}
})
.or_insert(rec.updated_at);

match winners.get(&key) {
None => {
order.push(key.clone());
winners.insert(key, (rec, stars));
}
Some((current, _)) if outranks(&rec, current) => {
winners.insert(key, (rec, stars));
}
Some(_) => {}
}
}

order
.into_iter()
.filter_map(|key| {
let max_updated = latest.get(&key).copied();
winners.remove(&key).map(|(mut rec, stars)| {
if let Some(u) = max_updated {
rec.updated_at = u;
}
(rec, stars)
})
})
.collect()
}

#[cfg(test)]
mod tests {
use super::*;

fn ts(s: &str) -> DateTime<Utc> {
DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc)
}

fn record(id: &str, owner_did: &str, name: &str, desc: &str, updated: &str) -> RepoRecord {
RepoRecord {
id: id.to_string(),
name: name.to_string(),
owner_did: owner_did.to_string(),
description: Some(desc.to_string()),
is_public: true,
default_branch: "main".to_string(),
created_at: ts("2026-01-01T00:00:00Z"),
updated_at: ts(updated),
disk_path: format!("/srv/{id}"),
forked_from: None,
machine_id: None,
}
}

#[test]
fn canonical_row_wins_over_short_owner_mirror() {
// Order deliberately puts the mirror row first to prove ranking, not input order, decides.
let mirror = record(
"z6Mkwbud/nipmod",
"z6Mkwbud",
"nipmod",
"mirrored from peer",
"2026-02-01T00:00:00Z",
);
let canonical = record(
"9d92186a",
"did:key:z6Mkwbud",
"nipmod",
"Decentralized npm for agents on Gitlawb",
"2026-01-15T00:00:00Z",
);

let out = dedupe_canonical_repos(vec![(mirror, 3), (canonical, 7)]);

assert_eq!(out.len(), 1, "the two rows collapse into one logical repo");
let (rec, stars) = &out[0];
assert_eq!(
rec.owner_did, "did:key:z6Mkwbud",
"canonical did:key row wins"
);
assert_eq!(
rec.description.as_deref(),
Some("Decentralized npm for agents on Gitlawb"),
"canonical description and metadata survive, not the mirror placeholder",
);
assert_eq!(*stars, 7, "star count follows the canonical row");
// Survivor inherits the group's most recent updated_at (here the mirror's).
assert_eq!(rec.updated_at, ts("2026-02-01T00:00:00Z"));
}

#[test]
fn distinct_repos_are_preserved_in_order() {
let a = record(
"id-a",
"did:key:z6Aaa",
"alpha",
"first",
"2026-03-01T00:00:00Z",
);
let b = record(
"id-b",
"did:key:z6Bbb",
"beta",
"second",
"2026-03-02T00:00:00Z",
);

let out = dedupe_canonical_repos(vec![(a, 1), (b, 2)]);

assert_eq!(out.len(), 2);
assert_eq!(out[0].0.name, "alpha");
assert_eq!(out[1].0.name, "beta");
}

#[test]
fn same_short_owner_different_repo_does_not_collapse() {
let one = record(
"id-1",
"z6Mkwbud",
"nipmod",
"mirrored from peer",
"2026-01-01T00:00:00Z",
);
let two = record(
"id-2",
"did:key:z6Mkwbud",
"other",
"real",
"2026-01-01T00:00:00Z",
);

let out = dedupe_canonical_repos(vec![(one, 0), (two, 0)]);

assert_eq!(
out.len(),
2,
"different repo names stay separate under one owner"
);
}

#[test]
fn two_mirror_rows_break_tie_by_earliest_created_at() {
let mut older = record(
"id-old",
"z6X",
"r",
"mirrored from peer",
"2026-02-01T00:00:00Z",
);
older.created_at = ts("2026-01-01T00:00:00Z");
let mut newer = record(
"id-new",
"z6X",
"r",
"mirrored from peer",
"2026-03-01T00:00:00Z",
);
newer.created_at = ts("2026-01-10T00:00:00Z");

let out = dedupe_canonical_repos(vec![(newer, 0), (older, 0)]);

assert_eq!(out.len(), 1);
assert_eq!(out[0].0.id, "id-old", "earliest created_at wins the tie");
}
}
Loading