From 74d1ec0080f2432d6eb7702a73d6bd750edd29f3 Mon Sep 17 00:00:00 2001 From: Tommaso Fontana Date: Mon, 20 Apr 2026 13:40:13 +0200 Subject: [PATCH] feat(traits): add Gamma, Tuple, and PrefixFree bit serde structs --- webgraph/src/traits/bit_serde.rs | 387 +++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) diff --git a/webgraph/src/traits/bit_serde.rs b/webgraph/src/traits/bit_serde.rs index ce16da65..57f2a2fb 100644 --- a/webgraph/src/traits/bit_serde.rs +++ b/webgraph/src/traits/bit_serde.rs @@ -229,3 +229,390 @@ impl, T: PrimitiveInteger> BitDeserializer self.codec_name() } } + +/// Serializes and deserializes a `u64` using [Elias γ codes](dsi_bitstream::codes::gamma). +/// +/// This is functionally equivalent to [`PrefixFree`] (whose default code is +/// gamma), but provided as a convenience so you can write `Gamma` directly +/// in value position without a turbofish. +/// # Examples +/// +/// ```rust +/// # use webgraph::traits::Gamma; +/// let sd = Gamma; +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct Gamma; + +impl> BitSerializer for Gamma { + type SerType = u64; + #[inline(always)] + fn serialize(&self, value: &u64, bitstream: &mut BW) -> Result { + bitstream.write_gamma(*value) + } + fn name(&self) -> String { + "Gamma".to_string() + } +} + +impl> BitDeserializer for Gamma { + type DeserType = u64; + #[inline(always)] + fn deserialize(&self, bitstream: &mut BR) -> Result { + bitstream.read_gamma() + } + fn name(&self) -> String { + "Gamma".to_string() + } +} + +/// Serializes and deserializes a tuple `(A, B)` by delegating each element +/// to a separate [`BitSerializer`]/[`BitDeserializer`]. +/// +/// The first field handles the first element and the second field handles +/// the second element. Elements are written and read in order. +/// +/// # Examples +/// +/// ```rust +/// # use webgraph::traits::{Tuple, FixedWidth, Gamma}; +/// // (u32 fixed-width, u64 gamma) +/// let sd = Tuple(FixedWidth::::new(), Gamma); +/// ``` +#[derive(Clone, Copy, Debug, Default)] +pub struct Tuple(pub F, pub S); + +impl, F: BitSerializer, S: BitSerializer> + BitSerializer for Tuple +{ + type SerType = (F::SerType, S::SerType); + #[inline(always)] + fn serialize(&self, value: &Self::SerType, bitstream: &mut BW) -> Result { + let a = self.0.serialize(&value.0, bitstream)?; + let b = self.1.serialize(&value.1, bitstream)?; + Ok(a + b) + } + fn name(&self) -> String { + format!("Tuple<{}, {}>", self.0.name(), self.1.name()) + } +} + +impl, F: BitDeserializer, S: BitDeserializer> + BitDeserializer for Tuple +{ + type DeserType = (F::DeserType, S::DeserType); + #[inline(always)] + fn deserialize(&self, bitstream: &mut BR) -> Result { + let a = self.0.deserialize(bitstream)?; + let b = self.1.deserialize(bitstream)?; + Ok((a, b)) + } + fn name(&self) -> String { + format!("Tuple<{}, {}>", self.0.name(), self.1.name()) + } +} + +/// Serializes and deserializes a `u64` using a compile-time–selected +/// prefix-free code from [`dsi_bitstream::dispatch::ConstCode`]. +/// +/// The `CODE` const parameter is one of the constants from +/// [`dsi_bitstream::dispatch::code_consts`] (e.g., `GAMMA`, `DELTA`, +/// `ZETA3`). Because the code is chosen at compile time, the dispatch is +/// fully inlined with no runtime overhead. +/// +/// # Examples +/// +/// ```rust +/// # use webgraph::traits::PrefixFree; +/// # use dsi_bitstream::prelude::code_consts; +/// let sd = PrefixFree::<{ code_consts::DELTA }>; +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct PrefixFree; + +impl, const CODE: usize> BitSerializer + for PrefixFree +{ + type SerType = u64; + #[inline(always)] + fn serialize(&self, value: &u64, bitstream: &mut BW) -> Result { + ConstCode::.write(bitstream, *value) + } + fn name(&self) -> String { + match Codes::from_code_const(CODE) { + Ok(code) => format!("PrefixFree<{}>", code), + Err(_) => format!("PrefixFree<{}>", CODE), + } + } +} + +impl, const CODE: usize> BitDeserializer + for PrefixFree +{ + type DeserType = u64; + #[inline(always)] + fn deserialize(&self, bitstream: &mut BR) -> Result { + ConstCode::.read(bitstream) + } + fn name(&self) -> String { + match Codes::from_code_const(CODE) { + Ok(code) => format!("PrefixFree<{}>", code), + Err(_) => format!("PrefixFree<{}>", CODE), + } + } +} + +/// Maps integers to natural numbers via the [`ToNat`]/[`ToInt`] bijection +/// and delegates to an inner [`BitSerializer`]/[`BitDeserializer`] that +/// operates on `u64`. +/// +/// The mapping sends `0 → 0, -1 → 1, 1 → 2, -2 → 3, 2 → 4, …`, +/// so small absolute values remain small — ideal for pairing with +/// variable-length codes like [`Gamma`]. +/// +/// # Examples +/// +/// ```rust +/// # use webgraph::traits::{Int2Nat, Gamma}; +/// // Encode signed values with gamma codes +/// let sd = Int2Nat(Gamma); +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct Int2Nat(pub C); + +impl, C: BitSerializer> BitSerializer + for Int2Nat +{ + type SerType = i64; + #[inline(always)] + fn serialize(&self, value: &i64, bitstream: &mut BW) -> Result { + let encoded = value.to_nat(); + self.0.serialize(&encoded, bitstream) + } + fn name(&self) -> String { + format!("Int2Nat<{}>", self.0.name()) + } +} + +impl, C: BitDeserializer> + BitDeserializer for Int2Nat +{ + type DeserType = i64; + #[inline(always)] + fn deserialize(&self, bitstream: &mut BR) -> Result { + let n = self.0.deserialize(bitstream)?; + Ok(n.to_int()) + } + fn name(&self) -> String { + format!("Int2Nat<{}>", self.0.name()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + type Writer = BufBitWriter>>; + type Reader = BufBitReader>>; + + /// Round-trip a value through serialize then deserialize. + fn round_trip(serde: &S, value: S::SerType) -> S::DeserType + where + S: BitSerializer + BitDeserializer, + S::SerType: std::fmt::Debug, + { + let mut writer = BufBitWriter::::new(MemWordWriterVec::new(Vec::new())); + serde.serialize(&value, &mut writer).unwrap(); + let buf = writer.into_inner().unwrap().into_inner(); + let mut reader = BufBitReader::::new(MemWordReader::new(buf)); + serde.deserialize(&mut reader).unwrap() + } + + // ─── FixedWidth unsigned ─────────────────────────────────────────── + + #[test] + fn fixed_width_u8_full() { + let sd = FixedWidth::::new(); + for v in [0u8, 1, 127, 128, 255] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_u16_full() { + let sd = FixedWidth::::new(); + for v in [0u16, 1, 255, 256, 32767, 65535] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_u32_full() { + let sd = FixedWidth::::new(); + for v in [0u32, 1, u32::MAX / 2, u32::MAX] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_u64_full() { + let sd = FixedWidth::::new(); + for v in [0u64, 1, u64::MAX / 2, u64::MAX] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_u32_narrow() { + let sd = FixedWidth::::with_bits(10); + for v in [0u32, 1, 512, 1023] { + assert_eq!(round_trip(&sd, v), v); + } + } + + // ─── FixedWidth signed ─────────────────────────────────────────── + + #[test] + fn fixed_width_i8_full() { + let sd = FixedWidth::::new(); + for v in [0i8, 1, -1, 127, -128] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_i16_full() { + let sd = FixedWidth::::new(); + for v in [0i16, 1, -1, i16::MAX, i16::MIN] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_i32_full() { + let sd = FixedWidth::::new(); + for v in [0i32, 1, -1, i32::MAX, i32::MIN] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_i64_full() { + let sd = FixedWidth::::new(); + for v in [0i64, 1, -1, i64::MAX, i64::MIN] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn fixed_width_i8_narrow() { + let sd = FixedWidth::::with_bits(5); + // 5 bits signed: [-16, 15] + for v in [0i8, 1, -1, 15, -16, -5, 7] { + assert_eq!(round_trip(&sd, v), v, "failed for {v}"); + } + } + + #[test] + fn fixed_width_i16_narrow() { + let sd = FixedWidth::::with_bits(9); + // 9 bits signed: [-256, 255] + for v in [0i16, 1, -1, 255, -256, -100, 100] { + assert_eq!(round_trip(&sd, v), v, "failed for {v}"); + } + } + + #[test] + fn fixed_width_i8_one_bit() { + let sd = FixedWidth::::with_bits(1); + // 1 bit signed: [-1, 0] + assert_eq!(round_trip(&sd, 0i8), 0); + assert_eq!(round_trip(&sd, -1i8), -1); + } + + // ─── Gamma ─────────────────────────────────────────────────────── + + #[test] + fn gamma_round_trip() { + for v in [0u64, 1, 2, 7, 100, 1000, u64::MAX / 2] { + assert_eq!(round_trip(&Gamma, v), v); + } + } + + // ─── PrefixFree ────────────────────────────────────────────────── + + #[test] + fn prefix_free_gamma() { + let sd = PrefixFree::<{ code_consts::GAMMA }>; + for v in [0u64, 1, 2, 100, 1000] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn prefix_free_delta() { + let sd = PrefixFree::<{ code_consts::DELTA }>; + for v in [0u64, 1, 2, 100, 1000, 1_000_000] { + assert_eq!(round_trip(&sd, v), v); + } + } + + #[test] + fn prefix_free_zeta3() { + let sd = PrefixFree::<{ code_consts::ZETA3 }>; + for v in [0u64, 1, 2, 100, 1000, 1_000_000] { + assert_eq!(round_trip(&sd, v), v); + } + } + + // ─── Tuple ─────────────────────────────────────────────────────── + + #[test] + fn tuple_fixed_gamma() { + let sd = Tuple(FixedWidth::::new(), Gamma); + for (a, b) in [(0u32, 0u64), (42, 100), (u32::MAX, 0), (0, 999)] { + assert_eq!(round_trip(&sd, (a, b)), (a, b)); + } + } + + #[test] + fn tuple_signed_fixed() { + let sd = Tuple(FixedWidth::::with_bits(5), FixedWidth::::new()); + for (a, b) in [(0i8, 0i16), (-1, -1), (15, i16::MAX), (-16, i16::MIN)] { + assert_eq!(round_trip(&sd, (a, b)), (a, b), "failed for ({a}, {b})"); + } + } + + #[test] + fn tuple_nested() { + let sd = Tuple(Gamma, Tuple(FixedWidth::::new(), Gamma)); + let value = (42u64, (255u8, 7u64)); + assert_eq!(round_trip(&sd, value), value); + } + + // ─── Int2Nat ───────────────────────────────────────────────────── + + #[test] + fn int2nat_gamma_round_trip() { + let sd = Int2Nat(Gamma); + for v in [0i64, 1, -1, 2, -2, 100, -100, 1_000_000, -1_000_000] { + assert_eq!(round_trip(&sd, v), v, "failed for {v}"); + } + } + + #[test] + fn int2nat_delta_round_trip() { + let sd = Int2Nat(PrefixFree::<{ code_consts::DELTA }>); + for v in [0i64, 1, -1, i64::MAX, i64::MIN + 1] { + assert_eq!(round_trip(&sd, v), v, "failed for {v}"); + } + } + + #[test] + fn tuple_int2nat_unsigned() { + let sd = Tuple(Int2Nat(Gamma), FixedWidth::::new()); + for (a, b) in [(0i64, 0u32), (-42, 100), (1_000_000, u32::MAX)] { + assert_eq!(round_trip(&sd, (a, b)), (a, b)); + } + } +}