From 4de799249ea40f7567107d57397b38cd1d1cabf7 Mon Sep 17 00:00:00 2001
From: Endel Dreyer <endel.dreyer@gmail.com>
Date: Thu, 19 Mar 2026 00:57:33 -0300
Subject: [PATCH 1/5] Replace RangeSet with bitmap for received packet number
 tracking

Inspired by lxin/quic kernel pnspace.c: O(1) duplicate detection via
bit test instead of O(n) range scan. Fixed 4096-bit sliding window
(512 bytes, zero heap allocation) replaces heap-allocated ArrayList.

- PacketNumberBitmap: mark/contains O(1), getRanges scans for ACK gen
- ReceivedPacketTracker no longer needs allocator for packet tracking
- removeBelow shifts bitmap forward for ACK-of-ACK pruning
- Window auto-shifts on large PN jumps (3/4 history preserved)
---
 src/quic/ack_handler.zig |  45 +++--
 src/quic/ranges.zig      | 371 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 400 insertions(+), 16 deletions(-)

diff --git a/src/quic/ack_handler.zig b/src/quic/ack_handler.zig
index dccce6a..0024acd 100644
--- a/src/quic/ack_handler.zig
+++ b/src/quic/ack_handler.zig
@@ -4,6 +4,8 @@ const testing = std.testing;
 
 const ranges = @import("ranges.zig");
 const RangeSet = ranges.RangeSet;
+const Range = ranges.Range;
+const PacketNumberBitmap = ranges.PacketNumberBitmap;
 const rtt_mod = @import("rtt.zig");
 const RttStats = rtt_mod.RttStats;
 const frame_mod = @import("frame.zig");
@@ -292,9 +294,11 @@ pub const SentPacketTracker = struct {
 };
 
 /// Tracks received packets for generating ACK frames.
+/// Uses a fixed-size sliding-window bitmap for O(1) duplicate detection
+/// (inspired by lxin/quic kernel pnspace.c).
 pub const ReceivedPacketTracker = struct {
     allocator: Allocator,
-    received: RangeSet,
+    received: PacketNumberBitmap = .{},
     largest_received: ?u64 = null,
     largest_received_time: i64 = 0,
     ack_eliciting_since_last_ack: u32 = 0,
@@ -313,17 +317,15 @@ pub const ReceivedPacketTracker = struct {
     pub fn init(allocator: Allocator) ReceivedPacketTracker {
         return .{
             .allocator = allocator,
-            .received = RangeSet.init(allocator),
         };
     }
 
     pub fn deinit(self: *ReceivedPacketTracker) void {
-        self.received.deinit();
+        _ = self;
     }
 
     pub fn onPacketReceived(self: *ReceivedPacketTracker, pn: u64, ack_eliciting: bool, now: i64, ecn: u2) !void {
-        if (self.received.contains(pn)) return;
-        try self.received.add(pn);
+        if (!self.received.mark(pn)) return; // duplicate
 
         if (self.largest_received == null or pn > self.largest_received.?) {
             self.largest_received = pn;
@@ -391,6 +393,12 @@ pub const ReceivedPacketTracker = struct {
         self.received.removeBelow(below);
     }
 
+    /// Backwards-compatible accessor: returns number of distinct ranges in the bitmap.
+    pub fn rangeCount(self: *const ReceivedPacketTracker) usize {
+        var buf: [MAX_ACK_RANGES + 1]Range = undefined;
+        return self.received.getRanges(&buf);
+    }
+
     /// Check if there are any unacknowledged ack-eliciting packets.
     /// Used by the packet packer to piggyback ACKs on outgoing data packets.
     pub fn hasUnackedAckEliciting(self: *const ReceivedPacketTracker) bool {
@@ -417,10 +425,14 @@ pub const ReceivedPacketTracker = struct {
         }
 
         const largest = self.largest_received orelse return null;
-        const recv_ranges = self.received.getRanges();
-        if (recv_ranges.len == 0) return null;
 
-        const first_range = recv_ranges[0];
+        // Scan bitmap to produce ranges in descending order.
+        // First slot is used for the first_ack_range; rest become additional ACK ranges.
+        var all_ranges: [MAX_ACK_RANGES + 1]Range = undefined;
+        const total = self.received.getRanges(&all_ranges);
+        if (total == 0) return null;
+
+        const first_range = all_ranges[0];
         const first_ack_range = largest - first_range.start;
 
         const ack_delay_ns = now - self.largest_received_time;
@@ -431,11 +443,11 @@ pub const ReceivedPacketTracker = struct {
         self.ack_alarm = null;
         self.ack_eliciting_since_last_ack = 0;
 
-        // Populate additional ACK ranges from received range set
+        // Populate additional ACK ranges from bitmap scan
         var ack_ranges: [MAX_ACK_RANGES]AckRange = undefined;
         var ack_range_count: u8 = 0;
-        if (recv_ranges.len > 1) {
-            for (recv_ranges[1..]) |r| {
+        if (total > 1) {
+            for (all_ranges[1..total]) |r| {
                 if (ack_range_count >= MAX_ACK_RANGES) break;
                 ack_ranges[ack_range_count] = .{ .start = r.start, .end = r.end };
                 ack_range_count += 1;
@@ -889,14 +901,15 @@ test "ReceivedPacketTracker: pruneAckedRanges removes old ranges" {
     }
 
     // Ranges should cover 0..9
-    try testing.expectEqual(@as(usize, 1), tracker.received.getRanges().len);
-    try testing.expectEqual(@as(u64, 0), tracker.received.getRanges()[0].start);
+    try testing.expectEqual(@as(usize, 1), tracker.rangeCount());
 
     // Prune below 5 — ranges should now start at 5
     tracker.pruneAckedRanges(5);
-    try testing.expectEqual(@as(usize, 1), tracker.received.getRanges().len);
-    try testing.expectEqual(@as(u64, 5), tracker.received.getRanges()[0].start);
-    try testing.expectEqual(@as(u64, 9), tracker.received.getRanges()[0].end);
+    try testing.expectEqual(@as(usize, 1), tracker.rangeCount());
+    // Verify 0-4 are gone, 5-9 remain
+    try testing.expect(!tracker.received.contains(4));
+    try testing.expect(tracker.received.contains(5));
+    try testing.expect(tracker.received.contains(9));
 }
 
 // App-limited cwnd suppression (RFC 9002 §7.8)
diff --git a/src/quic/ranges.zig b/src/quic/ranges.zig
index 1ef651b..023290e 100644
--- a/src/quic/ranges.zig
+++ b/src/quic/ranges.zig
@@ -166,6 +166,209 @@ pub const RangeSet = struct {
     }
 };
 
+/// Sliding-window bitmap for O(1) packet number tracking.
+/// Replaces RangeSet for received packet number duplicate detection.
+///
+/// Design (inspired by lxin/quic kernel pnspace.c):
+/// - Fixed 4096-bit bitmap (512 bytes, no heap allocation)
+/// - O(1) contains() via bit test
+/// - O(1) mark() via bit set
+/// - getRanges() scans bitmap to produce descending Range array for ACK frames
+/// - removeBelow() shifts the window forward
+pub const PacketNumberBitmap = struct {
+    const BITMAP_BITS: usize = 4096;
+    const MaskInt = std.DynamicBitSet.MaskInt;
+    const MASK_BITS = @bitSizeOf(MaskInt);
+    const NUM_MASKS: usize = BITMAP_BITS / MASK_BITS;
+
+    /// The bitmap, stored inline (512 bytes for 4096 bits).
+    masks: [NUM_MASKS]MaskInt = [_]MaskInt{0} ** NUM_MASKS,
+
+    /// Base packet number: bitmap[0] corresponds to this PN.
+    base: u64 = 0,
+
+    /// Highest marked packet number (for fast largest() query).
+    highest: ?u64 = null,
+
+    /// Number of set bits.
+    count: u32 = 0,
+
+    /// Mark a packet number as received. Returns true if newly added (not duplicate).
+    pub fn mark(self: *PacketNumberBitmap, pn: u64) bool {
+        if (self.highest != null and pn < self.base) return false; // too old
+
+        // If pn is beyond current window, shift the window forward
+        if (pn >= self.base + BITMAP_BITS) {
+            self.shiftTo(pn);
+        }
+
+        const offset = pn - self.base;
+        const idx = @as(usize, @intCast(offset / MASK_BITS));
+        const bit: std.math.Log2Int(MaskInt) = @intCast(offset % MASK_BITS);
+
+        if (self.masks[idx] & (@as(MaskInt, 1) << bit) != 0) {
+            return false; // duplicate
+        }
+
+        self.masks[idx] |= @as(MaskInt, 1) << bit;
+        self.count += 1;
+
+        if (self.highest == null or pn > self.highest.?) {
+            self.highest = pn;
+        }
+        return true;
+    }
+
+    /// O(1) duplicate detection.
+    pub fn contains(self: *const PacketNumberBitmap, pn: u64) bool {
+        if (self.highest == null) return false;
+        if (pn < self.base or pn >= self.base + BITMAP_BITS) return false;
+
+        const offset = pn - self.base;
+        const idx = @as(usize, @intCast(offset / MASK_BITS));
+        const bit: std.math.Log2Int(MaskInt) = @intCast(offset % MASK_BITS);
+
+        return self.masks[idx] & (@as(MaskInt, 1) << bit) != 0;
+    }
+
+    pub fn largest(self: *const PacketNumberBitmap) ?u64 {
+        return self.highest;
+    }
+
+    /// Remove all entries below `val` by shifting the base forward.
+    pub fn removeBelow(self: *PacketNumberBitmap, val: u64) void {
+        if (val <= self.base) return;
+
+        const shift_by = val - self.base;
+        if (shift_by >= BITMAP_BITS) {
+            // Clear everything
+            @memset(&self.masks, 0);
+            self.base = val;
+            self.count = 0;
+            // highest stays — it may be above val
+            return;
+        }
+
+        const word_shift = @as(usize, @intCast(shift_by / MASK_BITS));
+        const bit_shift: std.math.Log2Int(MaskInt) = @intCast(shift_by % MASK_BITS);
+
+        // Count bits being removed (for accurate count tracking)
+        var removed: u32 = 0;
+        if (word_shift > 0) {
+            for (self.masks[0..word_shift]) |w| {
+                removed += @popCount(w);
+            }
+        }
+        // Count bits in the partial word that are being shifted out
+        if (bit_shift > 0 and word_shift < NUM_MASKS) {
+            const partial_mask = (@as(MaskInt, 1) << bit_shift) - 1;
+            removed += @popCount(self.masks[word_shift] & partial_mask);
+        }
+        self.count -|= removed;
+
+        // Shift words
+        if (word_shift > 0) {
+            var i: usize = 0;
+            while (i + word_shift < NUM_MASKS) : (i += 1) {
+                self.masks[i] = self.masks[i + word_shift];
+            }
+            // Zero the trailing words
+            @memset(self.masks[NUM_MASKS - word_shift ..], 0);
+        }
+
+        // Shift bits within words
+        if (bit_shift > 0) {
+            const anti_shift: std.math.Log2Int(MaskInt) = @intCast(MASK_BITS - @as(usize, bit_shift));
+            var i: usize = 0;
+            while (i < NUM_MASKS - 1) : (i += 1) {
+                self.masks[i] = (self.masks[i] >> bit_shift) | (self.masks[i + 1] << anti_shift);
+            }
+            self.masks[NUM_MASKS - 1] >>= bit_shift;
+        }
+
+        self.base = val;
+    }
+
+    /// Produce up to `max_ranges` ranges in descending order for ACK frame generation.
+    /// Scans the bitmap from highest to base, finding contiguous runs of 1s.
+    pub fn getRanges(self: *const PacketNumberBitmap, out: []Range) usize {
+        const hi = self.highest orelse return 0;
+        if (hi < self.base) return 0;
+
+        var range_count: usize = 0;
+        const max_offset = hi - self.base;
+
+        var pos: u64 = max_offset;
+        while (range_count < out.len) {
+            // Find next set bit at or below pos (scanning downward)
+            const end_off = self.findPrevSet(pos) orelse break;
+            // Find the start of this contiguous run
+            const start_off = self.findRunStart(end_off);
+
+            out[range_count] = .{
+                .start = self.base + start_off,
+                .end = self.base + end_off,
+            };
+            range_count += 1;
+
+            if (start_off == 0) break;
+            pos = start_off - 1;
+        }
+
+        return range_count;
+    }
+
+    /// Find the highest set bit at or below `offset` (scanning downward).
+    fn findPrevSet(self: *const PacketNumberBitmap, offset: u64) ?u64 {
+        var pos: i64 = @intCast(offset);
+        while (pos >= 0) {
+            const p: usize = @intCast(pos);
+            const idx = p / MASK_BITS;
+            const bit_in_word = p % MASK_BITS;
+
+            // Mask off bits above our position
+            const mask = self.masks[idx] & ((@as(MaskInt, 2) << @as(std.math.Log2Int(MaskInt), @intCast(bit_in_word))) -% 1);
+            if (mask != 0) {
+                const highest_bit = MASK_BITS - 1 - @as(usize, @clz(mask));
+                return @intCast(idx * MASK_BITS + highest_bit);
+            }
+            // Move to end of previous word
+            if (idx == 0) break;
+            pos = @as(i64, @intCast(idx * MASK_BITS)) - 1;
+        }
+        return null;
+    }
+
+    /// Find the lowest offset in a contiguous run of set bits ending at `end_offset`.
+    fn findRunStart(self: *const PacketNumberBitmap, end_offset: u64) u64 {
+        if (end_offset == 0) {
+            return if (self.masks[0] & 1 != 0) 0 else end_offset;
+        }
+
+        var pos: u64 = end_offset;
+        while (pos > 0) {
+            pos -= 1;
+            const idx = @as(usize, @intCast(pos / MASK_BITS));
+            const bit: std.math.Log2Int(MaskInt) = @intCast(pos % MASK_BITS);
+            if (self.masks[idx] & (@as(MaskInt, 1) << bit) == 0) {
+                return pos + 1;
+            }
+        }
+        // Bit 0 is also set — run starts at 0
+        return 0;
+    }
+
+    /// Shift the window so that `pn` fits. Preserves as much history as possible.
+    fn shiftTo(self: *PacketNumberBitmap, pn: u64) void {
+        // Shift base so pn fits within the window, leaving some room for reordering.
+        // Place pn at 3/4 of the window to leave room for future packets.
+        const new_base = if (pn >= BITMAP_BITS * 3 / 4) pn - BITMAP_BITS * 3 / 4 else 0;
+        if (new_base > self.base) {
+            self.removeBelow(new_base);
+        }
+    }
+};
+
 // Tests
 
 test "RangeSet: add single values" {
@@ -267,3 +470,171 @@ test "RangeSet: out-of-order with gaps" {
     try rs.add(3);
     try testing.expectEqual(@as(usize, 1), rs.len());
 }
+
+// PacketNumberBitmap tests
+
+test "PacketNumberBitmap: mark and contains" {
+    var bm = PacketNumberBitmap{};
+
+    try testing.expect(bm.mark(0));
+    try testing.expect(bm.contains(0));
+    try testing.expect(!bm.contains(1));
+
+    // Duplicate returns false
+    try testing.expect(!bm.mark(0));
+
+    try testing.expect(bm.mark(5));
+    try testing.expect(bm.contains(5));
+    try testing.expect(!bm.contains(3));
+    try testing.expectEqual(@as(u64, 5), bm.largest().?);
+}
+
+test "PacketNumberBitmap: sequential packets" {
+    var bm = PacketNumberBitmap{};
+
+    var i: u64 = 0;
+    while (i < 100) : (i += 1) {
+        try testing.expect(bm.mark(i));
+    }
+    try testing.expectEqual(@as(u32, 100), bm.count);
+    try testing.expectEqual(@as(u64, 99), bm.largest().?);
+
+    // All should be present
+    i = 0;
+    while (i < 100) : (i += 1) {
+        try testing.expect(bm.contains(i));
+    }
+    try testing.expect(!bm.contains(100));
+}
+
+test "PacketNumberBitmap: getRanges descending" {
+    var bm = PacketNumberBitmap{};
+
+    // Create ranges: [0,2], gap, [5,7], gap, [10,12]
+    _ = bm.mark(0);
+    _ = bm.mark(1);
+    _ = bm.mark(2);
+    _ = bm.mark(5);
+    _ = bm.mark(6);
+    _ = bm.mark(7);
+    _ = bm.mark(10);
+    _ = bm.mark(11);
+    _ = bm.mark(12);
+
+    var out: [10]Range = undefined;
+    const n = bm.getRanges(&out);
+    try testing.expectEqual(@as(usize, 3), n);
+
+    // Descending order: highest first
+    try testing.expectEqual(@as(u64, 10), out[0].start);
+    try testing.expectEqual(@as(u64, 12), out[0].end);
+    try testing.expectEqual(@as(u64, 5), out[1].start);
+    try testing.expectEqual(@as(u64, 7), out[1].end);
+    try testing.expectEqual(@as(u64, 0), out[2].start);
+    try testing.expectEqual(@as(u64, 2), out[2].end);
+}
+
+test "PacketNumberBitmap: removeBelow" {
+    var bm = PacketNumberBitmap{};
+
+    var i: u64 = 0;
+    while (i < 20) : (i += 1) {
+        _ = bm.mark(i);
+    }
+
+    bm.removeBelow(10);
+    try testing.expectEqual(@as(u64, 10), bm.base);
+
+    // 0-9 should be gone
+    i = 0;
+    while (i < 10) : (i += 1) {
+        try testing.expect(!bm.contains(i));
+    }
+    // 10-19 should still be present
+    while (i < 20) : (i += 1) {
+        try testing.expect(bm.contains(i));
+    }
+    try testing.expectEqual(@as(u32, 10), bm.count);
+}
+
+test "PacketNumberBitmap: out-of-order with gaps" {
+    var bm = PacketNumberBitmap{};
+
+    _ = bm.mark(0);
+    _ = bm.mark(2);
+    _ = bm.mark(4);
+
+    try testing.expect(bm.contains(0));
+    try testing.expect(!bm.contains(1));
+    try testing.expect(bm.contains(2));
+    try testing.expect(!bm.contains(3));
+    try testing.expect(bm.contains(4));
+
+    var out: [10]Range = undefined;
+    const n = bm.getRanges(&out);
+    try testing.expectEqual(@as(usize, 3), n);
+    try testing.expectEqual(@as(u64, 4), out[0].start);
+    try testing.expectEqual(@as(u64, 4), out[0].end);
+    try testing.expectEqual(@as(u64, 2), out[1].start);
+    try testing.expectEqual(@as(u64, 2), out[1].end);
+    try testing.expectEqual(@as(u64, 0), out[2].start);
+    try testing.expectEqual(@as(u64, 0), out[2].end);
+
+    // Fill gaps
+    _ = bm.mark(1);
+    _ = bm.mark(3);
+    const n2 = bm.getRanges(&out);
+    try testing.expectEqual(@as(usize, 1), n2);
+    try testing.expectEqual(@as(u64, 0), out[0].start);
+    try testing.expectEqual(@as(u64, 4), out[0].end);
+}
+
+test "PacketNumberBitmap: large packet numbers" {
+    var bm = PacketNumberBitmap{};
+
+    // Start at high PN
+    _ = bm.mark(1_000_000);
+    _ = bm.mark(1_000_001);
+    _ = bm.mark(1_000_002);
+
+    try testing.expect(bm.contains(1_000_000));
+    try testing.expect(bm.contains(1_000_001));
+    try testing.expect(bm.contains(1_000_002));
+    try testing.expect(!bm.contains(999_999));
+    try testing.expectEqual(@as(u64, 1_000_002), bm.largest().?);
+}
+
+test "PacketNumberBitmap: window shift on large jump" {
+    var bm = PacketNumberBitmap{};
+
+    // Mark some early packets
+    _ = bm.mark(0);
+    _ = bm.mark(1);
+    _ = bm.mark(2);
+
+    // Jump far ahead — forces window shift
+    _ = bm.mark(10000);
+    try testing.expect(bm.contains(10000));
+    // Early packets should be gone (shifted out)
+    try testing.expect(!bm.contains(0));
+    try testing.expect(!bm.contains(1));
+    try testing.expect(!bm.contains(2));
+}
+
+test "PacketNumberBitmap: getRanges limited output" {
+    var bm = PacketNumberBitmap{};
+
+    // Create many ranges
+    _ = bm.mark(0);
+    _ = bm.mark(2);
+    _ = bm.mark(4);
+    _ = bm.mark(6);
+    _ = bm.mark(8);
+
+    // Only ask for 2 ranges (should get the highest 2)
+    var out: [2]Range = undefined;
+    const n = bm.getRanges(&out);
+    try testing.expectEqual(@as(usize, 2), n);
+    try testing.expectEqual(@as(u64, 8), out[0].start);
+    try testing.expectEqual(@as(u64, 6), out[1].start);
+}

From 9aafb73480e6562fb88fe9d6719d0f95e2226c9d Mon Sep 17 00:00:00 2001
From: Endel Dreyer <endel.dreyer@gmail.com>
Date: Thu, 19 Mar 2026 01:03:58 -0300
Subject: [PATCH 2/5] Add HyStart++ (RFC 9406) to CUBIC for better slow start
 exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Detects RTT increases during slow start before actual packet loss,
reducing bufferbloat on the first congestion event.

- Standard → CSS transition on RTT increase above eta threshold
- CSS grows by MSS/4 per ACK (vs MSS in standard slow start)
- CSS exits to congestion avoidance after 5 consecutive rounds
- CSS returns to standard if RTT stabilizes
- Round tracking via largest_sent_pn boundary
- Connection passes latest_rtt and largest_sent_pn to CC on ACK
---
 src/quic/congestion.zig | 278 +++++++++++++++++++++++++++++++++++++++-
 src/quic/connection.zig |  14 +-
 2 files changed, 287 insertions(+), 5 deletions(-)

diff --git a/src/quic/congestion.zig b/src/quic/congestion.zig
index 6f836ab..71e5a53 100644
--- a/src/quic/congestion.zig
+++ b/src/quic/congestion.zig
@@ -162,7 +162,7 @@ pub const NewReno = struct {
     }
 };
 
-/// CUBIC congestion control (RFC 8312).
+/// CUBIC congestion control (RFC 8312) with HyStart++ (RFC 9406).
 ///
 /// CUBIC uses a cubic function for window growth, providing better
 /// bandwidth utilization on high-BDP networks compared to NewReno.
@@ -171,6 +171,9 @@ pub const NewReno = struct {
 /// - Provides a TCP-friendly region for fairness with Reno flows
 /// - Beta = 0.7 (multiplicative decrease factor)
 /// - C = 0.4 (cubic scaling constant)
+///
+/// HyStart++ (RFC 9406) improves slow start exit by detecting RTT
+/// increases before actual packet loss, reducing bufferbloat.
 pub const Cubic = struct {
     /// Current congestion window in bytes.
     congestion_window: u64,
@@ -207,6 +210,37 @@ pub const Cubic = struct {
     /// W_est: estimated TCP-friendly window (for TCP-friendly region).
     w_est: u64 = 0,
 
+    // ── HyStart++ state (RFC 9406) ──
+
+    /// Current slow start phase
+    ss_phase: SlowStartPhase = .standard,
+
+    /// Packet number at which the current round ends.
+    /// A round ends when an ACK acknowledges a packet >= round_end_pn.
+    round_end_pn: ?u64 = null,
+
+    /// Minimum RTT observed in the current round (nanoseconds).
+    current_round_min_rtt: i64 = std.math.maxInt(i64),
+
+    /// Baseline min RTT from the previous round (nanoseconds).
+    /// Used to detect RTT increases.
+    last_round_min_rtt: i64 = std.math.maxInt(i64),
+
+    /// Number of RTT samples collected in the current round.
+    rtt_sample_count: u32 = 0,
+
+    /// Number of CSS rounds completed.
+    css_rounds: u32 = 0,
+
+    /// cwnd at the beginning of CSS (for ssthresh calculation).
+    css_baseline_cwnd: u64 = 0,
+
+    const SlowStartPhase = enum {
+        standard, // Normal exponential slow start
+        css, // Conservative Slow Start
+        done, // Exited slow start (won't re-enter)
+    };
+
     // ── Constants ──
     // Beta = 0.7 (RFC 8312 §4.5 recommends 0.7 for QUIC)
     const BETA_NUM: u64 = 7;
@@ -217,6 +251,13 @@ pub const Cubic = struct {
     const C_SCALED: u64 = 400;
     const C_DENOM: u64 = 1000;
 
+    // HyStart++ constants (RFC 9406 §4.2)
+    const MIN_RTT_THRESH: i64 = 4_000_000; // 4ms in ns
+    const MAX_RTT_THRESH: i64 = 16_000_000; // 16ms in ns
+    const N_RTT_SAMPLE: u32 = 8; // Min samples per round
+    const CSS_GROWTH_DIVISOR: u64 = 4; // cwnd += MSS/4 per ACK in CSS
+    const CSS_ROUNDS: u32 = 5; // CSS rounds before exiting to CA
+
     pub fn inSlowStart(self: *const Cubic) bool {
         return self.congestion_window < self.ssthresh;
     }
@@ -242,7 +283,14 @@ pub const Cubic = struct {
     }
 
     /// Called when a packet is acknowledged.
+    /// `latest_rtt_ns` is the RTT sample from this ACK (0 if not a new sample).
+    /// `largest_pn` is the largest packet number being sent (for round tracking).
     pub fn onPacketAcked(self: *Cubic, acked_bytes: u64, sent_time: i64) void {
+        self.onPacketAckedWithRtt(acked_bytes, sent_time, 0, null);
+    }
+
+    /// Extended version with RTT sample for HyStart++.
+    pub fn onPacketAckedWithRtt(self: *Cubic, acked_bytes: u64, sent_time: i64, latest_rtt_ns: i64, largest_sent_pn: ?u64) void {
         // Don't grow window during recovery
         if (self.inCongestionRecovery(sent_time)) return;
 
@@ -250,8 +298,7 @@ pub const Cubic = struct {
         if (self.app_limited) return;
 
         if (self.inSlowStart()) {
-            // Slow start: increase by acked_bytes (same as NewReno)
-            self.congestion_window += acked_bytes;
+            self.slowStartOnAck(acked_bytes, latest_rtt_ns, largest_sent_pn);
             return;
         }
 
@@ -259,6 +306,94 @@ pub const Cubic = struct {
         self.cubicUpdate(acked_bytes);
     }
 
+    /// HyStart++ slow start logic (RFC 9406).
+    fn slowStartOnAck(self: *Cubic, acked_bytes: u64, latest_rtt_ns: i64, largest_sent_pn: ?u64) void {
+        // Initialize first round if needed
+        if (self.round_end_pn == null) {
+            if (largest_sent_pn) |pn| {
+                self.round_end_pn = pn + 1;
+            }
+        }
+
+        // Sample RTT
+        if (latest_rtt_ns > 0) {
+            self.rtt_sample_count += 1;
+            if (latest_rtt_ns < self.current_round_min_rtt) {
+                self.current_round_min_rtt = latest_rtt_ns;
+            }
+        }
+
+        switch (self.ss_phase) {
+            .standard => {
+                // Normal exponential growth
+                self.congestion_window += acked_bytes;
+
+                // Check for round end
+                if (self.isRoundEnd(largest_sent_pn)) {
+                    // Enough samples? Check RTT increase.
+                    if (self.rtt_sample_count >= N_RTT_SAMPLE and
+                        self.last_round_min_rtt < std.math.maxInt(i64))
+                    {
+                        const eta = clampRttThresh(self.last_round_min_rtt);
+                        if (self.current_round_min_rtt >= self.last_round_min_rtt + eta) {
+                            // RTT increased → enter CSS
+                            self.ss_phase = .css;
+                            self.css_rounds = 0;
+                            self.css_baseline_cwnd = self.congestion_window;
+                        }
+                    }
+                    self.advanceRound(largest_sent_pn);
+                }
+            },
+            .css => {
+                // Conservative Slow Start: grow by MSS / CSS_GROWTH_DIVISOR per ACK
+                self.congestion_window += self.max_datagram_size / CSS_GROWTH_DIVISOR;
+
+                // Check for round end
+                if (self.isRoundEnd(largest_sent_pn)) {
+                    self.css_rounds += 1;
+
+                    if (self.rtt_sample_count >= N_RTT_SAMPLE and
+                        self.last_round_min_rtt < std.math.maxInt(i64))
+                    {
+                        const eta = clampRttThresh(self.last_round_min_rtt);
+                        if (self.current_round_min_rtt < self.last_round_min_rtt + eta) {
+                            // RTT stabilized → back to standard slow start
+                            self.ss_phase = .standard;
+                        }
+                    }
+
+                    if (self.css_rounds >= CSS_ROUNDS) {
+                        // Exit slow start → congestion avoidance
+                        self.ssthresh = self.congestion_window;
+                        self.ss_phase = .done;
+                    }
+
+                    self.advanceRound(largest_sent_pn);
+                }
+            },
+            .done => {
+                // Shouldn't reach here (inSlowStart would be false)
+                self.cubicUpdate(acked_bytes);
+            },
+        }
+    }
+
+    /// Check if the current round has ended.
+    fn isRoundEnd(self: *const Cubic, largest_sent_pn: ?u64) bool {
+        const end_pn = self.round_end_pn orelse return false;
+        const sent_pn = largest_sent_pn orelse return false;
+        return sent_pn >= end_pn;
+    }
+
+    /// Advance to the next round: save RTT stats and set new round boundary.
+    fn advanceRound(self: *Cubic, largest_sent_pn: ?u64) void {
+        self.last_round_min_rtt = self.current_round_min_rtt;
+        self.current_round_min_rtt = std.math.maxInt(i64);
+        self.rtt_sample_count = 0;
+        self.round_end_pn = if (largest_sent_pn) |pn| pn + 1 else null;
+    }
+
     /// CUBIC window update during congestion avoidance.
     fn cubicUpdate(self: *Cubic, acked_bytes: u64) void {
         // Initialize epoch on first ACK after congestion event
@@ -351,6 +486,9 @@ pub const Cubic = struct {
             MIN_WINDOW_PACKETS * self.max_datagram_size,
         );
         self.bytes_acked_in_round = 0;
+
+        // HyStart++: loss during slow start means we're done
+        self.ss_phase = .done;
     }
 
     /// Persistent congestion: reset to minimum window (RFC 9002 §7.6.2).
@@ -361,6 +499,14 @@ pub const Cubic = struct {
         self.epoch_start = null;
         self.w_max = 0;
         self.bytes_acked_in_round = 0;
+
+        // HyStart++: reset to standard for the new slow start phase
+        self.ss_phase = .standard;
+        self.round_end_pn = null;
+        self.current_round_min_rtt = std.math.maxInt(i64);
+        self.last_round_min_rtt = std.math.maxInt(i64);
+        self.rtt_sample_count = 0;
+        self.css_rounds = 0;
     }
 
     /// PTO does not reduce window (RFC 9002 §7.5).
@@ -386,6 +532,13 @@ pub const Cubic = struct {
     }
 };
 
+/// HyStart++ RTT threshold clamp (RFC 9406 §4.2):
+/// eta = clamp(lastRoundMinRTT / 8, MIN_RTT_THRESH, MAX_RTT_THRESH)
+fn clampRttThresh(last_round_min_rtt: i64) i64 {
+    const eighth = @divTrunc(last_round_min_rtt, 8);
+    return @max(Cubic.MIN_RTT_THRESH, @min(eighth, Cubic.MAX_RTT_THRESH));
+}
+
 /// Integer cube root approximation (Newton's method).
 fn icbrt(x: u64) u64 {
     if (x == 0) return 0;
@@ -692,6 +845,125 @@ test "Cubic: PTO does not reduce window" {
     try testing.expectEqual(window_before, cc.congestion_window);
 }
 
+// ── HyStart++ tests ──
+
+test "HyStart++: enters CSS on RTT increase" {
+    var cc = Cubic.init();
+    try testing.expect(cc.inSlowStart());
+    try testing.expectEqual(Cubic.SlowStartPhase.standard, cc.ss_phase);
+
+    const base_rtt: i64 = 20_000_000; // 20ms
+
+    // Simulate: sent packets 0..9, then ACK them all.
+    // largest_sent_pn=9 stays fixed during the entire ACK batch.
+    // First ACK initializes round_end_pn to 10.
+    // No round ends during this batch (all ACKed PNs < 10).
+    var i: u32 = 0;
+    while (i < Cubic.N_RTT_SAMPLE) : (i += 1) {
+        cc.onPacketAckedWithRtt(1200, 100, base_rtt, 9);
+    }
+    // round_end_pn = 10, no round ended yet (9 < 10)
+    try testing.expect(cc.round_end_pn != null);
+    try testing.expectEqual(@as(u64, 10), cc.round_end_pn.?);
+    try testing.expectEqual(@as(u32, 8), cc.rtt_sample_count);
+
+    // Now simulate: sent packets 10..19, ACK them.
+    // largest_sent_pn=19. First ACK with pn >= 10 triggers round end.
+    cc.onPacketAckedWithRtt(1200, 200, base_rtt, 19);
+    // Round ended: last_round_min_rtt = base_rtt, round_end_pn = 20
+    try testing.expectEqual(base_rtt, cc.last_round_min_rtt);
+    try testing.expectEqual(@as(u64, 20), cc.round_end_pn.?);
+
+    // Continue ACKing round 2 with increased RTT (+5ms > eta=4ms)
+    const increased_rtt: i64 = base_rtt + 5_000_000;
+    i = 0;
+    while (i < Cubic.N_RTT_SAMPLE - 1) : (i += 1) {
+        cc.onPacketAckedWithRtt(1200, 200, increased_rtt, 19);
+    }
+    // 8 total samples in round 2 (1 from above + 7 here), round hasn't ended yet
+
+    // Send packets 20..29, ACK triggers next round end
+    cc.onPacketAckedWithRtt(1200, 300, increased_rtt, 29);
+    // Round ended: RTT increased above threshold → should enter CSS
+    try testing.expectEqual(Cubic.SlowStartPhase.css, cc.ss_phase);
+}
+
+test "HyStart++: CSS exits to CA after CSS_ROUNDS" {
+    var cc = Cubic.init();
+
+    // Establish baseline RTT
+    const base_rtt: i64 = 20_000_000;
+    var pn: u64 = 0;
+
+    // First round: baseline
+    var i: u32 = 0;
+    while (i < Cubic.N_RTT_SAMPLE) : (i += 1) {
+        cc.onPacketAckedWithRtt(1200, 100, base_rtt, pn);
+        pn += 1;
+    }
+
+    // Force into CSS
+    cc.ss_phase = .css;
+    cc.css_rounds = 0;
+    cc.css_baseline_cwnd = cc.congestion_window;
+
+    // Run CSS_ROUNDS rounds with increasing RTT
+    var round: u32 = 0;
+    while (round < Cubic.CSS_ROUNDS) : (round += 1) {
+        const rtt = base_rtt + @as(i64, @intCast(round + 1)) * 2_000_000;
+        i = 0;
+        while (i < Cubic.N_RTT_SAMPLE) : (i += 1) {
+            cc.onPacketAckedWithRtt(1200, 200, rtt, pn);
+            pn += 1;
+        }
+    }
+
+    // Should have exited slow start
+    try testing.expectEqual(Cubic.SlowStartPhase.done, cc.ss_phase);
+    try testing.expect(!cc.inSlowStart()); // ssthresh was set
+}
+
+test "HyStart++: loss during slow start sets phase to done" {
+    var cc = Cubic.init();
+    try testing.expectEqual(Cubic.SlowStartPhase.standard, cc.ss_phase);
+
+    cc.onCongestionEvent(100, 200);
+    try testing.expectEqual(Cubic.SlowStartPhase.done, cc.ss_phase);
+}
+
+test "HyStart++: persistent congestion resets to standard" {
+    var cc = Cubic.init();
+    cc.ss_phase = .css;
+    cc.css_rounds = 3;
+
+    cc.onPersistentCongestion(1_000_000_000);
+    try testing.expectEqual(Cubic.SlowStartPhase.standard, cc.ss_phase);
+    try testing.expectEqual(@as(u32, 0), cc.css_rounds);
+    try testing.expect(cc.inSlowStart());
+}
+
+test "HyStart++: CSS grows slower than standard" {
+    var cc_std = Cubic.init();
+    var cc_css = Cubic.init();
+    cc_css.ss_phase = .css;
+    cc_css.css_baseline_cwnd = cc_css.congestion_window;
+
+    const initial_std = cc_std.congestion_window;
+    const initial_css = cc_css.congestion_window;
+
+    // Same ACK to both
+    cc_std.onPacketAckedWithRtt(1200, 100, 20_000_000, 10);
+    cc_css.onPacketAckedWithRtt(1200, 100, 20_000_000, 10);
+
+    const growth_std = cc_std.congestion_window - initial_std;
+    const growth_css = cc_css.congestion_window - initial_css;
+
+    // CSS should grow slower
+    try testing.expect(growth_css < growth_std);
+    // CSS growth = MSS / CSS_GROWTH_DIVISOR = 1200 / 4 = 300
+    try testing.expectEqual(@as(u64, DEFAULT_MAX_DATAGRAM_SIZE / Cubic.CSS_GROWTH_DIVISOR), growth_css);
+}
+
 // ── icbrt tests ──
 
 test "icbrt: basic cube roots" {
diff --git a/src/quic/connection.zig b/src/quic/connection.zig
index 18d3a29..e3a0c66 100644
--- a/src/quic/connection.zig
+++ b/src/quic/connection.zig
@@ -1345,7 +1345,12 @@ pub const Connection = struct {
                         std.log.info("PMTUD: probe ACK'd, MTU raised to {d}", .{new_mtu});
                     }
 
-                    self.cc.onPacketAcked(pkt.size, pkt.time_sent);
+                    self.cc.onPacketAckedWithRtt(
+                        pkt.size,
+                        pkt.time_sent,
+                        self.pkt_handler.rtt_stats.latest_rtt,
+                        if (self.pkt_handler.sent[@intFromEnum(enc_level)].largest_sent) |ls| ls else null,
+                    );
 
                     // Track whether a packet sent with current keys has been ACKed
                     if (enc_level == .application) {
@@ -1452,7 +1457,12 @@ pub const Connection = struct {
                         self.pacer.max_datagram_size = new_mtu;
                         std.log.info("PMTUD: probe ACK'd, MTU raised to {d}", .{new_mtu});
                     }
-                    self.cc.onPacketAcked(pkt.size, pkt.time_sent);
+                    self.cc.onPacketAckedWithRtt(
+                        pkt.size,
+                        pkt.time_sent,
+                        self.pkt_handler.rtt_stats.latest_rtt,
+                        if (self.pkt_handler.sent[@intFromEnum(enc_level)].largest_sent) |ls| ls else null,
+                    );
 
                     // Stop including HANDSHAKE_DONE once a packet containing it is ACKed
                     if (pkt.has_handshake_done) {

From 3f6299226fc66b17bb50cb5536d49be553b34218 Mon Sep 17 00:00:00 2001
From: Endel Dreyer <endel.dreyer@gmail.com>
Date: Thu, 19 Mar 2026 01:09:48 -0300
Subject: [PATCH 3/5] Add sub-millisecond pacing via inline micro-sleep in send
 loop

When the pacer requests a delay < 1ms, sleep inline in the send loop
instead of returning to the event loop (which has only ms-resolution
timers). This gives ~microsecond pacing precision for high-throughput
transfers, similar to the Linux kernel QUIC's hrtimer approach.

Applied to both Server and Client event loops.
---
 src/event_loop.zig | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/event_loop.zig b/src/event_loop.zig
index a56c12b..855971e 100644
--- a/src/event_loop.zig
+++ b/src/event_loop.zig
@@ -901,7 +901,21 @@ pub fn Server(comptime Handler: type) type {
                 var send_count: usize = 0;
                 while (send_count < max_burst_packets) : (send_count += 1) {
                     const bytes_written = conn.send(&self.out_buf) catch break;
-                    if (bytes_written == 0) break;
+                    if (bytes_written == 0) {
+                        // Sub-ms pacing: if the pacer wants a short delay (< 1ms),
+                        // spin-wait here instead of returning to the event loop
+                        // which has only ms-resolution timers. This gives us ~µs
+                        // pacing precision similar to the kernel's hrtimer approach.
+                        if (conn.pacer.bandwidth_shifted > 0) {
+                            const now_ns: i64 = @intCast(std.time.nanoTimestamp());
+                            const pacer_delay = conn.pacer.timeUntilSend(now_ns);
+                            if (pacer_delay > 0 and pacer_delay < 1_000_000) { // < 1ms
+                                std.Thread.sleep(@intCast(pacer_delay));
+                                continue; // Retry send after micro-sleep
+                            }
+                        }
+                        break;
+                    }
                     const send_addr = conn.peerAddress();
 
                     batch.add(
@@ -1710,7 +1724,18 @@ pub fn Client(comptime Handler: type) type {
             var send_count: usize = 0;
             while (send_count < max_burst_packets) : (send_count += 1) {
                 const bytes_written = conn.send(&self.out_buf) catch break;
-                if (bytes_written == 0) break;
+                if (bytes_written == 0) {
+                    // Sub-ms pacing spin-wait (see server tickAndSend for details)
+                    if (conn.pacer.bandwidth_shifted > 0) {
+                        const now_ns: i64 = @intCast(std.time.nanoTimestamp());
+                        const pacer_delay = conn.pacer.timeUntilSend(now_ns);
+                        if (pacer_delay > 0 and pacer_delay < 1_000_000) {
+                            std.Thread.sleep(@intCast(pacer_delay));
+                            continue;
+                        }
+                    }
+                    break;
+                }
                 self.batch.add(
                     self.out_buf[0..bytes_written],
                     @ptrCast(send_addr),

From 0036a5c3ab099de22d12acfafececb26f89d2dc1 Mon Sep 17 00:00:00 2001
From: Endel Dreyer <endel.dreyer@gmail.com>
Date: Thu, 19 Mar 2026 01:12:41 -0300
Subject: [PATCH 4/5] Add inline fast path to FrameSorter for in-order stream
 delivery

For the common case where stream data arrives in order (offset == read_pos
and no buffered chunks), stores data in a fixed 1500-byte inline buffer
instead of going through the AutoArrayHashMap. This eliminates hash map
insert/lookup/orderedRemove operations on the hot path.

Inspired by lxin/quic kernel's approach of minimizing per-frame allocation.
The pop() path still returns heap-allocated data for caller ownership
compatibility, but the push() path saves hash map overhead.
---
 src/quic/stream.zig | 53 +++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/src/quic/stream.zig b/src/quic/stream.zig
index 4c0b472..d9f0d31 100644
--- a/src/quic/stream.zig
+++ b/src/quic/stream.zig
@@ -38,10 +38,16 @@ pub fn isLocal(stream_id: u64, is_server: bool) bool {
 
 /// Gap-based frame sorter for out-of-order reassembly of stream data.
 /// Tracks received byte ranges and returns contiguous data starting from read_pos.
+///
+/// Optimization (inspired by lxin/quic kernel): inline fast path for the common
+/// in-order case. When data arrives exactly at read_pos with no out-of-order chunks
+/// buffered, it's stored in a fixed inline buffer (zero heap allocation).
 pub const FrameSorter = struct {
+    const INLINE_BUF_SIZE = 1500;
+
     allocator: Allocator,
 
-    /// Buffered data chunks, keyed by offset.
+    /// Buffered data chunks, keyed by offset. Used for out-of-order delivery.
     chunks: std.AutoArrayHashMap(u64, []const u8),
 
     /// Next offset to be read by the application.
@@ -50,6 +56,11 @@ pub const FrameSorter = struct {
     /// The final offset (set when FIN is received).
     fin_offset: ?u64 = null,
 
+    /// Inline buffer for zero-alloc in-order fast path.
+    /// When data arrives at read_pos and chunks map is empty, copy here instead.
+    inline_buf: [INLINE_BUF_SIZE]u8 = undefined,
+    inline_len: u16 = 0,
+
     pub fn init(allocator: Allocator) FrameSorter {
         return .{
             .allocator = allocator,
@@ -58,7 +69,6 @@ pub const FrameSorter = struct {
     }
 
     pub fn deinit(self: *FrameSorter) void {
-        // Free any owned data
         for (self.chunks.values()) |data| {
             self.allocator.free(data);
         }
@@ -68,6 +78,9 @@ pub const FrameSorter = struct {
     /// Return the highest byte offset buffered (or read_pos if no chunks).
     pub fn highestReceived(self: *const FrameSorter) u64 {
         var highest = self.read_pos;
+        if (self.inline_len > 0) {
+            highest = self.read_pos + self.inline_len;
+        }
         for (self.chunks.keys(), self.chunks.values()) |off, val| {
             const end = off + val.len;
             if (end > highest) highest = end;
@@ -79,24 +92,20 @@ pub const FrameSorter = struct {
     pub fn push(self: *FrameSorter, offset: u64, data: []const u8, fin: bool) !void {
         if (fin) {
             const new_fin = offset + data.len;
-            // RFC 9000 §4.5: final size cannot change once known
             if (self.fin_offset) |existing| {
                 if (existing != new_fin) return error.FinalSizeError;
             }
             self.fin_offset = new_fin;
         }
 
-        // RFC 9000 §4.5: data cannot exceed known final size
         if (self.fin_offset) |fs| {
             if (offset + data.len > fs) return error.FinalSizeError;
         }
 
         if (data.len == 0) return;
 
-        // Skip data that's already been read
         if (offset + data.len <= self.read_pos) return;
 
-        // Trim data that partially overlaps with already-read region
         var effective_offset = offset;
         var effective_data = data;
         if (offset < self.read_pos) {
@@ -105,20 +114,26 @@ pub const FrameSorter = struct {
             effective_offset = self.read_pos;
         }
 
-        // Check if there's already a chunk at this offset.
-        // Don't overwrite a longer chunk with a shorter one (retransmission
-        // with different fragmentation boundaries). Also free old data to
-        // prevent memory leaks.
+        // Fast path: in-order delivery, no buffered chunks, fits inline.
+        // Avoids hash map insertion + heap allocation entirely.
+        if (effective_offset == self.read_pos and
+            self.inline_len == 0 and
+            self.chunks.count() == 0 and
+            effective_data.len <= INLINE_BUF_SIZE)
+        {
+            @memcpy(self.inline_buf[0..effective_data.len], effective_data);
+            self.inline_len = @intCast(effective_data.len);
+            return;
+        }
+
+        // Slow path: out-of-order or inline buffer occupied
         if (self.chunks.get(effective_offset)) |existing| {
             if (existing.len >= effective_data.len) {
-                // Existing chunk covers at least as much data — skip.
                 return;
             }
-            // New chunk is longer — free old, overwrite below.
             self.allocator.free(existing);
         }
 
-        // Copy data to owned buffer
         const owned = try self.allocator.dupe(u8, effective_data);
         errdefer self.allocator.free(owned);
 
@@ -127,7 +142,17 @@ pub const FrameSorter = struct {
 
     /// Pop the next contiguous chunk of data from the read position.
     /// Returns null if there's no data available at the current read position.
+    /// Returned data is always heap-allocated — caller must free.
     pub fn pop(self: *FrameSorter) ?[]const u8 {
+        // Fast path: inline buffer → dupe to heap for caller ownership.
+        // The win is push() avoided hash map operations entirely.
+        if (self.inline_len > 0) {
+            const len = self.inline_len;
+            self.inline_len = 0;
+            self.read_pos += len;
+            return self.allocator.dupe(u8, self.inline_buf[0..len]) catch null;
+        }
+
         if (self.chunks.get(self.read_pos)) |data| {
             _ = self.chunks.orderedRemove(self.read_pos);
             self.read_pos += data.len;
@@ -139,7 +164,7 @@ pub const FrameSorter = struct {
     /// Check if all data has been received (FIN reached and all data consumed).
     pub fn isComplete(self: *const FrameSorter) bool {
         if (self.fin_offset) |fin| {
-            return self.read_pos >= fin;
+            return self.read_pos >= fin and self.inline_len == 0;
         }
         return false;
     }

From 9ff3e255d68ef9b6833dbe2f08c7118e9b20d5e2 Mon Sep 17 00:00:00 2001
From: Endel Dreyer <endel.dreyer@gmail.com>
Date: Thu, 19 Mar 2026 01:16:44 -0300
Subject: [PATCH 5/5] FrameSorter: inline fast path + O(1) swapRemove for pop

Two optimizations for the stream frame reassembly hot path:

1. Inline buffer: when data arrives in-order (offset == read_pos) and no
   out-of-order chunks are buffered, copy to a fixed 1500-byte inline
   buffer instead of going through AutoArrayHashMap operations. Avoids
   hash map insert on push for the common case.

2. swapRemove instead of orderedRemove in pop(): O(1) instead of O(n)
   for removing consumed chunks from the hash map. Order doesn't matter
   since lookup is by key (offset), not position.
---
 src/quic/stream.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/quic/stream.zig b/src/quic/stream.zig
index d9f0d31..1469272 100644
--- a/src/quic/stream.zig
+++ b/src/quic/stream.zig
@@ -154,7 +154,7 @@ pub const FrameSorter = struct {
         }
 
         if (self.chunks.get(self.read_pos)) |data| {
-            _ = self.chunks.orderedRemove(self.read_pos);
+            _ = self.chunks.swapRemove(self.read_pos);
             self.read_pos += data.len;
             return data;
         }