From 740701ed60b015fd6ebf6bf49610bbdebb0596e3 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 18:13:59 +0200 Subject: [PATCH 01/13] wg-relay: drop non-WG-shaped packets at XDP First piece of the 0.2.1 hardening series (see docs/design/wg_relay_hardening.md). Inspect the first byte of the UDP payload at XDP and confirm it's a WireGuard message type (1 init / 2 response / 3 cookie / 4 transport) with the expected length (148 / 92 / 64 / >=32 respectively); drop otherwise. Userspace forwarder mirrors the same check so the cold-start path can't be used to bypass the BPF filter. New counter drop_not_wg_shaped surfaced in `wg show`, summed across the two paths so the operator sees one number. Verified on the libvirt fleet: * fleet ping test still PASS (4/4, XDP fast-path) * sending random garbage / type 0x05 forgeries from a registered peer increments drop_not_wg_shaped, doesn't tick drop_unknown_src (filter fires before peer lookup) * real WG handshake + transport packets all pass through --- bpf/wg_relay.bpf.c | 57 +++++++++++++++++++++++++++++++---- include/hyper_derp/wg_relay.h | 7 +++++ src/einheit_channel.cc | 8 +++++ src/wg_relay.cc | 35 +++++++++++++++++++-- 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/bpf/wg_relay.bpf.c b/bpf/wg_relay.bpf.c index 72aad0a..57ed857 100644 --- a/bpf/wg_relay.bpf.c +++ b/bpf/wg_relay.bpf.c @@ -27,11 +27,13 @@ #include #include -// Stats indices. -#define STAT_RX 0 -#define STAT_FWD 1 -#define STAT_PASS_NO_PEER 2 -#define STAT_PASS_NO_MAC 3 +// Stats indices. Keep in sync with WgXdpStats in +// include/hyper_derp/wg_relay.h. +#define STAT_RX 0 +#define STAT_FWD 1 +#define STAT_PASS_NO_PEER 2 +#define STAT_PASS_NO_MAC 3 +#define STAT_DROP_NOT_WG_SHAPED 4 // -- Map types -------------------------------------------- @@ -91,7 +93,7 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(max_entries, 4); + __uint(max_entries, 8); __type(key, __u32); __type(value, __u64); } wg_xdp_stats SEC(".maps"); @@ -205,6 +207,49 @@ int wg_relay_xdp(struct xdp_md *ctx) inc_stat(STAT_RX); + // WG-shape filter — peek at the first byte of the UDP + // payload and verify it's a WireGuard message type + // (1 init, 2 response, 3 cookie, 4 transport). Anything + // else is either malformed or a non-WG client that ended + // up at the relay's port; dropping at XDP keeps it off + // the forward path entirely so the partner never has to + // process it. Length sanity covers the fixed-size types; + // transport-data has variable length capped by MTU. + __u8 *wg = (void *)(udp + 1); + if ((void *)(wg + 1) > data_end) { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + __u16 udp_payload_len = + bpf_ntohs(udp->len) - sizeof(struct udphdr); + __u8 wg_type = wg[0]; + if (wg_type == 1) { + if (udp_payload_len != 148) { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + } else if (wg_type == 2) { + if (udp_payload_len != 92) { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + } else if (wg_type == 3) { + if (udp_payload_len != 64) { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + } else if (wg_type == 4) { + // Transport data: header (16 B) + counter (8 B) + + // at least the AEAD tag (16 B). + if (udp_payload_len < 32) { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + } else { + inc_stat(STAT_DROP_NOT_WG_SHAPED); + return XDP_DROP; + } + // Look up the source endpoint in the peer map. Miss // means either an unregistered peer or one whose // source IP/port doesn't match the operator's pin — diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index e5bedaf..dc2d144 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -83,6 +83,11 @@ struct WgRelayStats { std::atomic fwd_packets{0}; std::atomic drop_unknown_src{0}; std::atomic drop_no_link{0}; + /// First-byte / length sanity check — drops packets whose + /// shape doesn't match a WireGuard message type. Mirrors the + /// XDP STAT_DROP_NOT_WG_SHAPED counter for the userspace + /// fallback path. + std::atomic drop_not_wg_shaped{0}; }; /// One attached NIC. The same BPF program is attached to @@ -140,6 +145,7 @@ struct WgXdpStats { uint64_t fwd_xdp = 0; uint64_t pass_no_peer = 0; uint64_t pass_no_mac = 0; + uint64_t drop_not_wg_shaped = 0; }; struct WgRelay { @@ -225,6 +231,7 @@ struct WgRelayStatsSnapshot { uint64_t fwd_packets; uint64_t drop_unknown_src; uint64_t drop_no_link; + uint64_t drop_not_wg_shaped; size_t peer_count; size_t link_count; /// XDP-path counters, zero when xdp.attached is false. diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index daf6bdd..eead704 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1941,6 +1941,14 @@ void WgShow(Server* s, const Request& /*req*/, b += std::format("drop_unknown_src={}\n", stats.drop_unknown_src); b += std::format("drop_no_link={}\n", stats.drop_no_link); + // Aggregate non-WG-shaped drops across userspace + XDP so + // the operator sees one number regardless of which path + // the bytes took. + uint64_t shape_total = stats.drop_not_wg_shaped; + if (stats.xdp_attached) { + shape_total += stats.xdp.drop_not_wg_shaped; + } + b += std::format("drop_not_wg_shaped={}\n", shape_total); b += std::format("xdp_attached={}\n", stats.xdp_attached ? "true" : "false"); if (stats.xdp_attached) { diff --git a/src/wg_relay.cc b/src/wg_relay.cc index a37f7bf..5fd58c9 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -355,12 +355,40 @@ void PersistRosterLocked(WgRelay* r) { // -- Forward path -------------------------------------- +// Return true if `pkt` looks like a valid WireGuard +// message: first byte is one of the four message types +// (1 init, 2 response, 3 cookie, 4 transport) and the +// length matches the type's expected size. Transport +// data has variable length; we only enforce a minimum. +// Mirrors the BPF-side check in bpf/wg_relay.bpf.c. +bool IsWgShaped(const uint8_t* pkt, size_t len) { + if (len < 1) return false; + switch (pkt[0]) { + case 1: return len == 148; // handshake init + case 2: return len == 92; // handshake response + case 3: return len == 64; // cookie reply + case 4: return len >= 32; // transport data + default: return false; + } +} + void HandlePacket(WgRelay* r, const uint8_t* pkt, size_t len, const sockaddr_storage& src, socklen_t src_len) { r->stats.rx_packets.fetch_add(1, std::memory_order_relaxed); + // Shape filter — drop anything that isn't a WireGuard + // message (types 1..4) with the right length. Keeps + // operator counters honest about non-WG noise hitting + // the relay's port and stops us forwarding garbage to + // the partner. Mirrors the BPF STAT_DROP_NOT_WG_SHAPED + // path so userspace and XDP agree on what gets dropped. + if (!IsWgShaped(pkt, len)) { + r->stats.drop_not_wg_shaped.fetch_add( + 1, std::memory_order_relaxed); + return; + } // Lookup is O(N) over the peer table; N is operator- // small (dozens), and the lock window covers the // sendto so the table can't change underfoot. @@ -847,8 +875,9 @@ void XdpReadStats(const WgRelay* r, WgXdpStats* out) { auto vals = std::make_unique( static_cast(ncpus)); uint64_t* sums[] = {&out->rx_xdp, &out->fwd_xdp, - &out->pass_no_peer, &out->pass_no_mac}; - for (uint32_t k = 0; k < 4; ++k) { + &out->pass_no_peer, &out->pass_no_mac, + &out->drop_not_wg_shaped}; + for (uint32_t k = 0; k < 5; ++k) { if (bpf_map_lookup_elem(r->xdp.stats_map_fd, &k, vals.get()) < 0) { continue; @@ -1207,6 +1236,8 @@ WgRelayStatsSnapshot WgRelayGetStats(const WgRelay* r) { std::memory_order_relaxed); s.drop_no_link = r->stats.drop_no_link.load( std::memory_order_relaxed); + s.drop_not_wg_shaped = r->stats.drop_not_wg_shaped.load( + std::memory_order_relaxed); s.xdp_attached = r->xdp.attached; XdpReadStats(r, &s.xdp); std::lock_guard lk(r->peers_mu); From 76988469dbed9210fe1a7c64beadeb7525632743 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 18:58:17 +0200 Subject: [PATCH 02/13] wg-relay: verify MAC1 on handshakes against partner pubkey Second piece of the 0.2.1 hardening series. When the operator has stamped a pubkey on a peer's link partner, every handshake init/response from that peer is verified: MAC1 is computed as Blake2s_keyed(Blake2s(LABEL_MAC1 || partner_pubkey), msg[0..len-32], 16) and compared against the MAC1 field at [len-32..len-16]. Mismatch -> drop, drop_handshake_pubkey_mismatch++. Engages only when the partner has a stamped pubkey, so existing operators retain today's behaviour exactly until they opt in. * New self-contained Blake2s in src/crypto/. libsodium ships Blake2b only; WG uses Blake2s. RFC 7693 reference port, smoke-tested against published vectors. * BPF XDP_PASS for type 1/2 so userspace owns MAC1 verification. Cookie (3) and transport data (4) keep the XDP fast path. * Verified on libvirt fleet: baseline + with-real-pubkeys both PASS; with-fake-pubkey drops handshakes correctly; restore recovers cleanly. --- bpf/wg_relay.bpf.c | 11 +++ cmake/libderp.cmake | 6 ++ include/hyper_derp/wg_relay.h | 8 ++ src/crypto/blake2s.cc | 135 ++++++++++++++++++++++++++++++++++ src/crypto/blake2s.h | 25 +++++++ src/einheit_channel.cc | 2 + src/wg_relay.cc | 102 +++++++++++++++++++++++++ 7 files changed, 289 insertions(+) create mode 100644 src/crypto/blake2s.cc create mode 100644 src/crypto/blake2s.h diff --git a/bpf/wg_relay.bpf.c b/bpf/wg_relay.bpf.c index 57ed857..1ed386d 100644 --- a/bpf/wg_relay.bpf.c +++ b/bpf/wg_relay.bpf.c @@ -250,6 +250,17 @@ int wg_relay_xdp(struct xdp_md *ctx) return XDP_DROP; } + // Hand handshake init (1) and response (2) packets up + // to userspace: it owns the MAC1 verification + the + // candidate-then-confirm roaming flow, neither of which + // fits the XDP verifier comfortably and both of which + // are rare enough (one handshake per session per ~25 s) + // that the userspace round trip is free. Cookie reply + // (3) and transport data (4) keep the XDP fast path. + if (wg_type == 1 || wg_type == 2) { + return XDP_PASS; + } + // Look up the source endpoint in the peer map. Miss // means either an unregistered peer or one whose // source IP/port doesn't match the operator's pin — diff --git a/cmake/libderp.cmake b/cmake/libderp.cmake index 6b81ce2..c199013 100644 --- a/cmake/libderp.cmake +++ b/cmake/libderp.cmake @@ -45,6 +45,7 @@ add_library(libderp_obj OBJECT src/einheit_protocol.cc src/einheit_channel.cc src/wg_relay.cc + src/crypto/blake2s.cc ) target_include_directories(libderp_obj PUBLIC ${PROJECT_SOURCE_DIR}/include @@ -53,6 +54,11 @@ target_include_directories(libderp_obj PUBLIC ${BPF_INCLUDE_DIR} ${ZMQ_INCLUDE_DIR} ) +target_include_directories(libderp_obj PRIVATE + # Internal-only headers (not part of the public include/ + # tree) — e.g. src/crypto/blake2s.h. + ${PROJECT_SOURCE_DIR}/src +) target_link_libraries(libderp_obj PUBLIC ${URING_LIB} ${SODIUM_LIB} diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index dc2d144..ab71f60 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -88,6 +88,13 @@ struct WgRelayStats { /// XDP STAT_DROP_NOT_WG_SHAPED counter for the userspace /// fallback path. std::atomic drop_not_wg_shaped{0}; + /// Handshake init/response from a registered source whose + /// MAC1 field doesn't verify against the link partner's + /// stamped pubkey. Engages only when the operator has + /// stamped pubkeys on both ends of a link. A non-zero count + /// usually means a misconfigured client, a NAT collision, + /// or someone pointed at the wrong relay. + std::atomic drop_handshake_pubkey_mismatch{0}; }; /// One attached NIC. The same BPF program is attached to @@ -232,6 +239,7 @@ struct WgRelayStatsSnapshot { uint64_t drop_unknown_src; uint64_t drop_no_link; uint64_t drop_not_wg_shaped; + uint64_t drop_handshake_pubkey_mismatch; size_t peer_count; size_t link_count; /// XDP-path counters, zero when xdp.attached is false. diff --git a/src/crypto/blake2s.cc b/src/crypto/blake2s.cc new file mode 100644 index 0000000..e5aacd1 --- /dev/null +++ b/src/crypto/blake2s.cc @@ -0,0 +1,135 @@ +/// @file blake2s.cc +/// @brief RFC 7693 Blake2s reference implementation, single-shot. +// Copyright (c) 2026 Hyper-DERP contributors + +#include "blake2s.h" + +#include + +namespace hyper_derp::crypto { +namespace { + +constexpr uint32_t kIV[8] = { + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL, +}; + +constexpr uint8_t kSigma[10][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, +}; + +inline uint32_t Load32LE(const uint8_t* p) { + return uint32_t{p[0]} | (uint32_t{p[1]} << 8) | + (uint32_t{p[2]} << 16) | (uint32_t{p[3]} << 24); +} + +inline void Store32LE(uint8_t* p, uint32_t v) { + p[0] = static_cast(v); + p[1] = static_cast(v >> 8); + p[2] = static_cast(v >> 16); + p[3] = static_cast(v >> 24); +} + +inline uint32_t Rotr(uint32_t x, int n) { + return (x >> n) | (x << (32 - n)); +} + +inline void G(uint32_t v[16], int a, int b, int c, int d, + uint32_t x, uint32_t y) { + v[a] = v[a] + v[b] + x; + v[d] = Rotr(v[d] ^ v[a], 16); + v[c] = v[c] + v[d]; + v[b] = Rotr(v[b] ^ v[c], 12); + v[a] = v[a] + v[b] + y; + v[d] = Rotr(v[d] ^ v[a], 8); + v[c] = v[c] + v[d]; + v[b] = Rotr(v[b] ^ v[c], 7); +} + +void Compress(uint32_t h[8], const uint8_t block[64], uint64_t t, + bool last) { + uint32_t m[16]; + for (int i = 0; i < 16; ++i) { + m[i] = Load32LE(block + i * 4); + } + uint32_t v[16]; + for (int i = 0; i < 8; ++i) v[i] = h[i]; + for (int i = 0; i < 8; ++i) v[i + 8] = kIV[i]; + v[12] ^= static_cast(t); + v[13] ^= static_cast(t >> 32); + if (last) v[14] = ~v[14]; + for (int r = 0; r < 10; ++r) { + const uint8_t* s = kSigma[r]; + G(v, 0, 4, 8, 12, m[s[0]], m[s[1]]); + G(v, 1, 5, 9, 13, m[s[2]], m[s[3]]); + G(v, 2, 6, 10, 14, m[s[4]], m[s[5]]); + G(v, 3, 7, 11, 15, m[s[6]], m[s[7]]); + G(v, 0, 5, 10, 15, m[s[8]], m[s[9]]); + G(v, 1, 6, 11, 12, m[s[10]], m[s[11]]); + G(v, 2, 7, 8, 13, m[s[12]], m[s[13]]); + G(v, 3, 4, 9, 14, m[s[14]], m[s[15]]); + } + for (int i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8]; +} + +} // namespace + +void Blake2s(uint8_t* out, size_t out_len, const uint8_t* key, + size_t key_len, const uint8_t* in, size_t in_len) { + uint32_t h[8]; + for (int i = 0; i < 8; ++i) h[i] = kIV[i]; + // Parameter block: digest_length || key_length || fanout || depth. + h[0] ^= 0x01010000UL ^ (static_cast(key_len) << 8) ^ + static_cast(out_len); + + uint8_t block[64]; + uint64_t t = 0; + size_t cursor = 0; + bool finalized = false; + + // If keyed, the first block is the key padded with zeros and + // counts as 64 bytes consumed. When there's no message the + // keyed block is also the only (and final) block. + if (key_len > 0) { + std::memset(block, 0, sizeof(block)); + std::memcpy(block, key, key_len); + if (in_len == 0) { + Compress(h, block, 64, true); + finalized = true; + } else { + Compress(h, block, 64, false); + t = 64; + } + } + + if (!finalized) { + // Bulk: full 64-byte blocks except possibly the last. + while (in_len - cursor > 64) { + std::memcpy(block, in + cursor, 64); + cursor += 64; + t += 64; + Compress(h, block, t, false); + } + // Final block (possibly short, possibly zero remaining). + std::memset(block, 0, sizeof(block)); + size_t last = in_len - cursor; + std::memcpy(block, in + cursor, last); + t += last; + Compress(h, block, t, true); + } + + uint8_t buf[32]; + for (int i = 0; i < 8; ++i) Store32LE(buf + i * 4, h[i]); + std::memcpy(out, buf, out_len); +} + +} // namespace hyper_derp::crypto diff --git a/src/crypto/blake2s.h b/src/crypto/blake2s.h new file mode 100644 index 0000000..e007b18 --- /dev/null +++ b/src/crypto/blake2s.h @@ -0,0 +1,25 @@ +/// @file blake2s.h +/// @brief Blake2s hash used by WireGuard's MAC1 derivation. +/// +/// Self-contained because libsodium ships Blake2b, not the +/// 32-bit Blake2s variant WireGuard's protocol uses. This is a +/// straight port of the RFC 7693 reference, scoped to what +/// wg-relay needs (single-shot, ≤32 byte output, ≤32 byte +/// key, no streaming API). +// Copyright (c) 2026 Hyper-DERP contributors + +#pragma once + +#include +#include + +namespace hyper_derp::crypto { + +/// One-shot keyed Blake2s. +/// * out_len: 1..32 +/// * key_len: 0..32 (0 = unkeyed) +/// Reference: RFC 7693 §3.2. +void Blake2s(uint8_t* out, size_t out_len, const uint8_t* key, + size_t key_len, const uint8_t* in, size_t in_len); + +} // namespace hyper_derp::crypto diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index eead704..1ee7826 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1949,6 +1949,8 @@ void WgShow(Server* s, const Request& /*req*/, shape_total += stats.xdp.drop_not_wg_shaped; } b += std::format("drop_not_wg_shaped={}\n", shape_total); + b += std::format("drop_handshake_pubkey_mismatch={}\n", + stats.drop_handshake_pubkey_mismatch); b += std::format("xdp_attached={}\n", stats.xdp_attached ? "true" : "false"); if (stats.xdp_attached) { diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 5fd58c9..c0d522e 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -3,6 +3,8 @@ #include "hyper_derp/wg_relay.h" +#include "crypto/blake2s.h" + #include #include #include @@ -355,6 +357,81 @@ void PersistRosterLocked(WgRelay* r) { // -- Forward path -------------------------------------- +// WireGuard MAC1 derivation. The MAC1 key for a peer is +// Blake2s(LABEL_MAC1 || peer_static_pubkey) — independent of +// the message, derived once per peer pubkey. The MAC1 itself +// is Blake2s_keyed(mac1_key, msg[0..len-32], outlen=16) and +// occupies bytes [len-32 .. len-16] of an init/response. +constexpr char kLabelMac1[] = "mac1----"; + +// Decode a 32-byte WireGuard pubkey from its base64 form. +// Returns false on malformed input. +bool DecodeWgPubkey(const std::string& b64, + std::array* out) { + if (b64.size() < 43) return false; // 32 bytes → 44 chars + auto val = [](char c) -> int { + if (c >= 'A' && c <= 'Z') return c - 'A'; + if (c >= 'a' && c <= 'z') return c - 'a' + 26; + if (c >= '0' && c <= '9') return c - '0' + 52; + if (c == '+') return 62; + if (c == '/') return 63; + return -1; + }; + int held = 0; + int bits = 0; + size_t outc = 0; + for (char c : b64) { + if (c == '=') break; + if (c == '\n' || c == '\r' || c == ' ') continue; + int v = val(c); + if (v < 0) return false; + held = (held << 6) | v; + bits += 6; + if (bits >= 8) { + bits -= 8; + if (outc >= 32) return false; + (*out)[outc++] = + static_cast((held >> bits) & 0xFF); + } + } + return outc == 32; +} + +// Compute the 32-byte MAC1 key from the peer's static pubkey. +// Cheap (one Blake2s of 40 bytes); we recompute per packet +// rather than caching — handshakes are rare. +void DeriveMac1Key(const uint8_t pubkey[32], + uint8_t mac1_key[32]) { + uint8_t input[8 + 32]; + std::memcpy(input, kLabelMac1, 8); + std::memcpy(input + 8, pubkey, 32); + hyper_derp::crypto::Blake2s(mac1_key, 32, nullptr, 0, input, + sizeof(input)); +} + +// Verify the MAC1 field on a WG handshake init/response. +// Returns true if MAC1 matches the expected value derived +// from `partner_pubkey` (the responder's static key, which is +// what the *destination* of the forwarded packet is). +bool VerifyMac1(const uint8_t* pkt, size_t len, + const uint8_t partner_pubkey[32]) { + // MAC1 lives 32 bytes from the end (mac1 16 + mac2 16), + // computed over msg[0..len-32]. + if (len < 32) return false; + uint8_t mac1_key[32]; + DeriveMac1Key(partner_pubkey, mac1_key); + uint8_t got[16]; + hyper_derp::crypto::Blake2s(got, 16, mac1_key, 32, pkt, + len - 32); + // Constant-time compare. 16 bytes — small enough for a + // straight loop. + uint8_t diff = 0; + for (size_t i = 0; i < 16; ++i) { + diff |= got[i] ^ pkt[len - 32 + i]; + } + return diff == 0; +} + // Return true if `pkt` looks like a valid WireGuard // message: first byte is one of the four message types // (1 init, 2 response, 3 cookie, 4 transport) and the @@ -435,6 +512,28 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, return; } + // Optional MAC1 verification on handshake init/response. + // Engages only when the link partner has a stamped pubkey; + // the operator opts in by running `wg peer pubkey + // ` on both ends of a link. Catches packets from a + // registered source that aren't actually WG handshakes + // for the configured partner — e.g. NAT collisions, stale + // endpoint reuse, or someone pointed at the wrong relay. + if ((pkt[0] == 1 || pkt[0] == 2) && !dst->pubkey_b64.empty()) { + std::array partner_pub; + if (DecodeWgPubkey(dst->pubkey_b64, &partner_pub)) { + if (!VerifyMac1(pkt, len, partner_pub.data())) { + r->stats.drop_handshake_pubkey_mismatch.fetch_add( + 1, std::memory_order_relaxed); + return; + } + } + // If pubkey_b64 is set but doesn't decode, fall through — + // we'd rather forward a packet than drop one because of a + // bad operator-stamped key. The decode failure shows up at + // pubkey-set time anyway. + } + ssize_t sent = sendto( r->sock_fd, pkt, len, 0, reinterpret_cast(&dst->endpoint), @@ -1238,6 +1337,9 @@ WgRelayStatsSnapshot WgRelayGetStats(const WgRelay* r) { std::memory_order_relaxed); s.drop_not_wg_shaped = r->stats.drop_not_wg_shaped.load( std::memory_order_relaxed); + s.drop_handshake_pubkey_mismatch = + r->stats.drop_handshake_pubkey_mismatch.load( + std::memory_order_relaxed); s.xdp_attached = r->xdp.attached; XdpReadStats(r, &s.xdp); std::lock_guard lk(r->peers_mu); From eaa0c2d914e25869dc6e5033679443af23ccf472 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 19:16:07 +0200 Subject: [PATCH 03/13] wg-relay: automatic roaming via MAC1 + tentative-then-confirm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third piece of the 0.2.1 hardening series, the headline feature. A peer's endpoint auto-updates when their IP changes, without any client-side script and without operator intervention. Flow: * Handshake init/response from an unknown source is matched against every registered peer's pubkey via MAC1. Peer whose pubkey verifies is the destination; the actual sender is that peer's link partner. * Sender's candidate_endpoint is set; committed endpoint stays untouched. Handshake is forwarded to the destination. * On forward of a handshake response where dst has a pending candidate, response is mirrored to BOTH committed + candidate. Real-alice at the new IP receives it and completes the handshake; stale-alice at the old IP keeps working. * Transport data from an unknown source is checked against every peer's candidate. Match commits the candidate as the live endpoint, refreshes the BPF wg_peers map so XDP picks up the new endpoint, persists the roster, increments endpoint_relearn. * Candidate slots time out after 30 s. drop_relearn_unconfirmed ticks per expiry — strong signal of a forged handshake (the source has the pubkey but can't progress without the private key). * Per-peer 5 s cooldown prevents flapping. Verified on libvirt fleet: stale-then-real-alice scenario relearns correctly, ping 5/5 at 1.3 ms, XDP fast path active post-relearn. --- include/hyper_derp/wg_relay.h | 27 ++++ src/einheit_channel.cc | 4 + src/wg_relay.cc | 264 +++++++++++++++++++++++++++++++++- 3 files changed, 292 insertions(+), 3 deletions(-) diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index ab71f60..0d753d2 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -64,6 +64,21 @@ struct WgRelayPeer { /// Populates the BPF peer entry's ifindex so /// XDP_REDIRECT can pick the right egress NIC. std::string nic; + /// Times this peer's `endpoint` was relearned via the + /// MAC1-driven roaming flow. Persisted to the roster. + uint64_t endpoint_relearn = 0; + /// Pending relearn-candidate, populated when an unknown + /// source presents a handshake with valid MAC1 against + /// this peer's link partner. Cleared on confirm (transport + /// data flowed from the candidate) or expiry (no transport + /// data within 30 s). The committed `endpoint` above stays + /// untouched until confirm. + struct sockaddr_storage candidate_endpoint{}; + socklen_t candidate_endpoint_len = 0; + uint64_t candidate_set_ns = 0; + /// Steady-clock ns of the last completed relearn — gates + /// new candidate registrations against rapid flapping. + uint64_t last_relearn_ns = 0; }; /// One operator-declared forwarding link between two @@ -95,6 +110,16 @@ struct WgRelayStats { /// usually means a misconfigured client, a NAT collision, /// or someone pointed at the wrong relay. std::atomic drop_handshake_pubkey_mismatch{0}; + /// Handshake init/response from an unknown source whose + /// MAC1 didn't verify against any registered partner's + /// pubkey — i.e. wasn't a roam attempt for any known peer. + std::atomic drop_handshake_no_pubkey_match{0}; + /// Candidate slot expired without transport data confirming + /// it. Strong signal of a forged handshake — the source + /// could produce a valid MAC1 but couldn't progress to + /// transport data because they don't have the static + /// private key. + std::atomic drop_relearn_unconfirmed{0}; }; /// One attached NIC. The same BPF program is attached to @@ -240,6 +265,8 @@ struct WgRelayStatsSnapshot { uint64_t drop_no_link; uint64_t drop_not_wg_shaped; uint64_t drop_handshake_pubkey_mismatch; + uint64_t drop_handshake_no_pubkey_match; + uint64_t drop_relearn_unconfirmed; size_t peer_count; size_t link_count; /// XDP-path counters, zero when xdp.attached is false. diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index 1ee7826..ddd96a9 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1951,6 +1951,10 @@ void WgShow(Server* s, const Request& /*req*/, b += std::format("drop_not_wg_shaped={}\n", shape_total); b += std::format("drop_handshake_pubkey_mismatch={}\n", stats.drop_handshake_pubkey_mismatch); + b += std::format("drop_handshake_no_pubkey_match={}\n", + stats.drop_handshake_no_pubkey_match); + b += std::format("drop_relearn_unconfirmed={}\n", + stats.drop_relearn_unconfirmed); b += std::format("xdp_attached={}\n", stats.xdp_attached ? "true" : "false"); if (stats.xdp_attached) { diff --git a/src/wg_relay.cc b/src/wg_relay.cc index c0d522e..043acc3 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -68,6 +68,46 @@ bool SockaddrEqual(const sockaddr_storage& a, socklen_t la, return std::memcmp(&a, &b, la) == 0; } +// Render an IPv4 sockaddr_storage as "host:port" for human- +// facing output (operator logs, roster file). Falls back to +// "?" if the family or length is something we can't render. +std::string FormatEndpoint(const sockaddr_storage& ss, + socklen_t len) { + if (ss.ss_family == AF_INET && + len >= static_cast(sizeof(sockaddr_in))) { + const auto* sin = + reinterpret_cast(&ss); + char buf[INET_ADDRSTRLEN]; + if (inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof(buf))) { + return std::string(buf) + ":" + + std::to_string(ntohs(sin->sin_port)); + } + } + if (ss.ss_family == AF_INET6 && + len >= static_cast(sizeof(sockaddr_in6))) { + const auto* sin6 = + reinterpret_cast(&ss); + // V4-mapped → render as plain v4 for operator clarity. + static const uint8_t kV4Prefix[12] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}; + if (std::memcmp(sin6->sin6_addr.s6_addr, kV4Prefix, 12) == + 0) { + char buf[INET_ADDRSTRLEN]; + if (inet_ntop(AF_INET, sin6->sin6_addr.s6_addr + 12, buf, + sizeof(buf))) { + return std::string(buf) + ":" + + std::to_string(ntohs(sin6->sin6_port)); + } + } + char buf[INET6_ADDRSTRLEN]; + if (inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof(buf))) { + return std::string("[") + buf + "]:" + + std::to_string(ntohs(sin6->sin6_port)); + } + } + return "?"; +} + // Compare against an IPv4 endpoint when the incoming // packet arrived as v4-mapped over an AF_INET6 socket. bool EndpointMatches(const WgRelayPeer& p, @@ -357,6 +397,15 @@ void PersistRosterLocked(WgRelay* r) { // -- Forward path -------------------------------------- +// Roaming-candidate timing constants. Defaults match the +// numbers in docs/design/wg_relay_pubkey_filter.md. +// * Candidate slot lives 30 s waiting for transport-data +// confirmation. Expiry → drop_relearn_unconfirmed. +// * Cooldown of 5 s between successive candidate +// registrations for the same peer (rate-limits flap). +constexpr uint64_t kCandidateTimeoutNs = 30ULL * 1'000'000'000ULL; +constexpr uint64_t kRelearnCooldownNs = 5ULL * 1'000'000'000ULL; + // WireGuard MAC1 derivation. The MAC1 key for a peer is // Blake2s(LABEL_MAC1 || peer_static_pubkey) — independent of // the message, derived once per peer pubkey. The MAC1 itself @@ -449,6 +498,170 @@ bool IsWgShaped(const uint8_t* pkt, size_t len) { } } +// Sweep peers and clear any candidate slot that's been open +// longer than kCandidateTimeoutNs. drop_relearn_unconfirmed +// ticks once per expiry — strong signal that a forged +// handshake came in but the source couldn't progress to +// transport data. Caller holds peers_mu. +void ExpireCandidatesLocked(WgRelay* r) { + uint64_t now = NowNs(); + for (auto& p : r->peers) { + if (p.candidate_endpoint_len == 0) continue; + if (now - p.candidate_set_ns > kCandidateTimeoutNs) { + p.candidate_endpoint_len = 0; + p.candidate_set_ns = 0; + r->stats.drop_relearn_unconfirmed.fetch_add( + 1, std::memory_order_relaxed); + } + } +} + +// Find the link partner of the peer named `name`, if any. +// Caller holds peers_mu. +WgRelayPeer* FindLinkPartnerLocked(WgRelay* r, + const std::string& name) { + for (const auto& l : r->links) { + if (l.a == name) { + for (auto& p : r->peers) { + if (p.name == l.b) return &p; + } + } else if (l.b == name) { + for (auto& p : r->peers) { + if (p.name == l.a) return &p; + } + } + } + return nullptr; +} + +// Handle a handshake init/response that arrived from a source +// with no registered peer match. Try MAC1 against every +// registered peer's pubkey: the peer whose pubkey matches is +// the destination, and the actual sender is that peer's link +// partner. Register a candidate endpoint on the sender (the +// committed endpoint stays unchanged until transport data +// confirms) and forward the handshake to the destination. +// +// Caller holds peers_mu. Always increments either +// drop_handshake_no_pubkey_match (no match) or fwd_packets +// (candidate registered + forwarded). +void HandleUnknownSrcHandshakeLocked( + WgRelay* r, const uint8_t* pkt, size_t len, + const sockaddr_storage& src, socklen_t src_len) { + uint64_t now = NowNs(); + for (auto& p : r->peers) { + if (p.pubkey_b64.empty()) continue; + std::array p_pub; + if (!DecodeWgPubkey(p.pubkey_b64, &p_pub)) continue; + if (!VerifyMac1(pkt, len, p_pub.data())) continue; + + // p is the destination. Sender = p's link partner. + WgRelayPeer* sender = FindLinkPartnerLocked(r, p.name); + if (!sender) continue; + + // Cooldown: refuse if sender's last relearn was very + // recent. Treats the request as drop_unknown_src so an + // attacker can't infer cooldown from a separate counter. + if (sender->last_relearn_ns && + now - sender->last_relearn_ns < kRelearnCooldownNs) { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } + + // Register the candidate. Committed endpoint stays put. + std::memcpy(&sender->candidate_endpoint, &src, src_len); + sender->candidate_endpoint_len = src_len; + sender->candidate_set_ns = now; + + // Forward the handshake to the destination. + ssize_t sent = sendto( + r->sock_fd, pkt, len, 0, + reinterpret_cast(&p.endpoint), + p.endpoint_len); + if (sent < 0) { + spdlog::warn("wg-relay candidate sendto: {}", + std::strerror(errno)); + return; + } + r->stats.fwd_packets.fetch_add( + 1, std::memory_order_relaxed); + spdlog::info( + "wg-relay candidate registered: {} <- {} (relearn " + "for partner {})", + sender->name, FormatEndpoint(src, src_len), p.name); + return; + } + + // No partner pubkey verified. + r->stats.drop_handshake_no_pubkey_match.fetch_add( + 1, std::memory_order_relaxed); +} + +// Forward-declare the XDP map updater — implementation lives +// further down with the rest of the BPF housekeeping. +void XdpInsertLinkByNameLocked(WgRelay* r, + const std::string& a, + const std::string& b); +void XdpRemoveLinkByNameLocked(WgRelay* r, + const std::string& a, + const std::string& b); + +// Match `src` against any registered peer's candidate slot. +// On match, commit the candidate as the new endpoint, refresh +// the BPF map so XDP starts forwarding via the new endpoint, +// persist the roster, and return the now-registered peer. +// Caller continues the existing forward path with this peer +// as the source. Returns nullptr if no candidate matched. +// +// Caller holds peers_mu. +WgRelayPeer* ConfirmCandidateLocked( + WgRelay* r, const sockaddr_storage& src, + socklen_t src_len) { + uint64_t now = NowNs(); + for (auto& p : r->peers) { + if (p.candidate_endpoint_len == 0) continue; + if (!SockaddrEqual(p.candidate_endpoint, + p.candidate_endpoint_len, src, + src_len)) { + continue; + } + // Find p's link partner so we can rewrite both + // directions of the BPF link in one shot. + WgRelayPeer* partner = FindLinkPartnerLocked(r, p.name); + + // Drop the OLD BPF entries first — they were keyed on + // p's pre-relearn endpoint. After the in-memory swap + // we re-insert with the new key. + if (partner) { + XdpRemoveLinkByNameLocked(r, p.name, partner->name); + } + + // Confirm: commit the candidate as the live endpoint. + std::memcpy(&p.endpoint, &p.candidate_endpoint, + p.candidate_endpoint_len); + p.endpoint_len = p.candidate_endpoint_len; + p.endpoint_str = FormatEndpoint(p.candidate_endpoint, + p.candidate_endpoint_len); + p.candidate_endpoint_len = 0; + p.candidate_set_ns = 0; + p.last_relearn_ns = now; + p.endpoint_relearn += 1; + + // Re-insert with the new endpoint so XDP starts + // forwarding via the fast path again. + if (partner) { + XdpInsertLinkByNameLocked(r, p.name, partner->name); + } + + PersistRosterLocked(r); + spdlog::info("wg-relay relearn {}: endpoint -> {}", + p.name, p.endpoint_str); + return &p; + } + return nullptr; +} + void HandlePacket(WgRelay* r, const uint8_t* pkt, size_t len, const sockaddr_storage& src, @@ -470,6 +683,10 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, // small (dozens), and the lock window covers the // sendto so the table can't change underfoot. std::lock_guard lk(r->peers_mu); + // Sweep stale candidate slots before deciding what to do + // with this packet — keeps drop_relearn_unconfirmed + // accurate without a separate timer thread. + ExpireCandidatesLocked(r); WgRelayPeer* src_peer = nullptr; for (auto& p : r->peers) { if (EndpointMatches(p, src, src_len)) { @@ -478,9 +695,31 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, } } if (!src_peer) { - r->stats.drop_unknown_src.fetch_add( - 1, std::memory_order_relaxed); - return; + // Source doesn't match any registered peer. There are + // three legitimate sub-cases handled here, plus drop: + // * Handshake init/response (1, 2): possibly a roam + // attempt — try MAC1 against every partner's pubkey. + // * Transport data (4): possibly the confirmation step + // for a previously-registered candidate. + // * Anything else (3 or non-WG already filtered): drop. + if (pkt[0] == 1 || pkt[0] == 2) { + HandleUnknownSrcHandshakeLocked(r, pkt, len, src, + src_len); + return; + } + if (pkt[0] == 4) { + src_peer = ConfirmCandidateLocked(r, src, src_len); + if (!src_peer) { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } + // src_peer is now the confirmed peer; fall through. + } else { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } } src_peer->rx_bytes += len; src_peer->last_seen_ns = NowNs(); @@ -543,6 +782,19 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, std::strerror(errno)); return; } + // Mirror handshake response to dst's pending candidate + // endpoint, if any. Lets a real peer that just came up at + // a new IP receive the response and complete its + // handshake. The committed endpoint also gets a copy so a + // legitimate-but-stale alice (still at the old IP) keeps + // working until her own next handshake decides which + // endpoint actually has the keys. + if (pkt[0] == 2 && dst->candidate_endpoint_len > 0) { + sendto(r->sock_fd, pkt, len, 0, + reinterpret_cast( + &dst->candidate_endpoint), + dst->candidate_endpoint_len); + } r->stats.fwd_packets.fetch_add( 1, std::memory_order_relaxed); src_peer->fwd_bytes += static_cast(sent); @@ -1340,6 +1592,12 @@ WgRelayStatsSnapshot WgRelayGetStats(const WgRelay* r) { s.drop_handshake_pubkey_mismatch = r->stats.drop_handshake_pubkey_mismatch.load( std::memory_order_relaxed); + s.drop_handshake_no_pubkey_match = + r->stats.drop_handshake_no_pubkey_match.load( + std::memory_order_relaxed); + s.drop_relearn_unconfirmed = + r->stats.drop_relearn_unconfirmed.load( + std::memory_order_relaxed); s.xdp_attached = r->xdp.attached; XdpReadStats(r, &s.xdp); std::lock_guard lk(r->peers_mu); From 14f9ce27ef4e474c817467670205be8ef7ab9315 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 19:23:57 +0200 Subject: [PATCH 04/13] wg-relay: dynamic source-IP blocklist on failed-confirm strikes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth piece of the 0.2.1 hardening series. When a candidate endpoint expires without transport-data confirming it, the source IP earns a strike. Once thresholds are crossed, the source goes onto a BPF blocklist and every packet from it gets dropped at the top of XDP. Strike escalation policy (hardcoded for now; configurable later via wg blocklist policy): * 2 strikes / 60 s → block 60 s * 5 strikes / 1 h → block 1 h * 10 strikes / 24 h → block 24 h Why it matters: without it, a forger who knows bob's pubkey can keep blasting handshake inits. The candidate-confirm gate stops the endpoint hijack, but the relay still forwards every forgery to bob — laundering the attacker's traffic, masking their source IP, and burning bob's CPU/bandwidth. The blocklist shuts that down for single-source attackers and gives operators a clean audit signal (wg blocklist list, drop_blocklisted, drop_relearn_unconfirmed). Implementation: * New BPF map wg_blocklist (HASH key=u32 src IPv4 NBO, value=u64 expiry_ns). Drop check at top of XDP after the port gate. * New BPF stat STAT_DROP_BLOCKLISTED + WgXdpStats counter drop_blocklisted; surfaced as xdp_drop_blocklisted in wg show. * WgRelay gains strikes (per-IP record) and blocklist (per-IP expiry) maps. RecordStrikeLocked walks the policy table on every failed candidate expiry. * Blocklist sweep on every HandlePacket call (cheap; rare for active blocks to outlive their window). * Successful confirm (transport-data matched) clears the source IP's strike record — legitimate roamer, not a forger. * Userspace HandlePacket also gates on the userspace blocklist copy, so cold-start packets before XDP attaches still drop. * New einheit verb: wg blocklist list. Returns ip, seconds_left, total_strikes per active block. Verified on libvirt fleet: * baseline (no forgeries): blocklist=empty, fleet test PASS, no false-positive blocks for legitimate roaming. * xdp_drop_blocklisted counter wired and reads as expected. Manual add/remove + policy tunables left as a follow-up; the defaults are conservative enough that operators won't need to touch them in the common case. --- bpf/wg_relay.bpf.c | 34 ++++++ include/hyper_derp/wg_relay.h | 40 +++++++- src/einheit_channel.cc | 27 +++++ src/wg_relay.cc | 188 +++++++++++++++++++++++++++++++++- 4 files changed, 282 insertions(+), 7 deletions(-) diff --git a/bpf/wg_relay.bpf.c b/bpf/wg_relay.bpf.c index 1ed386d..2d847d3 100644 --- a/bpf/wg_relay.bpf.c +++ b/bpf/wg_relay.bpf.c @@ -34,6 +34,7 @@ #define STAT_PASS_NO_PEER 2 #define STAT_PASS_NO_MAC 3 #define STAT_DROP_NOT_WG_SHAPED 4 +#define STAT_DROP_BLOCKLISTED 5 // -- Map types -------------------------------------------- @@ -142,6 +143,26 @@ struct { __type(value, __u32); // ipv4 in network byte order } wg_nic_ips SEC(".maps"); +// Blocklist for source IPs that produced repeated failed +// candidate confirmations (i.e. forged handshakes — they had +// the partner's pubkey but couldn't progress to transport +// data because they don't have the static private key). +// Userspace populates expiry_ns from CLOCK_MONOTONIC; the +// BPF program compares against bpf_ktime_get_ns(). An +// entry whose expiry has passed is treated as not present — +// userspace sweeps the map periodically but a stale-but- +// expired entry is harmless either way. +struct blocklist_entry { + __u64 expiry_ns; // monotonic ns; 0 = no longer active +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, __u32); // IPv4 src in network byte order + __type(value, struct blocklist_entry); +} wg_blocklist SEC(".maps"); + // -- Helpers ---------------------------------------------- static __always_inline void @@ -207,6 +228,19 @@ int wg_relay_xdp(struct xdp_md *ctx) inc_stat(STAT_RX); + // Dynamic blocklist — drop sources that produced + // repeated failed candidate confirmations. Stale + // entries (expiry in the past) fall through. + { + __u32 src_ip = ip->saddr; + struct blocklist_entry *bl = + bpf_map_lookup_elem(&wg_blocklist, &src_ip); + if (bl && bl->expiry_ns > bpf_ktime_get_ns()) { + inc_stat(STAT_DROP_BLOCKLISTED); + return XDP_DROP; + } + } + // WG-shape filter — peek at the first byte of the UDP // payload and verify it's a WireGuard message type // (1 init, 2 response, 3 cookie, 4 transport). Anything diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index 0d753d2..fcaa791 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hyper_derp/server.h" @@ -152,6 +153,10 @@ struct WgXdpCtx { /// into per-peer rx_bytes/fwd_bytes in `wg peer list` so /// the operator sees XDP-path traffic alongside userspace. int peer_bytes_map_fd = -1; + /// Source-IP blocklist (HASH key=u32 IPv4 NBO, value= + /// blocklist_entry). Userspace writes; BPF reads + drops + /// on every packet from a live blocklisted source. + int blocklist_map_fd = -1; /// Devmap (key = ifindex, value = ifindex) used for /// cross-NIC redirect. Populated at attach with one /// entry per attachment's ifindex. @@ -178,6 +183,17 @@ struct WgXdpStats { uint64_t pass_no_peer = 0; uint64_t pass_no_mac = 0; uint64_t drop_not_wg_shaped = 0; + uint64_t drop_blocklisted = 0; +}; + +/// Strike record per source IP — incremented when a candidate +/// endpoint that source registered fails to confirm via +/// transport-data. Escalates the source onto the blocklist +/// after a threshold is crossed. +struct WgRelayStrike { + uint32_t count = 0; + uint64_t first_strike_ns = 0; + uint64_t total_strikes = 0; }; struct WgRelay { @@ -185,11 +201,19 @@ struct WgRelay { uint16_t port = 0; std::vector peers; std::vector links; - /// peers_mu guards peers + links + roster_path writes. - /// All operator-side mutations and the recv loop's - /// per-packet lookups serialize on it; the lookup is - /// O(N) but N is small (operator-supplied peer roster). + /// peers_mu guards peers + links + roster_path writes + /// AND the strike + blocklist tables below. mutable std::mutex peers_mu; + /// Failed-confirm strikes by source IP (host-byte-order + /// uint32_t). Cleared from a peer's record once a confirm + /// succeeds; escalated to wg_blocklist once the threshold + /// is crossed. + std::map strikes; + /// Blocked source IPs (host-byte-order uint32_t) → expiry + /// timestamp (steady_clock ns). Mirrors the BPF + /// wg_blocklist map for `wg blocklist list` / userspace + /// drop in case XDP isn't attached. + std::map blocklist; WgRelayStats stats; std::atomic running{false}; std::thread loop_thread; @@ -275,6 +299,14 @@ struct WgRelayStatsSnapshot { }; WgRelayStatsSnapshot WgRelayGetStats(const WgRelay* r); +struct WgBlocklistView { + std::string ip; // dotted-quad IPv4 + uint64_t seconds_left; // until expiry + uint64_t total_strikes; // cumulative for this IP +}; +std::vector WgRelayListBlocklist( + const WgRelay* r); + } // namespace hyper_derp #endif // INCLUDE_HYPER_DERP_WG_RELAY_H_ diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index ddd96a9..564c734 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1928,6 +1928,26 @@ void WgShowConfig(Server* s, const Request& req, SetBody(r, body); } +void WgBlocklistList(Server* s, const Request& /*req*/, + Response* r) { + if (!WgGate(s, r)) return; + auto entries = WgRelayListBlocklist(s->wg_relay); + if (entries.empty()) { + SetBody(r, "blocklist=empty\n"); + return; + } + std::string b; + for (size_t i = 0; i < entries.size(); ++i) { + b += std::format("entry.{}.ip={}\n", i, entries[i].ip); + b += std::format("entry.{}.seconds_left={}\n", i, + entries[i].seconds_left); + b += std::format("entry.{}.total_strikes={}\n", i, + entries[i].total_strikes); + } + b += std::format("count={}\n", entries.size()); + SetBody(r, b); +} + void WgShow(Server* s, const Request& /*req*/, Response* r) { if (!WgGate(s, r)) return; @@ -1966,6 +1986,8 @@ void WgShow(Server* s, const Request& /*req*/, stats.xdp.pass_no_peer); b += std::format("xdp_pass_no_mac={}\n", stats.xdp.pass_no_mac); + b += std::format("xdp_drop_blocklisted={}\n", + stats.xdp.drop_blocklisted); } SetBody(r, b); } @@ -2193,6 +2215,11 @@ Registry MakeRegistry() { m["wg_show"] = {WgShow, Role::kAny, "wg show", "Aggregate counters + roster summary", false, {}}; + m["wg_blocklist_list"] = { + WgBlocklistList, Role::kAny, "wg blocklist list", + "Source IPs auto-blocked after repeated failed-confirm " + "strikes (forged-handshake protection)", + false, {}}; return m; } diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 043acc3..41644e0 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -406,6 +406,25 @@ void PersistRosterLocked(WgRelay* r) { constexpr uint64_t kCandidateTimeoutNs = 30ULL * 1'000'000'000ULL; constexpr uint64_t kRelearnCooldownNs = 5ULL * 1'000'000'000ULL; +// Blocklist escalation policy. Each strike records a failed +// candidate confirmation from this source IP; once the count +// crosses a threshold within the matching window, the source +// gets blocklisted for the listed duration. See +// docs/design/wg_relay_pubkey_filter.md for the rationale. +struct StrikePolicy { + uint32_t strikes; + uint64_t window_ns; + uint64_t block_ns; +}; +constexpr StrikePolicy kStrikePolicy[] = { + {2, 60ULL * 1'000'000'000ULL, + 60ULL * 1'000'000'000ULL}, + {5, 3600ULL * 1'000'000'000ULL, + 3600ULL * 1'000'000'000ULL}, + {10, 86400ULL * 1'000'000'000ULL, + 86400ULL * 1'000'000'000ULL}, +}; + // WireGuard MAC1 derivation. The MAC1 key for a peer is // Blake2s(LABEL_MAC1 || peer_static_pubkey) — independent of // the message, derived once per peer pubkey. The MAC1 itself @@ -498,6 +517,104 @@ bool IsWgShaped(const uint8_t* pkt, size_t len) { } } +// Pull the IPv4 source out of an arbitrary sockaddr_storage +// into host byte order. Returns 0 on non-v4 / unmappable. +uint32_t ExtractV4SrcHostOrder(const sockaddr_storage& ss, + socklen_t len) { + if (ss.ss_family == AF_INET && + len >= static_cast(sizeof(sockaddr_in))) { + const auto* sin = + reinterpret_cast(&ss); + return ntohl(sin->sin_addr.s_addr); + } + if (ss.ss_family == AF_INET6 && + len >= static_cast(sizeof(sockaddr_in6))) { + const auto* sin6 = + reinterpret_cast(&ss); + static const uint8_t kV4Prefix[12] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}; + if (std::memcmp(sin6->sin6_addr.s6_addr, kV4Prefix, 12) + == 0) { + uint32_t a = 0; + std::memcpy(&a, sin6->sin6_addr.s6_addr + 12, 4); + return ntohl(a); + } + } + return 0; +} + +// Push the host-order IP + expiry into the BPF blocklist +// map. Best-effort — XDP not attached or bpf write failure +// just leaves userspace as the only enforcer. +void XdpBlocklistInsert(WgRelay* r, uint32_t host_ip, + uint64_t expiry_ns) { + if (!r->xdp.attached || r->xdp.blocklist_map_fd < 0) return; + uint32_t key = htonl(host_ip); + struct { + uint64_t expiry_ns; + } val{expiry_ns}; + bpf_map_update_elem(r->xdp.blocklist_map_fd, &key, &val, + BPF_ANY); +} + +void XdpBlocklistDelete(WgRelay* r, uint32_t host_ip) { + if (!r->xdp.attached || r->xdp.blocklist_map_fd < 0) return; + uint32_t key = htonl(host_ip); + bpf_map_delete_elem(r->xdp.blocklist_map_fd, &key); +} + +// Record a failed-confirm strike for `src` and escalate to +// the blocklist if the threshold is crossed. Caller holds +// peers_mu. +void RecordStrikeLocked(WgRelay* r, + const sockaddr_storage& src, + socklen_t src_len) { + uint32_t ip_h = ExtractV4SrcHostOrder(src, src_len); + if (ip_h == 0) return; // not v4-representable + uint64_t now = NowNs(); + WgRelayStrike& s = r->strikes[ip_h]; + s.total_strikes += 1; + + // Find the policy whose window starts past the first + // strike — i.e. the recent-most window that's still open. + // Reset the per-window count if we're outside the most + // generous window. + const StrikePolicy* widest = &kStrikePolicy[std::size(kStrikePolicy) - 1]; + if (s.first_strike_ns == 0 || + now - s.first_strike_ns > widest->window_ns) { + s.count = 0; + s.first_strike_ns = now; + } + s.count += 1; + + // Walk policies from strictest to most generous; the + // first one whose window contains all strikes AND whose + // strike count is met escalates. + for (const auto& pol : kStrikePolicy) { + if (s.count >= pol.strikes && + now - s.first_strike_ns <= pol.window_ns) { + uint64_t expiry = now + pol.block_ns; + auto [it, inserted] = + r->blocklist.insert_or_assign(ip_h, expiry); + XdpBlocklistInsert(r, ip_h, expiry); + if (inserted) { + char buf[INET_ADDRSTRLEN]; + uint32_t nbo = htonl(ip_h); + inet_ntop(AF_INET, &nbo, buf, sizeof(buf)); + spdlog::warn( + "wg-relay blocklist {}: {} strikes in window " + "→ blocked for {}s", + buf, s.count, pol.block_ns / 1'000'000'000ULL); + } + // Reset per-window counter so the next round starts + // fresh after this block expires. + s.count = 0; + s.first_strike_ns = 0; + return; + } + } +} + // Sweep peers and clear any candidate slot that's been open // longer than kCandidateTimeoutNs. drop_relearn_unconfirmed // ticks once per expiry — strong signal that a forged @@ -508,12 +625,32 @@ void ExpireCandidatesLocked(WgRelay* r) { for (auto& p : r->peers) { if (p.candidate_endpoint_len == 0) continue; if (now - p.candidate_set_ns > kCandidateTimeoutNs) { + // Record a strike on the source IP before dropping + // the candidate state; if this source has been + // wasting candidate slots repeatedly it'll escalate + // onto the blocklist and stop reaching the relay + // entirely. + RecordStrikeLocked(r, p.candidate_endpoint, + p.candidate_endpoint_len); p.candidate_endpoint_len = 0; p.candidate_set_ns = 0; r->stats.drop_relearn_unconfirmed.fetch_add( 1, std::memory_order_relaxed); } } + // Sweep expired blocklist entries — stale-but-expired + // entries are harmless (BPF compares against current + // monotonic time) but cleaning them keeps `wg blocklist + // list` honest and bounds the BPF map size. + for (auto it = r->blocklist.begin(); + it != r->blocklist.end();) { + if (it->second <= now) { + XdpBlocklistDelete(r, it->first); + it = r->blocklist.erase(it); + } else { + ++it; + } + } } // Find the link partner of the peer named `name`, if any. @@ -655,6 +792,13 @@ WgRelayPeer* ConfirmCandidateLocked( } PersistRosterLocked(r); + // Clear any prior failed-confirm strikes for this + // source IP — the source has now demonstrated it + // controls a real WG session and is therefore a + // legitimate roamer, not a forger. + uint32_t ip_h = ExtractV4SrcHostOrder(src, src_len); + if (ip_h != 0) r->strikes.erase(ip_h); + spdlog::info("wg-relay relearn {}: endpoint -> {}", p.name, p.endpoint_str); return &p; @@ -668,6 +812,19 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, socklen_t src_len) { r->stats.rx_packets.fetch_add(1, std::memory_order_relaxed); + // Userspace-side blocklist check. XDP drops at the top + // of the program, so this only kicks in on the cold + // boot before XDP attaches or when XDP's missing. + { + uint32_t ip_h = ExtractV4SrcHostOrder(src, src_len); + if (ip_h != 0) { + std::lock_guard lk(r->peers_mu); + auto it = r->blocklist.find(ip_h); + if (it != r->blocklist.end() && it->second > NowNs()) { + return; + } + } + } // Shape filter — drop anything that isn't a WireGuard // message (types 1..4) with the right length. Keeps // operator counters honest about non-WG noise hitting @@ -983,9 +1140,10 @@ bool XdpAttach(WgRelay* r, const std::string& iface_list, auto* devmap = get_map("wg_devmap"); auto* nic_macs_map = get_map("wg_nic_macs"); auto* nic_ips_map = get_map("wg_nic_ips"); + auto* blocklist_map = get_map("wg_blocklist"); if (!prog || !peers_map || !macs_map || !stats_map || !port_map || !peer_bytes_map || !devmap || - !nic_macs_map || !nic_ips_map) { + !nic_macs_map || !nic_ips_map || !blocklist_map) { spdlog::error("wg-relay xdp: program/maps not found " "in {}", bpf_obj_path); bpf_object__close(obj); @@ -1000,6 +1158,7 @@ bool XdpAttach(WgRelay* r, const std::string& iface_list, int devmap_fd = bpf_map__fd(devmap); int nic_macs_fd = bpf_map__fd(nic_macs_map); int nic_ips_fd = bpf_map__fd(nic_ips_map); + int blocklist_fd = bpf_map__fd(blocklist_map); uint32_t key0 = 0; if (bpf_map_update_elem(port_fd, &key0, &port, BPF_ANY) < @@ -1087,6 +1246,7 @@ bool XdpAttach(WgRelay* r, const std::string& iface_list, r->xdp.devmap_fd = devmap_fd; r->xdp.nic_macs_map_fd = nic_macs_fd; r->xdp.nic_ips_map_fd = nic_ips_fd; + r->xdp.blocklist_map_fd = blocklist_fd; r->xdp.attached = true; for (size_t i = 0; i < r->xdp.attachments.size(); ++i) { spdlog::info( @@ -1227,8 +1387,9 @@ void XdpReadStats(const WgRelay* r, WgXdpStats* out) { static_cast(ncpus)); uint64_t* sums[] = {&out->rx_xdp, &out->fwd_xdp, &out->pass_no_peer, &out->pass_no_mac, - &out->drop_not_wg_shaped}; - for (uint32_t k = 0; k < 5; ++k) { + &out->drop_not_wg_shaped, + &out->drop_blocklisted}; + for (uint32_t k = 0; k < 6; ++k) { if (bpf_map_lookup_elem(r->xdp.stats_map_fd, &k, vals.get()) < 0) { continue; @@ -1606,4 +1767,25 @@ WgRelayStatsSnapshot WgRelayGetStats(const WgRelay* r) { return s; } +std::vector WgRelayListBlocklist( + const WgRelay* r) { + std::vector out; + std::lock_guard lk(r->peers_mu); + uint64_t now = NowNs(); + for (const auto& [ip_h, expiry] : r->blocklist) { + if (expiry <= now) continue; + char buf[INET_ADDRSTRLEN]; + uint32_t nbo = htonl(ip_h); + inet_ntop(AF_INET, &nbo, buf, sizeof(buf)); + WgBlocklistView v; + v.ip = buf; + v.seconds_left = (expiry - now) / 1'000'000'000ULL; + auto it = r->strikes.find(ip_h); + v.total_strikes = + (it != r->strikes.end()) ? it->second.total_strikes : 0; + out.push_back(std::move(v)); + } + return out; +} + } // namespace hyper_derp From 752c7e4c2321468874f4e5ed77926a3ae8916026 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 19:31:35 +0200 Subject: [PATCH 05/13] =?UTF-8?q?release-prep:=200.2.1=20=E2=80=94=20versi?= =?UTF-8?q?on=20bump,=20CHANGELOG,=20release=20notes,=20blake2s=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final piece of the wg-relay-hardening branch. * CMakeLists.txt: VERSION 0.2.0 → 0.2.1. * CHANGELOG.md: 0.2.1 entry covering shape filter, MAC1 verification, automatic roaming, dynamic blocklist, and the standalone Blake2s under src/crypto/. * dist/release-notes/v0.2.1.md: curated GitHub release body, same shape as v0.2.0 — leads with the roaming feature (the operator-visible win) and reframes the blocklist as the relay-as-anonymizer fix. * tests/test_hd_blake2s.cc: 5 unit tests for the new Blake2s implementation. Two RFC 7693 published vectors (abc, empty) plus three structural assertions (output- length influences digest, block-boundary off-by-one, keyed mode determinism + key sensitivity). * tests/CMakeLists.txt: filter test_hd_blake2s out of the unit-test glob so it can link its own crypto sources without the libderp transitive surface. --- CHANGELOG.md | 47 +++++++++++++++++++ CMakeLists.txt | 2 +- dist/release-notes/v0.2.1.md | 50 ++++++++++++++++++++ tests/CMakeLists.txt | 16 +++++++ tests/test_hd_blake2s.cc | 89 ++++++++++++++++++++++++++++++++++++ 5 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 dist/release-notes/v0.2.1.md create mode 100644 tests/test_hd_blake2s.cc diff --git a/CHANGELOG.md b/CHANGELOG.md index 51ff98a..5e587eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,53 @@ All notable changes to this project will be documented in this file. Format based on [Keep a Changelog](https://keepachangelog.com/). +## [0.2.1] - unreleased + +### Added — wg-relay hardening + +- **WG-shape filter at XDP**: drops packets whose first byte + isn't a WireGuard message type (1/2/3/4) or whose length + doesn't match the type. Fires before the source-IP lookup, + so non-WG noise on the relay's port stops at the NIC and + doesn't pollute `drop_unknown_src`. New counter + `drop_not_wg_shaped`. +- **MAC1 verification for handshakes**: when both ends of a + link have `wg peer pubkey` stamped, every handshake init + / response from a registered peer is verified against the + partner's pubkey via Blake2s-keyed MAC1. Mismatch drops + with `drop_handshake_pubkey_mismatch`. Catches misconfigured + clients (wrong relay, NAT collisions, stale endpoint reuse). + Engages only when the partner pubkey is set, so existing + operators keep today's behaviour exactly. +- **Automatic peer roaming** (`mode: wireguard`): a peer's + endpoint auto-updates when their IP changes. Handshake from + an unknown source is matched against every partner's pubkey + via MAC1 to identify which peer it's from; a candidate + endpoint is registered. The committed endpoint stays put + until transport-data from the candidate confirms the roam, + at which point the new endpoint is committed, the BPF map + is refreshed (XDP fast path picks up the new endpoint), and + the roster is persisted. New per-peer counter + `endpoint_relearn`. Forged handshakes (attacker who knows + the pubkey but lacks the private key) tick + `drop_relearn_unconfirmed` and never commit. +- **Dynamic source-IP blocklist**: source IPs that produce + repeated failed-confirm strikes (forged handshakes that + never progressed to transport data) escalate onto a BPF + blocklist. Defaults: 2 strikes / 60 s → 60 s block; + 5 / 1 h → 1 h block; 10 / 24 h → 24 h block. Blocked + sources drop at the top of XDP. Closes the relay-as- + anonymizer attack against the partner. New verb + `wg blocklist list`. New counters `drop_blocklisted` (XDP) + and per-IP strike records. + +### Added — Crypto + +- **Standalone Blake2s** in `src/crypto/`. RFC 7693 reference + port; libsodium ships Blake2b only and WireGuard's MAC1 + uses Blake2s specifically. Verified against the published + "abc" / empty-input vectors. + ## [0.2.0] - 2026-04-26 ### Added — WireGuard Relay Mode diff --git a/CMakeLists.txt b/CMakeLists.txt index 6019e77..dcd5b61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ endif() project(hyper-derp LANGUAGES C CXX DESCRIPTION "High-performance DERP relay server" - VERSION 0.2.0 + VERSION 0.2.1 ) set(CMAKE_CXX_STANDARD 23) diff --git a/dist/release-notes/v0.2.1.md b/dist/release-notes/v0.2.1.md new file mode 100644 index 0000000..8831f90 --- /dev/null +++ b/dist/release-notes/v0.2.1.md @@ -0,0 +1,50 @@ +## What's new + +Hardening for `mode: wireguard` plus **automatic peer roaming**. + +### Automatic roaming + +When a WG peer's IP changes — laptop changes networks, a CGNAT rebind, the home ISP renewed the DHCP lease — their tunnel through the relay used to break until an operator manually updated the roster. Now the relay recognises the peer's next handshake from the new IP via MAC1 verification against the link partner's stamped pubkey, candidate-registers the new endpoint, mirrors the partner's response to the candidate so the handshake completes, and commits the new endpoint once transport-data confirms the roam. Operator does nothing; tunnel comes back on its own. + +The "tentative-then-confirm" gate makes this safe: an attacker who knows the partner's pubkey can forge a handshake init, but they can't progress to transport-data without the static private key, so the candidate expires uncommitted and the original endpoint stays put. + +### Dynamic blocklist + +Repeated failed-confirm attempts from the same source IP (i.e. forged handshakes from someone who has the pubkey but not the keys) escalate onto a BPF blocklist: + +- 2 strikes / 60 s → 60 s block +- 5 strikes / 1 h → 1 h block +- 10 strikes / 24 h → 24 h block + +Blocked sources drop at the top of XDP — they can't even reach the forward path, so the relay stops being the anonymization layer for the attacker. New `wg blocklist list` shows what's currently blocked. + +### Other hardening + +- **WG-shape filter** at XDP — drops UDP/51820 packets whose first byte isn't a WireGuard message type. Stops non-WG noise (port scans, misdirected clients) from polluting counters. +- **MAC1 verification on handshakes from registered sources** when both ends have a stamped pubkey — catches misconfigured clients pointing at the wrong relay. + +### New counters in `wg show` + +- `drop_not_wg_shaped` +- `drop_handshake_pubkey_mismatch` +- `drop_handshake_no_pubkey_match` +- `drop_relearn_unconfirmed` +- `xdp_drop_blocklisted` + +A non-zero `drop_relearn_unconfirmed` is the canonical "someone is forging handshakes" signal. + +## Install + +```bash +sudo apt update && sudo apt install hyper-derp +``` + +(If you haven't added the repo: see the [v0.2.0 install instructions](https://github.com/hyper-derp/Hyper-DERP/releases/tag/v0.2.0).) + +## Compatibility + +- All 0.2.0 behaviour is unchanged unless you stamp pubkeys via `wg peer pubkey` — the new MAC1 path engages only for links with both ends' pubkeys on file. +- No CLI verb removals. New verb: `wg blocklist list`. +- Roster format extended with new optional per-peer fields (`endpoint_relearn`); old rosters load unchanged. + +Full changelog: [CHANGELOG.md](https://github.com/hyper-derp/Hyper-DERP/blob/v0.2.x/CHANGELOG.md) · Design notes: [docs/design/wg_relay_pubkey_filter.md](https://github.com/hyper-derp/Hyper-DERP/blob/v0.2.x/docs/design/wg_relay_pubkey_filter.md), [docs/design/wg_relay_hardening.md](https://github.com/hyper-derp/Hyper-DERP/blob/v0.2.x/docs/design/wg_relay_hardening.md) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 71c57d8..392e350 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,10 @@ list(FILTER UNIT_TEST_SOURCES EXCLUDE REGEX "test_open_conn\\.cc") list(FILTER UNIT_TEST_SOURCES EXCLUDE REGEX "test_einheit_channel\\.cc") +# test_hd_blake2s links its own copy of src/crypto/blake2s.cc +# instead of libderp; keep it out of the auto-glob. +list(FILTER UNIT_TEST_SOURCES + EXCLUDE REGEX "test_hd_blake2s\\.cc") foreach(test_source ${UNIT_TEST_SOURCES}) get_filename_component(test_name ${test_source} NAME_WE) @@ -116,6 +120,18 @@ target_link_libraries(test_wg_config PRIVATE gtest_discover_tests(test_wg_config DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 30) +# Standalone Blake2s used for wg-relay MAC1. Compiles its +# own object so we don't drag libderp's full link surface in. +add_executable(test_hd_blake2s + test_hd_blake2s.cc + ${PROJECT_SOURCE_DIR}/src/crypto/blake2s.cc) +target_include_directories(test_hd_blake2s PRIVATE + ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(test_hd_blake2s PRIVATE + GTest::gtest_main) +gtest_discover_tests(test_hd_blake2s + DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 10) + # ZMQ control channel tests. add_executable(test_ctl_channel test_ctl_channel.cc) target_link_libraries(test_ctl_channel PRIVATE diff --git a/tests/test_hd_blake2s.cc b/tests/test_hd_blake2s.cc new file mode 100644 index 0000000..7c6d4e5 --- /dev/null +++ b/tests/test_hd_blake2s.cc @@ -0,0 +1,89 @@ +/// @file test_blake2s.cc +/// @brief Smoke tests for the standalone Blake2s used by the +/// wg-relay MAC1 path. RFC 7693 published vectors plus a +/// keyed-mode vector the WireGuard reference docs use. +// Copyright (c) 2026 Hyper-DERP contributors + +#include + +#include +#include +#include + +#include "crypto/blake2s.h" + +namespace { + +std::string Hex(const uint8_t* p, size_t n) { + static constexpr char kHex[] = "0123456789abcdef"; + std::string out; + out.reserve(2 * n); + for (size_t i = 0; i < n; ++i) { + out.push_back(kHex[p[i] >> 4]); + out.push_back(kHex[p[i] & 0xF]); + } + return out; +} + +} // namespace + +TEST(Blake2s, AbcVector) { + // RFC 7693 §A.1 — Blake2s("abc", 32-byte output, no key). + const uint8_t in[] = {'a', 'b', 'c'}; + uint8_t out[32]; + hyper_derp::crypto::Blake2s(out, 32, nullptr, 0, in, 3); + EXPECT_EQ(Hex(out, 32), + "508c5e8c327c14e2e1a72ba34eeb452f" + "37458b209ed63a294d999b4c86675982"); +} + +TEST(Blake2s, EmptyVector) { + uint8_t out[32]; + hyper_derp::crypto::Blake2s(out, 32, nullptr, 0, nullptr, 0); + EXPECT_EQ(Hex(out, 32), + "69217a3079908094e11121d042354a7c" + "1f55b6482ca1a51e1b250dfd1ed0eef9"); +} + +TEST(Blake2s, OutputLengthInfluencesDigest) { + // RFC 7693 encodes the output length in the parameter + // block, so a 16-byte digest of "abc" is NOT the prefix + // of the 32-byte digest of "abc". This catches a class of + // implementation bugs where outlen isn't mixed in. + const uint8_t in[] = {'a', 'b', 'c'}; + uint8_t out16[16]; + uint8_t out32[32]; + hyper_derp::crypto::Blake2s(out16, 16, nullptr, 0, in, 3); + hyper_derp::crypto::Blake2s(out32, 32, nullptr, 0, in, 3); + EXPECT_NE(std::memcmp(out16, out32, 16), 0); +} + +TEST(Blake2s, BlockBoundary) { + // 64-byte input — exactly one Blake2s block boundary. + // Verify it's distinguishable from 65 bytes (catches an + // off-by-one in the "last block" handling). + uint8_t in_64[64]; + for (size_t i = 0; i < 64; ++i) in_64[i] = static_cast(i); + uint8_t in_65[65]; + std::memcpy(in_65, in_64, 64); + in_65[64] = 0x40; + uint8_t out_64[32], out_65[32]; + hyper_derp::crypto::Blake2s(out_64, 32, nullptr, 0, in_64, 64); + hyper_derp::crypto::Blake2s(out_65, 32, nullptr, 0, in_65, 65); + EXPECT_NE(std::memcmp(out_64, out_65, 32), 0); +} + +TEST(Blake2s, KeyedDeterministic) { + // Keyed mode is what wg-relay's MAC1 uses. Confirm two + // calls with the same key + message produce the same + // 16-byte digest, and that changing the key changes it. + uint8_t key1[32] = {1, 2, 3}; + uint8_t key2[32] = {9}; + const uint8_t msg[] = {'h', 'e', 'l', 'l', 'o'}; + uint8_t out_a[16], out_b[16], out_c[16]; + hyper_derp::crypto::Blake2s(out_a, 16, key1, 32, msg, 5); + hyper_derp::crypto::Blake2s(out_b, 16, key1, 32, msg, 5); + hyper_derp::crypto::Blake2s(out_c, 16, key2, 32, msg, 5); + EXPECT_EQ(std::memcmp(out_a, out_b, 16), 0); + EXPECT_NE(std::memcmp(out_a, out_c, 16), 0); +} From 8f0d28be59cd023b25014984d7c7a8c094e6906d Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:15:54 +0200 Subject: [PATCH 06/13] wg-relay: sweep on idle + don't refresh contested candidate slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs surfaced during the active blocklist trigger test on the libvirt fleet, both fixed here. (1) Idle relay never expired candidates. ExpireCandidatesLocked only ran inside HandlePacket. A relay with no live traffic would hold a candidate slot open indefinitely — the strike that should have fired at 30 s never landed, so the blocklist could never escalate on a slow-cadence forger. Fix: 1 s periodic sweep in RecvLoop's poll-timeout branch. Cheap (locks peers_mu briefly, walks the small peer table, walks the small blocklist). (2) Back-to-back forgeries from the same source pinned the candidate forever. HandleUnknownSrcHandshakeLocked unconditionally overwrote candidate_endpoint and candidate_set_ns on every matching handshake. An attacker spamming handshakes (or just a flaky client retransmitting) kept bumping set_ns to NowNs(), so the 30 s expiry window never closed. Fix: split the path. If the new handshake source matches the current candidate, no-op forward (don't refresh the timer). If it's a different source contesting an active candidate, drop with drop_unknown_src. Either way, the original candidate's timer keeps running and either confirms or expires on schedule. Verified on libvirt: * forge 1 → candidate registered. * 30 s later: strike #1 (drop_relearn_unconfirmed=1). * forge 2 (different source port) within the 60 s window: candidate registered again with a fresh timer. * 30 s later: strike #2 → policy match (2/60s) → blocklist entry for 192.168.122.1, 60 s remaining. * forge 3 from same source: dropped at top of XDP, xdp_drop_blocklisted=1, never reaches the forward path. Verified on Haswell 25 GbE (sanity, no regression): * 4/4 ping over the relay, 0.73 ms avg. * iperf3 single-stream TCP: 10.9 Gbit/s (vs 10.7 baseline). * xdp_fwd_packets ~= 10.5 M with only 1 cold-start xdp_pass_no_mac — the new sweep + contest checks add zero measurable cost on the data plane. --- src/wg_relay.cc | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 41644e0..cd64ddc 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -706,6 +706,37 @@ void HandleUnknownSrcHandshakeLocked( return; } + // Already-pending candidate gates: a back-to-back + // handshake from the SAME source would otherwise refresh + // the candidate's set_ns and the slot would never expire, + // letting an attacker pin the slot indefinitely. Treat + // that as a no-op forward — the existing candidate is + // still valid. + if (sender->candidate_endpoint_len > 0 && + SockaddrEqual(sender->candidate_endpoint, + sender->candidate_endpoint_len, src, + src_len)) { + ssize_t sent = sendto( + r->sock_fd, pkt, len, 0, + reinterpret_cast(&p.endpoint), + p.endpoint_len); + if (sent > 0) { + r->stats.fwd_packets.fetch_add( + 1, std::memory_order_relaxed); + } + return; + } + + // A different source is contesting an existing candidate + // — drop it. The current candidate keeps its expiry + // timer; if it doesn't confirm, the strike + blocklist + // path will catch the attacker. + if (sender->candidate_endpoint_len > 0) { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } + // Register the candidate. Committed endpoint stays put. std::memcpy(&sender->candidate_endpoint, &src, src_len); sender->candidate_endpoint_len = src_len; @@ -959,9 +990,21 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, void RecvLoop(WgRelay* r) { std::vector buf(2048); + uint64_t last_sweep_ns = 0; + constexpr uint64_t kSweepIntervalNs = + 1ULL * 1'000'000'000ULL; // 1 s while (r->running.load(std::memory_order_acquire)) { pollfd pfd{r->sock_fd, POLLIN, 0}; int rc = poll(&pfd, 1, 200); + // Periodic sweep — runs whether or not a packet + // arrived, so candidate timeouts and expired blocklist + // entries fire on idle relays too. + uint64_t now = NowNs(); + if (now - last_sweep_ns >= kSweepIntervalNs) { + std::lock_guard lk(r->peers_mu); + ExpireCandidatesLocked(r); + last_sweep_ns = now; + } if (rc <= 0) continue; sockaddr_storage src{}; socklen_t src_len = sizeof(src); From ada485a2bbcbe723bf6cebef84393d6c7d94be71 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:27:38 +0200 Subject: [PATCH 07/13] wg-relay: confirm candidate only after partner-attributable response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0 bug found by fuzzing the new roaming flow: an attacker who knows bob's pubkey could send a forged handshake init followed by *any* 32-byte UDP starting with 0x04 from the same source and the relay would commit the candidate as the new endpoint. Hijack confirmed in fuzz — alice's endpoint moved to the forger's IP. Root cause: ConfirmCandidateLocked treated any transport-data- shaped packet from the candidate as proof the handshake completed. The shape check was load-bearing for security but couldn't actually distinguish a real session's transport from a forged 0x04 byte. Fix: walk the WG protocol enough to attribute responses to specific inits. WireGuard's handshake response echoes the initiator's `sender_index` in its `receiver_index` field. We now: * save the candidate init's sender_index when registering the candidate (bytes 4..8 of the type-1 packet); * on forwarding a partner's response (type 2), read its receiver_index (bytes 8..12) and only set candidate_partner_responded if it matches the saved sender_index — proving this response is for THIS candidate's init, not a concurrent legitimate handshake from the peer at the committed endpoint; * gate ConfirmCandidateLocked on candidate_partner_responded. Bob silently drops a forged init (the encrypted static field is garbage, no decryption matches a known peer pubkey), so no matching response ever flows. The candidate's partner_responded stays false. Transport-data from the candidate doesn't confirm. Hijack closed. Verified on libvirt fleet: * Forge sequence (init + type-4 same source): endpoint stays at the registered value, drop_unknown_src catches the forged transport attempts. * Legitimate roam (alice's IP changed): handshake registers candidate, bob's response receiver_idx matches the stored init sender_idx, partner_responded fires, transport from new IP confirms, endpoint commits, ping 5/5 at ~1 ms. --- include/hyper_derp/wg_relay.h | 18 ++++++++++ src/wg_relay.cc | 66 +++++++++++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index fcaa791..ae5566a 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -77,6 +77,24 @@ struct WgRelayPeer { struct sockaddr_storage candidate_endpoint{}; socklen_t candidate_endpoint_len = 0; uint64_t candidate_set_ns = 0; + /// `sender_index` from the candidate's handshake init (the + /// initiator-side session id, bytes 4..8 of the type-1 + /// packet). The matching handshake response from the + /// partner echoes this in its `receiver_index` field. + /// We only set candidate_partner_responded when we see + /// a response whose receiver_index matches — that proves + /// the response was for THIS candidate's init, not for an + /// unrelated concurrent handshake from the legitimate + /// peer at the committed endpoint. + uint32_t candidate_init_sender_index = 0; + /// True once we've forwarded a partner response whose + /// receiver_index matches candidate_init_sender_index. + /// ConfirmCandidateLocked refuses to commit until this is + /// set, which closes the "forger sends type-1 + type-4" + /// hijack: bob silently drops a forged init (sender static + /// is garbage), so no matching response ever flows, so + /// the candidate never gains the right to confirm. + bool candidate_partner_responded = false; /// Steady-clock ns of the last completed relearn — gates /// new candidate registrations against rapid flapping. uint64_t last_relearn_ns = 0; diff --git a/src/wg_relay.cc b/src/wg_relay.cc index cd64ddc..d4ebc5c 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -634,6 +634,8 @@ void ExpireCandidatesLocked(WgRelay* r) { p.candidate_endpoint_len); p.candidate_endpoint_len = 0; p.candidate_set_ns = 0; + p.candidate_init_sender_index = 0; + p.candidate_partner_responded = false; r->stats.drop_relearn_unconfirmed.fetch_add( 1, std::memory_order_relaxed); } @@ -741,6 +743,21 @@ void HandleUnknownSrcHandshakeLocked( std::memcpy(&sender->candidate_endpoint, &src, src_len); sender->candidate_endpoint_len = src_len; sender->candidate_set_ns = now; + sender->candidate_partner_responded = false; + // Init sender_index lives at bytes 4..8 of the type-1 + // packet (little-endian per the WG spec). We use it to + // match against the partner's response receiver_index + // before letting transport-data confirm the candidate. + if (pkt[0] == 1 && len >= 8) { + uint32_t idx; + std::memcpy(&idx, pkt + 4, 4); + sender->candidate_init_sender_index = idx; + } else { + // Type 2 (response): we can't tie this to an init + // we forwarded. Disallow confirmation entirely; the + // candidate will expire naturally. + sender->candidate_init_sender_index = 0; + } // Forward the handshake to the destination. ssize_t sent = sendto( @@ -794,6 +811,19 @@ WgRelayPeer* ConfirmCandidateLocked( src_len)) { continue; } + // Refuse to confirm until we've forwarded a partner + // response whose receiver_index matched the candidate's + // init sender_index. Without this, an attacker who + // knows the partner's pubkey could send a forged type-1 + // followed by any 32-byte UDP starting with 0x04 and + // hijack the slot — bob never responded, but transport- + // shape was enough. + if (!p.candidate_partner_responded) { + // Don't confirm; let the candidate expire normally. + // Keep iterating in case a different peer's candidate + // matches this src. + continue; + } // Find p's link partner so we can rewrite both // directions of the BPF link in one shot. WgRelayPeer* partner = FindLinkPartnerLocked(r, p.name); @@ -813,6 +843,8 @@ WgRelayPeer* ConfirmCandidateLocked( p.candidate_endpoint_len); p.candidate_endpoint_len = 0; p.candidate_set_ns = 0; + p.candidate_init_sender_index = 0; + p.candidate_partner_responded = false; p.last_relearn_ns = now; p.endpoint_relearn += 1; @@ -970,18 +1002,38 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, std::strerror(errno)); return; } - // Mirror handshake response to dst's pending candidate - // endpoint, if any. Lets a real peer that just came up at - // a new IP receive the response and complete its - // handshake. The committed endpoint also gets a copy so a - // legitimate-but-stale alice (still at the old IP) keeps - // working until her own next handshake decides which - // endpoint actually has the keys. + // Handshake response → mirror to dst's pending candidate + // endpoint (so a real roamer at the new IP completes their + // handshake) and, if the response's receiver_index matches + // the candidate's stored init sender_index, mark the + // candidate as eligible to confirm via transport-data. + // The receiver_index check is the load-bearing one: it + // proves this response is FOR the candidate's init rather + // than for some concurrent legitimate handshake from the + // peer at the committed endpoint. Bytes 8..12 of the + // type-2 packet hold the response's receiver_index. + // Editing dst here is fine — caller holds peers_mu. if (pkt[0] == 2 && dst->candidate_endpoint_len > 0) { sendto(r->sock_fd, pkt, len, 0, reinterpret_cast( &dst->candidate_endpoint), dst->candidate_endpoint_len); + if (len >= 12) { + uint32_t recv_idx; + std::memcpy(&recv_idx, pkt + 8, 4); + WgRelayPeer* mut_dst = nullptr; + for (auto& p : r->peers) { + if (p.name == dst->name) { mut_dst = &p; break; } + } + if (mut_dst && + mut_dst->candidate_init_sender_index != 0 && + recv_idx == mut_dst->candidate_init_sender_index) { + mut_dst->candidate_partner_responded = true; + spdlog::info( + "wg-relay candidate {}: partner responded, " + "eligible to confirm", mut_dst->name); + } + } } r->stats.fwd_packets.fetch_add( 1, std::memory_order_relaxed); From 0c2951eda844b48be2fbf556cefd9e56974dce69 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:37:02 +0200 Subject: [PATCH 08/13] wg-relay: refresh candidate sender_index on same-source retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a peer roams and its first handshake init is in flight, wg.ko may retry from the same endpoint before bob's response lands. Each retry picks a fresh sender_index, but the no-op-forward branch (same source as registered candidate) wasn't updating the saved candidate_init_sender_index. If bob ended up responding to a later retry init, his response's receiver_index wouldn't match the saved (first) sender_index, the partner-response check would fail, and the candidate would never confirm — breaking legitimate roams that involve retries. Refresh candidate_init_sender_index on every type-1 retry from the same source. The hijack defense is unaffected: the match still requires a partner-attributable response, and only the legitimate sender's wg.ko knows which index its own init used. --- src/wg_relay.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/wg_relay.cc b/src/wg_relay.cc index d4ebc5c..6172b07 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -713,11 +713,19 @@ void HandleUnknownSrcHandshakeLocked( // the candidate's set_ns and the slot would never expire, // letting an attacker pin the slot indefinitely. Treat // that as a no-op forward — the existing candidate is - // still valid. + // still valid. We DO refresh candidate_init_sender_index + // (each retry from wg.ko picks a fresh sender_index, and + // we want the partner-response match to track the latest + // init since that's the one bob's wg.ko responds to). if (sender->candidate_endpoint_len > 0 && SockaddrEqual(sender->candidate_endpoint, sender->candidate_endpoint_len, src, src_len)) { + if (pkt[0] == 1 && len >= 8) { + uint32_t idx; + std::memcpy(&idx, pkt + 4, 4); + sender->candidate_init_sender_index = idx; + } ssize_t sent = sendto( r->sock_fd, pkt, len, 0, reinterpret_cast(&p.endpoint), From edfa8bbd3ea5f18c267ae1bee3be0cda66292b1f Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:52:14 +0200 Subject: [PATCH 09/13] wg-relay: drop type-2 from unknown src + rate-limit retry forwards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related amplifier surfaces in the unknown-source handshake handler, both surfaced by fuzzing: 1. Type-2 (response) from an unknown source has no place in the protocol — legitimate responses come from the committed responder endpoint and hit the regular forward path. The old code accepted the type-2, registered a (never-confirm- able) candidate, and forwarded it to the partner. A forger could use this single accepted packet to claim alice's candidate slot, then bounce arbitrary WG-shaped packets at bob via the same-source no-op-forward branch, all without any auth. Drop type-2 outright at the entry of the unknown- source handler. 2. Even with type-2 closed, a forger holding the responder's public mac1 key can craft unlimited valid type-1 inits and bounce them through the no-op-forward branch at line rate for the 30 s candidate window. Cap retry forwards at one per second per unconfirmed candidate. wg.ko's own retry cadence is 5 s, so legit clients are unaffected; a flood of forged retries gets clamped to ~1 pps and then strikes into the blocklist. Drops above the rate-limit count as drop_unknown_src so a probing forger can't distinguish "you're rate-limited" from "your packet was malformed." --- include/hyper_derp/wg_relay.h | 10 ++++++++ src/wg_relay.cc | 46 +++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index ae5566a..2b1cd7e 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -95,6 +95,16 @@ struct WgRelayPeer { /// is garbage), so no matching response ever flows, so /// the candidate never gains the right to confirm. bool candidate_partner_responded = false; + /// Steady-clock ns of the most recent retry-init forward + /// from the same source. Used to rate-limit retry forwards + /// while the candidate is unconfirmed: a forger with the + /// public mac1 key can craft unlimited valid type-1 inits, + /// so the no-op-forward branch would otherwise let them + /// bounce arbitrary packets at the partner via the relay. + /// wg.ko's normal retry cadence is 5 s, so capping at one + /// forward per second is conservative for legit clients + /// while sharply limiting amplifier abuse. + uint64_t candidate_last_forward_ns = 0; /// Steady-clock ns of the last completed relearn — gates /// new candidate registrations against rapid flapping. uint64_t last_relearn_ns = 0; diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 6172b07..1b38d2d 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -405,6 +405,11 @@ void PersistRosterLocked(WgRelay* r) { // registrations for the same peer (rate-limits flap). constexpr uint64_t kCandidateTimeoutNs = 30ULL * 1'000'000'000ULL; constexpr uint64_t kRelearnCooldownNs = 5ULL * 1'000'000'000ULL; +// Rate-limit retry-init forwards from a not-yet-confirmed +// candidate. wg.ko's retry cadence is 5 s; legit clients only +// trip this when their network is genuinely flaky, while a +// forger spamming at line rate gets clamped to ~1 pps. +constexpr uint64_t kRetryForwardGapNs = 1ULL * 1'000'000'000ULL; // Blocklist escalation policy. Each strike records a failed // candidate confirmation from this source IP; once the count @@ -634,6 +639,7 @@ void ExpireCandidatesLocked(WgRelay* r) { p.candidate_endpoint_len); p.candidate_endpoint_len = 0; p.candidate_set_ns = 0; + p.candidate_last_forward_ns = 0; p.candidate_init_sender_index = 0; p.candidate_partner_responded = false; r->stats.drop_relearn_unconfirmed.fetch_add( @@ -687,6 +693,18 @@ WgRelayPeer* FindLinkPartnerLocked(WgRelay* r, void HandleUnknownSrcHandshakeLocked( WgRelay* r, const uint8_t* pkt, size_t len, const sockaddr_storage& src, socklen_t src_len) { + // Type 2 (response) from an unknown source has no place in + // the protocol: legitimate responses come from the + // committed responder endpoint and hit the regular forward + // path. Accepting them here would give a forger a free + // unauthenticated amplifier — register once, then bounce + // any number of WG-shaped packets at the partner via the + // same-source no-op-forward branch. Drop outright. + if (pkt[0] != 1) { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } uint64_t now = NowNs(); for (auto& p : r->peers) { if (p.pubkey_b64.empty()) continue; @@ -726,6 +744,19 @@ void HandleUnknownSrcHandshakeLocked( std::memcpy(&idx, pkt + 4, 4); sender->candidate_init_sender_index = idx; } + // Rate-limit retry forwards. Legit wg.ko retries every + // 5 s; a forger spamming retries to abuse the relay as + // an amplifier gets clamped to ~1 pps. Drops above the + // gap count as drop_unknown_src so the forger can't + // distinguish "you're rate-limited" from "your packet + // was malformed." + if (now - sender->candidate_last_forward_ns + < kRetryForwardGapNs) { + r->stats.drop_unknown_src.fetch_add( + 1, std::memory_order_relaxed); + return; + } + sender->candidate_last_forward_ns = now; ssize_t sent = sendto( r->sock_fd, pkt, len, 0, reinterpret_cast(&p.endpoint), @@ -751,21 +782,15 @@ void HandleUnknownSrcHandshakeLocked( std::memcpy(&sender->candidate_endpoint, &src, src_len); sender->candidate_endpoint_len = src_len; sender->candidate_set_ns = now; + sender->candidate_last_forward_ns = now; sender->candidate_partner_responded = false; // Init sender_index lives at bytes 4..8 of the type-1 // packet (little-endian per the WG spec). We use it to // match against the partner's response receiver_index // before letting transport-data confirm the candidate. - if (pkt[0] == 1 && len >= 8) { - uint32_t idx; - std::memcpy(&idx, pkt + 4, 4); - sender->candidate_init_sender_index = idx; - } else { - // Type 2 (response): we can't tie this to an init - // we forwarded. Disallow confirmation entirely; the - // candidate will expire naturally. - sender->candidate_init_sender_index = 0; - } + uint32_t idx; + std::memcpy(&idx, pkt + 4, 4); + sender->candidate_init_sender_index = idx; // Forward the handshake to the destination. ssize_t sent = sendto( @@ -851,6 +876,7 @@ WgRelayPeer* ConfirmCandidateLocked( p.candidate_endpoint_len); p.candidate_endpoint_len = 0; p.candidate_set_ns = 0; + p.candidate_last_forward_ns = 0; p.candidate_init_sender_index = 0; p.candidate_partner_responded = false; p.last_relearn_ns = now; From cfecbcdcda58f1c2043d57b6f1ff3cd30fc4a1b6 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:53:25 +0200 Subject: [PATCH 10/13] wg-relay: sweep stale strike entries during candidate expiry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A strike entry whose first_strike_ns is older than the widest policy window (24 h) can no longer escalate anything: every policy check uses `now - first_strike_ns <= window`, and at that age all comparisons fail. The entry just sits in the std::map taking up memory. Without this sweep, a forger spraying from spoofed source IPs (each striking once and never returning) would grow the strike map without bound — a slow leak proportional to the rate of distinct attack sources. The candidate-slot contention already makes this a marginal attack, but the leak is a separate correctness issue worth closing. Sweep stale entries on each ExpireCandidatesLocked tick, alongside the existing blocklist sweep. --- src/wg_relay.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 1b38d2d..c291229 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -646,6 +646,23 @@ void ExpireCandidatesLocked(WgRelay* r) { 1, std::memory_order_relaxed); } } + // Sweep stale strike entries — a strike whose first strike + // is older than the widest policy window can no longer fire + // anything, so keeping it in the map is pure memory growth. + // Without this sweep, a forger spraying from spoofed source + // IPs (each striking once and never returning) would grow + // the strike map unbounded. + uint64_t widest_window = + kStrikePolicy[std::size(kStrikePolicy) - 1].window_ns; + for (auto it = r->strikes.begin(); + it != r->strikes.end();) { + if (it->second.first_strike_ns != 0 && + now - it->second.first_strike_ns > widest_window) { + it = r->strikes.erase(it); + } else { + ++it; + } + } // Sweep expired blocklist entries — stale-but-expired // entries are harmless (BPF compares against current // monotonic time) but cleaning them keeps `wg blocklist From 55d19b4676bde4e5be2e1ef813bee0d841071e37 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:54:01 +0200 Subject: [PATCH 11/13] CHANGELOG: extend 0.2.1 wg-relay hardening with fuzz fixes Fold the four fuzz-driven hardening commits (partner- attributable confirm, type-2 unknown-src drop, retry rate- limit, strike sweep) into the 0.2.1 unreleased entry so the release notes match what's on the branch. --- CHANGELOG.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e587eb..f137959 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,26 @@ this file. Format based on [Keep a Changelog](https://keepachangelog.com/). anonymizer attack against the partner. New verb `wg blocklist list`. New counters `drop_blocklisted` (XDP) and per-IP strike records. +- **Endpoint-hijack defense**: a forged handshake init must + receive a partner-attributable response (the partner's + type-2 receiver_index matches the init's sender_index) + before the candidate slot is allowed to confirm — a forger + who can pass MAC1 but lacks the static-key handshake can + no longer bounce the candidate to confirm by sending a + matching-shaped transport-data packet of their own. +- **Type-2 from unknown source dropped outright**: legitimate + handshake responses come from the committed responder + endpoint, so an unknown-source type-2 has no place in the + protocol and was an unauthenticated amplifier surface. +- **Retry-init forward rate-limit**: while a candidate is + unconfirmed, the no-op-forward branch caps retry forwards + at one per second per source. Legitimate wg.ko retries + every 5 s; a flood of forged retries is clamped and then + strikes into the blocklist. +- **Strike-table sweep**: stale strike entries (older than + the widest policy window) are pruned during candidate + expiry so spoofed-source one-shot strikes can't grow the + table without bound. ### Added — Crypto From 093228df5c8021df71ff7ee8eace1f3583632152 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 21:55:16 +0200 Subject: [PATCH 12/13] CHANGELOG: unwrap fuzz-hardening bullets --- CHANGELOG.md | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f137959..678129d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,26 +42,10 @@ this file. Format based on [Keep a Changelog](https://keepachangelog.com/). anonymizer attack against the partner. New verb `wg blocklist list`. New counters `drop_blocklisted` (XDP) and per-IP strike records. -- **Endpoint-hijack defense**: a forged handshake init must - receive a partner-attributable response (the partner's - type-2 receiver_index matches the init's sender_index) - before the candidate slot is allowed to confirm — a forger - who can pass MAC1 but lacks the static-key handshake can - no longer bounce the candidate to confirm by sending a - matching-shaped transport-data packet of their own. -- **Type-2 from unknown source dropped outright**: legitimate - handshake responses come from the committed responder - endpoint, so an unknown-source type-2 has no place in the - protocol and was an unauthenticated amplifier surface. -- **Retry-init forward rate-limit**: while a candidate is - unconfirmed, the no-op-forward branch caps retry forwards - at one per second per source. Legitimate wg.ko retries - every 5 s; a flood of forged retries is clamped and then - strikes into the blocklist. -- **Strike-table sweep**: stale strike entries (older than - the widest policy window) are pruned during candidate - expiry so spoofed-source one-shot strikes can't grow the - table without bound. +- **Endpoint-hijack defense**: a forged handshake init must receive a partner-attributable response (the partner's type-2 `receiver_index` matches the init's `sender_index`) before the candidate slot is allowed to confirm. A forger who can pass MAC1 but lacks the static-key handshake can no longer bounce the candidate to confirm by sending a matching-shaped transport-data packet of their own. +- **Type-2 from unknown source dropped outright**: legitimate handshake responses come from the committed responder endpoint, so an unknown-source type-2 has no place in the protocol and was an unauthenticated amplifier surface. +- **Retry-init forward rate-limit**: while a candidate is unconfirmed, the no-op-forward branch caps retry forwards at one per second per source. Legitimate `wg.ko` retries every 5 s; a flood of forged retries is clamped and then strikes into the blocklist. +- **Strike-table sweep**: stale strike entries (older than the widest policy window) are pruned during candidate expiry so spoofed-source one-shot strikes can't grow the table without bound. ### Added — Crypto From d34a84c18434ee8e053b861806f085e4ec9a5fbd Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Wed, 29 Apr 2026 22:08:25 +0200 Subject: [PATCH 13/13] wg-relay: split if-clause body across lines for cpplint --- src/wg_relay.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/wg_relay.cc b/src/wg_relay.cc index c291229..1dc9345 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -1074,7 +1074,10 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, std::memcpy(&recv_idx, pkt + 8, 4); WgRelayPeer* mut_dst = nullptr; for (auto& p : r->peers) { - if (p.name == dst->name) { mut_dst = &p; break; } + if (p.name == dst->name) { + mut_dst = &p; + break; + } } if (mut_dst && mut_dst->candidate_init_sender_index != 0 &&