From 8a13935050d1f629d15a7df672c7d123dc34a4e5 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Mon, 4 May 2026 19:30:46 +0200 Subject: [PATCH 1/5] wg-relay: --trace-forward-hashes flag + per-peer drop counters (P0.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daemon-side diagnostic surface for the t1-integrity failure on the 0.2.1 cloud-gcp-c4 benchmark. The brief is to ship diagnostics first, fix second — until we can see what diverges, any "fix" is a guess. What the daemon does now: * New `--trace-forward-hashes` flag (off by default). When enabled, every wg-relay forward logs a SHA-256(payload) hash + frame length + source/destination peer pubkey prefix at two points: ingress (after the source peer matches and after optional MAC1 verification) and egress (just before the sendto to the destination). The same SHA on both lines proves the relay didn't mutate the frame; divergence pinpoints a corrupting code path. Per-frame log on the hot path — start warns the operator and the docs note it must not be left on in production. * `wg peer list` now surfaces per-peer drop counters `peer..drop_no_link` and `peer..drop_pubkey_mismatch`. These are the pair-attributable subset of the existing aggregate counters, populated at the same drop sites where `src_peer` is already known. Aggregate counters are unchanged; the other drop classes (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`) are by definition unattributable to a known peer and stay aggregate-only. What the operator should observe on next run: with the runner patched to pass `--trace-forward-hashes` for the integrity test specifically, the relay logs ingress + egress SHA pairs the runner can compare end-to-end. `wg peer list` shows where each known peer's drops are landing without the runner having to diff aggregate counters between bursts. Smoke test: tests/test_wg_relay_trace.cc drives the wg-relay forwarder in-process with two peers + one link, exercises both trace=on / trace=off paths, and verifies the per-peer drop_no_link counter increments on an unlinked peer's traffic. Steady-state forwarding path is unchanged when the trace flag is off — the per-frame log is gated behind a single bool check, and the per-peer counter bumps are next to existing aggregate increments. The 24-h soak signal from 0.2.1 stands. --- CHANGELOG.md | 7 ++ include/hyper_derp/server.h | 12 +++ include/hyper_derp/wg_relay.h | 22 ++++ src/einheit_channel.cc | 4 + src/main.cc | 21 ++++ src/wg_relay.cc | 62 +++++++++++ tests/CMakeLists.txt | 10 ++ tests/test_wg_relay_trace.cc | 195 ++++++++++++++++++++++++++++++++++ 8 files changed, 333 insertions(+) create mode 100644 tests/test_wg_relay_trace.cc diff --git a/CHANGELOG.md b/CHANGELOG.md index 678129d8..686f55fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ All notable changes to this project will be documented in this file. Format based on [Keep a Changelog](https://keepachangelog.com/). +## [0.2.2] - unreleased + +### Added — wg-relay diagnostic surface + +- **`--trace-forward-hashes` daemon flag**. When set, every wg-relay forward logs a SHA-256(payload) hash + length + source/destination peer pubkey prefix at two points: ingress (after the source peer matches and MAC1 verification, if applicable) and egress (just before `sendto` to the destination). Same hash on both lines proves the relay didn't mutate the frame; divergence flags a corrupting code path. Off by default — this is per-frame logging on the hot path and will tank throughput if left on. Aimed at diagnosing integrity-test failures where a single log line and aggregate counters aren't enough to triage. +- **Per-peer drop counters**. `wg peer list` now surfaces `peer..drop_no_link` and `peer..drop_pubkey_mismatch` alongside the existing per-peer byte counters. The aggregate counters in `wg show` are unchanged; these are the pair-attributable subset for diagnosing which peer's traffic is hitting which drop reason. Other drop classes (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`) are by definition unattributable to a known peer and remain aggregate-only. + ## [0.2.1] - unreleased ### Added — wg-relay hardening diff --git a/include/hyper_derp/server.h b/include/hyper_derp/server.h index 194c19e0..9e0fd463 100644 --- a/include/hyper_derp/server.h +++ b/include/hyper_derp/server.h @@ -85,6 +85,18 @@ struct WgRelayConfig { /// Path to the compiled BPF object. Defaults to a /// CMake-installed location; rarely set explicitly. std::string xdp_bpf_obj_path; + /// XDP attach mode override. "" / "drv" → native only + /// (XDP_FLAGS_DRV_MODE); "skb" → generic only; "auto" → + /// try drv, fall back to skb on EOPNOTSUPP / EINVAL; + /// "off" → skip XDP entirely. Surfaces an explicit + /// failure mode rather than silently falling through to + /// userspace when the operator expected drv mode. + std::string xdp_mode; + /// Per-frame trace logging for diagnosing integrity + /// failures. See WgRelay::trace_forward_hashes — off by + /// default; enabled via the `--trace-forward-hashes` + /// daemon flag. + bool trace_forward_hashes = false; }; /// Connection level for a peer pair. Level 0 (DERP) is diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index 2b1cd7eb..70bd9bd9 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -68,6 +68,14 @@ struct WgRelayPeer { /// Times this peer's `endpoint` was relearned via the /// MAC1-driven roaming flow. Persisted to the roster. uint64_t endpoint_relearn = 0; + /// Per-peer drop counters — incremented alongside the + /// aggregate `WgRelayStats` counters at sites where the + /// drop is attributable to a known source peer. The + /// other drop classes (drop_unknown_src, + /// drop_not_wg_shaped, drop_handshake_no_pubkey_match) + /// are by definition unattributable and stay aggregate. + uint64_t drop_no_link_peer = 0; + uint64_t drop_pubkey_mismatch_peer = 0; /// Pending relearn-candidate, populated when an unknown /// source presents a handshake with valid MAC1 against /// this peer's link partner. Cleared on confirm (transport @@ -246,6 +254,14 @@ struct WgRelay { std::atomic running{false}; std::thread loop_thread; std::string roster_path; + /// When true, every forwarded frame is logged with + /// SHA-256(payload), length, and the source/destination + /// peer pubkey prefixes at both ingress and egress. + /// Off by default — this is per-frame logging on the hot + /// path and will tank throughput. Drive it from the + /// `--trace-forward-hashes` daemon flag for debugging + /// integrity-mismatch failures. + bool trace_forward_hashes = false; /// XDP fast path. attached == true iff the BPF program /// is live on a NIC. Map updates from `wg link add` /// land here; the userspace recv loop still runs as the @@ -291,6 +307,12 @@ struct WgRelayPeerInfo { uint64_t last_seen_ns; uint64_t rx_bytes; uint64_t fwd_bytes; + /// Per-peer drop counters. Aggregate counterparts in + /// WgRelayStatsSnapshot stay populated; these are the + /// pair-attributable subset for diagnosing which peer's + /// traffic is hitting which drop reason. + uint64_t drop_no_link; + uint64_t drop_pubkey_mismatch; std::string linked_to; // name of peer this is linked to, or empty }; std::vector WgRelayListPeers( diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index 564c734f..b103663b 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1843,6 +1843,10 @@ void WgPeerList(Server* s, const Request& /*req*/, p.rx_bytes); b += std::format("peer.{}.fwd_bytes={}\n", idx, p.fwd_bytes); + b += std::format("peer.{}.drop_no_link={}\n", idx, + p.drop_no_link); + b += std::format("peer.{}.drop_pubkey_mismatch={}\n", idx, + p.drop_pubkey_mismatch); idx++; } b += std::format("peer.count={}\n", idx); diff --git a/src/main.cc b/src/main.cc index 841dba57..854810c4 100644 --- a/src/main.cc +++ b/src/main.cc @@ -63,6 +63,15 @@ static void PrintUsage(const char* prog) { "(default: 3478)\n" " --xdp-interface Network interface for " "XDP attachment\n" + " --xdp-mode XDP attach mode: drv " + "(default), skb, auto, off\n" + " --trace-forward-hashes " + "Log SHA-256 of every forwarded\n" + " wg-relay frame at ingress + " + "egress. Per-frame\n" + " log; for diagnostics only — " + "do not enable in\n" + " production.\n" " --help Show this help\n" " --version Show version", prog); @@ -123,6 +132,8 @@ int main(int argc, char* argv[]) { const char* hd_relay_key = nullptr; const char* hd_enroll_mode = nullptr; const char* xdp_interface = nullptr; + const char* xdp_mode = nullptr; + bool trace_forward_hashes = false; int stun_port = -1; int hd_relay_id = -1; std::vector seed_relays; @@ -221,6 +232,10 @@ int main(int argc, char* argv[]) { } else if (arg == "--xdp-interface"sv && i + 1 < argc) { xdp_interface = argv[++i]; + } else if (arg == "--xdp-mode"sv && i + 1 < argc) { + xdp_mode = argv[++i]; + } else if (arg == "--trace-forward-hashes"sv) { + trace_forward_hashes = true; } else { std::println(stderr, "error: unknown option '{}'", arg); @@ -338,6 +353,12 @@ int main(int argc, char* argv[]) { static_cast(stun_port); if (xdp_interface) config.level2.xdp_interface = xdp_interface; + // wg-relay-mode-only flags: pass through whether or not + // wg-mode is selected (the relay only reads them when it's + // active; the daemon ignores them otherwise). + if (xdp_mode) config.wg.xdp_mode = xdp_mode; + if (trace_forward_hashes) + config.wg.trace_forward_hashes = true; if (pin_spec) { int n = ParsePinCores(pin_spec, diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 1dc9345b..90d8720d 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -19,6 +19,8 @@ #include #include +#include + #include #include #include @@ -40,6 +42,30 @@ uint64_t NowNs() { .count(); } +// Helpers for the trace-forward-hashes diagnostic path. +// Both functions are only called when r->trace_forward_hashes +// is set; the hot path stays untouched when tracing is off. +std::string Sha256HexPrefix(const uint8_t* data, size_t len) { + unsigned char digest[crypto_hash_sha256_BYTES]; + crypto_hash_sha256(digest, data, len); + static constexpr char kHex[] = "0123456789abcdef"; + // 16 bytes = 32 hex chars; enough to distinguish frames + // without spamming the log. + std::string out(32, '\0'); + for (int i = 0; i < 16; ++i) { + out[2 * i] = kHex[digest[i] >> 4]; + out[2 * i + 1] = kHex[digest[i] & 0xF]; + } + return out; +} + +// First 12 chars of a peer's base64 pubkey, "[--]" if unset. +std::string PubkeyPrefix(const std::string& pubkey_b64) { + if (pubkey_b64.empty()) return "[--]"; + return pubkey_b64.substr( + 0, std::min(12, pubkey_b64.size())); +} + bool ParseHostPort(const std::string& s, sockaddr_storage* out, socklen_t* len) { auto colon = s.rfind(':'); @@ -1019,6 +1045,7 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, if (!dst) { r->stats.drop_no_link.fetch_add( 1, std::memory_order_relaxed); + src_peer->drop_no_link_peer += 1; return; } @@ -1035,6 +1062,7 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, if (!VerifyMac1(pkt, len, partner_pub.data())) { r->stats.drop_handshake_pubkey_mismatch.fetch_add( 1, std::memory_order_relaxed); + src_peer->drop_pubkey_mismatch_peer += 1; return; } } @@ -1044,6 +1072,21 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, // pubkey-set time anyway. } + // Trace point: ingress (after src match + MAC1 verify, before + // forward). The matching egress trace below pairs with this + // by SHA-256 — same hash on both lines means the relay didn't + // mutate the frame (which is the integrity invariant we + // expect for stock WG relay mode). + if (r->trace_forward_hashes) { + spdlog::info( + "wg-relay trace ingress sha256={} len={} type={} " + "src_peer={} src_pubkey={} dst_peer={} dst_pubkey={}", + Sha256HexPrefix(pkt, len), len, + static_cast(pkt[0]), src_peer->name, + PubkeyPrefix(src_peer->pubkey_b64), dst->name, + PubkeyPrefix(dst->pubkey_b64)); + } + ssize_t sent = sendto( r->sock_fd, pkt, len, 0, reinterpret_cast(&dst->endpoint), @@ -1053,6 +1096,16 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, std::strerror(errno)); return; } + // Trace point: egress (sendto succeeded). Same SHA-256 as + // the matching ingress line proves we forwarded the frame + // byte-for-byte — divergence flags a corrupting code path. + if (r->trace_forward_hashes) { + spdlog::info( + "wg-relay trace egress sha256={} len={} type={} " + "src_peer={} dst_peer={}", + Sha256HexPrefix(pkt, len), sent, + static_cast(pkt[0]), src_peer->name, dst->name); + } // Handshake response → mirror to dst's pending candidate // endpoint (so a real roamer at the new IP completes their // handshake) and, if the response's receiver_index matches @@ -1619,6 +1672,13 @@ WgRelay* WgRelayStart(const WgRelayConfig& cfg) { r->sock_fd = sock; r->port = cfg.port; r->roster_path = cfg.roster_path; + r->trace_forward_hashes = cfg.trace_forward_hashes; + if (r->trace_forward_hashes) { + spdlog::warn( + "wg-relay trace_forward_hashes=on — per-frame " + "logging will dominate throughput; disable for " + "production traffic."); + } if (!r->roster_path.empty()) { std::lock_guard lk(r->peers_mu); LoadRosterFile(r, r->roster_path); @@ -1801,6 +1861,8 @@ std::vector WgRelayListPeers( i.last_seen_ns = p.last_seen_ns; i.rx_bytes = p.rx_bytes; i.fwd_bytes = p.fwd_bytes; + i.drop_no_link = p.drop_no_link_peer; + i.drop_pubkey_mismatch = p.drop_pubkey_mismatch_peer; // Fold in the XDP per-CPU byte counters when the fast // path is attached. Without this the bytes freeze at // whatever the cold-start packet was, since every diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 392e350b..bb531d0a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -132,6 +132,16 @@ target_link_libraries(test_hd_blake2s PRIVATE gtest_discover_tests(test_hd_blake2s DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 10) +# wg-relay diagnostic-surface smoke test (P0.1 / P0.2 in the +# 0.2.2 daemon brief). Drives the wg-relay forwarder +# in-process to confirm the trace path + per-peer counters +# are reachable. +add_executable(test_wg_relay_trace test_wg_relay_trace.cc) +target_link_libraries(test_wg_relay_trace PRIVATE + libderp GTest::gtest_main) +gtest_discover_tests(test_wg_relay_trace + DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 30) + # ZMQ control channel tests. add_executable(test_ctl_channel test_ctl_channel.cc) target_link_libraries(test_ctl_channel PRIVATE diff --git a/tests/test_wg_relay_trace.cc b/tests/test_wg_relay_trace.cc new file mode 100644 index 00000000..45ca7b93 --- /dev/null +++ b/tests/test_wg_relay_trace.cc @@ -0,0 +1,195 @@ +/// @file test_wg_relay_trace.cc +/// @brief Smoke tests for the wg-relay diagnostic surface +/// added in the 0.2.2 daemon brief: trace-forward- +/// hashes flag (P0.1) and per-peer drop counters +/// (P0.1). +/// +/// These exercise the wg-relay forwarder in-process so the +/// new logging path and per-peer counters are reachable. +/// They do not assert exact log strings — the brief only +/// requires that the path is reachable. + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "hyper_derp/server.h" +#include "hyper_derp/wg_relay.h" + +namespace hyper_derp { +namespace { + +// Bind a UDP socket on loopback and return (fd, port). Caller +// owns the fd; close it when done. +std::pair BindUdpEphemeral() { + int fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) return {-1, 0}; + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = 0; + if (bind(fd, reinterpret_cast(&addr), + sizeof(addr)) < 0) { + close(fd); + return {-1, 0}; + } + socklen_t len = sizeof(addr); + getsockname(fd, reinterpret_cast(&addr), &len); + return {fd, ntohs(addr.sin_port)}; +} + +// Build a 148-byte WireGuard-shaped handshake init. The +// payload is junk crypto — the relay forwarder doesn't decrypt +// it, only checks shape (length + first byte == 1). +std::array MakeHandshakeInit() { + std::array p{}; + p[0] = 1; // type = init + for (size_t i = 4; i < p.size(); ++i) { + p[i] = static_cast(i & 0xff); + } + return p; +} + +class WgRelayTraceTest : public ::testing::Test { + protected: + void SetUp() override { + auto [a_fd, a_port] = BindUdpEphemeral(); + auto [b_fd, b_port] = BindUdpEphemeral(); + auto [r_fd, r_port] = BindUdpEphemeral(); + ASSERT_GE(a_fd, 0); + ASSERT_GE(b_fd, 0); + ASSERT_GE(r_fd, 0); + alice_fd_ = a_fd; + alice_port_ = a_port; + bob_fd_ = b_fd; + bob_port_ = b_port; + // Close r_fd; the relay will rebind r_port itself. There's + // a TOCTOU window here but loopback ephemeral collisions + // in CI are rare enough that the simpler code wins. + close(r_fd); + relay_port_ = r_port; + } + + void TearDown() override { + if (relay_) WgRelayStop(relay_); + if (alice_fd_ >= 0) close(alice_fd_); + if (bob_fd_ >= 0) close(bob_fd_); + } + + WgRelayConfig MakeCfg() { + WgRelayConfig cfg; + cfg.port = relay_port_; + WgRelayConfig::PeerEntry alice; + alice.name = "alice"; + alice.endpoint = + std::format("127.0.0.1:{}", alice_port_); + cfg.peers.push_back(std::move(alice)); + WgRelayConfig::PeerEntry bob; + bob.name = "bob"; + bob.endpoint = + std::format("127.0.0.1:{}", bob_port_); + cfg.peers.push_back(std::move(bob)); + cfg.links.push_back({"alice", "bob"}); + return cfg; + } + + ssize_t SendFromAlice( + const std::array& pkt) { + sockaddr_in to{}; + to.sin_family = AF_INET; + to.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + to.sin_port = htons(relay_port_); + return sendto(alice_fd_, pkt.data(), pkt.size(), 0, + reinterpret_cast(&to), + sizeof(to)); + } + + // Wait up to 500 ms for a packet on bob's socket. Returns + // the number of bytes received, or -1 on timeout. + ssize_t RecvOnBob(uint8_t* buf, size_t bufsz) { + timeval tv{0, 500'000}; + setsockopt(bob_fd_, SOL_SOCKET, SO_RCVTIMEO, &tv, + sizeof(tv)); + return recv(bob_fd_, buf, bufsz, 0); + } + + int alice_fd_ = -1; + int bob_fd_ = -1; + uint16_t alice_port_ = 0; + uint16_t bob_port_ = 0; + uint16_t relay_port_ = 0; + WgRelay* relay_ = nullptr; +}; + +// Trace flag off (default) — relay still forwards. +TEST_F(WgRelayTraceTest, ForwardsWithTraceOff) { + WgRelayConfig cfg = MakeCfg(); + cfg.trace_forward_hashes = false; + relay_ = WgRelayStart(cfg); + ASSERT_NE(relay_, nullptr); + + auto pkt = MakeHandshakeInit(); + ASSERT_EQ(SendFromAlice(pkt), 148); + + std::array buf{}; + ssize_t n = RecvOnBob(buf.data(), buf.size()); + ASSERT_EQ(n, 148); + EXPECT_EQ(buf[0], 1); + auto stats = WgRelayGetStats(relay_); + EXPECT_EQ(stats.fwd_packets, 1u); +} + +// Trace flag on — relay forwards and the trace path runs +// without crashing. We don't assert exact log strings (per the +// brief); reaching this assertion proves the new code path is +// live. +TEST_F(WgRelayTraceTest, ForwardsWithTraceOn) { + WgRelayConfig cfg = MakeCfg(); + cfg.trace_forward_hashes = true; + relay_ = WgRelayStart(cfg); + ASSERT_NE(relay_, nullptr); + + auto pkt = MakeHandshakeInit(); + ASSERT_EQ(SendFromAlice(pkt), 148); + + std::array buf{}; + ssize_t n = RecvOnBob(buf.data(), buf.size()); + ASSERT_EQ(n, 148); + auto stats = WgRelayGetStats(relay_); + EXPECT_EQ(stats.fwd_packets, 1u); +} + +// Per-peer drop_no_link counter increments when a registered +// peer sends with no link configured for it. Counter is +// surfaced via WgRelayListPeers. +TEST_F(WgRelayTraceTest, PerPeerDropNoLinkCounter) { + WgRelayConfig cfg = MakeCfg(); + // Drop the link so alice is registered but unlinked. + cfg.links.clear(); + relay_ = WgRelayStart(cfg); + ASSERT_NE(relay_, nullptr); + + auto pkt = MakeHandshakeInit(); + ASSERT_EQ(SendFromAlice(pkt), 148); + // Give the recv loop a moment to process. + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + auto peers = WgRelayListPeers(relay_); + uint64_t alice_drop_no_link = 0; + for (const auto& p : peers) { + if (p.name == "alice") alice_drop_no_link = p.drop_no_link; + } + EXPECT_EQ(alice_drop_no_link, 1u); +} + +} // namespace +} // namespace hyper_derp From acb29ecf880e5b950cb458e32c30f0b650be3d1d Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Mon, 4 May 2026 19:34:48 +0200 Subject: [PATCH 2/5] wg-relay: structured xdp_attached log + --xdp-mode flag (P0.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daemon-side answer to the t1-xdp-attach failure on the 0.2.1 cloud-gcp-c4 benchmark, where gVNIC didn't support XDP_DRV_MODE and the relay silently fell back to SKB mode — the runner labelled subsequent rows "xdp" that were really running on the userspace recv loop. What the daemon does now: * `xdp_attached iface= ifindex=N mode={drv,skb} driver= kernel=` is emitted per attached NIC on success. The achieved mode + driver/kernel pair are legible from a single grep instead of having to infer the state from older free-form text. * `--xdp-mode={drv,skb,auto,off}` (default `drv`). Replaces the previous hardcoded "try DRV, fall back to SKB" loop that was the source of the silent fallback. `auto` preserves the old behaviour; `skb` forces generic; `off` skips XDP attach entirely so the operator can keep `--xdp-interface` in the unit file and toggle at runtime. * Attach failure is FATAL. `XdpAttach` returning false logs `xdp_attach_failed iface= ifindex=N mode= driver= kernel= reason=` and WgRelayStart tears the relay down; main.cc exits non-zero. The runner sees a clear failure signal rather than a silently-degraded relay. * Driver / kernel detection helpers: `ReadNicDriver` reads `/sys/class/net//device/driver` and returns its basename; `ReadKernelRelease` calls `uname()`. Both safe when sysfs paths are absent (loopback, bridges) — they return "?" and the log line still parses. What the operator should observe on next run: a fresh daemon launch on the cloud fleet either attaches XDP and logs the achieved mode, or fails fast with the structured `xdp_attach_failed` line. Userspace path is no longer reached implicitly. gVNIC / vmxnet3 / virtio-net advertise XDP only in generic mode on stock kernels through 6.12 — operators on those NICs need `--xdp-mode=auto` (or `=skb`) to opt into the generic fallback explicitly. Documented in --help and in the 0.2.2 CHANGELOG note. Smoke tests in tests/test_wg_relay_trace.cc: * RejectsUnknownXdpMode — invalid mode → WgRelayStart fails. * XdpModeOffSkipsAttach — `off` skips attach even when an interface is configured. Steady-state forwarding path is untouched. The 24-h soak signal from 0.2.1 stands. --- CHANGELOG.md | 8 ++ src/main.cc | 8 +- src/wg_relay.cc | 166 +++++++++++++++++++++++++++-------- tests/test_wg_relay_trace.cc | 27 ++++++ 4 files changed, 173 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 686f55fb..a47d0d2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,14 @@ this file. Format based on [Keep a Changelog](https://keepachangelog.com/). - **`--trace-forward-hashes` daemon flag**. When set, every wg-relay forward logs a SHA-256(payload) hash + length + source/destination peer pubkey prefix at two points: ingress (after the source peer matches and MAC1 verification, if applicable) and egress (just before `sendto` to the destination). Same hash on both lines proves the relay didn't mutate the frame; divergence flags a corrupting code path. Off by default — this is per-frame logging on the hot path and will tank throughput if left on. Aimed at diagnosing integrity-test failures where a single log line and aggregate counters aren't enough to triage. - **Per-peer drop counters**. `wg peer list` now surfaces `peer..drop_no_link` and `peer..drop_pubkey_mismatch` alongside the existing per-peer byte counters. The aggregate counters in `wg show` are unchanged; these are the pair-attributable subset for diagnosing which peer's traffic is hitting which drop reason. Other drop classes (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`) are by definition unattributable to a known peer and remain aggregate-only. +### Changed — wg-relay XDP attach is no longer silently fallback + +- **Structured `xdp_attached` log line**. On successful attach the daemon now logs `xdp_attached iface= ifindex=N mode={drv,skb} driver= kernel=` per attached NIC. The achieved mode is now legible from a single grep, instead of having to infer it from the older free-form "(ifindex=N, mode=native)" text. +- **`--xdp-mode={drv,skb,auto,off}` daemon flag**, default `drv`. Was previously hardcoded to "try DRV, fall back to SKB on failure" — a silent fallback that on the 0.2.1 cloud-gcp-c4 benchmark labelled rows "xdp" that were really running the userspace recv loop. Operators that want the historical behaviour pass `--xdp-mode=auto` explicitly. `off` skips XDP attach even when `--xdp-interface` is set, so the operator can leave the interface flag in their unit file but disable XDP at runtime without editing it. +- **Attach failure exits non-zero**. `XdpAttach` failure now logs `xdp_attach_failed iface= ifindex=N mode= driver= kernel= reason=` and tears the relay down (`WgRelayStart` returns `nullptr` → `main.cc` exits non-zero). The runner gets a clear signal rather than a silent userspace fallback. + +Note for cloud operators: gVNIC (Google) and vmxnet3 (VMware) do not advertise `XDP_DRV` mode on stock kernels through 6.12 — the daemon will fail attach with `reason=Operation not supported` under the default `--xdp-mode=drv`. Pass `--xdp-mode=auto` (or `=skb`) to opt into the generic-mode fallback explicitly. + ## [0.2.1] - unreleased ### Added — wg-relay hardening diff --git a/src/main.cc b/src/main.cc index 854810c4..947866dc 100644 --- a/src/main.cc +++ b/src/main.cc @@ -64,7 +64,13 @@ static void PrintUsage(const char* prog) { " --xdp-interface Network interface for " "XDP attachment\n" " --xdp-mode XDP attach mode: drv " - "(default), skb, auto, off\n" + "(default), skb, auto, off.\n" + " Some virtual NICs (gVNIC, " + "vmxnet3) only ship XDP\n" + " in generic mode; use " + "--xdp-mode=skb or --xdp-mode=auto\n" + " to opt into the slower " + "fallback explicitly.\n" " --trace-forward-hashes " "Log SHA-256 of every forwarded\n" " wg-relay frame at ingress + " diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 90d8720d..d9d6ea5f 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -1243,6 +1244,33 @@ bool ReadNicIpv4(const std::string& iface, return found; } +// Read the kernel module backing a NIC. Returns the basename +// of /sys/class/net//device/driver, or "?" if the +// symlink doesn't resolve (loopback, bridges, virtual NICs). +std::string ReadNicDriver(const std::string& iface) { + std::string path = + "/sys/class/net/" + iface + "/device/driver"; + char buf[256]; + ssize_t n = + ::readlink(path.c_str(), buf, sizeof(buf) - 1); + if (n <= 0) return "?"; + buf[n] = '\0'; + std::string s(buf); + auto slash = s.rfind('/'); + return slash == std::string::npos ? s + : s.substr(slash + 1); +} + +// Running kernel release ("6.12.73+deb13-amd64" etc). +// Operator-facing field on the xdp_attached log line so +// driver/kernel pairs that don't support XDP_DRV are +// distinguishable in the bench log. +std::string ReadKernelRelease() { + utsname u{}; + if (::uname(&u) != 0) return "?"; + return u.release; +} + // Read a NIC's hardware MAC from sysfs. Cleaner than // SIOCGIFHWADDR — no socket needed, no ioctl quirks. bool ReadNicMac(const std::string& iface, uint8_t mac[6]) { @@ -1278,9 +1306,46 @@ std::vector SplitIfaceList(const std::string& s) { return out; } +// Resolve the operator's --xdp-mode string into a flags + +// fallback policy. Empty / "drv" → DRV-only (fail loudly if +// the driver doesn't support it); "skb" → SKB-only; +// "auto" → DRV with SKB fallback (the historical behaviour); +// "off" → caller should skip XdpAttach entirely. Anything +// else is an operator typo. +struct XdpModePolicy { + bool valid = false; + bool try_drv = false; + bool try_skb_fallback = false; + std::string label; // "drv" | "skb" | "auto" | "off" +}; +XdpModePolicy ParseXdpMode(const std::string& mode) { + XdpModePolicy p; + if (mode.empty() || mode == "drv") { + p.valid = true; + p.try_drv = true; + p.try_skb_fallback = false; + p.label = "drv"; + } else if (mode == "skb") { + p.valid = true; + p.try_drv = false; + p.try_skb_fallback = true; + p.label = "skb"; + } else if (mode == "auto") { + p.valid = true; + p.try_drv = true; + p.try_skb_fallback = true; + p.label = "auto"; + } else if (mode == "off") { + p.valid = true; + p.label = "off"; + } + return p; +} + bool XdpAttach(WgRelay* r, const std::string& iface_list, const std::string& bpf_obj_path, - uint16_t port) { + uint16_t port, + const XdpModePolicy& mode) { auto ifaces = SplitIfaceList(iface_list); if (ifaces.empty()) return false; @@ -1409,30 +1474,40 @@ bool XdpAttach(WgRelay* r, const std::string& iface_list, } } - // Attach to each NIC. Native first; generic fallback per - // NIC, since some drivers in a mixed setup support - // native and others don't. + // Attach per NIC, honouring the operator's mode policy. + // The previous code unconditionally tried DRV then fell + // back to SKB; that silent fallback is exactly what the + // 0.2.1 cloud-gcp-c4 benchmark caught labelling "xdp" + // rows that were really running in userspace mode. std::vector natives; natives.reserve(attachments.size()); + std::string kernel = ReadKernelRelease(); for (const auto& a : attachments) { - int rc = bpf_xdp_attach(a.ifindex, prog_fd, - XDP_FLAGS_DRV_MODE, nullptr); - bool native = rc == 0; - if (rc < 0) { + std::string drv = ReadNicDriver(a.iface); + int rc = -EOPNOTSUPP; + bool native = false; + if (mode.try_drv) { + rc = bpf_xdp_attach(a.ifindex, prog_fd, + XDP_FLAGS_DRV_MODE, nullptr); + if (rc == 0) native = true; + } + if (rc != 0 && mode.try_skb_fallback) { rc = bpf_xdp_attach(a.ifindex, prog_fd, XDP_FLAGS_SKB_MODE, nullptr); - if (rc < 0) { - spdlog::error( - "wg-relay xdp: attach {} failed: {}", a.iface, - std::strerror(-rc)); - // Roll back any NICs we already attached. - for (const auto& done : attachments) { - if (done.ifindex == a.ifindex) break; - bpf_xdp_detach(done.ifindex, 0, nullptr); - } - bpf_object__close(obj); - return false; + } + if (rc != 0) { + spdlog::error( + "xdp_attach_failed iface={} ifindex={} mode={} " + "driver={} kernel={} reason={}", + a.iface, a.ifindex, mode.label, drv, kernel, + std::strerror(-rc)); + // Roll back any NICs we already attached. + for (const auto& done : attachments) { + if (done.ifindex == a.ifindex) break; + bpf_xdp_detach(done.ifindex, 0, nullptr); } + bpf_object__close(obj); + return false; } natives.push_back(native); } @@ -1451,12 +1526,13 @@ bool XdpAttach(WgRelay* r, const std::string& iface_list, r->xdp.blocklist_map_fd = blocklist_fd; r->xdp.attached = true; for (size_t i = 0; i < r->xdp.attachments.size(); ++i) { + const auto& a = r->xdp.attachments[i]; spdlog::info( - "wg-relay xdp: attached on {} " - "(ifindex={}, mode={})", - r->xdp.attachments[i].iface, - r->xdp.attachments[i].ifindex, - natives[i] ? "native" : "generic"); + "xdp_attached iface={} ifindex={} mode={} " + "driver={} kernel={}", + a.iface, a.ifindex, + natives[i] ? "drv" : "skb", + ReadNicDriver(a.iface), kernel); } return true; } @@ -1709,20 +1785,40 @@ WgRelay* WgRelayStart(const WgRelayConfig& cfg) { r->loop_thread = std::thread(RecvLoop, r); // Bring up the XDP fast path if the operator asked for - // it. Failure here is non-fatal — we log and stay on - // the userspace path; correctness is unchanged. - if (!cfg.xdp_interface.empty()) { + // it. The mode policy decides whether to try drv, skb, both, + // or skip entirely. Attach failure (after the policy says + // "really try this") is FATAL: silently falling back to + // userspace would label benchmark rows "xdp" that are + // actually running on the userspace recv loop, which is + // exactly what we're trying to avoid. + XdpModePolicy xdp_mode = ParseXdpMode(cfg.xdp_mode); + if (!xdp_mode.valid) { + spdlog::error( + "wg-relay invalid --xdp-mode '{}' (drv | skb | " + "auto | off)", cfg.xdp_mode); + WgRelayStop(r); + return nullptr; + } + bool xdp_requested = !cfg.xdp_interface.empty() && + xdp_mode.label != "off"; + if (xdp_requested) { std::string obj_path = cfg.xdp_bpf_obj_path.empty() ? std::string("/usr/lib/hyper-derp/wg_relay.bpf.o") : cfg.xdp_bpf_obj_path; - if (XdpAttach(r, cfg.xdp_interface, obj_path, - cfg.port)) { - // Replay current links into the BPF map so the fast - // path is live for already-loaded roster entries. - std::lock_guard lk(r->peers_mu); - for (const auto& l : r->links) { - XdpInsertLinkByNameLocked(r, l.a, l.b); - } + if (!XdpAttach(r, cfg.xdp_interface, obj_path, + cfg.port, xdp_mode)) { + // XdpAttach already logged xdp_attach_failed with the + // structured detail. Tear the relay down so main.cc + // exits non-zero — the runner needs a clear signal, + // not a silent userspace fallback. + WgRelayStop(r); + return nullptr; + } + // Replay current links into the BPF map so the fast path + // is live for already-loaded roster entries. + std::lock_guard lk(r->peers_mu); + for (const auto& l : r->links) { + XdpInsertLinkByNameLocked(r, l.a, l.b); } } diff --git a/tests/test_wg_relay_trace.cc b/tests/test_wg_relay_trace.cc index 45ca7b93..e687fe2d 100644 --- a/tests/test_wg_relay_trace.cc +++ b/tests/test_wg_relay_trace.cc @@ -168,6 +168,33 @@ TEST_F(WgRelayTraceTest, ForwardsWithTraceOn) { EXPECT_EQ(stats.fwd_packets, 1u); } +// P0.2 smoke: invalid --xdp-mode causes WgRelayStart to fail. +// Exercises the ParseXdpMode validation path; the daemon +// surfacing this means main.cc returns EXIT_FAILURE rather +// than silently running with a bogus mode. +TEST_F(WgRelayTraceTest, RejectsUnknownXdpMode) { + WgRelayConfig cfg = MakeCfg(); + cfg.xdp_interface = "lo"; + cfg.xdp_mode = "extreme"; + WgRelay* r = WgRelayStart(cfg); + EXPECT_EQ(r, nullptr); +} + +// P0.2 smoke: --xdp-mode=off skips XDP attach entirely even +// when --xdp-interface is set. The relay still comes up; the +// userspace recv loop handles every packet. Operator opts in +// to "no XDP" explicitly, rather than getting it as a silent +// fallback. +TEST_F(WgRelayTraceTest, XdpModeOffSkipsAttach) { + WgRelayConfig cfg = MakeCfg(); + cfg.xdp_interface = "lo"; + cfg.xdp_mode = "off"; + relay_ = WgRelayStart(cfg); + ASSERT_NE(relay_, nullptr); + auto stats = WgRelayGetStats(relay_); + EXPECT_FALSE(stats.xdp_attached); +} + // Per-peer drop_no_link counter increments when a registered // peer sends with no link configured for it. Counter is // surfaced via WgRelayListPeers. From 8137a2c81362aa725c8bfc2027e6eb9a24abbc3f Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Mon, 4 May 2026 19:37:43 +0200 Subject: [PATCH 3/5] wg-relay: distinct link-add error codes (P0.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decision: option (b) from the brief. The "each peer is in at most one link" invariant is load-bearing — it's the iter-1 design contract that lets the relay forward purely on the source 4-tuple. The relay's design memory captures the rationale, and the comment at LinkAddLocked already calls it out. Multi-link mesh routing is future work (per-link UDP ports or in-packet introspection); not on the path for 0.2.2. Daemon-side fix: differentiate the einheit-channel error codes so the benchmark runner can detect "you hit the one-link-per- peer limit" specifically rather than the generic `wg_link_failed`. What changed: * New `WgRelayLinkAddResult` enum (kWgLinkOk / UnknownPeer / SelfLink / LimitExceeded / Duplicate) plus a `WgRelayLinkAddDetail` accessor that returns it. Bool `WgRelayLinkAdd` is preserved as a thin wrapper for the many call sites that only need success/failure. * `WgLinkAdd` einheit handler maps each non-OK result onto a distinct error code: `wg_peer_unknown`, `wg_link_self`, `link_limit_exceeded`, `wg_link_duplicate`. The runner now has a discriminator to switch on; star-topology configs that land on the limit can fall back to disjoint pairs without parsing free-form error text. * `wg link add` verb description in the registry calls out the iter-1 limit so `wg link add --help` documents it. What the operator should observe on next run: a `wg link add` that exceeds the one-link-per-peer limit returns `error.code = link_limit_exceeded` with a one-line message explaining the rule. Other failure modes (typo'd peer, duplicate pair, self-link) return distinct codes too. No silent drops — every rejection is named and surfaced. Regression: tests/test_wg_relay_links.cc covers all five outcome codes (Ok / UnknownPeer / SelfLink / LimitExceeded / Duplicate), the star topology from the brief, and the runner's expected fallback (disjoint pairs). The bool wrapper is exercised separately to confirm any non-OK detail still maps to false. Steady-state forwarding path is untouched. --- CHANGELOG.md | 6 ++ include/hyper_derp/wg_relay.h | 25 +++++++ src/einheit_channel.cc | 42 +++++++++-- src/wg_relay.cc | 15 +++- tests/CMakeLists.txt | 9 +++ tests/test_wg_relay_links.cc | 137 ++++++++++++++++++++++++++++++++++ 6 files changed, 223 insertions(+), 11 deletions(-) create mode 100644 tests/test_wg_relay_links.cc diff --git a/CHANGELOG.md b/CHANGELOG.md index a47d0d2c..adeb9849 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ this file. Format based on [Keep a Changelog](https://keepachangelog.com/). - **`--trace-forward-hashes` daemon flag**. When set, every wg-relay forward logs a SHA-256(payload) hash + length + source/destination peer pubkey prefix at two points: ingress (after the source peer matches and MAC1 verification, if applicable) and egress (just before `sendto` to the destination). Same hash on both lines proves the relay didn't mutate the frame; divergence flags a corrupting code path. Off by default — this is per-frame logging on the hot path and will tank throughput if left on. Aimed at diagnosing integrity-test failures where a single log line and aggregate counters aren't enough to triage. - **Per-peer drop counters**. `wg peer list` now surfaces `peer..drop_no_link` and `peer..drop_pubkey_mismatch` alongside the existing per-peer byte counters. The aggregate counters in `wg show` are unchanged; these are the pair-attributable subset for diagnosing which peer's traffic is hitting which drop reason. Other drop classes (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`) are by definition unattributable to a known peer and remain aggregate-only. +### Changed — wg-relay link-add error codes are differentiated + +- **`link_limit_exceeded` is now its own error code**. Previously every `wg link add` failure mode (unknown peer, self-link, duplicate, iter-1 limit) collapsed into a single `wg_link_failed` response, so the benchmark runner couldn't tell "your star topology hit the one-link-per-peer limit" apart from "you typo'd a peer name." Each rejection reason now has its own code (`wg_peer_unknown`, `wg_link_self`, `link_limit_exceeded`, `wg_link_duplicate`); the iter-1 limit is documented in `wg link add --help`. Behaviour is unchanged — the rejection still happens at the same point in `LinkAddLocked` for the same reasons; only the surfaced error string changes. + +The iter-1 invariant ("each peer is in at most one link") stands per the relay's design memory. Multi-link mesh routing is future work — it requires either per-link UDP ports or some form of in-packet introspection to disambiguate the destination from the source 4-tuple alone. + ### Changed — wg-relay XDP attach is no longer silently fallback - **Structured `xdp_attached` log line**. On successful attach the daemon now logs `xdp_attached iface= ifindex=N mode={drv,skb} driver= kernel=` per attached NIC. The achieved mode is now legible from a single grep, instead of having to infer it from the older free-form "(ifindex=N, mode=native)" text. diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index 70bd9bd9..b6080c02 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -292,6 +292,31 @@ bool WgRelayPeerNic(WgRelay* r, const std::string& name, const std::string& nic); bool WgRelayPeerRemove(WgRelay* r, const std::string& name); +/// Outcome codes for `WgRelayLinkAdd` / `WgRelayLinkAddDetail`. +/// 0 means success; the non-zero values are surfaced by the +/// einheit channel as distinct error codes so the runner can +/// distinguish "you already used your one link slot" from +/// "you typo'd a peer name." +enum WgRelayLinkAddResult : int { + kWgLinkOk = 0, + kWgLinkUnknownPeer = 1, + kWgLinkSelfLink = 2, + /// Iteration-1 invariant: each peer is in at most one link + /// so the destination is unambiguous from the source 4-tuple + /// alone. A second link on either side is rejected here + /// rather than producing ambiguous forwarding. + kWgLinkLimitExceeded = 3, + kWgLinkDuplicate = 4, +}; + +/// Like WgRelayLinkAdd but returns the reason code. Use this +/// when the caller needs to differentiate failure modes (e.g. +/// the einheit channel mapping each onto a distinct error +/// string). The bool-returning wrapper below is kept for +/// callers that only need success/failure. +WgRelayLinkAddResult WgRelayLinkAddDetail( + WgRelay* r, const std::string& a, const std::string& b); + bool WgRelayLinkAdd(WgRelay* r, const std::string& a, const std::string& b); bool WgRelayLinkRemove(WgRelay* r, const std::string& a, diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index b103663b..c71b8471 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1858,13 +1858,39 @@ void WgLinkAdd(Server* s, const Request& req, if (!WgGate(s, r)) return; if (!RequireArg(req, 0, "a", r)) return; if (!RequireArg(req, 1, "b", r)) return; - if (!WgRelayLinkAdd(s->wg_relay, req.args[0], - req.args[1])) { + auto rc = WgRelayLinkAddDetail(s->wg_relay, req.args[0], + req.args[1]); + if (rc != kWgLinkOk) { r->status = ResponseStatus::kError; - r->error = ErrorOf( - "wg_link_failed", - "unknown peer, self-link, duplicate link, or one " - "side already has a link (iteration-1 limit)"); + switch (rc) { + case kWgLinkUnknownPeer: + r->error = ErrorOf("wg_peer_unknown", + "one or both peers are not " + "registered (use `wg peer add`)"); + break; + case kWgLinkSelfLink: + r->error = ErrorOf("wg_link_self", + "a peer cannot link to itself"); + break; + case kWgLinkLimitExceeded: + // The iter-1 invariant: each peer is in at most one + // link. Distinct from the other failure modes so the + // runner can detect a star-topology config that hits + // the limit and switch to disjoint pairs. + r->error = ErrorOf( + "link_limit_exceeded", + "iteration-1 limit: each peer may be in at most " + "one link (one side of this pair already is)"); + break; + case kWgLinkDuplicate: + r->error = ErrorOf("wg_link_duplicate", + "this exact link already exists"); + break; + default: + r->error = ErrorOf("wg_link_failed", + "link add failed"); + break; + } return; } SetBody(r, std::format("a={}\nb={}\n", req.args[0], @@ -2201,7 +2227,9 @@ Registry MakeRegistry() { m["wg_link_add"] = {WgLinkAdd, Role::kOperator, "wg link add", "Allow A↔B forwarding between two " - "peers", + "peers. Iteration-1 limit: each peer " + "may appear in at most one link " + "(rejected as link_limit_exceeded)", false, wg_link_args}; m["wg_link_remove"] = {WgLinkRemove, Role::kOperator, "wg link remove", diff --git a/src/wg_relay.cc b/src/wg_relay.cc index d9d6ea5f..66e19588 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -1911,13 +1911,20 @@ bool WgRelayPeerRemove(WgRelay* r, return true; } -bool WgRelayLinkAdd(WgRelay* r, const std::string& a, - const std::string& b) { +WgRelayLinkAddResult WgRelayLinkAddDetail( + WgRelay* r, const std::string& a, + const std::string& b) { std::lock_guard lk(r->peers_mu); - if (LinkAddLocked(r, a, b) != 0) return false; + int rc = LinkAddLocked(r, a, b); + if (rc != 0) return static_cast(rc); XdpInsertLinkByNameLocked(r, a, b); PersistRosterLocked(r); - return true; + return kWgLinkOk; +} + +bool WgRelayLinkAdd(WgRelay* r, const std::string& a, + const std::string& b) { + return WgRelayLinkAddDetail(r, a, b) == kWgLinkOk; } bool WgRelayLinkRemove(WgRelay* r, const std::string& a, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bb531d0a..941d7698 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -142,6 +142,15 @@ target_link_libraries(test_wg_relay_trace PRIVATE gtest_discover_tests(test_wg_relay_trace DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 30) +# wg-relay link-table regression tests (P0.3). Exercises the +# iter-1 invariant + the distinct WgRelayLinkAddResult codes +# the einheit channel maps onto error strings. +add_executable(test_wg_relay_links test_wg_relay_links.cc) +target_link_libraries(test_wg_relay_links PRIVATE + libderp GTest::gtest_main) +gtest_discover_tests(test_wg_relay_links + DISCOVERY_TIMEOUT 10 PROPERTIES TIMEOUT 30) + # ZMQ control channel tests. add_executable(test_ctl_channel test_ctl_channel.cc) target_link_libraries(test_ctl_channel PRIVATE diff --git a/tests/test_wg_relay_links.cc b/tests/test_wg_relay_links.cc new file mode 100644 index 00000000..7bc99da4 --- /dev/null +++ b/tests/test_wg_relay_links.cc @@ -0,0 +1,137 @@ +/// @file test_wg_relay_links.cc +/// @brief Integration tests for the wg-relay link table — in +/// particular the iteration-1 invariant "each peer is +/// in at most one link" and the distinct error codes +/// the daemon surfaces when it's hit. +/// +/// Drives `WgRelayLinkAddDetail` directly (no einheit channel +/// in the loop) so a star-topology config landing on the +/// limit produces a `kWgLinkLimitExceeded` outcome rather +/// than a silent drop. + +#include + +#include +#include +#include +#include + +#include "hyper_derp/server.h" +#include "hyper_derp/wg_relay.h" + +namespace hyper_derp { +namespace { + +uint16_t PickFreeUdpPort() { + int fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) return 0; + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = 0; + if (bind(fd, reinterpret_cast(&addr), + sizeof(addr)) < 0) { + close(fd); + return 0; + } + socklen_t len = sizeof(addr); + getsockname(fd, reinterpret_cast(&addr), &len); + uint16_t p = ntohs(addr.sin_port); + close(fd); + return p; +} + +class WgRelayLinkTest : public ::testing::Test { + protected: + void SetUp() override { + cfg_.port = PickFreeUdpPort(); + ASSERT_NE(cfg_.port, 0); + // Four peers — operator-distinct names with placeholder + // endpoints. The link table operates on names; the + // forwarder doesn't care about endpoint reachability for + // these tests. + for (auto name : {"alice", "bob", "carol", "dave"}) { + WgRelayConfig::PeerEntry pe; + pe.name = name; + pe.endpoint = "127.0.0.1:1"; + cfg_.peers.push_back(std::move(pe)); + } + relay_ = WgRelayStart(cfg_); + ASSERT_NE(relay_, nullptr); + } + + void TearDown() override { + if (relay_) WgRelayStop(relay_); + } + + WgRelayConfig cfg_; + WgRelay* relay_ = nullptr; +}; + +// First link in any pairing succeeds. +TEST_F(WgRelayLinkTest, FirstLinkOk) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "bob"), + kWgLinkOk); +} + +// Star topology (alice ↔ {bob, carol, dave}) is exactly the +// runner config from the brief. The first link must succeed +// and the second + third must be rejected with the distinct +// link_limit_exceeded outcome — not a generic "link failed". +TEST_F(WgRelayLinkTest, StarTopologyRejectsExtraLinks) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "bob"), + kWgLinkOk); + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "carol"), + kWgLinkLimitExceeded); + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "dave"), + kWgLinkLimitExceeded); +} + +// Disjoint pairs (alice↔bob, carol↔dave) — the runner's +// fallback if it hits the limit — must succeed entirely. +TEST_F(WgRelayLinkTest, DisjointPairsBothOk) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "bob"), + kWgLinkOk); + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "carol", "dave"), + kWgLinkOk); +} + +// Adding the literal same pair twice is the duplicate +// outcome — distinct from limit_exceeded so the runner can +// surface "this link already exists" cleanly. +TEST_F(WgRelayLinkTest, DuplicateRejected) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "bob"), + kWgLinkOk); + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "bob"), + kWgLinkDuplicate); + // Reverse direction is also caught as duplicate, since + // links are undirected. + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "bob", "alice"), + kWgLinkDuplicate); +} + +// Self-link rejected with its own outcome code. +TEST_F(WgRelayLinkTest, SelfLinkRejected) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "alice"), + kWgLinkSelfLink); +} + +// Unknown peer rejected with its own outcome code. +TEST_F(WgRelayLinkTest, UnknownPeerRejected) { + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "alice", "ghost"), + kWgLinkUnknownPeer); + EXPECT_EQ(WgRelayLinkAddDetail(relay_, "ghost", "alice"), + kWgLinkUnknownPeer); +} + +// Bool-returning wrapper preserves the success/failure +// invariant — any non-OK detail maps to false. +TEST_F(WgRelayLinkTest, BoolWrapperConsistent) { + EXPECT_TRUE(WgRelayLinkAdd(relay_, "alice", "bob")); + EXPECT_FALSE(WgRelayLinkAdd(relay_, "alice", "carol")); + EXPECT_FALSE(WgRelayLinkAdd(relay_, "ghost", "alice")); + EXPECT_FALSE(WgRelayLinkAdd(relay_, "alice", "alice")); +} + +} // namespace +} // namespace hyper_derp From b4b9b7672fe8b1653cd45d19dd9d188df4b39319 Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Mon, 4 May 2026 19:41:34 +0200 Subject: [PATCH 4/5] test_wg_relay_trace: poll fwd_packets to drain race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two flake sources in the test, both observed across 15 back-to-back runs of the full binary: * SetUp picked the relay's UDP port via bind-then-close on loopback. A different process grabbing the port between the test's close() and the relay's bind() left WgRelayStart returning nullptr. Replaced with StartRelay, which retries up to 5 times with fresh ephemeral ports. * The test read fwd_packets immediately after recv() on bob's socket returned, but the relay loop bumps the counter AFTER sendto, so RecvOnBob can return while the relay thread is still between sendto and fetch_add. The trace egress log fires in the same window, which is why the failing run shows the egress log but stats=0. Added WaitFwdPackets that polls up to ~500 ms. Trace path is unchanged. The egress log line still fires in the order ingress → sendto → egress → counter — same as production. --- tests/test_wg_relay_trace.cc | 56 +++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/tests/test_wg_relay_trace.cc b/tests/test_wg_relay_trace.cc index e687fe2d..1c7288d5 100644 --- a/tests/test_wg_relay_trace.cc +++ b/tests/test_wg_relay_trace.cc @@ -64,19 +64,31 @@ class WgRelayTraceTest : public ::testing::Test { void SetUp() override { auto [a_fd, a_port] = BindUdpEphemeral(); auto [b_fd, b_port] = BindUdpEphemeral(); - auto [r_fd, r_port] = BindUdpEphemeral(); ASSERT_GE(a_fd, 0); ASSERT_GE(b_fd, 0); - ASSERT_GE(r_fd, 0); alice_fd_ = a_fd; alice_port_ = a_port; bob_fd_ = b_fd; bob_port_ = b_port; - // Close r_fd; the relay will rebind r_port itself. There's - // a TOCTOU window here but loopback ephemeral collisions - // in CI are rare enough that the simpler code wins. - close(r_fd); - relay_port_ = r_port; + } + + // Pick a UDP port for the relay by binding-then-closing. + // Tests that need to run WgRelayStart call this each time + // so the kernel hands back a fresh ephemeral port — the + // historical "pick once at SetUp" pattern has a TOCTOU + // window that flakes when another process grabs the port + // between the test's close and the relay's bind. + void StartRelay(WgRelayConfig cfg) { + for (int attempt = 0; attempt < 5; ++attempt) { + auto [fd, port] = BindUdpEphemeral(); + ASSERT_GE(fd, 0); + relay_port_ = port; + close(fd); + cfg.port = port; + relay_ = WgRelayStart(cfg); + if (relay_) return; + } + FAIL() << "WgRelayStart failed 5 times in a row"; } void TearDown() override { @@ -87,7 +99,7 @@ class WgRelayTraceTest : public ::testing::Test { WgRelayConfig MakeCfg() { WgRelayConfig cfg; - cfg.port = relay_port_; + // cfg.port is filled in by StartRelay(); leave 0 here. WgRelayConfig::PeerEntry alice; alice.name = "alice"; alice.endpoint = @@ -130,11 +142,24 @@ class WgRelayTraceTest : public ::testing::Test { WgRelay* relay_ = nullptr; }; +// Wait briefly for fwd_packets to reach `target`. The recv +// loop bumps the counter just AFTER sendto returns, so a +// sub-microsecond test that reads stats immediately after +// recv() returns can race the increment. Poll up to ~500 ms. +uint64_t WaitFwdPackets(WgRelay* r, uint64_t target) { + for (int i = 0; i < 50; ++i) { + auto s = WgRelayGetStats(r); + if (s.fwd_packets >= target) return s.fwd_packets; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + return WgRelayGetStats(r).fwd_packets; +} + // Trace flag off (default) — relay still forwards. TEST_F(WgRelayTraceTest, ForwardsWithTraceOff) { WgRelayConfig cfg = MakeCfg(); cfg.trace_forward_hashes = false; - relay_ = WgRelayStart(cfg); + StartRelay(std::move(cfg)); ASSERT_NE(relay_, nullptr); auto pkt = MakeHandshakeInit(); @@ -144,8 +169,7 @@ TEST_F(WgRelayTraceTest, ForwardsWithTraceOff) { ssize_t n = RecvOnBob(buf.data(), buf.size()); ASSERT_EQ(n, 148); EXPECT_EQ(buf[0], 1); - auto stats = WgRelayGetStats(relay_); - EXPECT_EQ(stats.fwd_packets, 1u); + EXPECT_EQ(WaitFwdPackets(relay_, 1), 1u); } // Trace flag on — relay forwards and the trace path runs @@ -155,7 +179,7 @@ TEST_F(WgRelayTraceTest, ForwardsWithTraceOff) { TEST_F(WgRelayTraceTest, ForwardsWithTraceOn) { WgRelayConfig cfg = MakeCfg(); cfg.trace_forward_hashes = true; - relay_ = WgRelayStart(cfg); + StartRelay(std::move(cfg)); ASSERT_NE(relay_, nullptr); auto pkt = MakeHandshakeInit(); @@ -164,8 +188,7 @@ TEST_F(WgRelayTraceTest, ForwardsWithTraceOn) { std::array buf{}; ssize_t n = RecvOnBob(buf.data(), buf.size()); ASSERT_EQ(n, 148); - auto stats = WgRelayGetStats(relay_); - EXPECT_EQ(stats.fwd_packets, 1u); + EXPECT_EQ(WaitFwdPackets(relay_, 1), 1u); } // P0.2 smoke: invalid --xdp-mode causes WgRelayStart to fail. @@ -174,6 +197,7 @@ TEST_F(WgRelayTraceTest, ForwardsWithTraceOn) { // than silently running with a bogus mode. TEST_F(WgRelayTraceTest, RejectsUnknownXdpMode) { WgRelayConfig cfg = MakeCfg(); + cfg.port = 0; // not exercised; bind never reached. cfg.xdp_interface = "lo"; cfg.xdp_mode = "extreme"; WgRelay* r = WgRelayStart(cfg); @@ -189,7 +213,7 @@ TEST_F(WgRelayTraceTest, XdpModeOffSkipsAttach) { WgRelayConfig cfg = MakeCfg(); cfg.xdp_interface = "lo"; cfg.xdp_mode = "off"; - relay_ = WgRelayStart(cfg); + StartRelay(std::move(cfg)); ASSERT_NE(relay_, nullptr); auto stats = WgRelayGetStats(relay_); EXPECT_FALSE(stats.xdp_attached); @@ -202,7 +226,7 @@ TEST_F(WgRelayTraceTest, PerPeerDropNoLinkCounter) { WgRelayConfig cfg = MakeCfg(); // Drop the link so alice is registered but unlinked. cfg.links.clear(); - relay_ = WgRelayStart(cfg); + StartRelay(std::move(cfg)); ASSERT_NE(relay_, nullptr); auto pkt = MakeHandshakeInit(); From 7b3deb04a359f04ef2691ba2229d9c6dd8719adf Mon Sep 17 00:00:00 2001 From: Karl Ruskowski Date: Mon, 4 May 2026 20:12:13 +0200 Subject: [PATCH 5/5] wg-relay: per-source-IP drop histogram (P0.1 review followup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup on the daemon-0.2.2 review: the three drop classes the brief asked for that I'd skipped as "unattributable to a known peer" still have an apparent source IP, and that's exactly what the runner needs to triage the c4_gcp internal- vs-external NAT bug — `drop_handshake_no_pubkey_match` aggregate is the failure mode but says nothing about which IP is producing it. Added a small bounded `drop_by_src` map (host-byte-order uint32 → struct of three counters + last_seen_ns), capped at 256 source IPs with FIFO eviction on overflow. Bumped at every userspace site that increments `drop_unknown_src`, the userspace `drop_not_wg_shaped` shape-filter site, and the `drop_handshake_no_pubkey_match` site at the bottom of `HandleUnknownSrcHandshakeLocked`. Surfaced via a new `wg show drop_sources` verb that emits one row per source IP with the three counters plus last-seen timestamp. v6 sources are skipped — the brief's diagnostic targets are v4 NAT mismatches. The XDP-path `drop_not_wg_shaped` (in the BPF program) stays aggregate-only — populating a per-source map from XDP would need new BPF map plumbing and isn't on the runner's diagnostic path. The userspace shape-filter path (cold; runs when XDP isn't attached or when XDP_PASSes for shape) is covered. Smoke test in test_wg_relay_trace.cc: send a 1-byte non-WG-shaped UDP datagram, verify the histogram populates with `drop_not_wg_shaped >= 1` for the source IP. The aggregate counters in `wg show` are unchanged; this is strictly an additive observability surface. --- CHANGELOG.md | 3 +- include/hyper_derp/wg_relay.h | 39 +++++++++++++ src/einheit_channel.cc | 39 +++++++++++++ src/wg_relay.cc | 105 ++++++++++++++++++++++++++++++++++ tests/test_wg_relay_trace.cc | 33 +++++++++++ 5 files changed, 218 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adeb9849..9e354b4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ this file. Format based on [Keep a Changelog](https://keepachangelog.com/). ### Added — wg-relay diagnostic surface - **`--trace-forward-hashes` daemon flag**. When set, every wg-relay forward logs a SHA-256(payload) hash + length + source/destination peer pubkey prefix at two points: ingress (after the source peer matches and MAC1 verification, if applicable) and egress (just before `sendto` to the destination). Same hash on both lines proves the relay didn't mutate the frame; divergence flags a corrupting code path. Off by default — this is per-frame logging on the hot path and will tank throughput if left on. Aimed at diagnosing integrity-test failures where a single log line and aggregate counters aren't enough to triage. -- **Per-peer drop counters**. `wg peer list` now surfaces `peer..drop_no_link` and `peer..drop_pubkey_mismatch` alongside the existing per-peer byte counters. The aggregate counters in `wg show` are unchanged; these are the pair-attributable subset for diagnosing which peer's traffic is hitting which drop reason. Other drop classes (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`) are by definition unattributable to a known peer and remain aggregate-only. +- **Per-peer drop counters**. `wg peer list` now surfaces `peer..drop_no_link` and `peer..drop_pubkey_mismatch` alongside the existing per-peer byte counters. The aggregate counters in `wg show` are unchanged; these are the pair-attributable subset for diagnosing which peer's traffic is hitting which drop reason. +- **Per-source-IP drop histogram** for the three classes that aren't attributable to a known peer (`drop_unknown_src`, `drop_not_wg_shaped`, `drop_handshake_no_pubkey_match`). Surfaced via the new `wg show drop_sources` verb. Capped at 256 source IPs with FIFO eviction so spoofed-source storms don't grow the table without bound. Specifically targets the cloud-gcp-c4 internal-vs-external NAT-IP bug, where the runner traffic arrives at the relay from an IP not stamped on either peer and lights up `drop_handshake_no_pubkey_match` aggregately — now operators can see which IP it's coming from. ### Changed — wg-relay link-add error codes are differentiated diff --git a/include/hyper_derp/wg_relay.h b/include/hyper_derp/wg_relay.h index b6080c02..2a0668a5 100644 --- a/include/hyper_derp/wg_relay.h +++ b/include/hyper_derp/wg_relay.h @@ -222,6 +222,25 @@ struct WgXdpStats { uint64_t drop_blocklisted = 0; }; +/// Per-source-IP histogram of drops that can't be attributed +/// to a registered peer (because, by definition, the source +/// didn't match any peer's endpoint). The brief asked for +/// per-pair breakdown of these but per-pair is meaningless +/// when the source is unknown — per-source-IP is the next +/// best granularity and exactly what's needed to diagnose +/// "the runner's traffic is arriving from an external NAT IP +/// that isn't stamped on the peer." Bounded to keep the map +/// from growing under spoofed source attacks (FIFO eviction +/// at the cap). +struct WgRelayDropBySrc { + uint64_t drop_unknown_src = 0; + uint64_t drop_not_wg_shaped = 0; + uint64_t drop_handshake_no_pubkey_match = 0; + /// Steady-clock ns of the most recent increment — used as + /// the FIFO eviction key when the map is at capacity. + uint64_t last_seen_ns = 0; +}; + /// Strike record per source IP — incremented when a candidate /// endpoint that source registered fails to confirm via /// transport-data. Escalates the source onto the blocklist @@ -245,6 +264,12 @@ struct WgRelay { /// succeeds; escalated to wg_blocklist once the threshold /// is crossed. std::map strikes; + /// Per-source-IP drop histogram. Keyed by host-byte-order + /// uint32 (v4 only — v6 sources are skipped, since the + /// brief's diagnostic targets are v4 NAT bugs). Capped at + /// kDropBySrcMaxEntries; oldest `last_seen_ns` is evicted + /// when full so spoofed-source storms can't blow up RSS. + std::map drop_by_src; /// Blocked source IPs (host-byte-order uint32_t) → expiry /// timestamp (steady_clock ns). Mirrors the BPF /// wg_blocklist map for `wg blocklist list` / userspace @@ -382,6 +407,20 @@ struct WgBlocklistView { std::vector WgRelayListBlocklist( const WgRelay* r); +/// One row of `wg show drop_sources`. Provides the same three +/// drop counters the brief flagged as needing more granularity +/// than aggregate, attributed to the source IP (since they're +/// definitionally not attributable to a registered peer). +struct WgDropBySrcView { + std::string ip; + uint64_t drop_unknown_src; + uint64_t drop_not_wg_shaped; + uint64_t drop_handshake_no_pubkey_match; + uint64_t last_seen_ns; +}; +std::vector WgRelayListDropSources( + const WgRelay* r); + } // namespace hyper_derp #endif // INCLUDE_HYPER_DERP_WG_RELAY_H_ diff --git a/src/einheit_channel.cc b/src/einheit_channel.cc index c71b8471..4ef6c223 100644 --- a/src/einheit_channel.cc +++ b/src/einheit_channel.cc @@ -1978,6 +1978,37 @@ void WgBlocklistList(Server* s, const Request& /*req*/, SetBody(r, b); } +// Per-source-IP histogram of unattributable drops. The brief +// asked for per-pair breakdown of drop_unknown_src, +// drop_not_wg_shaped, and drop_handshake_no_pubkey_match — +// per-pair is meaningless when the source isn't a registered +// peer, but per-source-IP is exactly what the runner needs to +// diagnose internal/external NAT-IP mismatches. +void WgDropSources(Server* s, const Request& /*req*/, + Response* r) { + if (!WgGate(s, r)) return; + auto entries = WgRelayListDropSources(s->wg_relay); + if (entries.empty()) { + SetBody(r, "drop_sources=empty\n"); + return; + } + std::string b; + for (size_t i = 0; i < entries.size(); ++i) { + b += std::format("src.{}.ip={}\n", i, entries[i].ip); + b += std::format("src.{}.drop_unknown_src={}\n", i, + entries[i].drop_unknown_src); + b += std::format("src.{}.drop_not_wg_shaped={}\n", i, + entries[i].drop_not_wg_shaped); + b += std::format( + "src.{}.drop_handshake_no_pubkey_match={}\n", i, + entries[i].drop_handshake_no_pubkey_match); + b += std::format("src.{}.last_seen_ns={}\n", i, + entries[i].last_seen_ns); + } + b += std::format("count={}\n", entries.size()); + SetBody(r, b); +} + void WgShow(Server* s, const Request& /*req*/, Response* r) { if (!WgGate(s, r)) return; @@ -2252,6 +2283,14 @@ Registry MakeRegistry() { "Source IPs auto-blocked after repeated failed-confirm " "strikes (forged-handshake protection)", false, {}}; + m["wg_show_drop_sources"] = { + WgDropSources, Role::kAny, "wg show drop_sources", + "Per-source-IP histogram for the three drop classes " + "that aren't attributable to a registered peer " + "(drop_unknown_src, drop_not_wg_shaped, " + "drop_handshake_no_pubkey_match). Capped at 256 IPs; " + "FIFO eviction on overflow", + false, {}}; return m; } diff --git a/src/wg_relay.cc b/src/wg_relay.cc index 66e19588..7dda29d7 100644 --- a/src/wg_relay.cc +++ b/src/wg_relay.cc @@ -432,6 +432,13 @@ void PersistRosterLocked(WgRelay* r) { // registrations for the same peer (rate-limits flap). constexpr uint64_t kCandidateTimeoutNs = 30ULL * 1'000'000'000ULL; constexpr uint64_t kRelearnCooldownNs = 5ULL * 1'000'000'000ULL; + +// Cap on the per-source-IP drop histogram. Spoofed-source +// floods could otherwise grow the std::map unboundedly. 256 +// rows is enough for the diagnostic intent (the brief's +// integrity test has a single peer pair) while keeping the +// memory cost trivial. +constexpr size_t kDropBySrcMaxEntries = 256; // Rate-limit retry-init forwards from a not-yet-confirmed // candidate. wg.ko's retry cadence is 5 s; legit clients only // trip this when their network is genuinely flaky, while a @@ -595,6 +602,58 @@ void XdpBlocklistDelete(WgRelay* r, uint32_t host_ip) { bpf_map_delete_elem(r->xdp.blocklist_map_fd, &key); } +// Bump a counter on the per-source-IP drop histogram. The +// histogram exists for the three drop classes that aren't +// attributable to a registered peer (drop_unknown_src, +// drop_not_wg_shaped, drop_handshake_no_pubkey_match) — the +// brief's integrity-test diagnostic asks "what source IPs +// are hitting which drop reason?" rather than per-pair, since +// these drops fire precisely when no pair owns the source. +// +// `which` selects the field to bump (1=unknown_src, +// 2=not_wg_shaped, 3=no_pubkey_match). v6 sources are skipped +// — the brief's diagnostic targets are v4 NAT mismatches. +// Caller holds peers_mu. +enum class WgDropClass : int { + kUnknownSrc = 1, + kNotWgShaped = 2, + kHandshakeNoPubkeyMatch = 3, +}; +void RecordDropBySrcLocked( + WgRelay* r, const sockaddr_storage& src, + socklen_t src_len, WgDropClass which) { + uint32_t ip_h = ExtractV4SrcHostOrder(src, src_len); + if (ip_h == 0) return; + uint64_t now = NowNs(); + // FIFO eviction on overflow: pick the entry with the oldest + // last_seen_ns. Linear scan; map size is bounded. + if (r->drop_by_src.find(ip_h) == r->drop_by_src.end() && + r->drop_by_src.size() >= kDropBySrcMaxEntries) { + auto oldest = r->drop_by_src.begin(); + for (auto it = r->drop_by_src.begin(); + it != r->drop_by_src.end(); ++it) { + if (it->second.last_seen_ns < + oldest->second.last_seen_ns) { + oldest = it; + } + } + r->drop_by_src.erase(oldest); + } + WgRelayDropBySrc& s = r->drop_by_src[ip_h]; + s.last_seen_ns = now; + switch (which) { + case WgDropClass::kUnknownSrc: + s.drop_unknown_src += 1; + break; + case WgDropClass::kNotWgShaped: + s.drop_not_wg_shaped += 1; + break; + case WgDropClass::kHandshakeNoPubkeyMatch: + s.drop_handshake_no_pubkey_match += 1; + break; + } +} + // Record a failed-confirm strike for `src` and escalate to // the blocklist if the threshold is crossed. Caller holds // peers_mu. @@ -747,6 +806,8 @@ void HandleUnknownSrcHandshakeLocked( if (pkt[0] != 1) { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } uint64_t now = NowNs(); @@ -767,6 +828,8 @@ void HandleUnknownSrcHandshakeLocked( now - sender->last_relearn_ns < kRelearnCooldownNs) { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } @@ -798,6 +861,8 @@ void HandleUnknownSrcHandshakeLocked( < kRetryForwardGapNs) { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } sender->candidate_last_forward_ns = now; @@ -819,6 +884,8 @@ void HandleUnknownSrcHandshakeLocked( if (sender->candidate_endpoint_len > 0) { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } @@ -858,6 +925,13 @@ void HandleUnknownSrcHandshakeLocked( // No partner pubkey verified. r->stats.drop_handshake_no_pubkey_match.fetch_add( 1, std::memory_order_relaxed); + // Per-source-IP attribution for diagnostic — this drop + // class is exactly the failure mode the cloud-gcp-c4 + // internal/external NAT mismatch produces, where the + // runner traffic arrives from an IP not stamped on either + // peer. + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kHandshakeNoPubkeyMatch); } // Forward-declare the XDP map updater — implementation lives @@ -975,6 +1049,12 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, if (!IsWgShaped(pkt, len)) { r->stats.drop_not_wg_shaped.fetch_add( 1, std::memory_order_relaxed); + // Bump the per-source-IP histogram so the operator can + // see which IPs are flooding the relay with non-WG noise. + // Cold path — taking the lock briefly is fine. + std::lock_guard lk(r->peers_mu); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kNotWgShaped); return; } // Lookup is O(N) over the peer table; N is operator- @@ -1010,12 +1090,16 @@ void HandlePacket(WgRelay* r, const uint8_t* pkt, if (!src_peer) { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } // src_peer is now the confirmed peer; fall through. } else { r->stats.drop_unknown_src.fetch_add( 1, std::memory_order_relaxed); + RecordDropBySrcLocked(r, src, src_len, + WgDropClass::kUnknownSrc); return; } } @@ -2102,4 +2186,25 @@ std::vector WgRelayListBlocklist( return out; } +std::vector WgRelayListDropSources( + const WgRelay* r) { + std::vector out; + std::lock_guard lk(r->peers_mu); + out.reserve(r->drop_by_src.size()); + for (const auto& [ip_h, s] : r->drop_by_src) { + char buf[INET_ADDRSTRLEN]; + uint32_t nbo = htonl(ip_h); + inet_ntop(AF_INET, &nbo, buf, sizeof(buf)); + WgDropBySrcView v; + v.ip = buf; + v.drop_unknown_src = s.drop_unknown_src; + v.drop_not_wg_shaped = s.drop_not_wg_shaped; + v.drop_handshake_no_pubkey_match = + s.drop_handshake_no_pubkey_match; + v.last_seen_ns = s.last_seen_ns; + out.push_back(std::move(v)); + } + return out; +} + } // namespace hyper_derp diff --git a/tests/test_wg_relay_trace.cc b/tests/test_wg_relay_trace.cc index 1c7288d5..581afaef 100644 --- a/tests/test_wg_relay_trace.cc +++ b/tests/test_wg_relay_trace.cc @@ -219,6 +219,39 @@ TEST_F(WgRelayTraceTest, XdpModeOffSkipsAttach) { EXPECT_FALSE(stats.xdp_attached); } +// Per-source-IP histogram bumps drop_not_wg_shaped when an +// unrecognised packet hits the relay's port. The brief +// flagged this counter as needing more granularity than +// aggregate; surface it via WgRelayListDropSources. +TEST_F(WgRelayTraceTest, PerSrcIpDropHistogram) { + WgRelayConfig cfg = MakeCfg(); + StartRelay(std::move(cfg)); + ASSERT_NE(relay_, nullptr); + + // Send a single byte 0x05 (not a valid WG type) — fails the + // shape filter, increments drop_not_wg_shaped per-source. + uint8_t junk = 0x05; + sockaddr_in to{}; + to.sin_family = AF_INET; + to.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + to.sin_port = htons(relay_port_); + ASSERT_EQ(sendto(alice_fd_, &junk, 1, 0, + reinterpret_cast(&to), + sizeof(to)), + 1); + // Recv loop runs; let it process. + for (int i = 0; i < 50; ++i) { + auto rows = WgRelayListDropSources(relay_); + if (!rows.empty()) { + EXPECT_GE(rows[0].drop_not_wg_shaped, 1u); + EXPECT_EQ(rows[0].ip, "127.0.0.1"); + return; + } + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + FAIL() << "drop_by_src histogram never populated"; +} + // Per-peer drop_no_link counter increments when a registered // peer sends with no link configured for it. Counter is // surfaced via WgRelayListPeers.