diff --git a/sigma-agent-ebpf-common/Cargo.lock b/sigma-agent-ebpf-common/Cargo.lock new file mode 100644 index 0000000..269ef19 --- /dev/null +++ b/sigma-agent-ebpf-common/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "sigma-agent-ebpf-common" +version = "0.1.0" diff --git a/sigma-agent-ebpf-common/src/lib.rs b/sigma-agent-ebpf-common/src/lib.rs index 3057626..906b9dd 100644 --- a/sigma-agent-ebpf-common/src/lib.rs +++ b/sigma-agent-ebpf-common/src/lib.rs @@ -86,3 +86,13 @@ pub struct ExecValue { pub struct OomKillValue { pub count: u64, } + +/// SYN-flood candidate counter per source IPv4 address. +/// Keyed by source IPv4 in network byte order (big-endian); the userspace side +/// converts back to `a.b.c.d` for reporting. Counts inbound SYN-only TCP segments +/// (SYN=1, ACK=0) — i.e. handshake initiation, not SYN-ACK replies. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct SynCountValue { + pub count: u64, +} diff --git a/sigma-agent-ebpf/Cargo.lock b/sigma-agent-ebpf/Cargo.lock new file mode 100644 index 0000000..2ee011d --- /dev/null +++ b/sigma-agent-ebpf/Cargo.lock @@ -0,0 +1,321 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "aya-build" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59bc42f3c5ddacc34eca28a420b47e3cbb3f0f484137cb2bf1ad2153d0eae52a" +dependencies = [ + "anyhow", + "cargo_metadata", +] + +[[package]] +name = "aya-ebpf" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8dbaf5409a1a0982e5c9bdc0f499a55fe5ead39fe9c846012053faf0d404f73" +dependencies = [ + "aya-ebpf-bindings", + "aya-ebpf-cty", + "aya-ebpf-macros", + "rustversion", +] + +[[package]] +name = "aya-ebpf-bindings" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71ee8e6a617f040d8da7565ec4010aea75e33cda4662f64c019c66ee97d17889" +dependencies = [ + "aya-build", + "aya-ebpf-cty", +] + +[[package]] +name = "aya-ebpf-cty" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6f33396742e7fd0f519c1e0de5141d84e1a8df69146a557c08cc222b0ceace4" +dependencies = [ + "aya-build", +] + +[[package]] +name = "aya-ebpf-macros" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96fd02363736177e7e91d6c95d7effbca07be87502c7b5b32fc194aed8b177a0" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + +[[package]] +name = "aya-log-common" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "befef9fe882e63164a2ba0161874e954648a72b0e1c4b361f532d590638c4eec" +dependencies = [ + "num_enum", +] + +[[package]] +name = "aya-log-ebpf" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a10bbadd0829895a91eb1cd2bb02d7af145704087f03812bed60cb9fe65dbb3" +dependencies = [ + "aya-ebpf", + "aya-log-common", + "aya-log-ebpf-macros", +] + +[[package]] +name = "aya-log-ebpf-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6d8251a75f56077db51892041aa6b77c70ef2723845d7a210979700b2f01bc4" +dependencies = [ + "aya-log-common", + "aya-log-parser", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "aya-log-parser" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b102eb5c88c9aa0b49102d3fbcee08ecb0dfa81014f39b373311de7a7032cb" +dependencies = [ + "aya-log-common", +] + +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0061da739915fae12ea00e16397555ed4371a6bb285431aab930f61b0aa4ba" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "cargo_metadata" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "num_enum" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sigma-agent-ebpf" +version = "0.1.0" +dependencies = [ + "aya-ebpf", + "aya-log-ebpf", + "sigma-agent-ebpf-common", +] + +[[package]] +name = "sigma-agent-ebpf-common" +version = "0.1.0" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/sigma-agent-ebpf/src/main.rs b/sigma-agent-ebpf/src/main.rs index 4d50e54..7e3b599 100644 --- a/sigma-agent-ebpf/src/main.rs +++ b/sigma-agent-ebpf/src/main.rs @@ -8,7 +8,7 @@ use aya_ebpf::{ programs::{ProbeContext, RetProbeContext, TracePointContext}, EbpfContext, }; -use sigma_agent_ebpf_common::{ConnLatencyValue, ConnValue, DnsQueryValue, DropKey, DropValue, ExecValue, OomKillValue, RetransmitValue, RttValue, TrafficKey, TrafficValue}; +use sigma_agent_ebpf_common::{ConnLatencyValue, ConnValue, DnsQueryValue, DropKey, DropValue, ExecValue, OomKillValue, RetransmitValue, RttValue, SynCountValue, TrafficKey, TrafficValue}; /// Offset of `skc_dport` within `struct sock` (Linux 5.x/6.x x86_64). /// Located at sock.__sk_common.skc_dport. Network byte order (big-endian). @@ -63,6 +63,32 @@ static EXEC_MAP: HashMap = HashMap::with_max_entries(8192 #[map] static OOM_KILL_MAP: HashMap = HashMap::with_max_entries(8192, 0); +/// Per-source-IPv4 SYN-flood candidate counters. +/// Key: source IPv4 in network byte order (big-endian u32). Value: SYN count. +/// Bounded at 8192 entries; when full, new keys are dropped silently (the `insert` +/// call simply fails — existing entries continue to count). +#[map] +static SYN_MAP: HashMap = HashMap::with_max_entries(8192, 0); + +/// Offsets within `struct sk_buff` (Linux 6.x x86_64). May need adjustment on +/// different kernel builds — adopt CO-RE/BTF relocations if portability becomes +/// an issue. If the reads fail (e.g. structure layout changed), the probe +/// silently returns without recording — no crash, no map pollution. +const SKB_NETWORK_HEADER_OFFSET: usize = 182; +const SKB_HEAD_OFFSET: usize = 200; + +/// Offsets within the IPv4 header. +const IPHDR_SADDR_OFFSET: usize = 12; +/// `ihl` is the low nibble of the first byte (header length in 32-bit words). +const IPHDR_FIRST_BYTE_OFFSET: usize = 0; + +/// Offset of the flags byte within `struct tcphdr`. +/// Layout: source(2) + dest(2) + seq(4) + ack_seq(4) + doff/flags(2) = byte 13 is flags. +/// Bit 1 = SYN, bit 4 = ACK (little-endian within the 13th byte — the standard TCP layout). +const TCPHDR_FLAGS_OFFSET: usize = 13; +const TCP_FLAG_SYN: u8 = 0x02; +const TCP_FLAG_ACK: u8 = 0x10; + /// Offset of the `reason` field within the skb:kfree_skb tracepoint args. /// Layout: trace_entry common header (8) + skbaddr(8) + location(8) + rx_sk(8) + protocol(2) + padding(2) = 36 const KFREE_SKB_REASON_OFFSET: usize = 36; @@ -547,6 +573,82 @@ fn try_oom_mark_victim(_ctx: &TracePointContext) -> Result<(), i64> { Ok(()) } +/// kprobe on tcp_v4_rcv — called for every inbound IPv4 TCP segment. +/// We filter for SYN-only segments (SYN=1, ACK=0) and increment a per-source-IP +/// counter for SYN-flood candidate detection. Cost per non-matching packet is +/// a few `bpf_probe_read_kernel` calls + early return. +/// +/// Signature: `tcp_v4_rcv(struct sk_buff *skb)` — arg 0 is the skb pointer. +#[kprobe] +pub fn tcp_v4_rcv_syn(ctx: ProbeContext) -> u32 { + match try_tcp_v4_rcv_syn(&ctx) { + Ok(()) => 0, + Err(_) => 0, + } +} + +fn try_tcp_v4_rcv_syn(ctx: &ProbeContext) -> Result<(), i64> { + let skb: *const u8 = ctx.arg(0).ok_or(1i64)?; + if skb.is_null() { + return Ok(()); + } + + // Resolve skb->head and skb->network_header + let head: *const u8 = unsafe { + bpf_probe_read_kernel(skb.add(SKB_HEAD_OFFSET) as *const *const u8).map_err(|_| 1i64)? + }; + if head.is_null() { + return Ok(()); + } + let net_off: u16 = unsafe { + bpf_probe_read_kernel(skb.add(SKB_NETWORK_HEADER_OFFSET) as *const u16) + .map_err(|_| 1i64)? + }; + + // IPv4 header pointer + let iph: *const u8 = unsafe { head.add(net_off as usize) }; + + // Read IHL to locate the TCP header (IHL is in 32-bit words). + let ver_ihl: u8 = unsafe { + bpf_probe_read_kernel(iph.add(IPHDR_FIRST_BYTE_OFFSET)).map_err(|_| 1i64)? + }; + let ihl_words = (ver_ihl & 0x0f) as usize; + // Sanity check — IHL must be >= 5 (20 bytes minimum). Cap to bound verifier loop. + if ihl_words < 5 || ihl_words > 15 { + return Ok(()); + } + let ip_hdr_len = ihl_words * 4; + + // TCP header starts at iph + ip_hdr_len; flags byte is at TCPHDR_FLAGS_OFFSET. + let tcph: *const u8 = unsafe { iph.add(ip_hdr_len) }; + let flags: u8 = unsafe { + bpf_probe_read_kernel(tcph.add(TCPHDR_FLAGS_OFFSET)).map_err(|_| 1i64)? + }; + + // SYN-only: SYN bit set, ACK bit clear. Rejects SYN-ACK replies. + if (flags & TCP_FLAG_SYN) == 0 || (flags & TCP_FLAG_ACK) != 0 { + return Ok(()); + } + + // Read source IPv4 (network byte order — keep as-is for the map key). + let saddr: u32 = unsafe { + bpf_probe_read_kernel(iph.add(IPHDR_SADDR_OFFSET) as *const u32).map_err(|_| 1i64)? + }; + + if let Some(val) = SYN_MAP.get_ptr_mut(&saddr) { + unsafe { + (*val).count += 1; + } + } else { + // Bounded insert — when the map is full (8192 entries), insert silently fails + // and we drop this new source IP. Existing entries continue to count. + let val = SynCountValue { count: 1 }; + let _ = SYN_MAP.insert(&saddr, &val, 0); + } + + Ok(()) +} + #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { unsafe { core::hint::unreachable_unchecked() } diff --git a/sigma-agent/README.md b/sigma-agent/README.md index 83ad409..27244fe 100644 --- a/sigma-agent/README.md +++ b/sigma-agent/README.md @@ -103,6 +103,7 @@ sigma-agent xDS gRPC server → pushes DiscoveryResponse → Envoy Each `envoy_route` generates one **Cluster** (CDS) + one **Listener** (LDS) for Layer 4 TCP proxy. Multiple Envoy instances with different `node.id` values can connect to the same agent simultaneously. +<<<<<<< HEAD ## GPU Metrics When `--gpu-metrics` is enabled, the agent shells out to `nvidia-smi` on a fixed interval and exposes per-GPU stats on the same Prometheus `/metrics` endpoint. The same binary is intended to run on both GPU and non-GPU VPS instances — if `nvidia-smi` is missing, returns a non-zero exit, or times out (5s hard cap), the agent logs a `warn!` and continues serving the rest of its metrics unchanged. There is no panic path. @@ -237,6 +238,31 @@ When MCP is also enabled, the tool `agent_check_update` returns the cached snaps Network errors, non-2xx responses, and malformed JSON all set `last_error`, force `update_available = false` in the gauge, and log a `warn!`. The watchdog never panics and never blocks the heartbeat loop. HTTP timeout is 5 seconds per check. +## eBPF SYN-Flood Candidate Detection + +In addition to the existing eBPF observability subsystem (per-process TCP/UDP traffic, RTT, drops, DNS, exec, OOM), the agent ships a `tcp_v4_rcv` kprobe that inspects each inbound IPv4 TCP segment and increments a per-source-IPv4 SYN counter when the segment is SYN-only (`SYN=1`, `ACK=0`). The map is bounded at 8192 entries — once full, new source IPs are dropped silently while existing entries continue to count. The counter is read-and-cleared every `--ebpf-traffic-interval` seconds, so the exposed value is the SYN volume seen in the most recent window. + +Like the rest of the eBPF subsystem, this is feature-gated behind the `ebpf-traffic` cargo feature. Building without it produces a binary with zero eBPF dependencies. + +Prometheus metric (only emitted for non-zero entries): + +``` +# HELP sigma_tcp_syn_count Inbound TCP SYN count per source IPv4 (eBPF kprobe tcp_v4_rcv, SYN-only, non-cumulative — resets each harvest window) +# TYPE sigma_tcp_syn_count gauge +sigma_tcp_syn_count{hostname="relay-01",source_ip="203.0.113.10"} 1842 +``` + +Queryable via the MCP tool `query_syn_flood_candidates` (default threshold `min_syn=100`): + +```bash +curl -s -X POST http://127.0.0.1:9103/mcp \ + -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call", + "params":{"name":"query_syn_flood_candidates","arguments":{"min_syn":500}}}' +``` + +The kprobe reads `iph->saddr` and the TCP flags byte directly from `struct sk_buff` using fixed offsets known to work on Linux 5.10+/6.x x86_64 with BTF. If the underlying kernel layout has shifted, the probe simply returns without recording — no crash, no map pollution. + ## MCP Tool Surface (LLM-callable control plane) When `--mcp-enabled` is set, the agent runs a [Model Context Protocol](https://modelcontextprotocol.io) server at `POST /mcp` (JSON-RPC 2.0). This exposes agent capabilities as **tools that an external LLM can call** — e.g., an SRE assistant in `sigma-api` invoking `query_ebpf_traffic` during incident triage, or an automation calling `allocate_ports` when provisioning new Envoy routes. @@ -257,6 +283,7 @@ When `--mcp-enabled` is set, the agent runs a [Model Context Protocol](https://m | `query_gpu_metrics` | none | Per-GPU utilization/memory/temp/power (nvidia-smi) | | `query_backend_health` | `unreachable_only?` (default false) | Latest TCP-probe snapshot of Envoy upstreams | | `agent_check_update` | `force?` (bool) | Detection-only update check — cached snapshot, or forced poll | +| `query_syn_flood_candidates` | `min_syn?` (default 100) | Source IPs exceeding the SYN-volume threshold (eBPF) | Tools that depend on capabilities (port scan, eBPF, registration) return a structured `isError` or `enabled=false` payload when their dependency is not configured — they do not break the MCP session. diff --git a/sigma-agent/src/ebpf_traffic.rs b/sigma-agent/src/ebpf_traffic.rs index c79aed3..a0ebc9e 100644 --- a/sigma-agent/src/ebpf_traffic.rs +++ b/sigma-agent/src/ebpf_traffic.rs @@ -86,6 +86,12 @@ struct OomKillValue { count: u64, } +#[repr(C)] +#[derive(Clone, Copy)] +struct SynCountValue { + count: u64, +} + unsafe impl aya::Pod for TrafficKey {} unsafe impl aya::Pod for TrafficValue {} unsafe impl aya::Pod for RetransmitValue {} @@ -97,6 +103,7 @@ unsafe impl aya::Pod for DropValue {} unsafe impl aya::Pod for DnsQueryValue {} unsafe impl aya::Pod for ExecValue {} unsafe impl aya::Pod for OomKillValue {} +unsafe impl aya::Pod for SynCountValue {} /// Per-process traffic stats resolved from eBPF data. #[derive(Clone, Debug)] @@ -125,6 +132,17 @@ pub struct ProcessTraffic { pub type SharedTrafficStats = Arc>>; +/// Per-source-IP SYN count harvested from the SYN_MAP since the last interval. +/// `window_secs` matches the harvester interval so consumers can compute rate. +#[derive(Clone, Debug)] +pub struct SynStats { + pub source_ip: String, + pub syn_count: u64, + pub window_secs: u64, +} + +pub type SharedSynStats = Arc>>; + /// Load the pre-compiled eBPF programs and attach kprobes. pub fn load_ebpf() -> anyhow::Result { // The eBPF bytecode is embedded at compile time from the build stage @@ -328,13 +346,33 @@ pub fn load_ebpf() -> anyhow::Result { None => warn!("tracepoint program 'oom_mark_victim' not found in eBPF object"), } + // Attach kprobe to tcp_v4_rcv (non-fatal — per-source-IP SYN counting for + // SYN-flood candidate detection). On older kernels, the skb struct offsets + // used by the probe may not match; if the kprobe attach itself fails, we + // continue without SYN-flood metrics rather than failing the whole agent. + match ebpf.program_mut("tcp_v4_rcv_syn") { + Some(prog) => match <&mut KProbe>::try_from(prog) { + Ok(kp) => { + if let Err(e) = kp.load().and_then(|()| kp.attach("tcp_v4_rcv", 0)) { + warn!("Failed to attach kprobe to tcp_v4_rcv (SYN-flood): {:#}", e); + } else { + info!("Attached kprobe to tcp_v4_rcv (SYN-flood)"); + } + } + Err(e) => warn!("tcp_v4_rcv_syn program type mismatch: {:#}", e), + }, + None => warn!("kprobe program 'tcp_v4_rcv_syn' not found in eBPF object"), + } + Ok(ebpf) } -/// Main loop: periodically reads the BPF map, resolves PIDs, and updates shared stats. +/// Main loop: periodically reads the BPF maps, resolves PIDs, and updates shared stats. +/// Also harvests the per-source-IP SYN map into `shared_syn`. pub async fn traffic_loop( mut ebpf: Ebpf, shared_stats: SharedTrafficStats, + shared_syn: SharedSynStats, interval_secs: u64, host_proc: String, ) { @@ -353,7 +391,69 @@ pub async fn traffic_loop( warn!("Failed to harvest eBPF traffic stats: {:#}", e); } } + + match harvest_syn(&mut ebpf, interval_secs) { + Ok(stats) => { + debug!(entries = stats.len(), "Harvested eBPF SYN stats"); + let mut guard = shared_syn.write().await; + *guard = stats; + } + Err(e) => { + warn!("Failed to harvest eBPF SYN stats: {:#}", e); + } + } + } +} + +/// Harvest the SYN_MAP: read-and-clear per-source-IP SYN counts, then convert +/// the network-byte-order u32 keys back to dotted-quad strings. +fn harvest_syn(ebpf: &mut Ebpf, window_secs: u64) -> anyhow::Result> { + let map = match ebpf.map_mut("SYN_MAP") { + Some(m) => m, + None => return Ok(Vec::new()), + }; + let mut syn_map = BpfHashMap::<&mut aya::maps::MapData, u32, SynCountValue>::try_from(map)?; + + let mut entries: Vec<(u32, SynCountValue)> = Vec::new(); + let mut keys: Vec = Vec::new(); + for item in syn_map.iter() { + match item { + Ok((key, value)) => { + entries.push((key, value)); + keys.push(key); + } + Err(e) => debug!("Error reading SYN_MAP entry: {}", e), + } + } + for key in &keys { + let _ = syn_map.remove(key); } + + let stats = entries + .into_iter() + .map(|(saddr_be, value)| { + let source_ip = format_ipv4_be(saddr_be); + SynStats { + source_ip, + syn_count: value.count, + window_secs, + } + }) + .collect(); + + Ok(stats) +} + +/// Format a network-byte-order (big-endian) IPv4 address into `a.b.c.d`. +/// +/// The kernel stores `iph->saddr` in network order. When `bpf_probe_read_kernel` +/// loads those 4 bytes into a u32, the host CPU's endianness determines the numeric +/// value of that u32. To recover the on-wire bytes (which are the IP octets in +/// printing order), we explicitly convert from big-endian and use `to_be_bytes`. +fn format_ipv4_be(saddr_be: u32) -> String { + // Bytes in memory order (big-endian = network = printing order) + let bytes = u32::from_be(saddr_be).to_be_bytes(); + format!("{}.{}.{}.{}", bytes[0], bytes[1], bytes[2], bytes[3]) } /// Read all entries from the BPF map, resolve PIDs, aggregate by process name. @@ -890,4 +990,29 @@ mod tests { let result = resolve_process_name(99999999, "/proc"); assert_eq!(result, "pid-99999999"); } + + #[test] + fn test_format_ipv4_be_native_byteorder() { + // Build the network-order bytes for "1.2.3.4" and convert into a u32 the + // way `bpf_probe_read_kernel` would observe them on the host: + // four bytes laid out in memory order [1, 2, 3, 4] reinterpreted as the + // host's native u32. + let bytes: [u8; 4] = [1, 2, 3, 4]; + let saddr_be = u32::from_ne_bytes(bytes); + assert_eq!(format_ipv4_be(saddr_be), "1.2.3.4"); + } + + #[test] + fn test_format_ipv4_be_localhost() { + let bytes: [u8; 4] = [127, 0, 0, 1]; + let saddr_be = u32::from_ne_bytes(bytes); + assert_eq!(format_ipv4_be(saddr_be), "127.0.0.1"); + } + + #[test] + fn test_format_ipv4_be_max() { + let bytes: [u8; 4] = [255, 255, 255, 255]; + let saddr_be = u32::from_ne_bytes(bytes); + assert_eq!(format_ipv4_be(saddr_be), "255.255.255.255"); + } } diff --git a/sigma-agent/src/main.rs b/sigma-agent/src/main.rs index 4d6f6c9..5f2cdfa 100644 --- a/sigma-agent/src/main.rs +++ b/sigma-agent/src/main.rs @@ -78,30 +78,37 @@ async fn main() -> Result<()> { // Conditionally start eBPF traffic monitoring #[cfg(feature = "ebpf-traffic")] - let traffic_stats: Option = if config.ebpf_traffic { + let (traffic_stats, syn_stats): ( + Option, + Option, + ) = if config.ebpf_traffic { match ebpf_traffic::load_ebpf() { Ok(ebpf) => { let stats = Arc::new(RwLock::new(Vec::new())); + let syn = Arc::new(RwLock::new(Vec::new())); let stats_clone = stats.clone(); + let syn_clone = syn.clone(); let interval = config.ebpf_traffic_interval; let host_proc = config.host_proc.clone(); tokio::spawn(async move { - ebpf_traffic::traffic_loop(ebpf, stats_clone, interval, host_proc).await; + ebpf_traffic::traffic_loop(ebpf, stats_clone, syn_clone, interval, host_proc).await; }); info!("eBPF traffic monitoring started (interval={}s)", interval); - Some(stats) + (Some(stats), Some(syn)) } Err(e) => { warn!("Failed to load eBPF programs, traffic monitoring disabled: {:#}", e); - None + (None, None) } } } else { - None + (None, None) }; #[cfg(not(feature = "ebpf-traffic"))] let traffic_stats: Option<()> = None; + #[cfg(not(feature = "ebpf-traffic"))] + let syn_stats: Option<()> = None; // Conditionally start GPU metrics collection (nvidia-smi shell-out) let gpu_metrics: Option = if config.gpu_metrics { @@ -227,11 +234,12 @@ async fn main() -> Result<()> { let port = config.metrics_port; let hn = hostname.clone(); let ts = traffic_stats.clone(); + let ss = syn_stats.clone(); let gm = gpu_metrics.clone(); let pr = probe_results.clone(); let ui = update_info.clone(); tokio::spawn(async move { - metrics::serve_metrics(port, shared, hn, port_range, ts, gm, pr, ui).await; + metrics::serve_metrics(port, shared, hn, port_range, ts, ss, gm, pr, ui).await; }); } @@ -284,6 +292,8 @@ async fn main() -> Result<()> { agent_version: env!("CARGO_PKG_VERSION"), #[cfg(feature = "ebpf-traffic")] traffic_stats: traffic_stats.clone(), + #[cfg(feature = "ebpf-traffic")] + syn_stats: syn_stats.clone(), gpu_metrics: gpu_metrics.clone(), probe_results: probe_results.clone(), update_info: update_info.clone(), diff --git a/sigma-agent/src/mcp.rs b/sigma-agent/src/mcp.rs index 5a58a67..34828bc 100644 --- a/sigma-agent/src/mcp.rs +++ b/sigma-agent/src/mcp.rs @@ -50,7 +50,7 @@ use crate::system; use crate::watchdog::{self, SharedUpdateInfo}; #[cfg(feature = "ebpf-traffic")] -use crate::ebpf_traffic::SharedTrafficStats; +use crate::ebpf_traffic::{SharedSynStats, SharedTrafficStats}; const MCP_PROTOCOL_VERSION: &str = "2025-06-18"; const JSONRPC_VERSION: &str = "2.0"; @@ -124,6 +124,8 @@ pub struct McpState { pub agent_version: &'static str, #[cfg(feature = "ebpf-traffic")] pub traffic_stats: Option, + #[cfg(feature = "ebpf-traffic")] + pub syn_stats: Option, pub gpu_metrics: Option, pub probe_results: Option, pub update_info: Option, @@ -242,6 +244,21 @@ fn tools_list_response() -> Value { }, "additionalProperties": false } + }, + { + "name": "query_syn_flood_candidates", + "description": "List inbound source IPv4 addresses sending an unusually high volume of SYN packets in the last eBPF harvest window. Useful for spotting SYN-flood candidates and abusive scanners. Returns enabled=false if eBPF traffic monitoring is not configured on this agent.", + "inputSchema": { + "type": "object", + "properties": { + "min_syn": { + "type": "integer", + "minimum": 1, + "description": "Minimum SYN count threshold per source IP (default 100)." + } + }, + "additionalProperties": false + } } ] }) @@ -282,6 +299,7 @@ async fn handle_tool_call(state: Arc, params: Value) -> Value { "query_gpu_metrics" => tool_query_gpu_metrics(&state).await, "query_backend_health" => tool_query_backend_health(&state, args).await, "agent_check_update" => tool_agent_check_update(&state, args).await, + "query_syn_flood_candidates" => tool_query_syn_flood_candidates(&state, args).await, other => return tool_err(format!("unknown tool: {}", other)), }; @@ -619,6 +637,55 @@ async fn tool_agent_check_update(state: &McpState, args: Value) -> Result Result { + let Some(ref syn_arc) = state.syn_stats else { + return Ok(json!({"enabled": false, "candidates": []}).to_string()); + }; + + let min = args + .get("min_syn") + .and_then(|v| v.as_u64()) + .unwrap_or(100); + + let stats = syn_arc.read().await; + let candidates: Vec = stats + .iter() + .filter(|s| s.syn_count >= min) + .map(|s| { + json!({ + "source_ip": s.source_ip, + "syn_count": s.syn_count, + "window_secs": s.window_secs, + }) + }) + .collect(); + + serde_json::to_string_pretty(&json!({ + "enabled": true, + "min_syn": min, + "count": candidates.len(), + "candidates": candidates, + })) + .map_err(|e| e.to_string()) +} + +#[cfg(not(feature = "ebpf-traffic"))] +async fn tool_query_syn_flood_candidates( + _state: &McpState, + _args: Value, +) -> Result { + Ok(json!({ + "enabled": false, + "candidates": [], + "note": "agent built without ebpf-traffic feature" + }) + .to_string()) +} + // ---------- HTTP handler ---------- async fn mcp_handler( diff --git a/sigma-agent/src/metrics.rs b/sigma-agent/src/metrics.rs index 4f92617..0f12ced 100644 --- a/sigma-agent/src/metrics.rs +++ b/sigma-agent/src/metrics.rs @@ -17,7 +17,7 @@ use crate::port_scan::{self, PortScanResult, SharedScanResult}; use crate::watchdog::{SharedUpdateInfo, UpdateInfo}; #[cfg(feature = "ebpf-traffic")] -use crate::ebpf_traffic::SharedTrafficStats; +use crate::ebpf_traffic::{SharedSynStats, SharedTrafficStats}; /// Known sources that are always emitted (even when count=0) for stable time series const KNOWN_SOURCES: &[&str] = &[ @@ -36,6 +36,8 @@ struct MetricsState { port_range: Option<(u16, u16)>, #[cfg(feature = "ebpf-traffic")] traffic_stats: Option, + #[cfg(feature = "ebpf-traffic")] + syn_stats: Option, gpu_metrics: Option, probe_results: Option, update_info: Option, @@ -564,6 +566,35 @@ pub async fn render_probe_metrics(results: &[BackendProbeResult], hostname: &str out } +/// Render per-source-IP SYN counts as Prometheus gauges. +/// Only emits non-zero entries (consistent with packet drops / DNS / exec / OOM panels). +#[cfg(feature = "ebpf-traffic")] +pub fn render_syn_metrics(stats: &[crate::ebpf_traffic::SynStats], hostname: &str) -> String { + let mut out = String::with_capacity(256); + let active: Vec<&crate::ebpf_traffic::SynStats> = + stats.iter().filter(|s| s.syn_count > 0).collect(); + if active.is_empty() { + return out; + } + + writeln!( + out, + "# HELP sigma_tcp_syn_count Inbound TCP SYN count per source IPv4 (eBPF kprobe tcp_v4_rcv, SYN-only, non-cumulative — resets each harvest window)" + ) + .unwrap(); + writeln!(out, "# TYPE sigma_tcp_syn_count gauge").unwrap(); + for entry in &active { + writeln!( + out, + "sigma_tcp_syn_count{{hostname=\"{}\",source_ip=\"{}\"}} {}", + hostname, entry.source_ip, entry.syn_count + ) + .unwrap(); + } + + out +} + /// Render self-update watchdog metrics in Prometheus text format. /// /// Emits two gauges: @@ -651,6 +682,16 @@ async fn metrics_handler(State(state): State>) -> impl IntoRes body.push_str(&render_update_metrics(&snapshot, &state.hostname).await); } + #[cfg(feature = "ebpf-traffic")] + if let Some(ref syn_stats) = state.syn_stats { + let stats = syn_stats.read().await; + let rendered = render_syn_metrics(&stats, &state.hostname); + if !rendered.is_empty() { + body.push('\n'); + body.push_str(&rendered); + } + } + ( [( header::CONTENT_TYPE, @@ -710,6 +751,8 @@ pub async fn serve_metrics( port_range: Option<(u16, u16)>, #[cfg(feature = "ebpf-traffic")] traffic_stats: Option, #[cfg(not(feature = "ebpf-traffic"))] _traffic_stats: Option<()>, + #[cfg(feature = "ebpf-traffic")] syn_stats: Option, + #[cfg(not(feature = "ebpf-traffic"))] _syn_stats: Option<()>, gpu_metrics: Option, probe_results: Option, update_info: Option, @@ -720,6 +763,8 @@ pub async fn serve_metrics( port_range, #[cfg(feature = "ebpf-traffic")] traffic_stats, + #[cfg(feature = "ebpf-traffic")] + syn_stats, gpu_metrics, probe_results, update_info, @@ -858,6 +903,39 @@ mod tests { assert!(out.contains("sigma_gpu_utilization_percent")); } + #[cfg(feature = "ebpf-traffic")] + #[test] + fn test_render_syn_metrics_empty() { + let stats: Vec = Vec::new(); + let output = render_syn_metrics(&stats, "relay-01"); + assert!(output.is_empty(), "empty stats should produce no output"); + } + + #[cfg(feature = "ebpf-traffic")] + #[test] + fn test_render_syn_metrics_with_data() { + let stats = vec![ + crate::ebpf_traffic::SynStats { + source_ip: "1.2.3.4".to_string(), + syn_count: 250, + window_secs: 30, + }, + crate::ebpf_traffic::SynStats { + source_ip: "5.6.7.8".to_string(), + syn_count: 0, + window_secs: 30, + }, + ]; + let output = render_syn_metrics(&stats, "relay-01"); + assert!(output.contains("# HELP sigma_tcp_syn_count")); + assert!(output.contains("# TYPE sigma_tcp_syn_count gauge")); + assert!(output.contains( + "sigma_tcp_syn_count{hostname=\"relay-01\",source_ip=\"1.2.3.4\"} 250" + )); + // Zero-count entries should not be emitted + assert!(!output.contains("5.6.7.8")); + } + #[test] fn test_render_metrics_extra_source() { let mut used = HashMap::new();