From 3f506b7eba50e11ad90a2468a17750b337fa390d Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:09:19 +0000 Subject: [PATCH 1/9] Moves glob/wildcard matching into Fact. Host scanning now uses globs to only get inodes for the specific files matching the globs. Prefix map is populated with the longest prefix for each glob e.g. /etc/**/*.conf -> /etc/ /home/user/.ssh/id_{rsa,dsa} -> /home/user/.ssh/id_ Kernel captures events based on inode first and then prefix match (this behavior is unchanged) and then userspace does a glob match on the path and host_path. --- Cargo.lock | 65 +++++++++++++++++++--------------------- Cargo.toml | 2 ++ fact-ebpf/src/lib.rs | 21 +++++++++++-- fact/Cargo.toml | 2 ++ fact/src/bpf/mod.rs | 24 ++++++++++++++- fact/src/event/mod.rs | 22 ++++++++++++++ fact/src/host_scanner.rs | 25 ++++++++-------- 7 files changed, 110 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df6a01be..6262f5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,16 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -443,6 +453,8 @@ dependencies = [ "env_logger", "fact-api", "fact-ebpf", + "glob", + "globset", "http-body-util", "hyper", "hyper-tls", @@ -574,18 +586,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - [[package]] name = "getrandom" version = "0.4.1" @@ -607,9 +607,22 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] [[package]] name = "h2" @@ -939,7 +952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -1490,7 +1503,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1744,7 +1757,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom", "js-sys", "wasm-bindgen", ] @@ -1776,15 +1789,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -2003,15 +2007,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 742c25f4..01330200 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ aya = { version = "0.13.1", default-features = false } anyhow = { version = "1", default-features = false, features = ["std", "backtrace"] } clap = { version = "4.5.41", features = ["derive", "env"] } env_logger = { version = "0.11.5", default-features = false, features = ["humantime"] } +glob = "0.3.3" +globset = "0.4.18" http-body-util = "0.1.3" hyper = { version = "1.6.0", default-features = false } hyper-tls = "0.6.0" diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 0bf7a1ad..ac7802bc 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -30,17 +30,32 @@ impl TryFrom<&PathBuf> for path_prefix_t { prefix: value.display().to_string(), }); }; - let len = if filename.len() > LPM_SIZE_MAX as usize { + + // Take the start of the path until the first occurence of a wildcard + // character. This is used as a filter in the kernel in cases where + // the inode has failed to match. The full wildcard string is used + // for further processing in userspace. + let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { + "*?[]{}".contains(c) + }) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; + + let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize } else { - filename.len() + filename_prefix.len() }; unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); memcpy( cfg.path.as_mut_ptr() as *mut _, - filename.as_ptr() as *const _, + filename_prefix.as_ptr() as *const _, len, ); cfg.bit_len = (len * 8) as u32; diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 3b84db24..64218b33 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -10,6 +10,8 @@ anyhow = { workspace = true } aya = { workspace = true } clap = { workspace = true } env_logger = { workspace = true } +glob = { workspace = true } +globset = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true } hyper-tls = { workspace = true } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 71077cb8..2b8dc6e4 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -14,6 +14,7 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; +use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; @@ -30,6 +31,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + paths_globset: GlobSet, } impl Bpf { @@ -61,6 +64,7 @@ impl Bpf { tx, paths, paths_config, + paths_globset: GlobSet::empty(), }; bpf.load_paths()?; @@ -127,11 +131,14 @@ impl Bpf { // Add the new prefixes let mut new_paths = Vec::with_capacity(paths_config.len()); + let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + builder.add(Glob::new(&p.to_string_lossy())?); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); } + self.paths_globset = builder.build()?; // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { @@ -193,7 +200,22 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => event, + Ok(event) => { + // With wildcards, the kernel can only match on the inode and + // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + // the kernel matches up to /etc/) + // + // We do a proper glob match here to do a final check + // using short circuiting to avoid calling is_match in all + // scenarios + if self.paths_globset.is_match(event.get_filename()) || + self.paths_globset.is_match(event.get_host_path()) { + event + } else { + event_counter.dropped(); + continue; + } + }, Err(e) => { error!("Failed to parse event: '{e}'"); event_counter.dropped(); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index c85189d5..080274e5 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -150,6 +150,28 @@ impl Event { } } + pub fn get_filename(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.filename, + FileData::Creation(data) => &data.filename, + FileData::Unlink(data) => &data.filename, + FileData::Chmod(data) => &data.inner.filename, + FileData::Chown(data) => &data.inner.filename, + FileData::Rename(data) => &data.new.filename, + } + } + + pub fn get_host_path(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.host_file, + FileData::Creation(data) => &data.host_file, + FileData::Unlink(data) => &data.host_file, + FileData::Chmod(data) => &data.inner.host_file, + FileData::Chown(data) => &data.inner.host_file, + FileData::Rename(data) => &data.new.host_file, + } + } + /// Set the `host_file` field of the event to the one provided. /// /// In the case of operations that involve two paths, like rename, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 2adbb58d..d1a806b5 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -75,8 +75,8 @@ impl HostScanner { fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); - for path in self.config.borrow().iter() { - let path = host_info::prepend_host_mount(path); + for pattern in self.config.borrow().iter() { + let path = host_info::prepend_host_mount(pattern); self.scan_inner(&path)?; } debug!("Host scan done"); @@ -85,17 +85,18 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - if path.is_dir() { - for entry in path.read_dir()?.flatten() { - let entry = entry.path(); - self.scan_inner(&entry) - .with_context(|| format!("Failed to scan {}", entry.display()))?; + glob::glob(&path.to_string_lossy())?.try_for_each(|entry| { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()) + .with_context(|| format!("Failed to update entry for {}", path.display()))?; + } + Ok(()) + }, + Err(e) => Err(e.into()) } - } else if path.is_file() { - self.update_entry(path) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; - } - Ok(()) + }) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From 51882109d931deb1c7f768151357fc0485e6dbeb Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:20:30 +0000 Subject: [PATCH 2/9] Fmt --- fact-ebpf/src/lib.rs | 17 ++++++++--------- fact/src/bpf/mod.rs | 2 +- fact/src/host_scanner.rs | 19 +++++++++---------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index ac7802bc..ac1a0b33 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,15 +35,14 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { - "*?[]{}".contains(c) - }) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; + let filename_prefix = + if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 2b8dc6e4..56ed2b6c 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -7,6 +7,7 @@ use aya::{ Btf, Ebpf, }; use checks::Checks; +use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; use log::{error, info}; use tokio::{ @@ -14,7 +15,6 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; -use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index d1a806b5..4ef089df 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -85,17 +85,16 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - glob::glob(&path.to_string_lossy())?.try_for_each(|entry| { - match entry { - Ok(path) => { - if path.is_file() { - self.update_entry(path.as_path()) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; - } - Ok(()) - }, - Err(e) => Err(e.into()) + glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } + Ok(()) } + Err(e) => Err(e.into()), }) } From 032cad8324571577dcec359199836ef0d29eca9e Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 15:26:45 +0000 Subject: [PATCH 3/9] PR review fixes --- fact-ebpf/src/lib.rs | 25 +++++++++++-------------- fact/src/bpf/mod.rs | 10 +++++++--- fact/src/host_scanner.rs | 20 +++++++++++--------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index ac1a0b33..bd84ee08 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,20 +35,11 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = - if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; - - let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { - LPM_SIZE_MAX as usize - } else { - filename_prefix.len() - }; + // + // unwrap is safe here - if there are no matches, the full string is the + // only item in the iterator + let filename_prefix = filename.split(['*', '?', '[', '{']).next().unwrap(); + let len = filename_prefix.len().min(LPM_SIZE_MAX as usize); unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); @@ -77,6 +68,12 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl inode_key_t { + pub fn empty(&self) -> bool { + self.inode == 0 && self.dev == 0 + } +} + impl PartialEq for inode_key_t { fn eq(&self, other: &Self) -> bool { self.inode == other.inode && self.dev == other.dev diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 56ed2b6c..50ec3e27 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,7 +133,11 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { - builder.add(Glob::new(&p.to_string_lossy())?); + builder.add( + Glob::new(&p.to_string_lossy()) + .with_context(|| format!("invalid glob {}", p.display())) + .unwrap(), + ); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); @@ -208,8 +212,8 @@ impl Bpf { // We do a proper glob match here to do a final check // using short circuiting to avoid calling is_match in all // scenarios - if self.paths_globset.is_match(event.get_filename()) || - self.paths_globset.is_match(event.get_host_path()) { + if !event.get_inode().empty() || + self.paths_globset.is_match(event.get_filename()) { event } else { event_counter.dropped(); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 4ef089df..add5ddd3 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -85,17 +85,19 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { - Ok(path) => { - if path.is_file() { - self.update_entry(path.as_path()).with_context(|| { - format!("Failed to update entry for {}", path.display()) - })?; + for entry in glob::glob(&path.to_string_lossy())? { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } } - Ok(()) + Err(e) => return Err(e.into()), } - Err(e) => Err(e.into()), - }) + } + Ok(()) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From 7bbd4ae8b4150fec0f8afe101078b38628ea0871 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 12:39:09 +0000 Subject: [PATCH 4/9] Fix matching/tests and add wildcard tests --- fact-ebpf/src/bpf/file.h | 19 +++++- fact-ebpf/src/bpf/main.c | 84 +++++++++---------------- fact/src/bpf/mod.rs | 5 +- tests/conftest.py | 2 +- tests/test_config_hotreload.py | 4 +- tests/test_wildcard.py | 109 +++++++++++++++++++++++++++++++++ 6 files changed, 161 insertions(+), 62 deletions(-) create mode 100644 tests/test_wildcard.py diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index dd0639de..6c420095 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -6,12 +6,13 @@ #include "builtins.h" #include "types.h" #include "maps.h" +#include "inode.h" #include #include // clang-format on -__always_inline static bool is_monitored(struct bound_path_t* path) { +__always_inline static bool path_is_monitored(struct bound_path_t* path) { if (!filter_by_prefix()) { // no path configured, allow all return true; @@ -30,3 +31,19 @@ __always_inline static bool is_monitored(struct bound_path_t* path) { path->len = len; return res; } + +__always_inline static bool is_monitored(inode_key_t inode, struct bound_path_t* path, inode_key_t** submit) { + const inode_value_t* volatile inode_value = inode_get(&inode); + + switch (inode_is_monitored(inode_value)) { + case NOT_MONITORED: + if (path_is_monitored(path)) { + return true; + } + *submit = NULL; + return false; + case MONITORED: + break; + } + return true; +} diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 3c71fd71..e0a23522 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -45,18 +45,13 @@ int BPF_PROG(trace_file_open, struct file* file) { } inode_key_t inode_key = inode_to_key(file->f_inode); - const inode_value_t* inode = inode_get(&inode_key); - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - goto ignored; - } - break; - case MONITORED: - break; + inode_key_t* inode_to_submit = &inode_key; + + if (!is_monitored(inode_key, path, &inode_to_submit)) { + goto ignored; } - submit_open_event(&m->file_open, event_type, path->path, &inode_key); + submit_open_event(&m->file_open, event_type, path->path, inode_to_submit); return 0; @@ -82,24 +77,16 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { } inode_key_t inode_key = inode_to_key(dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - m->path_unlink.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - inode_remove(&inode_key); - break; + if (!is_monitored(inode_key, path, &inode_to_submit)) { + m->path_unlink.ignored++; + return 0; } submit_unlink_event(&m->path_unlink, path->path, - &inode_key); + inode_to_submit); return 0; } @@ -120,24 +107,17 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chmod.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chmod.ignored++; + return 0; } umode_t old_mode = BPF_CORE_READ(path, dentry, d_inode, i_mode); submit_mode_event(&m->path_chmod, bound_path->path, - &inode_key, + inode_to_submit, mode, old_mode); @@ -164,18 +144,11 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chown.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chown.ignored++; + return 0; } struct dentry* d = BPF_CORE_READ(path, dentry); @@ -184,7 +157,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign submit_ownership_event(&m->path_chown, bound_path->path, - &inode_key, + inode_to_submit, uid, gid, old_uid, @@ -217,14 +190,15 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, } inode_key_t old_inode = inode_to_key(old_dentry->d_inode); - const inode_value_t* volatile old_inode_value = inode_get(&old_inode); inode_key_t new_inode = inode_to_key(new_dentry->d_inode); - const inode_value_t* volatile new_inode_value = inode_get(&new_inode); - if (inode_is_monitored(old_inode_value) == NOT_MONITORED && - inode_is_monitored(new_inode_value) == NOT_MONITORED && - !is_monitored(old_path) && - !is_monitored(new_path)) { + inode_key_t* old_inode_submit = &old_inode; + inode_key_t* new_inode_submit = &new_inode; + + bool old_monitored = is_monitored(old_inode, old_path, &old_inode_submit); + bool new_monitored = is_monitored(new_inode, new_path, &new_inode_submit); + + if (!old_monitored && !new_monitored) { m->path_rename.ignored++; return 0; } @@ -232,8 +206,8 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, submit_rename_event(&m->path_rename, new_path->path, old_path->path, - &old_inode, - &new_inode); + old_inode_submit, + new_inode_submit); return 0; error: diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 50ec3e27..727aca4b 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -209,9 +209,8 @@ impl Bpf { // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, // the kernel matches up to /etc/) // - // We do a proper glob match here to do a final check - // using short circuiting to avoid calling is_match in all - // scenarios + // The kernel sets inode to 0 when it matched via path prefix only. + // so we only need to perform a glob match against the filename if !event.get_inode().empty() || self.paths_globset.is_match(event.get_filename()) { event diff --git a/tests/conftest.py b/tests/conftest.py index 143167bd..e00e4c5f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -143,7 +143,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir, '/mounted', '/container-dir'], + 'paths': [f'{monitored_dir}/**/*', '/mounted/**/*', '/container-dir/**/*'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index 9a875228..4afeac8f 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -136,7 +136,7 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [ignored_dir] + config['paths'] = [f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up @@ -173,7 +173,7 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [monitored_dir, ignored_dir] + config['paths'] = [f'{monitored_dir}/**/*', f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py new file mode 100644 index 00000000..fb815ee4 --- /dev/null +++ b/tests/test_wildcard.py @@ -0,0 +1,109 @@ +import os + +import pytest +import yaml + +from event import Event, EventType, Process + + +@pytest.fixture +def wildcard_config(fact_config, monitored_dir): + config, config_file = fact_config + config['paths'] = [ + f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/**/test-*.log', + ] + with open(config_file, 'w') as f: + yaml.dump(config, f) + return config, config_file + + +def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + + # Should not match any pattern + log_file = os.path.join(monitored_dir, 'app.log') + with open(log_file, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path='') + + server.wait_events([e]) + + +def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + + # Wrong prefix - should not match + app_log = os.path.join(monitored_dir, 'app-test.log') + with open(app_log, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=test_log, host_path='') + + server.wait_events([e]) + + +def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + nested_dir = os.path.join(monitored_dir, 'level1', 'level2') + os.makedirs(nested_dir, exist_ok=True) + + root_txt = os.path.join(monitored_dir, 'root.txt') + with open(root_txt, 'w') as f: + f.write('Root level txt') + + nested_txt = os.path.join(nested_dir, 'nested.txt') + with open(nested_txt, 'w') as f: + f.write('Nested txt') + + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=root_txt, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=nested_txt, host_path=''), + ] + + server.wait_events(events) + + +def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'notes.txt') + with open(txt_file, 'w') as f: + f.write('Text file') + + log_file = os.path.join(monitored_dir, 'test-output.log') + with open(log_file, 'w') as f: + f.write('Log file') + + # Matches neither pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=log_file, host_path=''), + ] + + server.wait_events(events) From 0bc7ee0675fb470b2725999b26160b4096f0a425 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:08 +0000 Subject: [PATCH 5/9] Fix basic unit test --- fact/src/bpf/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 727aca4b..71019ea1 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -278,7 +278,7 @@ mod bpf_tests { let monitored_path = env!("CARGO_MANIFEST_DIR"); let monitored_path = PathBuf::from(monitored_path); - let paths = vec![monitored_path.clone()]; + let paths = vec![PathBuf::from(format!("{}/**/*", monitored_path.display()))]; let mut config = FactConfig::default(); config.set_paths(paths); let reloader = Reloader::from(config); From 2723a737c20052e8e37d539ab771b61132824fc4 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:38 +0000 Subject: [PATCH 6/9] Fix missing single quote in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f4a6c81d..20f712a2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ In order to run these tests as part of the unit test suite y use the following command: ```shell -cargo test --config 'target."cfg(all())".runner="sudo -E" --features=bpf-test +cargo test --config 'target."cfg(all())".runner="sudo -E"' --features=bpf-test ``` ## Create compile_commands.json From 23cdf3e2ea907bef2eee24c085cb65ef37824f68 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:22:35 +0000 Subject: [PATCH 7/9] Fix tests based on PR comments --- tests/test_wildcard.py | 65 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py index fb815ee4..fd1728f3 100644 --- a/tests/test_wildcard.py +++ b/tests/test_wildcard.py @@ -1,3 +1,4 @@ +from time import sleep import os import pytest @@ -7,59 +8,69 @@ @pytest.fixture -def wildcard_config(fact_config, monitored_dir): +def wildcard_config(fact, fact_config, monitored_dir): config, config_file = fact_config config['paths'] = [ f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/*.conf', f'{monitored_dir}/**/test-*.log', ] with open(config_file, 'w') as f: yaml.dump(config, f) + + # reload the config + fact.kill('SIGHUP') + sleep(0.1) return config, config_file -def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): +def test_extension_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - txt_file = os.path.join(monitored_dir, 'document.txt') - with open(txt_file, 'w') as f: - f.write('This should be captured') - # Should not match any pattern log_file = os.path.join(monitored_dir, 'app.log') with open(log_file, 'w') as f: f.write('This should be ignored') + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path='') server.wait_events([e]) -def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): +def test_prefix_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - test_log = os.path.join(monitored_dir, 'test-app.log') - with open(test_log, 'w') as f: - f.write('This should be captured') - # Wrong prefix - should not match app_log = os.path.join(monitored_dir, 'app-test.log') with open(app_log, 'w') as f: f.write('This should be ignored') + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=test_log, host_path='') server.wait_events([e]) -def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): +def test_recursive_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() nested_dir = os.path.join(monitored_dir, 'level1', 'level2') os.makedirs(nested_dir, exist_ok=True) + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + root_txt = os.path.join(monitored_dir, 'root.txt') with open(root_txt, 'w') as f: f.write('Root level txt') @@ -68,11 +79,6 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): with open(nested_txt, 'w') as f: f.write('Nested txt') - # Different extension - should not match - nested_md = os.path.join(nested_dir, 'readme.md') - with open(nested_md, 'w') as f: - f.write('Should be ignored') - events = [ Event(process=process, event_type=EventType.CREATION, file=root_txt, host_path=''), @@ -83,9 +89,27 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): server.wait_events(events) -def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): +def test_nonrecursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + fut = os.path.join(monitored_dir, 'app.conf') + with open(fut, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + + server.wait_events([e]) + + +def test_multiple_patterns(wildcard_config, monitored_dir, server): process = Process.from_proc() + # Matches no pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + txt_file = os.path.join(monitored_dir, 'notes.txt') with open(txt_file, 'w') as f: f.write('Text file') @@ -94,11 +118,6 @@ def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): with open(log_file, 'w') as f: f.write('Log file') - # Matches neither pattern - conf_file = os.path.join(monitored_dir, 'config.yml') - with open(conf_file, 'w') as f: - f.write('Config file') - events = [ Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path=''), From 622641551537252e2265a95bcea39fd554783c02 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:40:21 +0000 Subject: [PATCH 8/9] Use to_str instead of lossy --- fact/src/bpf/mod.rs | 9 +++++++-- fact/src/host_scanner.rs | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 71019ea1..f9b6ca3d 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,11 +133,16 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + builder.add( - Glob::new(&p.to_string_lossy()) - .with_context(|| format!("invalid glob {}", p.display())) + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) .unwrap(), ); + let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index add5ddd3..150c2bfa 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -25,7 +25,7 @@ use std::{ sync::Arc, }; -use anyhow::Context; +use anyhow::{Context, bail}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn}; @@ -85,7 +85,11 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - for entry in glob::glob(&path.to_string_lossy())? { + let Some(glob_str) = path.to_str() else { + bail!("invalid path {}", path.display()); + }; + + for entry in glob::glob(glob_str)? { match entry { Ok(path) => { if path.is_file() { From 45c1ae93c6f9bea8e5c670b797bcb43fbbd79f89 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Mon, 2 Mar 2026 18:03:06 +0100 Subject: [PATCH 9/9] Fix rename events --- fact-ebpf/src/bpf/file.h | 2 +- fact/src/bpf/mod.rs | 12 ++---------- fact/src/event/mod.rs | 29 ++++++++++++++++++++++++++++- fact/src/host_scanner.rs | 2 +- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index 6c420095..4e3d6369 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -37,10 +37,10 @@ __always_inline static bool is_monitored(inode_key_t inode, struct bound_path_t* switch (inode_is_monitored(inode_value)) { case NOT_MONITORED: + *submit = NULL; if (path_is_monitored(path)) { return true; } - *submit = NULL; return false; case MONITORED: break; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index f9b6ca3d..5604b821 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -210,19 +210,11 @@ impl Bpf { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { Ok(event) => { - // With wildcards, the kernel can only match on the inode and - // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, - // the kernel matches up to /etc/) - // - // The kernel sets inode to 0 when it matched via path prefix only. - // so we only need to perform a glob match against the filename - if !event.get_inode().empty() || - self.paths_globset.is_match(event.get_filename()) { - event - } else { + if event.is_ignored(&self.paths_globset) { event_counter.dropped(); continue; } + event }, Err(e) => { error!("Failed to parse event: '{e}'"); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 080274e5..98a70162 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -6,6 +6,7 @@ use std::{ path::{Path, PathBuf}, }; +use globset::GlobSet; use serde::Serialize; use fact_ebpf::{event_t, file_activity_type_t, inode_key_t, PATH_MAX}; @@ -150,7 +151,7 @@ impl Event { } } - pub fn get_filename(&self) -> &PathBuf { + fn get_filename(&self) -> &PathBuf { match &self.file { FileData::Open(data) => &data.filename, FileData::Creation(data) => &data.filename, @@ -161,6 +162,13 @@ impl Event { } } + fn get_old_filename(&self) -> Option<&PathBuf> { + match &self.file { + FileData::Rename(data) => Some(&data.old.filename), + _ => None, + } + } + pub fn get_host_path(&self) -> &PathBuf { match &self.file { FileData::Open(data) => &data.host_file, @@ -194,6 +202,25 @@ impl Event { data.old.host_file = host_path } } + + /// Determine if the event should be ignored. + /// + /// With wildcards, the kernel can only match on the inode and + /// then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + /// the kernel matches up to /etc/). + /// + /// The kernel sets inode to 0 when it matched via path prefix only. + /// so we only need to perform a glob match against the filename. + /// + /// We also need to check the old values for rename events. + pub fn is_ignored(&self, globset: &GlobSet) -> bool { + self.get_inode().empty() + && self.get_old_inode().is_none_or(|inode| inode.empty()) + && !globset.is_match(self.get_filename()) + && self + .get_old_filename() + .is_none_or(|path| !globset.is_match(path)) + } } impl TryFrom<&event_t> for Event { diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 150c2bfa..135b12c5 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -25,7 +25,7 @@ use std::{ sync::Arc, }; -use anyhow::{Context, bail}; +use anyhow::{bail, Context}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn};