From bb652852f29ad8d15ddc69c1ca6652ffaba4213d Mon Sep 17 00:00:00 2001 From: Matjaz Domen Pecan Date: Fri, 13 Mar 2026 16:46:38 +0100 Subject: [PATCH] feat: replace custom filter engine with tokf-filter crate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delegate RTK's 8-stage filter pipeline to tokf-filter::apply() while keeping the registry, command matching, build.rs concatenation, rtk verify, and omission markers unchanged. Unlocks tokf's full feature set (sections, chunks, JSON extraction, templates) for .rtk/filters.toml authors. - All 890 unit tests pass - All 111/111 inline verify tests pass - 7 pre-existing verify test failures fixed (on_empty + empty input) - One cosmetic change: truncate_lines_at uses unicode ellipsis (…) - +2.1ms startup overhead, +0.2MB binary size Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 6 + Cargo.lock | 153 ++++++++++++++++- Cargo.toml | 2 + docs/filter-workflow.md | 17 +- src/filters/README.md | 29 +++- src/filters/basedpyright.toml | 4 +- src/filters/biome.toml | 4 +- src/filters/gcc.toml | 4 +- src/filters/oxlint.toml | 4 +- src/filters/skopeo.toml | 4 +- src/filters/ty.toml | 4 +- src/filters/xcodebuild.toml | 4 +- src/toml_filter.rs | 303 +++++++++++++++------------------- 13 files changed, 337 insertions(+), 201 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7af32d3a..2731c1d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Features +* **toml-dsl:** replace RTK's custom 8-stage filter engine with `tokf-filter` crate + * Full backward compatibility — all 47 built-in filters and user-authored `.rtk/filters.toml` files work unchanged + * RTK keeps its TOML parsing/registry/matching layer; delegates filtering to `tokf-filter::apply()` + * RTK handles `head_lines`/`tail_lines`/`max_lines` with omission markers (tokf silently truncates) + * `truncate_lines_at` now uses `…` (unicode ellipsis) instead of `...` (3 ASCII dots) + * Unlocks tokf's advanced features for user-authored filters: sections, chunks, aggregates, templates, JSON extraction, `on_success`/`on_failure` branches, `dedup` * **toml-dsl:** declarative TOML filter engine — add command filters without writing Rust ([#299](https://github.com/rtk-ai/rtk/issues/299)) * 8 primitives: `strip_ansi`, `replace`, `match_output`, `strip/keep_lines_matching`, `truncate_lines_at`, `head/tail_lines`, `max_lines`, `on_empty` * lookup chain: `.rtk/filters.toml` (project-local) → `~/.config/rtk/filters.toml` (user-global) → built-in filters diff --git a/Cargo.lock b/Cargo.lock index a3349b3a..1d459d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,6 +621,15 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "inventory" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +dependencies = [ + "rustversion", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -700,6 +709,12 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -710,6 +725,16 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -799,7 +824,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -865,7 +890,9 @@ dependencies = [ "serde_json", "sha2", "tempfile", - "thiserror", + "thiserror 1.0.69", + "tokf-common", + "tokf-filter", "toml", "ureq", "walkdir", @@ -992,6 +1019,56 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_json_path" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b992cea3194eea663ba99a042d61cea4bd1872da37021af56f6a37e0359b9d33" +dependencies = [ + "inventory", + "nom", + "regex", + "serde", + "serde_json", + "serde_json_path_core", + "serde_json_path_macros", + "thiserror 2.0.18", +] + +[[package]] +name = "serde_json_path_core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde67d8dfe7d4967b5a95e247d4148368ddd1e753e500adb34b3ffe40c6bc1bc" +dependencies = [ + "inventory", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "serde_json_path_macros" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "517acfa7f77ddaf5c43d5f119c44a683774e130b4247b7d3210f8924506cfac8" +dependencies = [ + "inventory", + "serde_json_path_core", + "serde_json_path_macros_internal", +] + +[[package]] +name = "serde_json_path_macros_internal" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -1089,7 +1166,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", ] [[package]] @@ -1103,6 +1189,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1113,6 +1210,47 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokf-common" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "789fd1b675e510b9c55723ba53cb468f162ae6be474e115bb7ce9d14e5dd3bc6" +dependencies = [ + "serde", + "serde_json", + "sha2", + "unicode-normalization", +] + +[[package]] +name = "tokf-filter" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d4a7c249647fcecdfb92341bbc022b80cc3432b7ce6e18a6bc7dab6d7110a8b" +dependencies = [ + "anyhow", + "regex", + "serde", + "serde_json", + "serde_json_path", + "tokf-common", +] + [[package]] name = "toml" version = "0.8.23" @@ -1166,6 +1304,15 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 934daee0..157ee8d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ ureq = "2" hostname = "0.4" flate2 = "1.0" quick-xml = "0.37" +tokf-filter = { version = "0.2.33", default-features = false } +tokf-common = "0.2.33" [build-dependencies] toml = "0.8" diff --git a/docs/filter-workflow.md b/docs/filter-workflow.md index 0b0d32c1..fa41de0a 100644 --- a/docs/filter-workflow.md +++ b/docs/filter-workflow.md @@ -40,14 +40,15 @@ flowchart TD S -->|"✅ match"| U["exec command\ncapture stdout"] U --> V - subgraph PIPELINE ["8-stage filter pipeline"] - V["strip_ansi"] --> W["replace"] - W --> X{"match_output\nshort-circuit?"} - X -->|"✅ pattern matched"| Y[["emit message\nstop pipeline"]] - X -->|"no match"| Z["strip/keep_lines"] - Z --> AA["truncate_lines_at"] - AA --> AB["tail_lines"] - AB --> AC["max_lines"] + subgraph PIPELINE ["tokf-filter pipeline + RTK post-processing"] + V{"match_output\nshort-circuit?\n(strip_ansi applied\nif enabled)"} -->|"✅ pattern matched"| Y[["emit message\nstop pipeline"]] + V -->|"no match"| W["replace\n(replace_all mode)"] + W --> WA["strip_ansi\ntrim_lines"] + WA --> Z["skip/keep lines"] + Z --> ZA["dedup"] + ZA --> AA["truncate_lines_at"] + AA --> AB["head/tail_lines\n(RTK adds omission\nmarkers)"] + AB --> AC["max_lines\n(RTK adds truncation\nmarker)"] AC --> AD{"output\nempty?"} AD -->|"yes"| AE[["emit on_empty"]] AD -->|"no"| AF[["print filtered\noutput + exit code"]] diff --git a/src/filters/README.md b/src/filters/README.md index b7d7487c..e58c953c 100644 --- a/src/filters/README.md +++ b/src/filters/README.md @@ -3,6 +3,9 @@ Each `.toml` file in this directory defines one filter and its inline tests. Files are concatenated alphabetically by `build.rs` into a single TOML blob embedded in the binary. +Filtering is powered by the [tokf-filter](https://crates.io/crates/tokf-filter) engine. +RTK handles the TOML registry, command matching, and `head/tail/max_lines` omission markers. + ## Adding a filter 1. Copy any existing `.toml` file and rename it (e.g. `my-tool.toml`) @@ -37,15 +40,27 @@ expected = "expected filtered output" | `description` | string | Human-readable description | | `match_command` | regex | Matches the command string (e.g. `"^docker\\s+inspect"`) | | `strip_ansi` | bool | Strip ANSI escape codes before processing | -| `strip_lines_matching` | regex[] | Drop lines matching any regex | -| `keep_lines_matching` | regex[] | Keep only lines matching at least one regex | -| `replace` | array | Regex substitutions (`{ pattern, replacement }`) | -| `match_output` | array | Short-circuit rules (`{ pattern, message }`) | -| `truncate_lines_at` | int | Truncate lines longer than N characters | -| `max_lines` | int | Keep only the first N lines | -| `tail_lines` | int | Keep only the last N lines (applied after other filters) | +| `strip_lines_matching` | regex[] | Drop lines matching any regex (alias: `skip`) | +| `keep_lines_matching` | regex[] | Keep only lines matching at least one regex (alias: `keep`) | +| `replace` | array | Regex substitutions (`{ pattern, replacement }`) — all occurrences per line | +| `match_output` | array | Short-circuit rules (`{ pattern, message, unless }`) | +| `truncate_lines_at` | int | Truncate lines longer than N characters (uses `…` ellipsis) | +| `head_lines` | int | Keep only the first N lines (with `"... (N lines omitted)"` marker) | +| `tail_lines` | int | Keep only the last N lines (with omission marker) | +| `max_lines` | int | Absolute line cap after head/tail (with `"... (N lines truncated)"` marker) | | `on_empty` | string | Fallback message when filtered output is empty | +## Pipeline order + +1. `match_output` — short-circuit if pattern matches (ANSI-stripped when `strip_ansi = true`) +2. `replace` — regex substitutions, all occurrences per line, rules chained sequentially +3. `strip_ansi` — remove ANSI escape codes +4. `skip`/`keep` — filter lines by regex +5. `truncate_lines_at` — truncate long lines +6. `head_lines`/`tail_lines` — keep first/last N lines (with omission markers) +7. `max_lines` — absolute line cap +8. `on_empty` — message if result is empty + ## Naming convention Use the command name as the filename: `terraform-plan.toml`, `docker-inspect.toml`, `mix-compile.toml`. diff --git a/src/filters/basedpyright.toml b/src/filters/basedpyright.toml index 9ba88d07..2a6a779a 100644 --- a/src/filters/basedpyright.toml +++ b/src/filters/basedpyright.toml @@ -42,6 +42,6 @@ Found 10 source files expected = "0 errors, 0 warnings, 0 informations" [[tests.basedpyright]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "basedpyright: ok" diff --git a/src/filters/biome.toml b/src/filters/biome.toml index c9414c34..c21b7d37 100644 --- a/src/filters/biome.toml +++ b/src/filters/biome.toml @@ -40,6 +40,6 @@ Checked 42 files in 0.3s expected = "biome: ok" [[tests.biome]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "biome: ok" diff --git a/src/filters/gcc.toml b/src/filters/gcc.toml index 8fc556c1..41a4b234 100644 --- a/src/filters/gcc.toml +++ b/src/filters/gcc.toml @@ -44,6 +44,6 @@ collect2: error: ld returned 1 exit status expected = "/usr/bin/ld: /tmp/main.o: undefined reference to 'missing_func'\ncollect2: error: ld returned 1 exit status" [[tests.gcc]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "gcc: ok" diff --git a/src/filters/oxlint.toml b/src/filters/oxlint.toml index d5935041..731e7d43 100644 --- a/src/filters/oxlint.toml +++ b/src/filters/oxlint.toml @@ -38,6 +38,6 @@ Finished in 5ms on 100 files. expected = "oxlint: ok" [[tests.oxlint]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "oxlint: ok" diff --git a/src/filters/skopeo.toml b/src/filters/skopeo.toml index 9827eb0a..790e7ffd 100644 --- a/src/filters/skopeo.toml +++ b/src/filters/skopeo.toml @@ -40,6 +40,6 @@ input = """ expected = "{\n \"Name\": \"docker.io/library/nginx\",\n \"Tag\": \"latest\",\n \"Digest\": \"sha256:abc123\",\n \"RepoTags\": [\"latest\", \"1.25\"],\n \"Created\": \"2026-01-01T00:00:00Z\"\n}" [[tests.skopeo]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "skopeo: ok" diff --git a/src/filters/ty.toml b/src/filters/ty.toml index 981aa955..777cc7fb 100644 --- a/src/filters/ty.toml +++ b/src/filters/ty.toml @@ -45,6 +45,6 @@ All checks passed! expected = "All checks passed!" [[tests.ty]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "ty: ok" diff --git a/src/filters/xcodebuild.toml b/src/filters/xcodebuild.toml index f83cb18c..5c6ace8a 100644 --- a/src/filters/xcodebuild.toml +++ b/src/filters/xcodebuild.toml @@ -94,6 +94,6 @@ Executed 2 tests, with 1 failure in 0.003 seconds expected = "Test Suite 'All tests' started at 2026-03-10 12:00:00\nTest Suite 'AppTests' started at 2026-03-10 12:00:00\nTest Case '-[AppTests testExample]' passed (0.001 seconds).\nTest Case '-[AppTests testFailing]' failed (0.002 seconds).\nTest Suite 'AppTests' passed at 2026-03-10 12:00:01\nExecuted 2 tests, with 1 failure in 0.003 seconds" [[tests.xcodebuild]] -name = "empty input passes through" +name = "empty input returns on_empty" input = "" -expected = "" +expected = "xcodebuild: ok" diff --git a/src/toml_filter.rs b/src/toml_filter.rs index 36eb52ab..4c004524 100644 --- a/src/toml_filter.rs +++ b/src/toml_filter.rs @@ -1,7 +1,7 @@ /// TOML-based filter DSL for RTK. /// -/// Provides a declarative pipeline of 8 stages that can be configured -/// via TOML files. Lookup priority (first match wins): +/// Provides a declarative pipeline that can be configured via TOML files. +/// Lookup priority (first match wins): /// 1. `.rtk/filters.toml` — project-local, committable with the repo /// 2. `~/.config/rtk/filters.toml` — user-global, applies to all projects /// 3. Built-in TOML — `src/filters/*.toml`, concatenated by build.rs and embedded at compile time @@ -13,19 +13,20 @@ /// - `RTK_NO_TOML=1` — bypass TOML engine entirely /// - `RTK_TOML_DEBUG=1` — print which filter matched and line counts to stderr /// -/// Pipeline stages (applied in order): -/// 1. strip_ansi — remove ANSI escape codes -/// 2. replace — regex substitutions, line-by-line, chainable -/// 3. match_output — short-circuit: if blob matches a pattern, return message immediately -/// 4. strip/keep_lines — filter lines by regex -/// 5. truncate_lines_at — truncate each line to N chars -/// 6. head/tail_lines — keep first/last N lines -/// 7. max_lines — absolute line cap -/// 8. on_empty — message if result is empty +/// Filtering is delegated to the `tokf-filter` crate, which provides a rich +/// pipeline (sections, chunks, aggregates, templates, JSON extraction, etc.). +/// RTK keeps its own TOML parsing/registry/matching layer and adds omission +/// markers for head/tail/max_lines that tokf silently truncates. use lazy_static::lazy_static; -use regex::{Regex, RegexSet}; +use regex::Regex; use serde::Deserialize; use std::collections::BTreeMap; +use tokf_common::config::types::{ + CommandPattern, FilterConfig, MatchOutputRule as TokfMatchOutputRule, + ReplaceRule as TokfReplaceRule, +}; +use tokf_filter::filter::{FilterOptions, FilterResult}; +use tokf_filter::CommandResult; // Built-in filters: concatenated from src/filters/*.toml by build.rs at compile time. const BUILTIN_TOML: &str = include_str!(concat!(env!("OUT_DIR"), "/builtin_filters.toml")); @@ -107,43 +108,22 @@ struct TomlFilterDef { // Compiled types (post-validation, ready to use) // --------------------------------------------------------------------------- -#[derive(Debug)] -struct CompiledMatchOutputRule { - pattern: Regex, - message: String, - /// If set and matches the blob, this rule is skipped (prevents swallowing errors). - unless: Option, -} - -#[derive(Debug)] -struct CompiledReplaceRule { - pattern: Regex, - replacement: String, -} - -#[derive(Debug)] -enum LineFilter { - None, - Strip(RegexSet), - Keep(RegexSet), -} - /// A filter that has been parsed and compiled — all regexes are ready. +/// Delegates actual filtering to `tokf_filter::filter::apply()`. #[derive(Debug)] pub struct CompiledFilter { pub name: String, #[allow(dead_code)] pub description: Option, match_regex: Regex, - strip_ansi: bool, - replace: Vec, - match_output: Vec, - line_filter: LineFilter, - truncate_lines_at: Option, + /// tokf-filter config (handles strip_ansi, replace, match_output, skip/keep, + /// truncate_lines_at, on_empty). Does NOT include head/tail/max_lines — + /// those are handled by RTK for omission markers. + config: FilterConfig, + /// RTK-specific: head/tail/max_lines with omission markers head_lines: Option, tail_lines: Option, pub max_lines: Option, - on_empty: Option, } // --------------------------------------------------------------------------- @@ -287,6 +267,95 @@ const RUST_HANDLED_COMMANDS: &[&str] = &[ "learn", ]; +/// Convert an RTK `TomlFilterDef` into a tokf `FilterConfig`. +/// +/// Maps RTK field names to tokf equivalents. `head_lines`, `tail_lines`, +/// and `max_lines` are NOT included — RTK handles those separately to add +/// omission markers (tokf silently truncates). +fn convert_def_to_filter_config(def: &TomlFilterDef) -> FilterConfig { + FilterConfig { + command: CommandPattern::default(), + run: None, + skip: def.strip_lines_matching.clone(), + keep: def.keep_lines_matching.clone(), + step: vec![], + extract: None, + match_output: def + .match_output + .iter() + .map(|r| TokfMatchOutputRule { + contains: None, + pattern: Some(r.pattern.clone()), + output: r.message.clone(), + unless: r.unless.clone(), + }) + .collect(), + section: vec![], + on_success: None, + on_failure: None, + parse: None, + output: None, + fallback: None, + replace: def + .replace + .iter() + .map(|r| TokfReplaceRule { + pattern: r.pattern.clone(), + output: r.replacement.clone(), + // RTK's original replace used replace_all semantics + replace_all: true, + }) + .collect(), + dedup: false, + dedup_window: None, + strip_ansi: def.strip_ansi, + trim_lines: false, + strip_empty_lines: false, + collapse_empty_lines: false, + lua_script: None, + chunk: vec![], + json: None, + variant: vec![], + show_history_hint: false, + inject_path: false, + passthrough_args: vec![], + description: def.description.clone(), + truncate_lines_at: def.truncate_lines_at, + on_empty: def.on_empty.clone(), + // head/tail/max_lines deliberately NOT passed to tokf — + // RTK handles them with omission markers. + head: None, + tail: None, + max_lines: None, + } +} + +/// Validate regexes in a filter definition before conversion. +/// Returns an error string if any regex is invalid. +fn validate_def_regexes(def: &TomlFilterDef) -> Result<(), String> { + for r in &def.replace { + Regex::new(&r.pattern) + .map_err(|e| format!("invalid replace pattern '{}': {}", r.pattern, e))?; + } + for r in &def.match_output { + Regex::new(&r.pattern) + .map_err(|e| format!("invalid match_output pattern '{}': {}", r.pattern, e))?; + if let Some(ref u) = r.unless { + Regex::new(u) + .map_err(|e| format!("invalid match_output unless pattern '{}': {}", u, e))?; + } + } + for pat in &def.strip_lines_matching { + Regex::new(pat) + .map_err(|e| format!("invalid strip_lines_matching regex '{}': {}", pat, e))?; + } + for pat in &def.keep_lines_matching { + Regex::new(pat) + .map_err(|e| format!("invalid keep_lines_matching regex '{}': {}", pat, e))?; + } + Ok(()) +} + fn compile_filter(name: String, def: TomlFilterDef) -> Result { // Mutual exclusion: strip and keep cannot both be set if !def.strip_lines_matching.is_empty() && !def.keep_lines_matching.is_empty() { @@ -309,68 +378,19 @@ fn compile_filter(name: String, def: TomlFilterDef) -> Result, _>>()?; + // Validate all regexes before converting + validate_def_regexes(&def)?; - let match_output = def - .match_output - .into_iter() - .map(|r| -> Result { - let pat = r.pattern.clone(); - let pattern = Regex::new(&r.pattern) - .map_err(|e| format!("invalid match_output pattern '{}': {}", pat, e))?; - let unless = r - .unless - .as_deref() - .map(|u| { - Regex::new(u) - .map_err(|e| format!("invalid match_output unless pattern '{}': {}", u, e)) - }) - .transpose()?; - Ok(CompiledMatchOutputRule { - pattern, - message: r.message, - unless, - }) - }) - .collect::, _>>()?; - - let line_filter = if !def.strip_lines_matching.is_empty() { - let set = RegexSet::new(&def.strip_lines_matching) - .map_err(|e| format!("invalid strip_lines_matching regex: {}", e))?; - LineFilter::Strip(set) - } else if !def.keep_lines_matching.is_empty() { - let set = RegexSet::new(&def.keep_lines_matching) - .map_err(|e| format!("invalid keep_lines_matching regex: {}", e))?; - LineFilter::Keep(set) - } else { - LineFilter::None - }; + let config = convert_def_to_filter_config(&def); Ok(CompiledFilter { name, description: def.description, match_regex, - strip_ansi: def.strip_ansi, - replace, - match_output, - line_filter, - truncate_lines_at: def.truncate_lines_at, + config, head_lines: def.head_lines, tail_lines: def.tail_lines, max_lines: def.max_lines, - on_empty: def.on_empty, }) } @@ -397,74 +417,27 @@ pub fn find_filter_in<'a>( /// Apply a compiled filter pipeline to raw stdout. Pure String -> String. /// -/// Pipeline stages (in order): -/// 1. strip_ansi — remove ANSI escape codes -/// 2. replace — regex substitutions, line-by-line, chainable -/// 3. match_output — short-circuit if blob matches a pattern -/// 4. strip/keep_lines — filter lines by regex -/// 5. truncate_lines_at — truncate each line to N chars -/// 6. head/tail_lines — keep first/last N lines -/// 7. max_lines — absolute line cap -/// 8. on_empty — message if result is empty +/// Delegates to `tokf_filter::filter::apply()` for the core pipeline +/// (strip_ansi, replace, match_output, skip/keep, truncate_lines_at, on_empty), +/// then applies RTK's head/tail/max_lines with omission markers. pub fn apply_filter(filter: &CompiledFilter, stdout: &str) -> String { - let mut lines: Vec = stdout.lines().map(String::from).collect(); - - // 1. strip_ansi - if filter.strip_ansi { - lines = lines - .into_iter() - .map(|l| crate::utils::strip_ansi(&l)) - .collect(); - } - - // 2. replace — line-by-line, rules chained sequentially - if !filter.replace.is_empty() { - lines = lines - .into_iter() - .map(|mut line| { - for rule in &filter.replace { - line = rule - .pattern - .replace_all(&line, rule.replacement.as_str()) - .into_owned(); - } - line - }) - .collect(); - } + // Build a CommandResult for tokf — we only have stdout, no stderr capture + let cmd_result = CommandResult { + stdout: stdout.to_string(), + stderr: String::new(), + exit_code: 0, + combined: stdout.to_string(), + }; - // 3. match_output — short-circuit on full blob match (first rule wins) - // If `unless` is set and also matches the blob, the rule is skipped. - if !filter.match_output.is_empty() { - let blob = lines.join("\n"); - for rule in &filter.match_output { - if rule.pattern.is_match(&blob) { - if let Some(ref unless_re) = rule.unless { - if unless_re.is_match(&blob) { - continue; // errors/warnings present — skip this rule - } - } - return rule.message.clone(); - } - } - } + let FilterResult { + output: tokf_output, + } = tokf_filter::filter::apply(&filter.config, &cmd_result, &[], &FilterOptions::default()); - // 4. strip OR keep (mutually exclusive) - match &filter.line_filter { - LineFilter::Strip(set) => lines.retain(|l| !set.is_match(l)), - LineFilter::Keep(set) => lines.retain(|l| set.is_match(l)), - LineFilter::None => {} - } + // Post-process: apply RTK's head/tail/max_lines with omission markers + // (tokf silently truncates; RTK adds "... (N lines omitted)" markers) + let mut lines: Vec = tokf_output.lines().map(String::from).collect(); - // 5. truncate_lines_at — uses utils::truncate (unicode-safe) - if let Some(max_chars) = filter.truncate_lines_at { - lines = lines - .into_iter() - .map(|l| crate::utils::truncate(&l, max_chars)) - .collect(); - } - - // 6. head + tail + // head + tail let total = lines.len(); if let (Some(head), Some(tail)) = (filter.head_lines, filter.tail_lines) { if total > head + tail { @@ -486,7 +459,7 @@ pub fn apply_filter(filter: &CompiledFilter, stdout: &str) -> String { } } - // 7. max_lines — absolute cap applied after head/tail (includes omit messages) + // max_lines — absolute cap applied after head/tail (includes omit messages) if let Some(max) = filter.max_lines { if lines.len() > max { let truncated = lines.len() - max; @@ -495,15 +468,7 @@ pub fn apply_filter(filter: &CompiledFilter, stdout: &str) -> String { } } - // 8. on_empty - let result = lines.join("\n"); - if result.trim().is_empty() { - if let Some(ref msg) = filter.on_empty { - return msg.clone(); - } - } - - result + lines.join("\n") } // --------------------------------------------------------------------------- @@ -719,11 +684,11 @@ match_command = "^cmd" truncate_lines_at = 5 "#, ); - // utils::truncate(s, 5) takes 2 chars + "..." when len > 5 + // tokf truncates to N-1 chars + "…" (unicode ellipsis) when len > N // "hello" = 5 chars exactly, stays unchanged - // "日本語xyz" = 6 chars, truncated to "日本..." (take 2 + "...") + // "日本語xyz" = 6 chars, truncated to "日本語x" + "…" = 5 chars let out = apply_filter(&f, "hello\n日本語xyz"); - assert_eq!(out, "hello\n日本..."); + assert_eq!(out, "hello\n日本語x\u{2026}"); } #[test]