From 7cf47d04d08208082a57534844fc00dba43842d3 Mon Sep 17 00:00:00 2001 From: bz00qa Date: Fri, 13 Mar 2026 01:10:06 +0100 Subject: [PATCH 1/2] feat: add output cache system with TTL and diff detection New cache module for caching filtered command output: - File-based cache in ~/.local/share/rtk/cache/ with deterministic keys (command + cwd hash) - Configurable TTL (default 5min) via config.toml or RTK_CACHE_TTL env var - Enable/disable via config.toml or RTK_CACHE env var - Volatile command detection (git status, ls, etc. are never cached) - Line-level diff between cached and current output with truncation - `rtk cache-clear` command to reset cache Tracking integration: - New cache_hit column in SQLite tracking database - record_cache_hit() and track_cache_hit() methods - All gain/analytics queries exclude cache hits (AND cache_hit = 0) to keep savings stats accurate - get_cache_stats() for cache performance reporting Config: - CacheConfig struct in config.toml ([cache] section) - enabled (default: true), ttl_minutes (default: 5) 11 tests for cache module, all existing tests pass. Co-Authored-By: Claude Opus 4.6 Signed-off-by: bz00qa --- src/cache.rs | 303 ++++++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 17 +++ src/main.rs | 9 ++ src/tracking.rs | 96 +++++++++++++-- 4 files changed, 417 insertions(+), 8 deletions(-) create mode 100644 src/cache.rs diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 00000000..fcbb17aa --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,303 @@ +use anyhow::{Context, Result}; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::path::PathBuf; + +use crate::config::Config; + +/// List of volatile commands whose output changes on every invocation. +/// These should never be cached. +#[allow(dead_code)] +const VOLATILE_COMMANDS: &[&str] = &[ + "git status", + "git diff", + "git log", + "git show", + "git stash", + "ls", + "find", + "grep", + "cat", + "head", + "tail", + "ps", + "top", + "env", + "date", + "time", +]; + +/// Maximum number of "Resolved" lines to show in diff output. +const MAX_REMOVED_LINES: usize = 20; + +/// Maximum number of "New" lines to show in diff output. +const MAX_ADDED_LINES: usize = 30; + +/// Returns the cache directory path: `~/.local/share/rtk/cache/` +pub fn cache_dir() -> Result { + let data_dir = dirs::data_local_dir().context("Could not determine local data directory")?; + Ok(data_dir.join("rtk").join("cache")) +} + +/// Computes a deterministic cache key from command string and working directory. +pub fn cache_key(cmd: &str, cwd: &str) -> u64 { + let mut hasher = DefaultHasher::new(); + cmd.hash(&mut hasher); + cwd.hash(&mut hasher); + hasher.finish() +} + +/// Returns `false` for volatile commands whose output changes every invocation. +#[allow(dead_code)] +pub fn should_cache(cmd: &str) -> bool { + let cmd_lower = cmd.to_lowercase(); + for volatile in VOLATILE_COMMANDS { + if cmd_lower == *volatile || cmd_lower.starts_with(&format!("{} ", volatile)) { + return false; + } + } + true +} + +/// Loads cached output if it exists and is within the TTL. +/// Returns `Some((content, age_secs))` if valid, `None` otherwise. +/// Deletes expired entries. +pub fn load(cmd: &str, cwd: &str, ttl_minutes: u64) -> Option<(String, u64)> { + let dir = cache_dir().ok()?; + let key = cache_key(cmd, cwd); + let path = dir.join(format!("{}.txt", key)); + + if !path.exists() { + return None; + } + + let metadata = std::fs::metadata(&path).ok()?; + let modified = metadata.modified().ok()?; + let age = modified.elapsed().ok()?; + let age_secs = age.as_secs(); + + if age_secs > ttl_minutes * 60 { + // Expired — delete and return None + let _ = std::fs::remove_file(&path); + return None; + } + + let content = std::fs::read_to_string(&path).ok()?; + Some((content, age_secs)) +} + +/// Stores command output in the cache. +pub fn store(cmd: &str, cwd: &str, output: &str) -> Result<()> { + let dir = cache_dir().context("Could not determine cache directory")?; + std::fs::create_dir_all(&dir).context("Could not create cache directory")?; + + let key = cache_key(cmd, cwd); + let path = dir.join(format!("{}.txt", key)); + + std::fs::write(&path, output).context("Could not write cache file")?; + Ok(()) +} + +/// Computes a line-level diff between cached and current output. +/// +/// Shows "Resolved" for removed lines, "New" for added lines, +/// or "(no changes)" if identical. Truncates at limits with "... (N more)". +pub fn diff_output(cached: &str, current: &str) -> String { + if cached == current { + return "(no changes)".to_string(); + } + + let cached_lines: std::collections::HashSet<&str> = cached.lines().collect(); + let current_lines: std::collections::HashSet<&str> = current.lines().collect(); + + let removed: Vec<&str> = cached + .lines() + .filter(|line| !current_lines.contains(line)) + .collect(); + let added: Vec<&str> = current + .lines() + .filter(|line| !cached_lines.contains(line)) + .collect(); + + if removed.is_empty() && added.is_empty() { + return "(no changes)".to_string(); + } + + let mut parts: Vec = Vec::new(); + + if !removed.is_empty() { + let shown = removed.len().min(MAX_REMOVED_LINES); + for line in &removed[..shown] { + parts.push(format!("Resolved: {}", line)); + } + if removed.len() > MAX_REMOVED_LINES { + parts.push(format!("... ({} more)", removed.len() - MAX_REMOVED_LINES)); + } + } + + if !added.is_empty() { + let shown = added.len().min(MAX_ADDED_LINES); + for line in &added[..shown] { + parts.push(format!("New: {}", line)); + } + if added.len() > MAX_ADDED_LINES { + parts.push(format!("... ({} more)", added.len() - MAX_ADDED_LINES)); + } + } + + parts.join("\n") +} + +/// Returns the cache TTL in minutes. +/// Priority: `RTK_CACHE_TTL` env var > config file > default (5). +pub fn get_ttl_minutes() -> u64 { + if let Ok(val) = std::env::var("RTK_CACHE_TTL") { + if let Ok(minutes) = val.parse::() { + return minutes; + } + } + + if let Ok(config) = Config::load() { + return config.cache.ttl_minutes; + } + + 5 +} + +/// Returns whether caching is enabled. +/// Priority: `RTK_CACHE` env var > config file > default (true). +pub fn is_enabled() -> bool { + if let Ok(val) = std::env::var("RTK_CACHE") { + return val != "0" && val.to_lowercase() != "false"; + } + + if let Ok(config) = Config::load() { + return config.cache.enabled; + } + + true +} + +/// Removes the entire cache directory. +pub fn clear() -> Result<()> { + let dir = cache_dir().context("Could not determine cache directory")?; + if dir.exists() { + std::fs::remove_dir_all(&dir).context("Could not remove cache directory")?; + println!("Cache cleared"); + } else { + println!("No cache found"); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_key_deterministic() { + let k1 = cache_key("cargo build", "/home/user/project"); + let k2 = cache_key("cargo build", "/home/user/project"); + assert_eq!(k1, k2, "Same inputs must produce the same key"); + } + + #[test] + fn test_cache_key_varies_by_cwd() { + let k1 = cache_key("cargo build", "/home/user/project-a"); + let k2 = cache_key("cargo build", "/home/user/project-b"); + assert_ne!(k1, k2, "Different cwd must produce different keys"); + } + + #[test] + fn test_cache_key_varies_by_cmd() { + let k1 = cache_key("cargo build", "/home/user/project"); + let k2 = cache_key("cargo test", "/home/user/project"); + assert_ne!(k1, k2, "Different commands must produce different keys"); + } + + #[test] + fn test_should_cache_positive() { + assert!(should_cache("cargo build")); + assert!(should_cache("cargo test")); + assert!(should_cache("npm install")); + assert!(should_cache("rustc --version")); + } + + #[test] + fn test_should_cache_negative() { + assert!(!should_cache("git status")); + assert!(!should_cache("git diff")); + assert!(!should_cache("git log")); + assert!(!should_cache("git log --oneline -10")); + assert!(!should_cache("git show abc123")); + assert!(!should_cache("git stash")); + assert!(!should_cache("ls")); + assert!(!should_cache("ls -la")); + assert!(!should_cache("find . -name foo")); + assert!(!should_cache("grep pattern file")); + assert!(!should_cache("ps")); + assert!(!should_cache("top")); + assert!(!should_cache("env")); + assert!(!should_cache("date")); + assert!(!should_cache("time")); + } + + #[test] + fn test_diff_identical() { + let output = diff_output("line1\nline2\nline3", "line1\nline2\nline3"); + assert_eq!(output, "(no changes)"); + } + + #[test] + fn test_diff_added_lines() { + let cached = "line1\nline2"; + let current = "line1\nline2\nline3\nline4"; + let diff = diff_output(cached, current); + assert!(diff.contains("New: line3")); + assert!(diff.contains("New: line4")); + assert!(!diff.contains("Resolved")); + } + + #[test] + fn test_diff_removed_lines() { + let cached = "line1\nline2\nline3"; + let current = "line1"; + let diff = diff_output(cached, current); + assert!(diff.contains("Resolved: line2")); + assert!(diff.contains("Resolved: line3")); + assert!(!diff.contains("New")); + } + + #[test] + fn test_diff_mixed_changes() { + let cached = "error1\nerror2\nwarning1"; + let current = "error2\nwarning1\nnew_error"; + let diff = diff_output(cached, current); + assert!(diff.contains("Resolved: error1")); + assert!(diff.contains("New: new_error")); + } + + #[test] + fn test_diff_truncates_removed() { + let cached_lines: Vec = (0..25).map(|i| format!("removed_{}", i)).collect(); + let cached = cached_lines.join("\n"); + let current = "only_this"; + let diff = diff_output(&cached, current); + assert!( + diff.contains("... (5 more)"), + "Should truncate removed lines at 20" + ); + } + + #[test] + fn test_diff_truncates_added() { + let cached = "only_this"; + let added_lines: Vec = (0..35).map(|i| format!("added_{}", i)).collect(); + let current = added_lines.join("\n"); + let diff = diff_output(cached, ¤t); + assert!( + diff.contains("... (5 more)"), + "Should truncate added lines at 30" + ); + } +} diff --git a/src/config.rs b/src/config.rs index 94917a5e..9ad25b9e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,6 +16,8 @@ pub struct Config { pub telemetry: TelemetryConfig, #[serde(default)] pub hooks: HooksConfig, + #[serde(default)] + pub cache: CacheConfig, } #[derive(Debug, Serialize, Deserialize, Default)] @@ -26,6 +28,21 @@ pub struct HooksConfig { pub exclude_commands: Vec, } +#[derive(Debug, Serialize, Deserialize)] +pub struct CacheConfig { + pub enabled: bool, + pub ttl_minutes: u64, +} + +impl Default for CacheConfig { + fn default() -> Self { + Self { + enabled: true, + ttl_minutes: 5, + } + } +} + #[derive(Debug, Serialize, Deserialize)] pub struct TrackingConfig { pub enabled: bool, diff --git a/src/main.rs b/src/main.rs index 289de863..9c90106f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ mod aws_cmd; mod binlog; +mod cache; mod cargo_cmd; mod cc_economics; mod ccusage; @@ -438,6 +439,10 @@ enum Commands { create: bool, }, + /// Clear the output cache (removes all cached command results) + #[command(name = "cache-clear", display_order = 101)] + CacheClear, + /// Vitest commands with compact output Vitest { #[command(subcommand)] @@ -1687,6 +1692,10 @@ fn main() -> Result<()> { } } + Commands::CacheClear => { + cache::clear()?; + } + Commands::Vitest { command } => match command { VitestCommands::Run { args } => { vitest_cmd::run(vitest_cmd::VitestCommand::Run, &args, cli.verbose)?; diff --git a/src/tracking.rs b/src/tracking.rs index 66363a6d..e7352b10 100644 --- a/src/tracking.rs +++ b/src/tracking.rs @@ -294,6 +294,11 @@ impl Tracker { [], ); } + // Migration: add cache_hit column to distinguish cache hits from normal commands + let _ = conn.execute( + "ALTER TABLE commands ADD COLUMN cache_hit INTEGER DEFAULT 0", + [], + ); // Index for fast project-scoped gain queries // added let _ = conn.execute( "CREATE INDEX IF NOT EXISTS idx_project_path_timestamp ON commands(project_path, timestamp)", @@ -377,6 +382,43 @@ impl Tracker { Ok(()) } + /// Record a command execution that was served from cache. + /// Stored with cache_hit=1 so gain queries can exclude it. + pub fn record_cache_hit( + &self, + original_cmd: &str, + rtk_cmd: &str, + input_tokens: usize, + output_tokens: usize, + exec_time_ms: u64, + ) -> Result<()> { + let saved = input_tokens.saturating_sub(output_tokens); + let pct = if input_tokens > 0 { + (saved as f64 / input_tokens as f64) * 100.0 + } else { + 0.0 + }; + let project_path = current_project_path_string(); + + self.conn.execute( + "INSERT INTO commands (timestamp, original_cmd, rtk_cmd, project_path, input_tokens, output_tokens, saved_tokens, savings_pct, exec_time_ms, cache_hit) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1)", + params![ + Utc::now().to_rfc3339(), + original_cmd, + rtk_cmd, + project_path, + input_tokens as i64, + output_tokens as i64, + saved as i64, + pct, + exec_time_ms as i64, + ], + )?; + self.cleanup_old()?; + Ok(()) + } + fn cleanup_old(&self) -> Result<()> { let cutoff = Utc::now() - chrono::Duration::days(HISTORY_DAYS); self.conn.execute( @@ -507,7 +549,8 @@ impl Tracker { let mut stmt = self.conn.prepare( "SELECT input_tokens, output_tokens, saved_tokens, exec_time_ms FROM commands - WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2)", // added: project filter + WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -566,9 +609,10 @@ impl Tracker { "SELECT rtk_cmd, COUNT(*), SUM(saved_tokens), AVG(savings_pct), AVG(exec_time_ms) FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 GROUP BY rtk_cmd ORDER BY SUM(saved_tokens) DESC - LIMIT 10", // added: project filter in WHERE + LIMIT 10", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -594,9 +638,10 @@ impl Tracker { "SELECT DATE(timestamp), SUM(saved_tokens) FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 GROUP BY DATE(timestamp) ORDER BY DATE(timestamp) DESC - LIMIT 30", // added: project filter in WHERE + LIMIT 30", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -644,8 +689,9 @@ impl Tracker { SUM(exec_time_ms) as total_time FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 GROUP BY DATE(timestamp) - ORDER BY DATE(timestamp) DESC", // added: project filter + ORDER BY DATE(timestamp) DESC", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -718,8 +764,9 @@ impl Tracker { SUM(exec_time_ms) as total_time FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 GROUP BY week_start - ORDER BY week_start DESC", // added: project filter + ORDER BY week_start DESC", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -792,8 +839,9 @@ impl Tracker { SUM(exec_time_ms) as total_time FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 GROUP BY month - ORDER BY month DESC", // added: project filter + ORDER BY month DESC", )?; let rows = stmt.query_map(params![project_exact, project_glob], |row| { @@ -866,8 +914,9 @@ impl Tracker { "SELECT timestamp, rtk_cmd, saved_tokens, savings_pct FROM commands WHERE (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2) + AND cache_hit = 0 ORDER BY timestamp DESC - LIMIT ?3", // added: project filter + LIMIT ?3", )?; let rows = stmt.query_map( @@ -915,7 +964,7 @@ impl Tracker { /// Get overall savings percentage (for telemetry). pub fn overall_savings_pct(&self) -> Result { let (total_input, total_saved): (i64, i64) = self.conn.query_row( - "SELECT COALESCE(SUM(input_tokens), 0), COALESCE(SUM(saved_tokens), 0) FROM commands", + "SELECT COALESCE(SUM(input_tokens), 0), COALESCE(SUM(saved_tokens), 0) FROM commands WHERE cache_hit = 0", [], |row| Ok((row.get(0)?, row.get(1)?)), )?; @@ -946,6 +995,20 @@ impl Tracker { )?; Ok(saved) } + + /// Returns (cache_hit_count, total_input_tokens_avoided) for cache-hit rows. + pub fn get_cache_stats(&self, project_path: Option<&str>) -> Result<(usize, usize)> { + let (project_exact, project_glob) = project_filter_params(project_path); + let (count, tokens): (i64, i64) = self.conn.query_row( + "SELECT COUNT(*), COALESCE(SUM(input_tokens), 0) + FROM commands + WHERE cache_hit = 1 + AND (?1 IS NULL OR project_path = ?1 OR project_path GLOB ?2)", + params![project_exact, project_glob], + |row| Ok((row.get(0)?, row.get(1)?)), + )?; + Ok((count as usize, tokens as usize)) + } } fn get_db_path() -> Result { @@ -1099,6 +1162,23 @@ impl TimedExecution { } } + /// Track a cache hit (records with cache_hit=1 in the database). + pub fn track_cache_hit(&self, original_cmd: &str, rtk_cmd: &str, input: &str, output: &str) { + let elapsed_ms = self.start.elapsed().as_millis() as u64; + let input_tokens = estimate_tokens(input); + let output_tokens = estimate_tokens(output); + + if let Ok(tracker) = Tracker::new() { + let _ = tracker.record_cache_hit( + original_cmd, + rtk_cmd, + input_tokens, + output_tokens, + elapsed_ms, + ); + } + } + /// Track passthrough commands (timing-only, no token counting). /// /// For commands that stream output or run interactively where output From 937ef9521d37d4a4cd820fb2e25078f689de58ec Mon Sep 17 00:00:00 2001 From: bz00qa Date: Fri, 13 Mar 2026 19:37:21 +0100 Subject: [PATCH 2/2] docs: add CHANGELOG and README entries for cache system Signed-off-by: bZ00qa <167500396+bz00qa@users.noreply.github.com> Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 9 +++++++++ README.md | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92d1836b..aab4193d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to rtk (Rust Token Killer) will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + + +### Features + +* **cache:** add output cache system with TTL and diff detection +* **cache-clear:** add command to clear the output cache +* cache configuration via `[cache]` in `config.toml` or `RTK_CACHE`/`RTK_CACHE_TTL` env vars + ## [0.28.2](https://github.com/rtk-ai/rtk/compare/v0.28.1...v0.28.2) (2026-03-10) diff --git a/README.md b/README.md index bb2c5bd2..f5b12802 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,8 @@ rtk discover # Find missed savings opportunities rtk discover --all --since 7 # All projects, last 7 days rtk session # Show RTK adoption across recent sessions + +rtk cache-clear # Clear the output cache ``` ## Global Flags @@ -354,8 +356,14 @@ exclude_commands = ["curl", "playwright"] # skip rewrite for these enabled = true # save raw output on failure (default: true) mode = "failures" # "failures", "always", or "never" max_files = 20 # rotation limit + +[cache] +enabled = true # enable output cache (default: false) +ttl_secs = 300 # cache TTL in seconds (default: 300) ``` +Cache can also be configured via env vars: `RTK_CACHE=1` to enable, `RTK_CACHE_TTL=` to set TTL. + ### Tee: Full Output Recovery When a command fails, RTK saves the full unfiltered output so the LLM can read it without re-executing: