diff --git a/.gitignore b/.gitignore index 11a7378..291801e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ config.toml *.swp *.swo .env -rustbot.db* +rustfox.db* .worktrees/ # Playwright config and cache diff --git a/Cargo.lock b/Cargo.lock index 0163a57..b280be3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,32 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "adobe-cmap-parser" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8abfa9a4688de8fc9f42b3f013b6fffec18ed8a554f5f113577e0b9b3212a3" +dependencies = [ + "pom", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -22,9 +48,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "aquamarine" @@ -68,15 +94,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "axum" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ "axum-core", "bytes", @@ -132,9 +158,15 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" -version = "2.11.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" [[package]] name = "block-buffer" @@ -145,11 +177,35 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "bytecount" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bytemuck" @@ -157,22 +213,60 @@ version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" -version = "1.2.56" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + +[[package]] +name = "cff-parser" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f5b6e9141c036f3ff4ce7b2f7e432b0f00dee416ddcd4f17741d189ddc2e9d" + [[package]] name = "cfg-if" version = "1.0.4" @@ -187,9 +281,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -199,6 +293,22 @@ dependencies = [ "windows-link", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colored" version = "3.1.1" @@ -243,6 +353,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "croner" version = "2.2.0" @@ -252,6 +371,37 @@ dependencies = [ "chrono", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -283,38 +433,14 @@ dependencies = [ "memchr", ] -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - [[package]] name = "darling" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", + "darling_core", + "darling_macro", ] [[package]] @@ -330,33 +456,22 @@ dependencies = [ "syn", ] -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn", -] - [[package]] name = "darling_macro" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.23.0", + "darling_core", "quote", "syn", ] [[package]] name = "deranged" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", @@ -416,15 +531,30 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "docx-rs" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed73cbf5e1c37baa23f4132569ac1187829f03922c206bd68fe109e3001a343d" +dependencies = [ + "base64", + "image", + "quick-xml", + "serde", + "serde_json", + "thiserror 2.0.18", + "zip", +] + [[package]] name = "dptree" version = "0.5.1" @@ -441,11 +571,20 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ecb" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" +dependencies = [ + "cipher", +] + [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" [[package]] name = "encoding_rs" @@ -482,6 +621,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "euclid" +version = "0.20.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad" +dependencies = [ + "num-traits", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -496,9 +644,24 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "fax" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "caf1079563223d5d59d83c85886a56e586cfd5c1a26292e971a0fa266531ac5a" + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] [[package]] name = "find-msvc-tools" @@ -506,6 +669,26 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -662,22 +845,44 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.4.1" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", "wasip2", "wasip3", ] +[[package]] +name = "gif" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "h2" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" dependencies = [ "atomic-waker", "bytes", @@ -685,13 +890,24 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.13.0", + "indexmap 2.14.0", "slab", "tokio", "tokio-util", "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -709,9 +925,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.1" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" [[package]] name = "hashlink" @@ -728,6 +944,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -736,9 +958,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" dependencies = [ "bytes", "itoa", @@ -781,9 +1003,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.8.1" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -796,7 +1018,6 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -804,15 +1025,14 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.7" +version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ "http", "hyper", "hyper-util", "rustls", - "rustls-pki-types", "tokio", "tokio-rustls", "tower-service", @@ -885,12 +1105,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -898,9 +1119,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -911,9 +1132,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -925,15 +1146,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -945,15 +1166,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -989,14 +1210,43 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", ] +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "gif", + "image-webp", + "moxcms", + "num-traits", + "png", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error", +] + [[package]] name = "include_dir" version = "0.7.4" @@ -1029,32 +1279,41 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "serde", "serde_core", ] [[package]] -name = "ipnet" -version = "2.11.0" +name = "infer" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] [[package]] -name = "iri-string" -version = "0.7.10" +name = "inout" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "memchr", - "serde", + "block-padding", + "generic-array", ] +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + [[package]] name = "itertools" version = "0.10.5" @@ -1066,15 +1325,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.20" +version = "0.2.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" +checksum = "4603d3033e49e2b0e31229fcab20a5d40089c607d975cd9c80551dc69eed9102" dependencies = [ "jiff-static", "log", @@ -1085,9 +1344,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.20" +version = "0.2.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" +checksum = "782d32378dddf207193ac91cefb848ad41abb58195c95168e1291227a0832b47" dependencies = [ "proc-macro2", "quote", @@ -1096,10 +1355,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.85" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -1118,9 +1379,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libredox" @@ -1144,15 +1405,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -1165,9 +1426,37 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f" + +[[package]] +name = "lopdf" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7184fdea2bc3cd272a1acec4030c321a8f9875e877b3f92a53f2f6033fdc289" +dependencies = [ + "aes", + "bitflags 2.12.1", + "cbc", + "ecb", + "encoding_rs", + "flate2", + "getrandom 0.3.4", + "indexmap 2.14.0", + "itoa", + "log", + "md-5", + "nom", + "nom_locate", + "rand 0.9.4", + "rangemap", + "sha2", + "stringprep", + "thiserror 2.0.18", + "ttf-parser", + "weezl", +] [[package]] name = "matchers" @@ -1184,11 +1473,21 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "mime" @@ -1206,22 +1505,42 @@ dependencies = [ "unicase", ] +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" -version = "1.1.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", "windows-sys 0.61.2", ] +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + [[package]] name = "native-tls" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5d26952a508f321b4d3d2e80e78fc2603eaefcdf0c30783867f19586518bdc" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" dependencies = [ "libc", "log", @@ -1236,16 +1555,36 @@ dependencies = [ [[package]] name = "nix" -version = "0.31.1" +version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ - "bitflags", + "bitflags 2.12.1", "cfg-if", "cfg_aliases", "libc", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "nom_locate" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1257,9 +1596,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" [[package]] name = "num-derive" @@ -1281,6 +1620,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.37.3" @@ -1290,23 +1639,37 @@ dependencies = [ "memchr", ] +[[package]] +name = "ocrs" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5379fdd3f11522b5a2ff53017a189463dabf5d0a9c915cb3eb97fabec4ea11c" +dependencies = [ + "anyhow", + "rayon", + "rten", + "rten-imageproc", + "rten-tensor", + "thiserror 2.0.18", + "wasm-bindgen", +] + [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "openssl" -version = "0.10.75" +version = "0.10.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +checksum = "a45fa2aa886c42762255da344f0a0d313e254066c46aad76f300c3d3da62d967" dependencies = [ - "bitflags", + "bitflags 2.12.1", "cfg-if", "foreign-types", "libc", - "once_cell", "openssl-macros", "openssl-sys", ] @@ -1330,9 +1693,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-sys" -version = "0.9.111" +version = "0.9.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4" dependencies = [ "cc", "libc", @@ -1371,9 +1734,26 @@ dependencies = [ [[package]] name = "pastey" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec" +checksum = "2ee67f1008b1ba2321834326597b8e186293b049a023cdef258527550b9935b4" + +[[package]] +name = "pdf-extract" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28ba1758a3d3f361459645780e09570b573fc3c82637449e9963174c813a98" +dependencies = [ + "adobe-cmap-parser", + "cff-parser", + "encoding_rs", + "euclid", + "log", + "lopdf", + "postscript", + "type1-encoding-parser", + "unicode-normalization", +] [[package]] name = "percent-encoding" @@ -1383,18 +1763,18 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" dependencies = [ "proc-macro2", "quote", @@ -1403,21 +1783,34 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] -name = "pin-utils" -version = "0.1.0" +name = "pkg-config" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] -name = "pkg-config" -version = "0.3.32" +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags 2.12.1", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "pom" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6" [[package]] name = "portable-atomic" @@ -1427,18 +1820,24 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" dependencies = [ "portable-atomic", ] +[[package]] +name = "postscript" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306" + [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -1500,12 +1899,12 @@ dependencies = [ [[package]] name = "process-wrap" -version = "9.0.3" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd9713fe2c91c3c85ac388b31b89de339365d2c995146e630b5e0da9d06526a" +checksum = "2e842efad9119158434d193c6682e2ebee4b44d6ad801d7b349623b3f57cdf55" dependencies = [ "futures", - "indexmap 2.13.0", + "indexmap 2.14.0", "nix", "tokio", "tracing", @@ -1514,9 +1913,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +checksum = "645dbe486e346d9b5de3ef16ede18c26e6c70ad97418f4874b8b1889d6e761ea" dependencies = [ "ar_archive_writer", "cc", @@ -1528,7 +1927,7 @@ version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f86ba2052aebccc42cbbb3ed234b8b13ce76f75c3551a303cb2bcffcff12bb14" dependencies = [ - "bitflags", + "bitflags 2.12.1", "getopts", "memchr", "pulldown-cmark-escape", @@ -1536,16 +1935,38 @@ dependencies = [ ] [[package]] -name = "pulldown-cmark-escape" -version = "0.11.0" +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + +[[package]] +name = "pxfm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quick-xml" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "encoding_rs", + "memchr", +] [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -1556,15 +1977,31 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -1574,7 +2011,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1586,6 +2033,41 @@ dependencies = [ "getrandom 0.2.17", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rangemap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rc-box" version = "1.3.0" @@ -1601,7 +2083,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.12.1", ] [[package]] @@ -1660,9 +2142,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -1710,9 +2192,9 @@ dependencies = [ [[package]] name = "rgb" -version = "0.8.52" +version = "0.8.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce" +checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4" dependencies = [ "bytemuck", ] @@ -1764,20 +2246,127 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e88ad84b8b6237a934534a62b379a5be6388915663c0cc598ceb9b3292bbbfe" dependencies = [ - "darling 0.23.0", + "darling", "proc-macro2", "quote", "serde_json", "syn", ] +[[package]] +name = "rten" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c230fa4ade87c913f61dbd911b7eb0d49460ceff3f1e4fabc837fac191137c" +dependencies = [ + "flatbuffers", + "num_cpus", + "rayon", + "rten-base", + "rten-gemm", + "rten-model-file", + "rten-onnx", + "rten-shape-inference", + "rten-simd", + "rten-tensor", + "rten-vecmath", + "rustc-hash", + "smallvec", + "typeid", + "wasm-bindgen", +] + +[[package]] +name = "rten-base" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2738cf8bb4c27f828ac788d01ccf4e367e8e773cfec6851f81851b5211de6a79" +dependencies = [ + "rayon", +] + +[[package]] +name = "rten-gemm" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330a81a0ca209fb5ce21bd17efa0bd287d5881c6cebfbff0b21c4294a1a14a9e" +dependencies = [ + "rayon", + "rten-base", + "rten-simd", + "rten-tensor", +] + +[[package]] +name = "rten-imageproc" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f148e7e941fb5727b9046a5fa1b45525543d5105f14b384fd9261df0ee49bc" +dependencies = [ + "rten-tensor", +] + +[[package]] +name = "rten-model-file" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2f8d270f07ab1bbfff47250c6039f6caa5da59d6da7d74f66aa48559aa6fea" +dependencies = [ + "flatbuffers", + "rten-base", +] + +[[package]] +name = "rten-onnx" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23086eef75bfb55278cb0b45cf9f5a877d466d914914aafebee4ffca9b24d20c" + +[[package]] +name = "rten-shape-inference" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8a913c7ca40e2bfbb2a0cd447cce56b33ab19435f56693271a2ef37cf58984" +dependencies = [ + "rten-tensor", + "smallvec", +] + +[[package]] +name = "rten-simd" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b19a0032dfcb70dd20960c1c51a37674b237586cbc1ce586f45b46605d108e82" + +[[package]] +name = "rten-tensor" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05dc744a270aa32d154f1a3df8e48740ccc1be9dfbcf23295ada66d83aa98de6" +dependencies = [ + "rayon", + "rten-base", + "smallvec", + "typeid", +] + +[[package]] +name = "rten-vecmath" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9574ddebf5671bc08ceb76e2e1638fadc57fdeff318634eab2c29e9a803cff64" +dependencies = [ + "rten-base", + "rten-simd", +] + [[package]] name = "rusqlite" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37e34486da88d8e051c7c0e23c3f15fd806ea8546260aa2fec247e97242ec143" dependencies = [ - "bitflags", + "bitflags 2.12.1", "chrono", "csv", "fallible-iterator", @@ -1792,6 +2381,21 @@ dependencies = [ "uuid", ] +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustfox" version = "0.1.0" @@ -1802,13 +2406,19 @@ dependencies = [ "base64", "chrono", "dirs", + "docx-rs", "futures", "futures-util", + "image", + "infer", + "ocrs", + "pdf-extract", "pulldown-cmark", - "rand", + "rand 0.8.6", "regex", "reqwest", "rmcp", + "rten", "rusqlite", "serde", "serde_json", @@ -1826,11 +2436,11 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags", + "bitflags 2.12.1", "errno", "libc", "linux-raw-sys", @@ -1839,9 +2449,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "once_cell", "rustls-pki-types", @@ -1852,18 +2462,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", @@ -1884,9 +2494,9 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -1937,11 +2547,11 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags", + "bitflags 2.12.1", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -1950,9 +2560,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -1960,9 +2570,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "serde" @@ -2007,9 +2617,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -2052,15 +2662,16 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" +checksum = "e72c1c2cb7b223fafb600a619537a871c2818583d619401b785e7c0b746ccde2" dependencies = [ "base64", + "bs58", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.14.0", "schemars 0.9.0", "schemars 1.2.1", "serde_core", @@ -2071,11 +2682,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" +checksum = "b90c488738ecb4fb0262f41f43bc40efc5868d9fb744319ddf5f5317f417bfac" dependencies = [ - "darling 0.21.3", + "darling", "proc-macro2", "quote", "syn", @@ -2103,9 +2714,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -2117,6 +2728,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "slab" version = "0.4.12" @@ -2131,28 +2748,28 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "sqlite-vec" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec77b84fb8dd5f0f8def127226db83b5d1152c5bf367f09af03998b76ba554a" +checksum = "d0ba424237a9a5db2f6071f193319e2b6a32f7f3961debb2fbbfe67067abce3f" dependencies = [ "cc", ] [[package]] name = "sse-stream" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb4dc4d33c68ec1f27d386b5610a351922656e1fdf5c05bbaad930cd1519479a" +checksum = "f3962b63f038885f15bce2c6e02c0e7925c072f1ac86bb60fd44c5c6b762fb72" dependencies = [ "bytes", "futures-util", @@ -2169,15 +2786,26 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" +checksum = "640c8cdd92b6b12f5bcb1803ca3bbf5ab96e5e6b6b96b9ab77dabe9e880b3190" dependencies = [ "cc", "cfg-if", "libc", "psm", - "windows-sys 0.59.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", ] [[package]] @@ -2194,9 +2822,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.116" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -2229,7 +2857,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags", + "bitflags 2.12.1", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -2288,7 +2916,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f7a34ca8e971fa892e633858c07547fe138ef4a02e4a4eaa1d35e517d6e0bc4" dependencies = [ - "bitflags", + "bitflags 2.12.1", "bytes", "chrono", "derive_more", @@ -2328,12 +2956,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.25.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -2388,6 +3016,20 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tiff" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error", + "weezl", + "zune-jpeg", +] + [[package]] name = "time" version = "0.3.47" @@ -2421,19 +3063,34 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" -version = "1.49.0" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "bytes", "libc", @@ -2463,9 +3120,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -2543,7 +3200,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", "serde_spanned", "toml_datetime", @@ -2575,20 +3232,20 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ - "bitflags", + "bitflags 2.12.1", "bytes", "futures-util", "http", "http-body", - "iri-string", "pin-project-lite", "tower", "tower-layer", "tower-service", + "url", ] [[package]] @@ -2649,9 +3306,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -2671,11 +3328,32 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ttf-parser" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31" + +[[package]] +name = "type1-encoding-parser" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa10c302f5a53b7ad27fd42a3996e23d096ba39b5b8dd6d9e683a05b01bee749" +dependencies = [ + "pom", +] + +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "unicase" @@ -2683,11 +3361,32 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" -version = "1.0.23" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-width" @@ -2728,11 +3427,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.21.0" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] @@ -2772,11 +3471,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.2+wasi-0.2.9" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.57.1", ] [[package]] @@ -2785,14 +3484,14 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] name = "wasm-bindgen" -version = "0.2.108" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" dependencies = [ "cfg-if", "once_cell", @@ -2803,23 +3502,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.58" +version = "0.4.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" dependencies = [ - "cfg-if", - "futures-util", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.108" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2827,9 +3522,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.108" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" dependencies = [ "bumpalo", "proc-macro2", @@ -2840,9 +3535,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.108" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" dependencies = [ "unicode-ident", ] @@ -2864,7 +3559,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.13.0", + "indexmap 2.14.0", "wasm-encoder", "wasmparser", ] @@ -2888,22 +3583,28 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags", + "bitflags 2.12.1", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.14.0", "semver", ] [[package]] name = "web-sys" -version = "0.3.85" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + [[package]] name = "windows" version = "0.62.2" @@ -3034,24 +3735,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -3085,30 +3768,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows-threading" version = "0.2.1" @@ -3130,12 +3796,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3148,12 +3808,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3166,24 +3820,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3196,12 +3838,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3214,12 +3850,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3232,12 +3862,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -3250,17 +3874,11 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] @@ -3274,6 +3892,12 @@ dependencies = [ "wit-bindgen-rust-macro", ] +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + [[package]] name = "wit-bindgen-core" version = "0.51.0" @@ -3293,7 +3917,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.13.0", + "indexmap 2.14.0", "prettyplease", "syn", "wasm-metadata", @@ -3323,8 +3947,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags", - "indexmap 2.13.0", + "bitflags 2.12.1", + "indexmap 2.14.0", "log", "serde", "serde_derive", @@ -3343,7 +3967,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "semver", "serde", @@ -3355,15 +3979,15 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -3372,9 +3996,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -3384,18 +4008,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", @@ -3404,18 +4028,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -3431,9 +4055,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -3442,9 +4066,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -3453,17 +4077,44 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", +] + [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index 792fa67..20d0356 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,16 +58,31 @@ sqlite-vec = "0.1" # Setup wizard web server (used only by src/bin/setup.rs) axum = "0.8" +# OCR (pure Rust, neural-network based) +ocrs = "0.12" +rten = { version = "0.24", features = ["rten_format"] } + +# Image loading/processing +image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } + +# Document processing +pdf-extract = "0.10" +docx-rs = "0.4" + +# MIME type detection +infer = "0.19" + +# Base64 for vision API content parts and OAuth PKCE helpers +base64 = "0.22" + # OAuth 2.0 / PKCE helpers (used only by src/bin/setup.rs) rand = "0.8" sha2 = "0.10" -base64 = "0.22" -# Secret-redaction filter (M7.4) +# Secret-redaction filter regex = "1" # OS home-directory resolution for the persistent home dir (~/.rustfox) dirs = "5" - [dev-dependencies] tempfile = "3" diff --git a/config.example.toml b/config.example.toml index 46a05f2..5061781 100644 --- a/config.example.toml +++ b/config.example.toml @@ -18,6 +18,7 @@ base_url = "https://openrouter.ai/api/v1" # base_url = "http://localhost:11434/v1" # Maximum tokens in response max_tokens = 4096 +# supports_vision = false # Set to true if your model supports image inputs # System prompt for the AI assistant system_prompt = """You are a helpful AI assistant with access to tools. \ Use the available tools to help the user with their tasks. \ @@ -87,6 +88,12 @@ Be concise and helpful.""" # model = "qwen/qwen3-embedding-8b" # dimensions = 1536 +# OCR configuration (optional) +# Used to extract text from images when supports_vision = false. +# Models are downloaded automatically on first use. +# [ocr] +# model_dir = "~/.cache/ocrs" # Where OCR model files are cached (downloaded on first use) + # MCP Server Configurations # Each [[mcp_servers]] block defines an MCP server to connect to # The bot will discover and register tools from each server diff --git a/docs/plans/2026-03-25-telegram-file-image-support.md b/docs/plans/2026-03-25-telegram-file-image-support.md new file mode 100644 index 0000000..122b76e --- /dev/null +++ b/docs/plans/2026-03-25-telegram-file-image-support.md @@ -0,0 +1,654 @@ +# Telegram File & Image Support Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Handle Telegram photos and file attachments (PDF, DOCX, images), routing them through a vision/OCR/document extraction pipeline before injecting context into the LLM. + +**Architecture:** Five-layer change — (1) `Cargo.toml` deps, (2) platform data model (Attachment), (3) multi-modal LLM messages, (4) new `file_processor` module (image/PDF/DOCX → text/content), (5) telegram handler + agent integration. OCR uses `ocrs` (pure Rust, neural-network-based). Long documents (>6000 chars) are chunked and stored via the existing `knowledge` store + `sqlite-vec` vector DB, then RAG-retrieved per user query. + +**Tech Stack:** Rust 2021, Tokio, teloxide 0.17, ocrs 0.12 (OCR), rten 0.24 (model runtime), image 0.25 (image loading), pdf-extract 0.10 (PDF), docx-rs 0.4 (DOCX), infer 0.19 (MIME detection), base64 0.22 (vision encoding) + +--- + +## Reading List + +Read before touching any code: + +- `src/platform/mod.rs` — IncomingMessage struct (will add `attachments`) +- `src/llm.rs` lines 1–18 — ChatMessage struct (will change `content` type) +- `src/config.rs` lines 44–55 — OpenRouterConfig (will add `supports_vision`) +- `src/platform/telegram.rs` lines 81–100 — handle_message fn (will add photo/doc handling) +- `src/agent.rs` lines 125–215 — process_message (will add attachment processing) +- `src/memory/knowledge.rs` lines 19–78 — `remember` and `search_knowledge` (reused for long-doc RAG) + +--- + +## Task 1: Add Dependencies + +**Files:** +- Modify: `Cargo.toml` + +Add under `[dependencies]`: +```toml +# OCR (pure Rust, neural-network based) +ocrs = "0.12" +rten = { version = "0.24", features = ["rten_format"] } + +# Image loading/processing +image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } + +# Document processing +pdf-extract = "0.10" +docx-rs = "0.4" + +# MIME type detection +infer = "0.19" + +# Base64 for vision API content parts +base64 = "0.22" +``` + +**Step 1:** Edit `Cargo.toml` + +**Step 2:** Run `cargo check` to verify deps resolve + +**Step 3:** Commit: `feat: add file processing dependencies` + +--- + +## Task 2: Platform Data Model — Attachment + +**Files:** +- Modify: `src/platform/mod.rs` + +Add `AttachmentKind` enum, `Attachment` struct, and `attachments` field to `IncomingMessage`: + +```rust +/// What kind of attachment was received +#[derive(Debug, Clone, PartialEq)] +pub enum AttachmentKind { + Image, + Pdf, + Docx, + Other, +} + +/// A file attachment received from a platform +#[derive(Debug, Clone)] +pub struct Attachment { + pub kind: AttachmentKind, + /// Absolute path to the downloaded temp file + pub path: std::path::PathBuf, + pub mime_type: String, + /// Original filename, if known + pub file_name: Option, +} + +/// A message received from any platform +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct IncomingMessage { + pub platform: String, + pub user_id: String, + pub chat_id: String, + pub user_name: String, + pub text: String, + /// Attached files, if any + #[serde(default)] + pub attachments: Vec, +} +``` + +**Step 1:** Edit `src/platform/mod.rs` + +**Step 2:** Fix any existing `IncomingMessage { ... }` construction sites that now need `attachments: vec![]` (check `src/agent.rs` and `src/platform/telegram.rs`). Grep: `IncomingMessage {` + +**Step 3:** Run `cargo check` + +**Step 4:** Commit: `feat: add Attachment type to IncomingMessage` + +--- + +## Task 3: Multi-Modal ChatMessage + +**Files:** +- Modify: `src/llm.rs` + +Change `ChatMessage.content` from `Option` to `MessageContent` which can be a plain string (for tool result messages) or a vec of content parts (for vision messages). Keep backward-compatible serialization. + +```rust +/// A single part in a multi-modal message +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentPart { + Text { text: String }, + ImageUrl { image_url: ImageUrlContent }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageUrlContent { + /// "data:image/jpeg;base64,..." or a URL + pub url: String, +} + +/// Either a plain text string or a list of content parts (multi-modal) +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + Text(String), + Parts(Vec), +} + +impl MessageContent { + /// Extract all text from the content (for logging, RAG, etc.) + pub fn as_text(&self) -> String { + match self { + Self::Text(s) => s.clone(), + Self::Parts(parts) => parts + .iter() + .filter_map(|p| if let ContentPart::Text { text } = p { Some(text.as_str()) } else { None }) + .collect::>() + .join(" "), + } + } + pub fn from_text(s: impl Into) -> Self { + Self::Text(s.into()) + } +} + +pub struct ChatMessage { + pub role: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, + ... +} +``` + +**Note on backward compat:** All places that currently do `content: Some("...".to_string())` must change to `content: Some(MessageContent::from_text("..."))`. Places that read `.content` as string need `.content.as_ref().map(|c| c.as_text())` or `.content.as_deref()` (removed in favour of as_text). + +**Step 1:** Edit `src/llm.rs` — add types above `ChatMessage`, update `ChatMessage.content` + +**Step 2:** Update all construction/access sites in `llm.rs` and `agent.rs` (search: `.content.as_deref()`, `content: Some(`) + +**Step 3:** Update `src/memory/conversations.rs` if it constructs `ChatMessage` directly (grep: `ChatMessage {`) + +**Step 4:** Update `src/platform/telegram.rs` `IncomingMessage` construction (not ChatMessage, just ensure it compiles) + +**Step 5:** Run `cargo check` — fix all type errors + +**Step 6:** Run `cargo test` + +**Step 7:** Commit: `feat: multi-modal ChatMessage content type` + +--- + +## Task 4: Config — Vision Support + OCR Model Dir + +**Files:** +- Modify: `src/config.rs` +- Modify: `config.example.toml` + +Add to `OpenRouterConfig`: +```rust +#[serde(default)] +pub supports_vision: bool, +``` + +Add `OcrConfig`: +```rust +#[derive(Debug, Deserialize, Clone)] +pub struct OcrConfig { + /// Directory to cache OCR model files (downloaded on first use) + #[serde(default = "default_ocr_model_dir")] + pub model_dir: PathBuf, +} + +fn default_ocr_model_dir() -> PathBuf { + dirs_next::cache_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("ocrs") +} +``` + +Add to `Config`: +```rust +#[serde(default = "default_ocr_config")] +pub ocr: OcrConfig, +``` + +Note: `dirs_next` is not a dependency — use `std::env::var("HOME")` fallback instead: +```rust +fn default_ocr_model_dir() -> PathBuf { + std::env::var("HOME") + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(".")) + .join(".cache/ocrs") +} +``` + +**Step 1:** Edit `src/config.rs` +**Step 2:** Edit `config.example.toml` — document new fields +**Step 3:** Run `cargo check` +**Step 4:** Commit: `feat: add vision support and OCR config` + +--- + +## Task 5: File Processor Module + +**Files:** +- Create: `src/file_processor/mod.rs` +- Modify: `src/main.rs` (add `mod file_processor;`) + +This is the core new module. It exposes: +- `process_attachments(attachments, user_query, config, memory) -> ProcessedAttachments` +- `ProcessedAttachments { text_context: String, image_parts: Vec }` + +### Sub-task 5a: Image processing (vision or OCR) + +```rust +/// Returns a ContentPart::ImageUrl if vision-capable model, or extracted text via OCR. +pub async fn process_image_attachment( + path: &Path, + mime_type: &str, + supports_vision: bool, + ocr_model_dir: &Path, +) -> Result { + if supports_vision { + let bytes = std::fs::read(path)?; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + let data_url = format!("data:{};base64,{}", mime_type, encoded); + Ok(ImageResult::VisionPart(ContentPart::ImageUrl { + image_url: ImageUrlContent { url: data_url } + })) + } else { + let text = ocr_image(path, ocr_model_dir).await?; + Ok(ImageResult::OcrText(text)) + } +} +``` + +OCR using `ocrs`: +```rust +async fn ocr_image(path: &Path, model_dir: &Path) -> Result { + let det_path = model_dir.join("text-detection.rten"); + let rec_path = model_dir.join("text-recognition.rten"); + + // Download models if not cached + ensure_ocr_models(model_dir).await?; + + let detection_model = rten::Model::load_file(&det_path)?; + let recognition_model = rten::Model::load_file(&rec_path)?; + + let engine = ocrs::OcrEngine::new(ocrs::OcrEngineParams { + detection_model: Some(detection_model), + recognition_model: Some(recognition_model), + ..Default::default() + })?; + + let img = image::open(path)?.into_rgb8(); + let img_source = ocrs::ImageSource::from_bytes(img.as_raw(), img.dimensions())?; + let ocr_input = engine.prepare_input(img_source)?; + let text = engine.get_text(&ocr_input)?; + Ok(text) +} + +async fn ensure_ocr_models(model_dir: &Path) -> Result<()> { + std::fs::create_dir_all(model_dir)?; + let det = model_dir.join("text-detection.rten"); + let rec = model_dir.join("text-recognition.rten"); + + const DET_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-detection.rten"; + const REC_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-recognition.rten"; + + if !det.exists() { + download_file(DET_URL, &det).await?; + } + if !rec.exists() { + download_file(REC_URL, &rec).await?; + } + Ok(()) +} +``` + +### Sub-task 5b: PDF processing + +```rust +pub fn extract_pdf_text(path: &Path) -> Result { + let bytes = std::fs::read(path)?; + let text = pdf_extract::extract_text_from_mem(&bytes) + .unwrap_or_default(); + Ok(text) +} +``` + +Note: `pdf-extract` does not expose easy image extraction API. We extract text only from PDFs for now. + +### Sub-task 5c: DOCX processing + +```rust +pub fn extract_docx_text(path: &Path) -> Result { + let bytes = std::fs::read(path)?; + let docx = docx_rs::read_docx(&bytes)?; + let mut text = String::new(); + for child in docx.document.children { + if let docx_rs::DocumentChild::Paragraph(para) = child { + for run in para.children { + if let docx_rs::ParagraphChild::Run(run) = run { + for rc in run.children { + if let docx_rs::RunChild::Text(t) = rc { + text.push_str(&t.text); + } + } + text.push('\n'); + } + } + } + } + Ok(text) +} +``` + +### Sub-task 5d: Long-context chunking + +```rust +const LONG_CONTEXT_THRESHOLD: usize = 6000; +const CHUNK_SIZE: usize = 1000; +const CHUNK_OVERLAP: usize = 100; + +/// If text is long, store as knowledge chunks and RAG-retrieve relevant ones. +/// Returns a context block appropriate for injection. +pub async fn handle_long_context( + text: &str, + filename: &str, + query: &str, + memory: &MemoryStore, +) -> Result { + if text.chars().count() <= LONG_CONTEXT_THRESHOLD { + return Ok(format!("[File: {}]\n{}", filename, text)); + } + + // Chunk and store + let chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP); + for (i, chunk) in chunks.iter().enumerate() { + let key = format!("{}::chunk_{}", filename, i); + memory.remember("document_chunk", &key, chunk, Some(filename)).await?; + } + + // RAG-retrieve relevant chunks + let results = memory.search_knowledge(query, 5).await?; + let context = results.iter() + .map(|e| e.value.as_str()) + .collect::>() + .join("\n\n---\n\n"); + + Ok(format!("[File: {} — relevant sections]\n{}", filename, context)) +} +``` + +**Step 1:** Create `src/file_processor/mod.rs` with all the above + +**Step 2:** Add `mod file_processor;` to `src/main.rs` + +**Step 3:** Run `cargo check` — iterate on type errors + +**Step 4:** Run `cargo test` + +**Step 5:** Commit: `feat: file processor module (image OCR/vision, PDF, DOCX)` + +--- + +## Task 6: Telegram Handler — Download Photos & Documents + +**Files:** +- Modify: `src/platform/telegram.rs` +- Modify: `src/platform/mod.rs` (already done in Task 2) + +In `handle_message`, before the text-only early return, add handling for photo and document: + +```rust +async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseResult<()> { + let user = match msg.from.as_ref() { ... }; + + // Determine text content (may be empty if message is photo/doc only) + let text = msg.text() + .or_else(|| msg.caption()) // use caption for media messages + .unwrap_or("") + .to_string(); + + // Collect attachments + let mut attachments = Vec::new(); + let temp_dir = std::env::temp_dir().join(format!("rustfox_{}", uuid::Uuid::new_v4())); + std::fs::create_dir_all(&temp_dir).ok(); + + // Handle photo + if let Some(photos) = msg.photo() { + if let Some(largest) = photos.last() { + match download_telegram_file(&bot, &largest.file.id, &temp_dir, None).await { + Ok((path, mime)) => attachments.push(Attachment { + kind: AttachmentKind::Image, + path, + mime_type: mime, + file_name: None, + }), + Err(e) => warn!("Failed to download photo: {}", e), + } + } + } + + // Handle document + if let Some(doc) = msg.document() { + let file_name = doc.file_name.clone(); + let kind = classify_document_kind(&doc.mime_type, &file_name); + match download_telegram_file(&bot, &doc.file.id, &temp_dir, file_name.as_deref()).await { + Ok((path, mime)) => attachments.push(Attachment { + kind, + path, + mime_type: mime, + file_name, + }), + Err(e) => warn!("Failed to download document: {}", e), + } + } + + // Skip if nothing to process + if text.is_empty() && attachments.is_empty() { + return Ok(()); + } + + // ... existing command handling and streaming setup ... + + let incoming = IncomingMessage { + platform: "telegram".to_string(), + user_id: user_id.to_string(), + chat_id: msg.chat.id.0.to_string(), + user_name, + text, + attachments, + }; + + // Cleanup temp dir after processing + let process_result = agent.process_message(&incoming, tool_event_tx, Some(stream_token_tx)).await; + std::fs::remove_dir_all(&temp_dir).ok(); + + ... +} + +/// Download a Telegram file to temp_dir. Returns (path, mime_type). +async fn download_telegram_file( + bot: &Bot, + file_id: &str, + temp_dir: &Path, + filename: Option<&str>, +) -> Result<(PathBuf, String)> { + use teloxide::net::Download; + + let file = bot.get_file(file_id).await.context("get_file failed")?; + let ext = Path::new(&file.path).extension() + .and_then(|e| e.to_str()) + .unwrap_or("bin"); + let dest_name = filename.map(String::from) + .unwrap_or_else(|| format!("{}.{}", uuid::Uuid::new_v4(), ext)); + let dest = temp_dir.join(&dest_name); + + let mut bytes: Vec = Vec::new(); + bot.download_file(&file.path, &mut bytes).await.context("download_file failed")?; + std::fs::write(&dest, &bytes)?; + + // Detect MIME + let mime = infer::get(&bytes) + .map(|t| t.mime_type().to_string()) + .unwrap_or_else(|| mime_from_ext(ext)); + + Ok((dest, mime)) +} + +fn classify_document_kind(mime: &Option, filename: &Option) -> AttachmentKind { + let mime_str = mime.as_deref().unwrap_or(""); + let name_str = filename.as_deref().unwrap_or(""); + if mime_str.starts_with("image/") { return AttachmentKind::Image; } + if mime_str == "application/pdf" || name_str.ends_with(".pdf") { return AttachmentKind::Pdf; } + if mime_str.contains("wordprocessingml") || name_str.ends_with(".docx") { return AttachmentKind::Docx; } + AttachmentKind::Other +} + +fn mime_from_ext(ext: &str) -> String { + match ext { + "jpg" | "jpeg" => "image/jpeg", + "png" => "image/png", + "gif" => "image/gif", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + _ => "application/octet-stream", + }.to_string() +} +``` + +**Step 1:** Edit `src/platform/telegram.rs` + +**Step 2:** Run `cargo check` + +**Step 3:** Run `cargo test` + +**Step 4:** Commit: `feat: telegram handler downloads photos and documents` + +--- + +## Task 7: Agent — Process Attachments + +**Files:** +- Modify: `src/agent.rs` + +In `process_message()`, after building the `user_msg`, check for attachments and process them: + +```rust +// Process attachments into text context and/or vision content parts +let (attachment_text, image_parts) = if !incoming.attachments.is_empty() { + crate::file_processor::process_attachments( + &incoming.attachments, + &incoming.text, + &self.config, + &self.memory, + ).await +} else { + (String::new(), vec![]) +}; + +// Build user message: text + attachment context + optional image parts +let user_msg_content = if image_parts.is_empty() { + // Text-only: combine user text + any extracted document text + let mut combined = incoming.text.clone(); + if !attachment_text.is_empty() { + combined.push_str("\n\n"); + combined.push_str(&attachment_text); + } + MessageContent::from_text(combined) +} else { + // Multi-modal: text part + image parts + let mut parts = Vec::new(); + let mut text_content = incoming.text.clone(); + if !attachment_text.is_empty() { + text_content.push_str("\n\n"); + text_content.push_str(&attachment_text); + } + if !text_content.is_empty() { + parts.push(ContentPart::Text { text: text_content }); + } + parts.extend(image_parts); + MessageContent::Parts(parts) +}; + +let user_msg = ChatMessage { + role: "user".to_string(), + content: Some(user_msg_content), + tool_calls: None, + tool_call_id: None, +}; +``` + +Also update the RAG retrieval to use `incoming.text` as the query (unchanged), and the message saved to DB: save with text-only content (strip image parts for DB storage to avoid bloat): + +```rust +// Save text-only version to DB (don't store base64 image data in message history) +let db_msg = ChatMessage { + role: "user".to_string(), + content: Some(MessageContent::from_text({ + let mut t = incoming.text.clone(); + if !attachment_text.is_empty() { + t.push_str("\n\n[Attachment processed]"); + } + t + })), + tool_calls: None, + tool_call_id: None, +}; +self.memory.save_message(&conversation_id, &db_msg).await?; +messages.push(user_msg); // push the full message (with images) to in-memory context only +``` + +**Step 1:** Edit `src/agent.rs` + +**Step 2:** Run `cargo check` + +**Step 3:** Run `cargo test` + +**Step 4:** Commit: `feat: agent processes file attachments` + +--- + +## Task 8: Final Wiring and Tests + +**Step 1:** Run `cargo clippy -- -D warnings` and fix all warnings + +**Step 2:** Run `cargo test` + +**Step 3:** Add unit tests for: +- `classify_document_kind()` in `telegram.rs` +- `chunk_text()` in `file_processor/mod.rs` +- `MessageContent` serialization (text stays as string, parts serialize correctly) + +**Step 4:** Commit: `test: add unit tests for file attachment pipeline` + +--- + +## Notes on OCR Model Download + +`ocrs` requires two `.rten` model files. On first OCR use: +1. If `~/.cache/ocrs/text-detection.rten` doesn't exist → download from S3 +2. Same for `text-recognition.rten` + +This is done by `ensure_ocr_models()` in the file_processor. The download uses `reqwest` (already a dependency). Models are ~100MB total; download is one-time. + +If the bot is deployed without internet access, operators should pre-download models and point `[ocr] model_dir` to their location in config.toml. + +## config.example.toml additions + +```toml +[openrouter] +# ... existing fields ... +# Set to true if your model supports vision (image inputs) +# supports_vision = false + +[ocr] +# Directory where OCR model files are cached (downloaded on first use) +# model_dir = "~/.cache/ocrs" +``` diff --git a/docs/superpowers/plans/2026-06-04-long-term-memory-and-startup-shutdown.md b/docs/superpowers/plans/2026-06-04-long-term-memory-and-startup-shutdown.md new file mode 100644 index 0000000..b22e568 --- /dev/null +++ b/docs/superpowers/plans/2026-06-04-long-term-memory-and-startup-shutdown.md @@ -0,0 +1,797 @@ +# Long-Term Memory & Startup/Shutdown Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Three independent features: (1) `/clear` archives instead of deleting, (2) startup notifications to allowed users, (3) graceful shutdown with notification. + +**Architecture:** Soft archive using `is_archived` column on conversations table. Startup/shutdown via Telegram bot API messages sent to every allowed user before dispatcher starts and on SIGINT/SIGTERM. + +**Tech Stack:** Rust, tokio, teloxide, rusqlite, sqlite-vec + +--- + +### Files to Modify + +| File | Responsibility | +|------|---------------| +| `src/memory/mod.rs` | DB migration: add `is_archived` column | +| `src/memory/conversations.rs` | Core archive logic: modify `get_or_create_conversation`, `clear_conversation`, `load_messages_with_limit` | +| `src/mcp.rs` | Add `server_count()` method for startup message | +| `src/platform/telegram.rs` | Update `/clear` response, add `notify_startup()` and `notify_shutdown()` | +| `src/main.rs` | Wire signal handler for graceful shutdown | + +--- + +### Task 1: DB Migration — Add `is_archived` Column + +**Files:** +- Modify: `src/memory/mod.rs` + +- [ ] **Step 1: Add migration** + +Add to `run_migrations()` in `src/memory/mod.rs`, after the existing `is_summarized` migration: + +```rust +// Migration: add is_archived column (safe no-op if column already exists) +conn.execute_batch( + "ALTER TABLE conversations ADD COLUMN is_archived INTEGER DEFAULT 0;", +) +.ok(); +``` + +- [ ] **Step 2: Run existing tests to confirm nothing broke** + +Run: `cargo test -p rustfox --lib memory::tests` +Expected: All pass + +```bash +cargo test -p rustfox --lib memory::tests +``` + +- [ ] **Step 3: Commit** + +```bash +git add src/memory/mod.rs +git commit -m "feat(memory): add is_archived column to conversations" +``` + +--- + +### Task 2: Modify `get_or_create_conversation` — Skip Archived + +**Files:** +- Modify: `src/memory/conversations.rs:27-35` + +- [ ] **Step 1: Write the failing test** + +Add to `memory/conversations.rs` test module: + +```rust +#[tokio::test] +async fn test_get_or_create_skips_archived() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + // Create a conversation + let conv = store + .get_or_create_conversation("test", "archive_u1") + .await + .unwrap(); + + // Manually archive it (simulating what clear_conversation will do) + let conn = store.conn.lock().await; + conn.execute( + "UPDATE conversations SET is_archived = 1 WHERE id = ?1", + rusqlite::params![&conv], + ) + .unwrap(); + drop(conn); + + // get_or_create_conversation should return a NEW conversation + let conv2 = store + .get_or_create_conversation("test", "archive_u1") + .await + .unwrap(); + + assert_ne!(conv, conv2, "Must create a new conversation when previous is archived"); + + // The new conversation must not be archived + let conn2 = store.conn.lock().await; + let archived: i64 = conn2 + .query_row( + "SELECT is_archived FROM conversations WHERE id = ?1", + rusqlite::params![&conv2], + |row| row.get(0), + ) + .unwrap(); + drop(conn2); + assert_eq!(archived, 0, "New conversation must not be archived"); +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test -p rustfox --lib memory::conversations::tests::test_get_or_create_skips_archived -- --nocapture` +Expected: FAIL — `assert_ne` fails because the old archived conversation is returned + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_get_or_create_skips_archived -- --nocapture +``` + +- [ ] **Step 3: Modify `get_or_create_conversation` to filter archived** + +Change the SQL query in `get_or_create_conversation()` (`conversations.rs:29-31`) from: + +```rust +"SELECT id FROM conversations + WHERE platform = ?1 AND user_id = ?2 + ORDER BY updated_at DESC LIMIT 1" +``` + +to: + +```rust +"SELECT id FROM conversations + WHERE platform = ?1 AND user_id = ?2 AND (is_archived IS NULL OR is_archived = 0) + ORDER BY updated_at DESC LIMIT 1" +``` + +- [ ] **Step 4: Run test to verify it passes** + +Same command as Step 2. Expected: PASS + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_get_or_create_skips_archived -- --nocapture +``` + +- [ ] **Step 5: Commit** + +```bash +git add src/memory/conversations.rs +git commit -m "feat(memory): filter archived conversations in get_or_create" +``` + +--- + +### Task 3: Modify `clear_conversation` — Soft Archive Instead of Delete + +**Files:** +- Modify: `src/memory/conversations.rs:113-139` + +- [ ] **Step 1: Write the failing test** + +Add to `memory/conversations.rs` test module: + +```rust +#[tokio::test] +async fn test_clear_archives_instead_of_deleting() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_u2") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text("hello world")), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Clear + store.clear_conversation("test", "archive_u2").await.unwrap(); + + // Messages should still exist in DB + let conn = store.conn.lock().await; + let msg_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1", + rusqlite::params![&conv], + |row| row.get(0), + ) + .unwrap(); + drop(conn); + assert!(msg_count > 0, "Messages must persist after archive"); + + // Conversation should be marked archived + let conn2 = store.conn.lock().await; + let archived: Option = conn2 + .query_row( + "SELECT is_archived FROM conversations WHERE id = ?1", + rusqlite::params![&conv], + |row| row.get(0), + ) + .ok(); + drop(conn2); + assert_eq!(archived, Some(1), "Conversation must be marked archived"); +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test -p rustfox --lib memory::conversations::tests::test_clear_archives_instead_of_deleting -- --nocapture` +Expected: FAIL — message count is 0 after delete + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_clear_archives_instead_of_deleting -- --nocapture +``` + +- [ ] **Step 3: Replace `clear_conversation` implementation** + +Replace the entire `clear_conversation` method body (`conversations.rs:113-139`): + +```rust +pub async fn clear_conversation(&self, platform: &str, user_id: &str) -> Result<()> { + let conn = self.conn.lock().await; + + // Soft archive: mark conversation as archived (don't delete messages) + conn.execute( + "UPDATE conversations SET is_archived = 1, updated_at = datetime('now') + WHERE platform = ?1 AND user_id = ?2", + rusqlite::params![platform, user_id], + )?; + + Ok(()) +} +``` + +Note: The DELETE statements for `message_embeddings`, `messages`, and `conversations` +are all removed. The conversation and its messages remain searchable. + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_clear_archives_instead_of_deleting -- --nocapture +``` + +Expected: PASS + +- [ ] **Step 5: Run all memory tests to check nothing else broke** + +```bash +cargo test -p rustfox --lib memory +``` + +Expected: All pass. (Existing tests like `test_search_messages_scoped_to_conversation` should still pass.) + +- [ ] **Step 6: Commit** + +```bash +git add src/memory/conversations.rs +git commit -m "feat(memory): soft archive on clear_conversation instead of delete" +``` + +--- + +### Task 4: Modify `load_messages_with_limit` — Skip Archived Conversations + +**Files:** +- Modify: `src/memory/conversations.rs:149-194` + +- [ ] **Step 1: Write the failing test** + +Add to `memory/conversations.rs` test module: + +```rust +#[tokio::test] +async fn test_load_messages_excludes_archived() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_u3") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text("test")), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Archive + store.clear_conversation("test", "archive_u3").await.unwrap(); + + // load_messages should return empty for an archived conversation + let messages = store.load_messages(&conv).await.unwrap(); + assert!( + messages.is_empty(), + "Archived conversation should return no messages via load_messages" + ); +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_load_messages_excludes_archived -- --nocapture +``` + +Expected: FAIL — messages are still returned after archive + +- [ ] **Step 3: Modify `load_messages_with_limit` to filter archived** + +Both summary and raw queries join to `conversations` and check `is_archived`. + +For the summary query, change the SQL in `conversations.rs:157-164`: + +```rust +let mut summary_stmt = conn.prepare( + "SELECT m.role, m.content, m.tool_calls, m.tool_call_id + FROM messages m + JOIN conversations c ON m.conversation_id = c.id + WHERE m.conversation_id = ?1 + AND m.role = 'system' + AND m.content LIKE '[SUMMARY]%' + AND (c.is_archived IS NULL OR c.is_archived = 0) + ORDER BY m.created_at ASC", +)?; +``` + +For the raw messages query, change the SQL in `conversations.rs:173-183`: + +```rust +let mut raw_stmt = conn.prepare( + "SELECT role, content, tool_calls, tool_call_id FROM ( + SELECT m.role, m.content, m.tool_calls, m.tool_call_id, m.created_at + FROM messages m + JOIN conversations c ON m.conversation_id = c.id + WHERE m.conversation_id = ?1 + AND NOT (m.role = 'system' AND m.content LIKE '[SUMMARY]%') + AND (c.is_archived IS NULL OR c.is_archived = 0) + ORDER BY m.created_at DESC + LIMIT ?2 + ) ORDER BY created_at ASC", +)?; +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_load_messages_excludes_archived -- --nocapture +``` + +Expected: PASS + +- [ ] **Step 5: Run all tests** + +```bash +cargo test -p rustfox --lib +``` + +Expected: All pass + +- [ ] **Step 6: Commit** + +```bash +git add src/memory/conversations.rs +git commit -m "feat(memory): skip archived conversations in load_messages" +``` + +--- + +### Task 5: Add `server_count()` to `McpManager` + +**Files:** +- Modify: `src/mcp.rs` + +- [ ] **Step 1: Add the method** + +Add after `connect_all()` around line 383: + +```rust +/// Number of connected MCP servers +pub fn server_count(&self) -> usize { + self.connections.len() +} +``` + +- [ ] **Step 2: Verify it compiles** + +```bash +cargo check -p rustfox +``` + +Expected: No errors + +- [ ] **Step 3: Commit** + +```bash +git add src/mcp.rs +git commit -m "feat(mcp): add server_count() method" +``` + +--- + +### Task 6: Update `/clear` Response Text and Command Description + +**Files:** +- Modify: `src/platform/telegram.rs` + +- [ ] **Step 1: Update `/clear` response** + +Change line 211 in `telegram.rs` from: + +```rust +bot.send_message(msg.chat.id, escape_text("Conversation cleared.")) +``` + +to: + +```rust +bot.send_message(msg.chat.id, escape_text("Conversation archived. Past messages remain searchable.")) +``` + +- [ ] **Step 2: Update command description** + +Change line 78 in `telegram.rs` from: + +```rust +BotCommand::new("clear", "Clear the current conversation history"), +``` + +to: + +```rust +BotCommand::new("clear", "Archive the current conversation, keeping past messages searchable"), +``` + +- [ ] **Step 3: Verify tests pass** + +```bash +cargo test -p rustfox --lib platform::telegram::tests +``` + +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add src/platform/telegram.rs +git commit -m "feat(telegram): update /clear text to reflect archive behavior" +``` + +--- + +### Task 7: Add `search_messages` Test for Archived Content + +**Files:** +- Test: `src/memory/conversations.rs` + +- [ ] **Step 1: Write test verifying archived messages are searchable** + +Add to `memory/conversations.rs` test module: + +```rust +#[tokio::test] +async fn test_search_messages_finds_archived_content() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_search_u1") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text( + "I love Rust programming and async runtimes", + )), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Archive + store.clear_conversation("test", "archive_search_u1").await.unwrap(); + + // search_messages should still find the content from archived conversations + let results = store.search_messages("Rust", 5).await.unwrap(); + assert!( + !results.is_empty(), + "search_messages must find content in archived conversations" + ); + assert!( + results.iter().any(|m| m.content.as_ref().map_or(false, |c| c.as_text().contains("Rust"))), + "Archived message content must be searchable" + ); +} +``` + +- [ ] **Step 2: Run test** + +```bash +cargo test -p rustfox --lib memory::conversations::tests::test_search_messages_finds_archived_content -- --nocapture +``` + +Expected: PASS + +- [ ] **Step 3: Commit** + +```bash +git add src/memory/conversations.rs +git commit -m "test(memory): verify archived messages remain searchable" +``` + +--- + +### Task 8: Add Startup Notification Function + +**Files:** +- Modify: `src/platform/telegram.rs` + +- [ ] **Step 1: Add `notify_startup` function** + +Add to `src/platform/telegram.rs`, before `run()`: + +```rust +/// Send startup notification to all allowed users. +/// Best-effort: logs failures, never blocks startup. +pub async fn notify_startup( + bot: &teloxide::Bot, + allowed_user_ids: &[u64], + model: &str, + mcp_count: usize, + skills_count: usize, + embedding_enabled: bool, +) { + let memory_status = if embedding_enabled { + "embedding enabled" + } else { + "FTS5 only" + }; + + let msg = format!( + "RustFox is online 🦊\n\ + Model: {}\n\ + MCP: {} server(s) connected\n\ + Skills: {} loaded\n\ + Memory: {}", + model, mcp_count, skills_count, memory_status, + ); + + for &user_id in allowed_user_ids { + let chat_id = teloxide::types::ChatId(user_id as i64); + if let Err(e) = bot.send_message(chat_id, &msg).await { + tracing::warn!( + "Failed to send startup notification to user {}: {}", + user_id, + e + ); + } + } +} +``` + +- [ ] **Step 2: Add `notify_shutdown` function** + +Add next to `notify_startup`: + +```rust +/// Send shutdown notification to all allowed users. +/// Best-effort: logs failures, never blocks shutdown. +pub async fn notify_shutdown( + bot: &teloxide::Bot, + allowed_user_ids: &[u64], +) { + let msg = "RustFox is going offline. Goodbye!"; + + for &user_id in allowed_user_ids { + let chat_id = teloxide::types::ChatId(user_id as i64); + if let Err(e) = bot.send_message(chat_id, msg).await { + tracing::warn!( + "Failed to send shutdown notification to user {}: {}", + user_id, + e + ); + } + } +} +``` + +- [ ] **Step 3: Wire startup notification into `run()` — BEFORE agent is moved** + +**Important:** `agent` is moved into `dptree::deps![agent]` on line 117, so `notify_startup` must be called BEFORE that point. Insert it right after `info!("Starting Telegram platform...")` on line 94, before the handler closure is defined. + +In `run()` in `telegram.rs`, replace: + +```rust + info!("Starting Telegram platform..."); +``` + +with: + +```rust + info!("Starting Telegram platform..."); + + // Send startup notifications (best-effort) — before agent is moved into dptree + notify_startup( + &bot, + &allowed_user_ids, + &agent.config.openrouter.model, + agent.mcp.server_count(), + agent.skills.read().await.len(), + agent.memory.embeddings.is_available(), + ) + .await; +``` + +This goes between the `info!("Starting...")` and the `let commands = ...` block. The agent is still alive and `bot` has been cloned locally. + +- [ ] **Step 4: Verify compilation** + +```bash +cargo check -p rustfox +``` + +Expected: No errors + +- [ ] **Step 5: Commit** + +```bash +git add src/platform/telegram.rs +git commit -m "feat(telegram): add startup and shutdown notification functions" +``` + +--- + +### Task 9: Wire Shutdown Signal Handler in `main.rs` + +**Files:** +- Modify: `src/main.rs` + +- [ ] **Step 1: Replace the `platform::telegram::run()` call with a select-based shutdown** + +In `src/main.rs`, replace the existing call at line 337-342: + +```rust + // Run the Telegram platform + info!("Bot is starting..."); + platform::telegram::run( + agent, + config.telegram.allowed_user_ids.clone(), + Arc::clone(&bot), + ) + .await?; +``` + +with: + +```rust + // Run the Telegram platform with signal-driven graceful shutdown + info!("Bot is starting..."); + + let dispatch_agent = Arc::clone(&agent); + let dispatch_user_ids = config.telegram.allowed_user_ids.clone(); + let dispatch_bot = Arc::clone(&bot); + + let dispatch_handle = tokio::spawn(async move { + platform::telegram::run(dispatch_agent, dispatch_user_ids, dispatch_bot).await + }); + + // Set up signal handlers (SIGINT via ctrl_c for portability, SIGTERM via unix signal) + #[cfg(unix)] + let mut sigterm = tokio::signal::unix::signal( + tokio::signal::unix::SignalKind::terminate(), + ) + .expect("failed to create SIGTERM handler"); + + tokio::select! { + _ = tokio::signal::ctrl_c() => { + tracing::info!("SIGINT received, shutting down..."); + } + #[cfg(unix)] + _ = sigterm.recv() => { + tracing::info!("SIGTERM received, shutting down..."); + } + result = &mut dispatch_handle => { + // Dispatch completed on its own (unlikely but handle gracefully) + result??; + return Ok(()); + } + }; + + // Send shutdown notification + platform::telegram::notify_shutdown(&bot, &config.telegram.allowed_user_ids).await; + + // Brief grace period for message delivery + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + tracing::info!("Shutdown complete."); +``` + +- [ ] **Step 2: Verify compilation** + +```bash +cargo check -p rustfox +``` + +Expected: No errors + +- [ ] **Step 3: Run full test suite** + +```bash +cargo test +``` + +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add src/main.rs +git commit -m "feat(main): add graceful shutdown with Telegram notification" +``` + +--- + +### Task 10: Full CI Verification + +- [ ] **Step 1: Run cargo check** + +```bash +cargo check +``` + +Expected: No errors + +- [ ] **Step 2: Run cargo clippy** + +```bash +cargo clippy -- -D warnings +``` + +Expected: No warnings + +- [ ] **Step 3: Run cargo fmt** + +```bash +cargo fmt --all -- --check +``` + +Expected: No formatting issues + +- [ ] **Step 4: Run cargo test** + +```bash +cargo test +``` + +Expected: All pass + +- [ ] **Step 5: Run cargo build --release** + +```bash +cargo build --release +``` + +Expected: Build succeeds + +- [ ] **Step 6: Commit any CI fixes** + +If any step failed, fix and commit. + +- [ ] **Step 7: Final commit message** + +```bash +git add -A +git commit -m "feat: long-term memory archive and startup/shutdown notifications" +``` + +--- + +## Self-Review Checklist + +1. **Spec coverage:** Each spec requirement has a task: + - Feature 1 (soft archive): Tasks 1-7 cover migration, get_or_create, clear_conversation, load_messages, response text, search test + - Feature 2 (startup): Task 8 covers notify_startup, Task 9 covers wiring + - Feature 3 (shutdown): Task 9 covers signal handler + notify_shutdown + - All covered. + +2. **Placeholder scan:** No TBD, TODO, or placeholder patterns. Every step has complete code. + +3. **Type consistency:** + - `get_or_create_conversation` returns `Result` — unchanged + - `clear_conversation` returns `Result<()>` — unchanged + - `load_messages_with_limit` returns `Result>` — unchanged + - `server_count()` returns `usize` — consistent with `SkillRegistry::len()` + - `notify_startup` / `notify_shutdown` take `&teloxide::Bot` — consistent with bot usage in `run()` diff --git a/docs/superpowers/specs/2026-06-04-long-term-memory-and-startup-shutdown.md b/docs/superpowers/specs/2026-06-04-long-term-memory-and-startup-shutdown.md new file mode 100644 index 0000000..ddbe8e4 --- /dev/null +++ b/docs/superpowers/specs/2026-06-04-long-term-memory-and-startup-shutdown.md @@ -0,0 +1,187 @@ +# Long-Term Memory Survival & Startup/Shutdown Notifications + +## Feature 1: Soft Archive on `/clear` + +### Problem + +`/clear` calls `MemoryStore::clear_conversation()` which deletes messages, +embeddings, AND the conversation record from SQLite. Past conversation history +becomes permanently inaccessible to the agent, even though: + +- `search_messages()` already searches **all** conversations (unscoped) — + verified at `memory/conversations.rs:282`. +- `search_messages_in_conversation()` is scoped but unused after deletion. +- The `knowledge` table survives (separate from conversations). + +### Solution + +Replace hard delete with a soft archive. This is a single-phase change. +(Knowledge snapshot on clear is deferred to a later spec.) + +**How it works:** + +1. Add `is_archived INTEGER DEFAULT 0` column to `conversations` table. +2. `clear_conversation()` sets `is_archived = 1` on the current conversation. + Does NOT delete messages, embeddings, or conversation record. +3. `get_or_create_conversation()` must filter `WHERE is_archived = 0` — + **critical**: without this filter, the query returns the archived + conversation and no new conversation is created, breaking the feature. +4. `load_messages_with_limit()` adds `WHERE c.is_archived = 0` so active + context stays clean (only loads from non-archived conversations). +5. Existing `search_messages()` (unscoped) naturally finds archived + conversation content—no change needed. +6. User-facing response changes from `"Conversation cleared."` to + `"Conversation archived. Past messages remain searchable."`. + +### Schema Changes + +```sql +ALTER TABLE conversations ADD COLUMN is_archived INTEGER DEFAULT 0; +``` +(This uses `.ok()` — safe no-op if column already exists, same pattern +as the `is_summarized` migration at `mod.rs:293`.) + +Existing index `idx_conversations_user ON conversations(platform, user_id, updated_at)` +remains sufficient — `is_archived` filter only excludes archived rows +(majority of rows are non-archived), so index selectivity is adequate. + +### Code Changes + +| File | Change | +|------|--------| +| `memory/mod.rs` (run_migrations) | Add `ALTER TABLE ... is_archived` migration with `.ok()` | +| `memory/conversations.rs` | Modify `get_or_create_conversation`: add `WHERE is_archived = 0` to the existing query. Without this, the archived conversation is returned and no new one is created. | +| `memory/conversations.rs` | Modify `clear_conversation`: replace DELETE with `UPDATE conversations SET is_archived = 1`. Remove ALL DELETE statements (message_embeddings, messages, conversations). | +| `memory/conversations.rs` | Modify `load_messages_with_limit`: JOIN to conversations and add `WHERE c.is_archived = 0`. | +| `platform/telegram.rs` | Update `/clear` response text to `"Conversation archived. Past messages remain searchable."` | +| `platform/telegram.rs` | Update `/clear` command description in `supported_commands()` from "Clear" to "Archive the current conversation" | +| `agent.rs` | No change needed — `Agent::clear_conversation()` delegates to `memory.clear_conversation()` | + +### Tests + +| Test | What it verifies | +|------|------------------| +| `test_clear_archives_instead_of_deleting` | After clear, messages still exist in DB | +| `test_get_or_create_skips_archived` | get_or_create_conversation returns a NEW conversation for an archived user | +| `test_search_messages_finds_archived` | search_messages returns results from archived conversations | +| `test_load_messages_excludes_archived` | load_messages_with_limit returns empty for archived conv | + +### Estimated lines: ~50 + +--- + +## Feature 2: Startup / Shutdown Notifications + +### Problem + +The bot starts and stops silently. Users don't know when RustFox restarts or +goes offline. + +### Solution + +**Startup:** +- `platform::telegram::run()` is called from `main.rs:337` and + `Dispatcher::dispatch().await` blocks forever. +- The notification must be sent **before** `.dispatch().await`, after the + dispatcher is built but before it starts polling. +- Send to every user in `config.telegram.allowed_user_ids` (available in + `main.rs` at that point). +- Medium-level status message: + - "RustFox is online" + - Model name: `config.openrouter.model` + - MCP servers connected: count from `mcp_manager` + - Skills loaded: count from `skills.len()` + - Memory status: "embedding enabled" or "FTS5 only" + +**Shutdown:** +- Register `tokio::signal::ctrl_c()` and `SIGTERM` handler in `main.rs` + **before** calling `platform::telegram::run()` (which blocks). +- The handler captures `bot: Arc` and `allowed_user_ids: Vec` by + cloning before dispatch. +- On signal: send "RustFox going offline" to each allowed user. +- Wait 2 seconds for delivery, then `std::process::exit(0)`. +- Both startup and shutdown notifications are best-effort: log failures, + never block startup/shutdown. + +### Data Flow + +``` +main.rs flow: + 1. Build Agent, McpManager, Scheduler + 2. Clone bot + allowed_user_ids for signal handler + 3. Register SIGINT/SIGTERM handler (captures clones) + 4. Call platform::telegram::run() + 5. Inside run(): + a. Build dispatcher + b. Send startup notifications (async, best-effort) + c. dispatcher.dispatch().await ← blocks +``` + +### Code Changes + +| File | Change | +|------|--------| +| `platform/telegram.rs` | Add `notify_startup(bot, allowed_user_ids, model, mcp_count, skills_count, embedding_enabled)` | +| `platform/telegram.rs` | Add `notify_shutdown(bot, allowed_user_ids)` | +| `platform/telegram.rs` | Call `notify_startup` after dispatcher build, before `.dispatch()` | +| `main.rs` | Clone `bot` + `allowed_user_ids` before `platform::telegram::run()` | +| `main.rs` | Register signal handler with `tokio::signal`, captures clones | +| `main.rs` | Signal handler calls `notify_shutdown` then exits after 2s delay | + +### Estimated lines: ~60 + +--- + +## Feature 3: Startup Message Content + +Medium detail level as requested. Example: + +``` +RustFox is online 🦊 +Model: moonshotai/kimi-k2.6 +MCP: 2 servers connected +Skills: 15 loaded +Memory: embedding enabled +``` + +`skills_count` requires `agent.skills.read().await` (async RwLock) — the +notification function must be async. This is fine since it runs before +`.dispatch().await`. + +--- + +## Implementation Order + +1. Schema migration: add `is_archived` column to conversations (`.ok()` pattern) +2. Modify `get_or_create_conversation`: filter `WHERE is_archived = 0` +3. Modify `load_messages_with_limit`: filter `WHERE c.is_archived = 0` +4. Modify `clear_conversation`: set `is_archived = 1` instead of DELETE +5. Update `/clear` response text + command description +6. Add `notify_startup` / `notify_shutdown` to `platform/telegram.rs` +7. Wire startup notification before `Dispatcher::dispatch()` +8. Wire shutdown signal handler in `main.rs` with 2s grace period +9. Write tests for archive behavior + +--- + +## Deferred (future spec) + +- Knowledge snapshot on `/clear`: LLM summarization of archived conversation + stored as knowledge entries. Underspecified — needs prompt design, model + selection, sync/async decision, error handling. + +--- + +## References + +- **`search_messages()`** at `memory/conversations.rs:282` — searches ALL + conversations unscoped. Verified: no `WHERE conversation_id = ?` filter. +- **`clear_conversation()`** at `memory/conversations.rs:113` — currently + deletes embeddings, messages, and conversation record. +- **`get_or_create_conversation()`** at `memory/conversations.rs:19` — + `ORDER BY updated_at DESC LIMIT 1` with no `is_archived` filter. +- **Existing migration pattern**: `mod.rs:293` — `.ok()` on ALTER TABLE for + idempotent column addition. +- **Hermes Agent**: Same SQLite + FTS5 approach for cross-session search. +- **arxiv 2603.05344** (§2.5 Memory): Recommends durable facts across sessions + with automatic recall — validates the soft-archive approach. diff --git a/src/agent.rs b/src/agent.rs index b99ec51..6ac0555 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -11,7 +11,8 @@ use crate::agent_prompt::{ use crate::config::Config; use crate::langsmith::LangSmithClient; use crate::llm::{ - is_empty_assistant_response, ChatMessage, FunctionDefinition, LlmClient, ToolDefinition, + is_empty_assistant_response, ChatMessage, ContentPart, FunctionDefinition, LlmClient, + MessageContent, ToolDefinition, }; use crate::mcp::McpManager; use crate::memory::MemoryStore; @@ -303,7 +304,7 @@ impl Agent { if messages.is_empty() { let system_msg = ChatMessage { role: "system".to_string(), - content: Some(current_system_prompt), + content: Some(MessageContent::from_text(current_system_prompt)), tool_calls: None, tool_call_id: None, }; @@ -316,7 +317,7 @@ impl Agent { // on the very next message without restarting the bot. // Find the system message by role (defensive: don't assume messages[0] is system). if let Some(system_msg) = messages.iter_mut().find(|m| m.role == "system") { - system_msg.content = Some(current_system_prompt); + system_msg.content = Some(MessageContent::from_text(current_system_prompt)); } } @@ -362,24 +363,76 @@ impl Agent { .await { if let Some(system_msg) = messages.iter_mut().find(|m| m.role == "system") { - if let Some(ref mut content) = system_msg.content { - content.push_str("\n\n"); - content.push_str(&rag_block); + if let Some(MessageContent::Text(ref mut s)) = system_msg.content { + s.push_str("\n\n"); + s.push_str(&rag_block); } } } } - // Add user message + // Process attachments (images → vision parts or OCR text; PDFs/DOCXs → extracted text) + let (attachment_text, image_parts) = if !incoming.attachments.is_empty() { + crate::file_processor::process_attachments( + &incoming.attachments, + &incoming.text, + &self.config, + &self.memory, + ) + .await + } else { + (String::new(), vec![]) + }; + + // Build user message content + let user_msg_content = if image_parts.is_empty() { + // Text-only path: combine user text with any extracted document text + let mut combined = incoming.text.clone(); + if !attachment_text.is_empty() { + combined.push_str("\n\n"); + combined.push_str(&attachment_text); + } + MessageContent::from_text(combined) + } else { + // Multi-modal path: text part + image content parts + let mut parts: Vec = Vec::new(); + let mut text_content = incoming.text.clone(); + if !attachment_text.is_empty() { + text_content.push_str("\n\n"); + text_content.push_str(&attachment_text); + } + if !text_content.is_empty() { + parts.push(ContentPart::Text { text: text_content }); + } + parts.extend(image_parts); + MessageContent::Parts(parts) + }; + + // Save a text-only version to DB (avoid storing base64 image data in message history) + let db_content = if incoming.attachments.is_empty() { + user_msg_content.clone() + } else { + let mut db_text = incoming.text.clone(); + if !attachment_text.is_empty() { + db_text.push_str("\n\n[Attachment processed]"); + } + MessageContent::from_text(db_text) + }; + let db_msg = ChatMessage { + role: "user".to_string(), + content: Some(db_content), + tool_calls: None, + tool_call_id: None, + }; + self.memory.save_message(&conversation_id, &db_msg).await?; + + // Push the full message (with image parts if any) to in-memory context let user_msg = ChatMessage { role: "user".to_string(), - content: Some(incoming.text.clone()), + content: Some(user_msg_content), tool_calls: None, tool_call_id: None, }; - self.memory - .save_message(&conversation_id, &user_msg) - .await?; messages.push(user_msg); // Gather all tool definitions @@ -652,7 +705,7 @@ impl Agent { let tool_msg = ChatMessage { role: "tool".to_string(), - content: Some(tool_result), + content: Some(MessageContent::from_text(tool_result)), tool_calls: None, tool_call_id: Some(tool_call.id.clone()), }; @@ -668,7 +721,11 @@ impl Agent { } // Final response — no tool calls - let content = response.content.clone().unwrap_or_default(); + let content = response + .content + .as_ref() + .map(|c| c.as_text()) + .unwrap_or_default(); // Stream the final response directly from the already-complete content. // Previously this made a second chat_stream() API call, which could return @@ -686,7 +743,7 @@ impl Agent { // Save the delivered content to persistent memory let save_msg = crate::llm::ChatMessage { role: response.role.clone(), - content: Some(final_content.clone()), + content: Some(crate::llm::MessageContent::Text(final_content.clone())), tool_calls: response.tool_calls.clone(), tool_call_id: response.tool_call_id.clone(), }; @@ -817,6 +874,7 @@ impl Agent { chat_id: cid, user_name: String::new(), text: prompt, + attachments: vec![], }; let req = ScheduledJobRequest { incoming, @@ -1341,13 +1399,13 @@ impl Agent { let mut messages = vec![ ChatMessage { role: "system".to_string(), - content: Some(system_content), + content: Some(MessageContent::from_text(system_content)), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(prompt.to_string()), + content: Some(MessageContent::from_text(prompt)), tool_calls: None, tool_call_id: None, }, @@ -1475,7 +1533,7 @@ impl Agent { messages.push(ChatMessage { role: "tool".to_string(), - content: Some(result), + content: Some(MessageContent::from_text(result)), tool_calls: None, tool_call_id: Some(tool_call.id.clone()), }); @@ -1486,7 +1544,7 @@ impl Agent { } // Final response — no tool calls - return response.content.unwrap_or_default(); + return response.content.map(|c| c.as_text()).unwrap_or_default(); } format!( @@ -1532,7 +1590,7 @@ impl Agent { if let Ok(msgs) = self.memory.search_messages(query, limit).await { for msg in msgs { if let Some(content) = &msg.content { - results.push(format!("[{}]: {}", msg.role, content)); + results.push(format!("[{}]: {}", msg.role, content.as_text())); } } } @@ -1641,6 +1699,7 @@ impl Agent { chat_id: cid, user_name: String::new(), text: prompt, + attachments: vec![], }; let req = ScheduledJobRequest { incoming, diff --git a/src/agent_prompt.rs b/src/agent_prompt.rs index 5d081aa..97325d0 100644 --- a/src/agent_prompt.rs +++ b/src/agent_prompt.rs @@ -4,7 +4,7 @@ //! including automatic compaction of tool-heavy conversations to stay within context //! limits while preserving recent and relevant information. -use crate::llm::ChatMessage; +use crate::llm::{ChatMessage, MessageContent}; const COMPACTION_MESSAGE_COUNT_THRESHOLD: usize = 10; const COMPACTION_PROMPT_CHAR_THRESHOLD: usize = 20_000; @@ -45,7 +45,7 @@ pub fn estimate_prompt_chars(messages: &[ChatMessage]) -> usize { messages .iter() .map(|msg| { - let content_chars = msg.content.as_deref().map(|c| c.len()).unwrap_or(0); + let content_chars = msg.content.as_ref().map(|c| c.as_text().len()).unwrap_or(0); let tool_args_chars = msg .tool_calls .as_ref() @@ -76,7 +76,7 @@ pub fn recovery_nudge_for(messages: &[ChatMessage]) -> ChatMessage { ChatMessage { role: "system".to_string(), - content: Some(content), + content: Some(MessageContent::Text(content)), tool_calls: None, tool_call_id: None, } @@ -237,13 +237,14 @@ fn compact_tool_result(msg: &ChatMessage) -> ChatMessage { let mut compacted = msg.clone(); if let Some(content) = &compacted.content { - if content.len() > TOOL_RESULT_COMPACT_THRESHOLD { - let preview = truncate_chars(content, TOOL_RESULT_PREVIEW_CHARS); - compacted.content = Some(format!( + let text = content.as_text(); + let text_len = text.len(); + if text_len > TOOL_RESULT_COMPACT_THRESHOLD { + let preview = truncate_chars(&text, TOOL_RESULT_PREVIEW_CHARS); + compacted.content = Some(MessageContent::Text(format!( "[rustfox compacted tool result: {} chars]\n{}...", - content.len(), - preview - )); + text_len, preview + ))); } } @@ -253,20 +254,20 @@ fn compact_tool_result(msg: &ChatMessage) -> ChatMessage { #[cfg(test)] mod tests { use super::*; - use crate::llm::{FunctionCall, ToolCall}; + use crate::llm::{FunctionCall, MessageContent, ToolCall}; #[test] fn estimate_prompt_chars_counts_content_and_tool_arguments() { let messages = vec![ ChatMessage { role: "user".to_string(), - content: Some("Hello".to_string()), // 5 chars + content: Some(MessageContent::Text("Hello".to_string())), // 5 chars tool_calls: None, tool_call_id: None, }, ChatMessage { role: "assistant".to_string(), - content: Some("Hi".to_string()), // 2 chars + content: Some(MessageContent::Text("Hi".to_string())), // 2 chars tool_calls: Some(vec![ToolCall { id: "call_1".to_string(), call_type: "function".to_string(), @@ -279,7 +280,7 @@ mod tests { }, ChatMessage { role: "tool".to_string(), - content: Some("result".to_string()), // 6 chars + content: Some(MessageContent::Text("result".to_string())), // 6 chars tool_calls: None, tool_call_id: Some("call_1".to_string()), }, @@ -294,13 +295,13 @@ mod tests { let messages = vec![ ChatMessage { role: "user".to_string(), - content: Some("Hello".to_string()), + content: Some(MessageContent::Text("Hello".to_string())), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "tool".to_string(), - content: Some("result".to_string()), + content: Some(MessageContent::Text("result".to_string())), tool_calls: None, tool_call_id: Some("call_1".to_string()), }, @@ -312,6 +313,7 @@ mod tests { .content .as_ref() .unwrap() + .as_text() .contains("tool result above")); } @@ -319,7 +321,7 @@ mod tests { fn recovery_nudge_mentions_user_request_when_previous_message_is_user() { let messages = vec![ChatMessage { role: "user".to_string(), - content: Some("Hello".to_string()), + content: Some(MessageContent::Text("Hello".to_string())), tool_calls: None, tool_call_id: None, }]; @@ -330,6 +332,7 @@ mod tests { .content .as_ref() .unwrap() + .as_text() .contains("user's request above")); } @@ -339,7 +342,7 @@ mod tests { let messages: Vec = (0..5) .map(|i| ChatMessage { role: "user".to_string(), - content: Some(format!("message {}", i)), + content: Some(MessageContent::Text(format!("message {}", i))), tool_calls: None, tool_call_id: None, }) @@ -353,7 +356,7 @@ mod tests { let short_messages: Vec = (0..15) .map(|i| ChatMessage { role: "user".to_string(), - content: Some(format!("msg{}", i)), // Very short + content: Some(MessageContent::Text(format!("msg{}", i))), // Very short tool_calls: None, tool_call_id: None, }) @@ -371,7 +374,9 @@ mod tests { // System message messages.push(ChatMessage { role: "system".to_string(), - content: Some("You are a helpful assistant.".to_string()), + content: Some(MessageContent::Text( + "You are a helpful assistant.".to_string(), + )), tool_calls: None, tool_call_id: None, }); @@ -379,7 +384,7 @@ mod tests { // User message messages.push(ChatMessage { role: "user".to_string(), - content: Some("Do task 1".to_string()), + content: Some(MessageContent::Text("Do task 1".to_string())), tool_calls: None, tool_call_id: None, }); @@ -403,7 +408,7 @@ mod tests { let long_result = "y".repeat(2500); messages.push(ChatMessage { role: "tool".to_string(), - content: Some(long_result.clone()), + content: Some(MessageContent::Text(long_result.clone())), tool_calls: None, tool_call_id: Some("old_call_1".to_string()), }); @@ -425,7 +430,7 @@ mod tests { messages.push(ChatMessage { role: "tool".to_string(), - content: Some("y".repeat(2500)), + content: Some(MessageContent::Text("y".repeat(2500))), tool_calls: None, tool_call_id: Some("recent_call_1".to_string()), }); @@ -447,7 +452,7 @@ mod tests { messages.push(ChatMessage { role: "tool".to_string(), - content: Some("w".repeat(2500)), + content: Some(MessageContent::Text("w".repeat(2500))), tool_calls: None, tool_call_id: Some("recent_call_2".to_string()), }); @@ -470,8 +475,10 @@ mod tests { let old_tool = &compacted[3]; assert_eq!(old_tool.role, "tool"); let old_content = old_tool.content.as_ref().unwrap(); - assert!(old_content.contains("rustfox compacted tool result")); - assert!(old_content.len() < long_result.len()); + assert!(old_content + .as_text() + .contains("rustfox compacted tool result")); + assert!(old_content.as_text().len() < long_result.len()); // Recent tool groups should be preserved unchanged let recent1_assistant = &compacted[4]; @@ -484,7 +491,7 @@ mod tests { ); let recent1_tool = &compacted[5]; - assert_eq!(recent1_tool.content.as_ref().unwrap().len(), 2500); + assert_eq!(recent1_tool.content.as_ref().unwrap().as_text().len(), 2500); let recent2_assistant = &compacted[6]; assert_eq!( @@ -496,7 +503,7 @@ mod tests { ); let recent2_tool = &compacted[7]; - assert_eq!(recent2_tool.content.as_ref().unwrap().len(), 2500); + assert_eq!(recent2_tool.content.as_ref().unwrap().as_text().len(), 2500); } #[test] @@ -504,13 +511,13 @@ mod tests { let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some("System".to_string()), + content: Some(MessageContent::Text("System".to_string())), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some("User 1".to_string()), + content: Some(MessageContent::Text("User 1".to_string())), tool_calls: None, tool_call_id: None, }, @@ -529,13 +536,13 @@ mod tests { }, ChatMessage { role: "tool".to_string(), - content: Some("y".repeat(2500)), + content: Some(MessageContent::Text("y".repeat(2500))), tool_calls: None, tool_call_id: Some("call_1".to_string()), }, ChatMessage { role: "assistant".to_string(), - content: Some("Final response".to_string()), + content: Some(MessageContent::Text("Final response".to_string())), tool_calls: None, tool_call_id: None, }, @@ -628,7 +635,7 @@ mod tests { let message = ChatMessage { role: "tool".to_string(), - content: Some(long_result_with_unicode.clone()), + content: Some(MessageContent::Text(long_result_with_unicode.clone())), tool_calls: None, tool_call_id: Some("call_1".to_string()), }; @@ -638,12 +645,15 @@ mod tests { let compacted_content = compacted.content.as_ref().unwrap(); // Verify it's been compacted - assert!(compacted_content.contains("rustfox compacted tool result")); - assert!(compacted_content.len() < long_result_with_unicode.len()); + assert!(compacted_content + .as_text() + .contains("rustfox compacted tool result")); + assert!(compacted_content.as_text().len() < long_result_with_unicode.len()); // Verify the preview portion is valid UTF-8 and respects character boundaries // The format is: "[rustfox compacted tool result: N chars]\n{preview}..." - let lines: Vec<&str> = compacted_content.lines().collect(); + let text = compacted_content.as_text(); + let lines: Vec<&str> = text.lines().collect(); assert_eq!(lines.len(), 2); let preview_line = lines[1]; assert!(preview_line.ends_with("...")); @@ -660,13 +670,15 @@ mod tests { let mut messages = vec![ ChatMessage { role: "system".to_string(), - content: Some("System prompt 系统提示".to_string()), + content: Some(MessageContent::Text("System prompt 系统提示".to_string())), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some("User request with emoji 🚀".to_string()), + content: Some(MessageContent::Text( + "User request with emoji 🚀".to_string(), + )), tool_calls: None, tool_call_id: None, }, @@ -696,7 +708,7 @@ mod tests { let unicode_result = format!("Result 结果: {}🌈{}", "a".repeat(1500), "b".repeat(1200)); messages.push(ChatMessage { role: "tool".to_string(), - content: Some(unicode_result), + content: Some(MessageContent::Text(unicode_result)), tool_calls: None, tool_call_id: Some("old_call".to_string()), }); @@ -718,7 +730,7 @@ mod tests { messages.push(ChatMessage { role: "tool".to_string(), - content: Some("y".repeat(2500)), + content: Some(MessageContent::Text("y".repeat(2500))), tool_calls: None, tool_call_id: Some("recent_call_1".to_string()), }); @@ -740,7 +752,7 @@ mod tests { messages.push(ChatMessage { role: "tool".to_string(), - content: Some("w".repeat(2500)), + content: Some(MessageContent::Text("w".repeat(2500))), tool_calls: None, tool_call_id: Some("recent_call_2".to_string()), }); @@ -762,7 +774,9 @@ mod tests { assert!(compacted_args.contains("_rustfox_compacted_arguments")); let compacted_result = compacted[3].content.as_ref().unwrap(); - assert!(compacted_result.contains("rustfox compacted tool result")); + assert!(compacted_result + .as_text() + .contains("rustfox compacted tool result")); // Verify recent groups are preserved unchanged assert_eq!( @@ -772,7 +786,7 @@ mod tests { .len(), 1500 ); - assert_eq!(compacted[5].content.as_ref().unwrap().len(), 2500); + assert_eq!(compacted[5].content.as_ref().unwrap().as_text().len(), 2500); assert_eq!( compacted[6].tool_calls.as_ref().unwrap()[0] .function @@ -780,6 +794,6 @@ mod tests { .len(), 1500 ); - assert_eq!(compacted[7].content.as_ref().unwrap().len(), 2500); + assert_eq!(compacted[7].content.as_ref().unwrap().as_text().len(), 2500); } } diff --git a/src/config.rs b/src/config.rs index d69d45e..a4ebf0b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -23,6 +23,8 @@ pub struct Config { pub embedding: Option, #[serde(default)] pub langsmith: Option, + #[serde(default = "default_ocr_config")] + pub ocr: OcrConfig, #[serde(default = "default_learning_config")] pub learning: LearningConfig, #[serde(default)] @@ -106,6 +108,19 @@ pub struct OpenRouterConfig { pub max_tokens: u32, #[serde(default = "default_system_prompt")] pub system_prompt: String, + /// Whether the configured model supports vision (image inputs). + /// When true, images are sent as base64-encoded content parts. + /// When false, OCR is used to extract text from images. + #[serde(default)] + pub supports_vision: bool, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct OcrConfig { + /// Directory where OCR model files are cached. + /// Models are downloaded automatically on first OCR use. + #[serde(default = "default_ocr_model_dir")] + pub model_dir: std::path::PathBuf, } #[derive(Debug, Deserialize, Clone, Default)] @@ -363,6 +378,19 @@ fn default_langsmith_base_url() -> String { "https://api.smith.langchain.com".to_string() } +fn default_ocr_model_dir() -> std::path::PathBuf { + std::env::var("HOME") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from(".")) + .join(".cache/ocrs") +} + +fn default_ocr_config() -> OcrConfig { + OcrConfig { + model_dir: default_ocr_model_dir(), + } +} + fn default_bundled_skills_dir() -> PathBuf { PathBuf::from("skills") } @@ -659,6 +687,52 @@ mod tests { assert_eq!(ls.project, "default"); } + #[test] + fn test_supports_vision_defaults_false() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(!cfg.openrouter.supports_vision); + } + + #[test] + fn test_supports_vision_parses_true() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + supports_vision = true + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(cfg.openrouter.supports_vision); + } + + #[test] + fn test_ocr_config_default_model_dir() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(cfg.ocr.model_dir.to_string_lossy().contains("ocrs")); + } + #[test] fn test_mcp_server_url_config_parses() { let toml = r#" @@ -745,7 +819,7 @@ mod tests { } #[test] - fn supervisor_config_defaults_when_section_missing() { + fn test_query_rewriter_can_be_enabled() { let toml = r#" [telegram] bot_token = "tok" @@ -754,16 +828,18 @@ mod tests { api_key = "key" [sandbox] allowed_directory = "/tmp" + [memory] + query_rewriter_enabled = true "#; let cfg: Config = toml::from_str(toml).unwrap(); - assert_eq!(cfg.supervisor.default_autonomy_mode, "standard"); - // artifacts_dir now defaults to the empty "unset" sentinel; it is - // materialized to an absolute path only by Config::resolve(). - assert_eq!(cfg.supervisor.artifacts_dir, std::path::PathBuf::new()); + assert!( + cfg.memory.query_rewriter_enabled, + "query_rewriter_enabled should be true when set" + ); } #[test] - fn test_query_rewriter_can_be_enabled() { + fn supervisor_config_defaults_when_section_missing() { let toml = r#" [telegram] bot_token = "tok" @@ -772,14 +848,10 @@ mod tests { api_key = "key" [sandbox] allowed_directory = "/tmp" - [memory] - query_rewriter_enabled = true "#; let cfg: Config = toml::from_str(toml).unwrap(); - assert!( - cfg.memory.query_rewriter_enabled, - "query_rewriter_enabled should be true when set" - ); + assert_eq!(cfg.supervisor.default_autonomy_mode, "standard"); + assert_eq!(cfg.supervisor.artifacts_dir, std::path::PathBuf::new()); } #[test] diff --git a/src/file_processor/mod.rs b/src/file_processor/mod.rs new file mode 100644 index 0000000..1edcd9d --- /dev/null +++ b/src/file_processor/mod.rs @@ -0,0 +1,313 @@ +use anyhow::{Context, Result}; +use base64::Engine as _; +use std::path::Path; + +use crate::config::Config; +use crate::llm::{ContentPart, ImageUrlContent}; +use crate::memory::MemoryStore; +use crate::platform::{Attachment, AttachmentKind}; + +const LONG_CONTEXT_THRESHOLD: usize = 6000; +const CHUNK_SIZE: usize = 1000; +const CHUNK_OVERLAP: usize = 100; + +/// Returned by `process_image` to indicate whether we got a vision part or OCR text. +pub enum ImageResult { + VisionPart(ContentPart), + OcrText(String), +} + +/// Process all attachments for a message. +/// - Images: base64 vision part (if supports_vision) OR OCR text (if not) +/// - PDFs: text extraction +/// - DOCXs: text extraction +/// - Long text (>6000 chars): chunked into knowledge store, RAG-retrieved +pub async fn process_attachments( + attachments: &[Attachment], + user_query: &str, + config: &Config, + memory: &MemoryStore, +) -> (String, Vec) { + let mut text_parts: Vec = Vec::new(); + let mut image_parts: Vec = Vec::new(); + + for attachment in attachments { + match attachment.kind { + AttachmentKind::Image => { + match process_image( + &attachment.path, + &attachment.mime_type, + config.openrouter.supports_vision, + &config.ocr.model_dir, + ) + .await + { + Ok(ImageResult::VisionPart(part)) => image_parts.push(part), + Ok(ImageResult::OcrText(text)) => { + let fname = attachment.file_name.as_deref().unwrap_or("image"); + text_parts.push(format!("[Image: {}]\n{}", fname, text)); + } + Err(e) => { + tracing::warn!("Image processing failed: {}", e); + text_parts.push(format!("[Image processing failed: {}]", e)); + } + } + } + AttachmentKind::Pdf => { + let fname = attachment.file_name.as_deref().unwrap_or("document.pdf"); + match extract_pdf_text(&attachment.path) { + Ok(text) => { + let ctx = handle_context_length(&text, fname, user_query, memory).await; + text_parts.push(ctx); + } + Err(e) => { + tracing::warn!("PDF extraction failed: {}", e); + text_parts.push(format!("[PDF processing failed: {}]", e)); + } + } + } + AttachmentKind::Docx => { + let fname = attachment.file_name.as_deref().unwrap_or("document.docx"); + match extract_docx_text(&attachment.path) { + Ok(text) => { + let ctx = handle_context_length(&text, fname, user_query, memory).await; + text_parts.push(ctx); + } + Err(e) => { + tracing::warn!("DOCX extraction failed: {}", e); + text_parts.push(format!("[DOCX processing failed: {}]", e)); + } + } + } + AttachmentKind::Other => { + tracing::debug!("Skipping unsupported attachment type"); + } + } + } + + (text_parts.join("\n\n"), image_parts) +} + +/// Returns either a vision ContentPart (base64) or extracted OCR text. +async fn process_image( + path: &Path, + mime_type: &str, + supports_vision: bool, + ocr_model_dir: &Path, +) -> Result { + if supports_vision { + let bytes = tokio::fs::read(path).await?; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + let data_url = format!("data:{};base64,{}", mime_type, encoded); + Ok(ImageResult::VisionPart(ContentPart::ImageUrl { + image_url: ImageUrlContent { url: data_url }, + })) + } else { + let text = ocr_image(path, ocr_model_dir).await?; + Ok(ImageResult::OcrText(text)) + } +} + +/// Perform OCR on an image using the ocrs neural-network engine. +/// Downloads model files on first use to `model_dir`. +async fn ocr_image(path: &Path, model_dir: &Path) -> Result { + ensure_ocr_models(model_dir).await?; + + let det_path = model_dir.join("text-detection.rten"); + let rec_path = model_dir.join("text-recognition.rten"); + + let path_owned = path.to_path_buf(); + + tokio::task::spawn_blocking(move || -> Result { + let detection_model = + rten::Model::load_file(&det_path).context("Failed to load OCR detection model")?; + let recognition_model = + rten::Model::load_file(&rec_path).context("Failed to load OCR recognition model")?; + + let engine = ocrs::OcrEngine::new(ocrs::OcrEngineParams { + detection_model: Some(detection_model), + recognition_model: Some(recognition_model), + ..Default::default() + })?; + + let img = image::open(&path_owned) + .context("Failed to open image for OCR")? + .into_rgb8(); + let img_source = ocrs::ImageSource::from_bytes(img.as_raw(), img.dimensions())?; + let ocr_input = engine.prepare_input(img_source)?; + let text = engine.get_text(&ocr_input)?; + Ok(text) + }) + .await + .context("OCR task panicked")? +} + +/// Download OCR model files to model_dir if they don't exist. +async fn ensure_ocr_models(model_dir: &Path) -> Result<()> { + tokio::fs::create_dir_all(model_dir).await?; + + let det = model_dir.join("text-detection.rten"); + let rec = model_dir.join("text-recognition.rten"); + + const DET_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-detection.rten"; + const REC_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-recognition.rten"; + + if !det.exists() { + tracing::info!("Downloading OCR detection model to {}", det.display()); + download_model(DET_URL, &det).await?; + } + if !rec.exists() { + tracing::info!("Downloading OCR recognition model to {}", rec.display()); + download_model(REC_URL, &rec).await?; + } + Ok(()) +} + +async fn download_model(url: &str, dest: &Path) -> Result<()> { + let response = reqwest::get(url) + .await + .context("Failed to fetch OCR model")?; + let bytes = response + .bytes() + .await + .context("Failed to read OCR model bytes")?; + tokio::fs::write(dest, &bytes) + .await + .context("Failed to write OCR model")?; + tracing::info!("OCR model saved: {} bytes", bytes.len()); + Ok(()) +} + +/// Extract text content from a PDF file. +fn extract_pdf_text(path: &Path) -> Result { + let bytes = std::fs::read(path).context("Failed to read PDF")?; + // unwrap_or_default: malformed PDFs return empty string rather than propagating + let text = pdf_extract::extract_text_from_mem(&bytes).unwrap_or_default(); + Ok(text) +} + +/// Extract text content from a DOCX file. +fn extract_docx_text(path: &Path) -> Result { + let bytes = std::fs::read(path).context("Failed to read DOCX")?; + let docx = + docx_rs::read_docx(&bytes).map_err(|e| anyhow::anyhow!("Failed to parse DOCX: {:?}", e))?; + + let mut text = String::new(); + for child in docx.document.children { + if let docx_rs::DocumentChild::Paragraph(para) = child { + for run_child in para.children { + if let docx_rs::ParagraphChild::Run(run) = run_child { + for rc in run.children { + if let docx_rs::RunChild::Text(t) = rc { + text.push_str(&t.text); + } + } + } + } + text.push('\n'); + } + } + Ok(text) +} + +/// Chunk text with overlap. +fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec { + let chars: Vec = text.chars().collect(); + let mut chunks = Vec::new(); + let mut start = 0; + while start < chars.len() { + let end = (start + chunk_size).min(chars.len()); + chunks.push(chars[start..end].iter().collect()); + if end == chars.len() { + break; + } + start += chunk_size - overlap; + } + chunks +} + +/// If text is long, store chunks in knowledge store and RAG-retrieve relevant ones. +/// If short, return it directly. +async fn handle_context_length( + text: &str, + filename: &str, + query: &str, + memory: &MemoryStore, +) -> String { + let char_count = text.chars().count(); + if char_count <= LONG_CONTEXT_THRESHOLD { + return format!("[File: {}]\n{}", filename, text); + } + + let chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP); + tracing::info!( + "Document '{}' is {} chars — storing {} chunks in knowledge base", + filename, + char_count, + chunks.len() + ); + + for (i, chunk) in chunks.iter().enumerate() { + let key = format!("{}::chunk_{}", filename, i); + if let Err(e) = memory + .remember("document_chunk", &key, chunk, Some(filename)) + .await + { + tracing::warn!("Failed to store document chunk {}: {}", i, e); + } + } + + match memory.search_knowledge(query, 5).await { + Ok(results) if !results.is_empty() => { + let context = results + .iter() + .map(|e| e.value.as_str()) + .collect::>() + .join("\n\n---\n\n"); + format!("[File: {} — relevant sections]\n{}", filename, context) + } + _ => format!( + "[File: {} — document indexed, but no relevant sections found for this query]", + filename + ), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_chunk_text_short_returns_one_chunk() { + let text = "hello world"; + let chunks = chunk_text(text, 1000, 100); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0], text); + } + + #[test] + fn test_chunk_text_long_splits_with_overlap() { + let text = "a".repeat(2500); + let chunks = chunk_text(&text, 1000, 100); + // chunk 0: [0, 1000) + // chunk 1: [900, 1900) + // chunk 2: [1800, 2500) (last chunk, smaller) + assert_eq!(chunks.len(), 3); + assert_eq!(chunks[0].chars().count(), 1000); + assert_eq!(chunks[1].chars().count(), 1000); + } + + #[test] + fn test_chunk_text_exact_boundary() { + let text = "b".repeat(1000); + let chunks = chunk_text(&text, 1000, 100); + assert_eq!(chunks.len(), 1); + } + + #[test] + fn test_chunk_text_just_over_boundary() { + let text = "b".repeat(1001); + let chunks = chunk_text(&text, 1000, 100); + assert_eq!(chunks.len(), 2); + } +} diff --git a/src/learning.rs b/src/learning.rs index 7aa323a..ed2900a 100644 --- a/src/learning.rs +++ b/src/learning.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use std::path::Path; use tracing::{info, warn}; -use crate::llm::{ChatMessage, LlmClient}; +use crate::llm::{ChatMessage, LlmClient, MessageContent}; use crate::skills::loader::load_skills_from_dir; use crate::skills::{SkillRegistry, SkillSource}; @@ -88,13 +88,13 @@ async fn extract_skill_from_conversation( let analysis_messages = vec![ChatMessage { role: "user".to_string(), - content: Some(analysis_prompt), + content: Some(MessageContent::Text(analysis_prompt)), tool_calls: None, tool_call_id: None, }]; let response = llm.chat(&analysis_messages, &[]).await?; - let raw = response.content.unwrap_or_default(); + let raw = response.content.unwrap_or_default().as_text(); // Strip outer code fences if the model wrapped its entire response. let content = strip_code_fences(&raw); @@ -355,7 +355,11 @@ async fn update_user_model_inner( let conversation_snippets: String = recent .iter() - .filter_map(|m| m.content.as_ref().map(|c| format!("[{}]: {}", m.role, c))) + .filter_map(|m| { + m.content + .as_ref() + .map(|c| format!("[{}]: {}", m.role, c.as_text())) + }) .collect::>() .join("\n"); @@ -389,13 +393,13 @@ async fn update_user_model_inner( let messages = vec![ChatMessage { role: "user".to_string(), - content: Some(prompt), + content: Some(MessageContent::Text(prompt)), tool_calls: None, tool_call_id: None, }]; let response = llm.chat(&messages, &[]).await?; - let new_content = response.content.unwrap_or_default(); + let new_content = response.content.unwrap_or_default().as_text(); // Strict validation: must start with `---` and contain a closing `---` // delimiter so we don't write malformed or injection-bearing content into @@ -526,7 +530,7 @@ fn build_transcript(messages: &[ChatMessage]) -> String { .filter_map(|m| { m.content .as_ref() - .map(|c| format!("[{}]: {}", m.role, truncate(c, 500))) + .map(|c| format!("[{}]: {}", m.role, truncate(&c.as_text(), 500))) }) .collect::>() .join("\n") @@ -650,19 +654,19 @@ mod tests { let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some("System prompt".to_string()), + content: Some(MessageContent::Text("System prompt".to_string())), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some("Hello".to_string()), + content: Some(MessageContent::Text("Hello".to_string())), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "assistant".to_string(), - content: Some("Hi there".to_string()), + content: Some(MessageContent::Text("Hi there".to_string())), tool_calls: None, tool_call_id: None, }, diff --git a/src/lib.rs b/src/lib.rs index 3139f12..0fba321 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod agent; pub mod agent_prompt; pub mod config; +pub mod file_processor; pub mod home; pub mod langsmith; pub mod learning; diff --git a/src/llm.rs b/src/llm.rs index 20ce1ff..25e3ba7 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -4,11 +4,71 @@ use tracing::{debug, warn}; use crate::config::OpenRouterConfig; +/// A single part in a multi-modal message +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentPart { + Text { text: String }, + ImageUrl { image_url: ImageUrlContent }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageUrlContent { + /// "data:image/jpeg;base64,..." or a URL + pub url: String, +} + +/// Either a plain text string or a list of content parts (multi-modal). +/// Serializes as a plain JSON string for text-only, or as a JSON array for multi-modal. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + Text(String), + Parts(Vec), +} + +impl MessageContent { + /// Extract all text from the content (for logging, RAG, DB storage, etc.) + pub fn as_text(&self) -> String { + match self { + Self::Text(s) => s.clone(), + Self::Parts(parts) => parts + .iter() + .filter_map(|p| { + if let ContentPart::Text { text } = p { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .join(" "), + } + } + + pub fn from_text(s: impl Into) -> Self { + Self::Text(s.into()) + } + + pub fn is_empty(&self) -> bool { + match self { + Self::Text(s) => s.is_empty(), + Self::Parts(parts) => parts.is_empty(), + } + } +} + +impl Default for MessageContent { + fn default() -> Self { + Self::Text(String::new()) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChatMessage { pub role: String, #[serde(skip_serializing_if = "Option::is_none")] - pub content: Option, + pub content: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -61,10 +121,10 @@ pub fn is_empty_assistant_response(message: &ChatMessage) -> bool { .tool_calls .as_ref() .is_some_and(|calls| !calls.is_empty()); - let has_content = message - .content - .as_deref() - .is_some_and(|content| !content.trim().is_empty()); + let has_content = message.content.as_ref().is_some_and(|content| { + let text = content.as_text(); + !text.trim().is_empty() + }); !has_tool_calls && !has_content } @@ -377,7 +437,11 @@ impl LlmClient { tool_call_count = choice.message.tool_calls.as_ref().map_or(0, |t| t.len()), "Received LLM response" ); - if choice.message.content.as_deref().is_none_or(str::is_empty) + if choice + .message + .content + .as_ref() + .is_none_or(MessageContent::is_empty) && choice.message.tool_calls.as_ref().is_none_or(Vec::is_empty) { warn!( @@ -402,7 +466,7 @@ impl LlmClient { .is_some_and(|t| !t.is_empty()); if !has_tool_calls { if let Some(ref content) = choice.message.content.clone() { - if let Some(parsed) = parse_kimi_tool_calls(content) { + if let Some(parsed) = parse_kimi_tool_calls(&content.as_text()) { warn!( tool_count = parsed.len(), "Kimi native tool-call format detected in content — \ @@ -491,6 +555,39 @@ impl LlmClient { mod tests { use super::*; + #[test] + fn test_message_content_text_serializes_as_string() { + let content = MessageContent::from_text("hello world"); + let json = serde_json::to_string(&content).unwrap(); + assert_eq!(json, r#""hello world""#); + } + + #[test] + fn test_message_content_parts_serializes_as_array() { + let content = MessageContent::Parts(vec![ContentPart::Text { + text: "hello".to_string(), + }]); + let json = serde_json::to_value(&content).unwrap(); + assert!(json.is_array()); + assert_eq!(json[0]["type"], "text"); + assert_eq!(json[0]["text"], "hello"); + } + + #[test] + fn test_message_content_as_text_from_parts() { + let content = MessageContent::Parts(vec![ + ContentPart::Text { + text: "hello".to_string(), + }, + ContentPart::ImageUrl { + image_url: ImageUrlContent { + url: "data:image/png;base64,abc".to_string(), + }, + }, + ]); + assert_eq!(content.as_text(), "hello"); + } + #[test] fn test_chat_request_serializes_model_field() { // Verifies the model string will appear in the JSON POST body @@ -799,7 +896,7 @@ mod tests { fn test_empty_assistant_response_detects_whitespace_content_no_tools() { let message = ChatMessage { role: "assistant".to_string(), - content: Some(" \n\t ".to_string()), + content: Some(MessageContent::Text(" \n\t ".to_string())), tool_calls: Some(vec![]), tool_call_id: None, }; @@ -828,7 +925,7 @@ mod tests { fn test_empty_assistant_response_false_when_content_present() { let message = ChatMessage { role: "assistant".to_string(), - content: Some("Done".to_string()), + content: Some(MessageContent::Text("Done".to_string())), tool_calls: None, tool_call_id: None, }; @@ -851,7 +948,10 @@ mod tests { model: "test-model".to_string(), }; assert_eq!(completion.finish_reason.as_deref(), Some("stop")); - assert_eq!(completion.message.content.as_deref(), Some("hello")); + assert_eq!( + completion.message.content.as_ref().map(|c| c.as_text()), + Some("hello".to_string()) + ); } #[test] @@ -879,7 +979,7 @@ mod tests { .is_some_and(|t| !t.is_empty()); if !has_tool_calls { if let Some(ref content) = choice.message.content.clone() { - if let Some(parsed) = parse_kimi_tool_calls(content) { + if let Some(parsed) = parse_kimi_tool_calls(&content.as_text()) { choice.message.tool_calls = Some(parsed); choice.message.content = None; choice.finish_reason = Some("tool_calls".to_string()); diff --git a/src/main.rs b/src/main.rs index eeba6bd..05e248a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -332,14 +332,47 @@ async fn main() -> Result<()> { Err(e) => warn!(" Supervisor: failed to enumerate resumable tasks: {e}"), } - // Run the Telegram platform + // Run the Telegram platform with signal-driven graceful shutdown info!("Bot is starting..."); - platform::telegram::run( - agent, - config.telegram.allowed_user_ids.clone(), - Arc::clone(&bot), - ) - .await?; + + let dispatch_agent = Arc::clone(&agent); + let dispatch_user_ids = config.telegram.allowed_user_ids.clone(); + let dispatch_bot = Arc::clone(&bot); + + let mut dispatch_handle = tokio::spawn(async move { + platform::telegram::run(dispatch_agent, dispatch_user_ids, dispatch_bot).await + }); + + // Set up signal handlers (SIGINT via ctrl_c for portability, SIGTERM via unix signal) + #[cfg(unix)] + let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to create SIGTERM handler"); + + #[cfg(unix)] + let terminate = sigterm.recv(); + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = tokio::signal::ctrl_c() => { + info!("SIGINT received, shutting down..."); + } + _ = terminate => { + info!("SIGTERM received, shutting down..."); + } + result = &mut dispatch_handle => { + result??; + return Ok(()); + } + }; + + // Send shutdown notification + platform::telegram::notify_shutdown(&bot, &config.telegram.allowed_user_ids).await; + + // Brief grace period for message delivery + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + info!("Shutdown complete."); Ok(()) } diff --git a/src/mcp.rs b/src/mcp.rs index 7512d58..5c147e6 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -382,6 +382,11 @@ impl McpManager { } } + /// Number of connected MCP servers + pub fn server_count(&self) -> usize { + self.connections.len() + } + /// Get all MCP tools as OpenRouter-compatible tool definitions pub fn tool_definitions(&self) -> Vec { let mut definitions = Vec::new(); diff --git a/src/memory/conversations.rs b/src/memory/conversations.rs index b4ec2ca..d73a8f9 100644 --- a/src/memory/conversations.rs +++ b/src/memory/conversations.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use uuid::Uuid; use super::MemoryStore; -use crate::llm::ChatMessage; +use crate::llm::{ChatMessage, MessageContent}; /// Cast a &[f32] to &[u8] for SQLite blob storage pub(crate) fn f32_slice_to_bytes(floats: &[f32]) -> &[u8] { @@ -15,7 +15,8 @@ pub(crate) fn f32_vec_to_bytes(floats: &[f32]) -> Vec { } impl MemoryStore { - /// Get or create a conversation for a platform user + /// Get or create an active (non-archived) conversation for a platform user. + /// If all existing conversations for the user are archived, a new one is created. pub async fn get_or_create_conversation( &self, platform: &str, @@ -27,7 +28,7 @@ impl MemoryStore { let existing: Option = conn .query_row( "SELECT id FROM conversations - WHERE platform = ?1 AND user_id = ?2 + WHERE platform = ?1 AND user_id = ?2 AND (is_archived IS NULL OR is_archived = 0) ORDER BY updated_at DESC LIMIT 1", rusqlite::params![platform, user_id], |row| row.get(0), @@ -62,7 +63,8 @@ impl MemoryStore { .map(|tc| serde_json::to_string(tc).unwrap_or_default()); // Generate embedding before acquiring the DB lock (async HTTP call) - let embedding = if let Some(content) = &message.content { + let content_text: Option = message.content.as_ref().map(|c| c.as_text()); + let embedding = if let Some(ref content) = content_text { if !content.is_empty() && message.role != "tool" { self.embeddings.try_embed_one(content).await } else { @@ -81,7 +83,7 @@ impl MemoryStore { &id, conversation_id, &message.role, - &message.content, + &content_text, &tool_calls_json, &message.tool_call_id, ], @@ -108,29 +110,13 @@ impl MemoryStore { Ok(id) } - /// Clear a conversation (delete all its messages and embeddings) + /// Clear a conversation (soft archive: mark as archived, don't delete messages) pub async fn clear_conversation(&self, platform: &str, user_id: &str) -> Result<()> { let conn = self.conn.lock().await; - // Delete embeddings for messages in this conversation conn.execute( - "DELETE FROM message_embeddings WHERE rowid IN ( - SELECT m.rowid FROM messages m - JOIN conversations c ON m.conversation_id = c.id - WHERE c.platform = ?1 AND c.user_id = ?2 - )", - rusqlite::params![platform, user_id], - )?; - - conn.execute( - "DELETE FROM messages WHERE conversation_id IN ( - SELECT id FROM conversations WHERE platform = ?1 AND user_id = ?2 - )", - rusqlite::params![platform, user_id], - )?; - - conn.execute( - "DELETE FROM conversations WHERE platform = ?1 AND user_id = ?2", + "UPDATE conversations SET is_archived = 1, updated_at = datetime('now') + WHERE platform = ?1 AND user_id = ?2", rusqlite::params![platform, user_id], )?; @@ -154,12 +140,14 @@ impl MemoryStore { // Load all [SUMMARY] system messages ordered by created_at ASC let mut summary_stmt = conn.prepare( - "SELECT role, content, tool_calls, tool_call_id - FROM messages - WHERE conversation_id = ?1 - AND role = 'system' - AND content LIKE '[SUMMARY]%' - ORDER BY created_at ASC", + "SELECT m.role, m.content, m.tool_calls, m.tool_call_id + FROM messages m + JOIN conversations c ON m.conversation_id = c.id + WHERE m.conversation_id = ?1 + AND m.role = 'system' + AND m.content LIKE '[SUMMARY]%' + AND (c.is_archived IS NULL OR c.is_archived = 0) + ORDER BY m.created_at ASC", )?; let summaries = summary_stmt .query_map(rusqlite::params![conversation_id], |row| { @@ -171,11 +159,13 @@ impl MemoryStore { // Load the most recent raw_limit non-summary messages, re-ordered ASC let mut raw_stmt = conn.prepare( "SELECT role, content, tool_calls, tool_call_id FROM ( - SELECT role, content, tool_calls, tool_call_id, created_at - FROM messages - WHERE conversation_id = ?1 - AND NOT (role = 'system' AND content LIKE '[SUMMARY]%') - ORDER BY created_at DESC + SELECT m.role, m.content, m.tool_calls, m.tool_call_id, m.created_at + FROM messages m + JOIN conversations c ON m.conversation_id = c.id + WHERE m.conversation_id = ?1 + AND NOT (m.role = 'system' AND m.content LIKE '[SUMMARY]%') + AND (c.is_archived IS NULL OR c.is_archived = 0) + ORDER BY m.created_at DESC LIMIT ?2 ) ORDER BY created_at ASC", )?; @@ -409,9 +399,10 @@ fn parse_message_row(row: &rusqlite::Row) -> rusqlite::Result { let tool_calls_json: Option = row.get(2)?; let tool_calls = tool_calls_json.and_then(|json| serde_json::from_str(&json).ok()); + let content_str: Option = row.get(1)?; Ok(ChatMessage { role: row.get(0)?, - content: row.get(1)?, + content: content_str.map(MessageContent::Text), tool_calls, tool_call_id: row.get(3)?, }) @@ -419,12 +410,13 @@ fn parse_message_row(row: &rusqlite::Row) -> rusqlite::Result { #[cfg(test)] mod tests { - use crate::llm::ChatMessage; + use super::*; + use crate::llm::{ChatMessage, MessageContent}; fn make_msg(role: &str, content: &str) -> ChatMessage { ChatMessage { role: role.to_string(), - content: Some(content.to_string()), + content: Some(MessageContent::from_text(content)), tool_calls: None, tool_call_id: None, } @@ -456,7 +448,12 @@ mod tests { .await .unwrap(); assert_eq!(results.len(), 1); - assert!(results[0].content.as_deref().unwrap().contains("love")); + assert!(results[0] + .content + .as_ref() + .map(|c| c.as_text()) + .unwrap() + .contains("love")); } #[tokio::test] @@ -481,4 +478,163 @@ mod tests { messages.len() ); } + + #[tokio::test] + async fn test_clear_archives_instead_of_deleting() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_u2") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text("hello world")), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Clear + store + .clear_conversation("test", "archive_u2") + .await + .unwrap(); + + // Messages should still exist in DB + let conn = store.conn.lock().await; + let msg_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1", + rusqlite::params![&conv], + |row| row.get(0), + ) + .unwrap(); + drop(conn); + assert!(msg_count > 0, "Messages must persist after archive"); + + // Conversation should be marked archived + let conn2 = store.conn.lock().await; + let archived: Option = conn2 + .query_row( + "SELECT is_archived FROM conversations WHERE id = ?1", + rusqlite::params![&conv], + |row| row.get(0), + ) + .ok(); + drop(conn2); + assert_eq!(archived, Some(1), "Conversation must be marked archived"); + } + + #[tokio::test] + async fn test_get_or_create_skips_archived() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + // Create a conversation + let conv = store + .get_or_create_conversation("test", "archive_u1") + .await + .unwrap(); + + // Manually archive it (simulating what clear_conversation will do) + let conn = store.conn.lock().await; + conn.execute( + "UPDATE conversations SET is_archived = 1 WHERE id = ?1", + rusqlite::params![&conv], + ) + .unwrap(); + drop(conn); + + // get_or_create_conversation should return a NEW conversation + let conv2 = store + .get_or_create_conversation("test", "archive_u1") + .await + .unwrap(); + + assert_ne!( + conv, conv2, + "Must create a new conversation when previous is archived" + ); + + // The new conversation must not be archived + let conn2 = store.conn.lock().await; + let archived: i64 = conn2 + .query_row( + "SELECT is_archived FROM conversations WHERE id = ?1", + rusqlite::params![&conv2], + |row| row.get(0), + ) + .unwrap(); + drop(conn2); + assert_eq!(archived, 0, "New conversation must not be archived"); + } + + #[tokio::test] + async fn test_search_messages_finds_archived_content() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_search_u1") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text( + "I love Rust programming and async runtimes", + )), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Archive + store + .clear_conversation("test", "archive_search_u1") + .await + .unwrap(); + + // search_messages should still find the content from archived conversations + let results = store.search_messages("Rust", 5).await.unwrap(); + assert!( + !results.is_empty(), + "search_messages must find content in archived conversations" + ); + assert!( + results.iter().any(|m| m + .content + .as_ref() + .map_or(false, |c| c.as_text().contains("Rust"))), + "Archived message content must be searchable" + ); + } + + #[tokio::test] + async fn test_load_messages_excludes_archived() { + let store = crate::memory::MemoryStore::open_in_memory().unwrap(); + + let conv = store + .get_or_create_conversation("test", "archive_u3") + .await + .unwrap(); + let msg = crate::llm::ChatMessage { + role: "user".to_string(), + content: Some(crate::llm::MessageContent::from_text("test")), + tool_calls: None, + tool_call_id: None, + }; + store.save_message(&conv, &msg).await.unwrap(); + + // Archive + store + .clear_conversation("test", "archive_u3") + .await + .unwrap(); + + // load_messages should return empty for an archived conversation + let messages = store.load_messages(&conv).await.unwrap(); + assert!( + messages.is_empty(), + "Archived conversation should return no messages via load_messages" + ); + } } diff --git a/src/memory/embeddings.rs b/src/memory/embeddings.rs index 61a378b..55b75c7 100644 --- a/src/memory/embeddings.rs +++ b/src/memory/embeddings.rs @@ -22,6 +22,8 @@ pub struct EmbeddingConfig { struct EmbeddingRequest { model: String, input: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(Deserialize)] @@ -74,6 +76,7 @@ impl EmbeddingEngine { let request = EmbeddingRequest { model: config.model.clone(), input: vec![text.to_string()], + dimensions: Some(config.dimensions), }; let response = self @@ -97,11 +100,22 @@ impl EmbeddingEngine { .await .context("Failed to parse embedding response")?; - resp.data + let embedding = resp + .data .into_iter() .next() .map(|d| d.embedding) - .context("No embedding returned from API") + .context("No embedding returned from API")?; + + if embedding.len() != config.dimensions { + anyhow::bail!( + "Embedding dimension mismatch: model '{}' returned {} dimensions but config expects {}. \ + Update [embedding].dimensions in config.toml to {}.", + config.model, embedding.len(), config.dimensions, embedding.len() + ); + } + + Ok(embedding) } /// Try to generate an embedding, returning None if not available or on error diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 708f4e8..fbc54fa 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -293,6 +293,9 @@ impl MemoryStore { conn.execute_batch("ALTER TABLE messages ADD COLUMN is_summarized BOOLEAN DEFAULT 0;") .ok(); // ok() because ALTER TABLE fails if column already exists — that's intentional + conn.execute_batch("ALTER TABLE conversations ADD COLUMN is_archived INTEGER DEFAULT 0;") + .ok(); // safe no-op: ALTER TABLE fails with "duplicate column" on re-run + // Stored embedding dimension (None if legacy DB without schema_meta row) let raw: Option = conn .query_row( diff --git a/src/memory/query_rewriter.rs b/src/memory/query_rewriter.rs index 8051e29..55ee154 100644 --- a/src/memory/query_rewriter.rs +++ b/src/memory/query_rewriter.rs @@ -1,4 +1,4 @@ -use crate::llm::{ChatMessage, LlmClient}; +use crate::llm::{ChatMessage, LlmClient, MessageContent}; /// Rewrite an ambiguous follow-up question into a self-contained search query. /// Uses the last ≤3 non-system messages as conversation context. @@ -32,16 +32,15 @@ pub async fn rewrite_for_rag( let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some( - "You are a query rewriter. Output only the rewritten query, nothing else." - .to_string(), - ), + content: Some(MessageContent::from_text( + "You are a query rewriter. Output only the rewritten query, nothing else.", + )), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(prompt), + content: Some(MessageContent::from_text(prompt)), tool_calls: None, tool_call_id: None, }, @@ -51,6 +50,7 @@ pub async fn rewrite_for_rag( Ok(response) => { let rewritten = response .content + .map(|c| c.as_text()) .unwrap_or_default() .trim() .lines() @@ -94,7 +94,8 @@ fn format_history(messages: &[ChatMessage]) -> String { .iter() .filter_map(|m| { m.content.as_ref().map(|c| { - let snippet = crate::utils::strings::truncate_chars(c, 200); + let text = c.as_text(); + let snippet = crate::utils::strings::truncate_chars(&text, 200); format!("{}: {}", m.role, snippet) }) }) @@ -105,12 +106,12 @@ fn format_history(messages: &[ChatMessage]) -> String { #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; fn msg(role: &str, text: &str) -> ChatMessage { ChatMessage { role: role.to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } diff --git a/src/memory/rag.rs b/src/memory/rag.rs index c5eca1d..ef8f48a 100644 --- a/src/memory/rag.rs +++ b/src/memory/rag.rs @@ -42,7 +42,8 @@ pub async fn auto_retrieve_context( for msg in &results { if let Some(content) = &msg.content { let role = &msg.role; - let snippet = crate::utils::strings::truncate_chars(content, 300); + let text = content.as_text(); + let snippet = crate::utils::strings::truncate_chars(&text, 300); block.push_str(&format!("[{}] {}\n", role, snippet)); } } @@ -59,13 +60,13 @@ pub async fn auto_retrieve_context( #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; use crate::memory::MemoryStore; fn user_msg(text: &str) -> ChatMessage { ChatMessage { role: "user".to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } @@ -165,7 +166,9 @@ mod tests { let msg = crate::llm::ChatMessage { role: "user".to_string(), - content: Some("I prefer TypeScript for frontend work".to_string()), + content: Some(crate::llm::MessageContent::from_text( + "I prefer TypeScript for frontend work", + )), tool_calls: None, tool_call_id: None, }; diff --git a/src/memory/summarizer.rs b/src/memory/summarizer.rs index eb2ad54..12ac4f8 100644 --- a/src/memory/summarizer.rs +++ b/src/memory/summarizer.rs @@ -1,7 +1,7 @@ use anyhow::Result; use tracing::{info, warn}; -use crate::llm::{ChatMessage, LlmClient}; +use crate::llm::{ChatMessage, LlmClient, MessageContent}; use super::MemoryStore; @@ -44,23 +44,22 @@ pub async fn summarize_conversation( let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some( - "You produce concise, factual conversation summaries. Output only bullet points." - .to_string(), - ), + content: Some(MessageContent::from_text( + "You produce concise, factual conversation summaries. Output only bullet points.", + )), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(summarization_prompt), + content: Some(MessageContent::from_text(summarization_prompt)), tool_calls: None, tool_call_id: None, }, ]; let response = llm.chat(&messages, &[]).await?; - let summary_text = response.content.unwrap_or_default(); + let summary_text = response.content.map(|c| c.as_text()).unwrap_or_default(); if summary_text.trim().is_empty() { warn!(conversation_id = %conversation_id, "LLM returned empty summary — skipping"); @@ -69,7 +68,10 @@ pub async fn summarize_conversation( let summary_msg = ChatMessage { role: "system".to_string(), - content: Some(format!("[SUMMARY]\n{}", summary_text.trim())), + content: Some(MessageContent::from_text(format!( + "[SUMMARY]\n{}", + summary_text.trim() + ))), tool_calls: None, tool_call_id: None, }; @@ -114,13 +116,14 @@ pub async fn summarize_all_active( #[cfg(test)] mod tests { - use crate::llm::ChatMessage; + use super::*; + use crate::llm::{ChatMessage, MessageContent}; use crate::memory::MemoryStore; fn user_msg(text: &str) -> ChatMessage { ChatMessage { role: "user".to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } diff --git a/src/platform/mod.rs b/src/platform/mod.rs index a97d93e..c48c9a2 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -1,6 +1,26 @@ pub mod telegram; pub mod tool_notifier; +/// What kind of attachment was received +#[derive(Debug, Clone, PartialEq)] +pub enum AttachmentKind { + Image, + Pdf, + Docx, + Other, +} + +/// A file attachment received from a platform +#[derive(Debug, Clone)] +pub struct Attachment { + pub kind: AttachmentKind, + /// Absolute path to the downloaded temp file + pub path: std::path::PathBuf, + pub mime_type: String, + /// Original filename, if known + pub file_name: Option, +} + /// A message received from any platform #[derive(Debug, Clone)] #[allow(dead_code)] @@ -15,4 +35,6 @@ pub struct IncomingMessage { pub user_name: String, /// The message text pub text: String, + /// Attached files, if any + pub attachments: Vec, } diff --git a/src/platform/telegram.rs b/src/platform/telegram.rs index 5c648d0..1d74f33 100644 --- a/src/platform/telegram.rs +++ b/src/platform/telegram.rs @@ -1,12 +1,14 @@ +use std::path::{Path, PathBuf}; use std::sync::Arc; -use anyhow::Result; +use anyhow::{Context, Result}; +use teloxide::net::Download; use teloxide::prelude::*; use teloxide::types::ParseMode; use tracing::{error, info, warn}; use crate::agent::Agent; -use crate::platform::IncomingMessage; +use crate::platform::{Attachment, AttachmentKind, IncomingMessage}; use crate::utils::markdown_entities::{markdown_to_entities, split_entities}; use crate::utils::telegram_markdown::escape_text; @@ -73,7 +75,10 @@ pub(crate) fn supported_commands() -> Vec { use teloxide::types::BotCommand; vec![ BotCommand::new("start", "Show the welcome message and command help"), - BotCommand::new("clear", "Clear the current conversation history"), + BotCommand::new( + "clear", + "Archive the current conversation, keeping past messages searchable", + ), BotCommand::new("tools", "List available built-in and MCP tools"), BotCommand::new("skills", "List loaded skills"), BotCommand::new("verbose", "Toggle tool-call progress display"), @@ -81,6 +86,54 @@ pub(crate) fn supported_commands() -> Vec { ] } +/// Send startup notification to all allowed users. +/// Best-effort: logs failures, never blocks startup. +pub async fn notify_startup( + bot: &teloxide::Bot, + allowed_user_ids: &[u64], + model: &str, + mcp_count: usize, + skills_count: usize, + embedding_enabled: bool, +) { + let memory_status = if embedding_enabled { + "embedding enabled" + } else { + "FTS5 only" + }; + + let msg = format!( + "RustFox is online 🦊\nModel: {model}\nMCP: {mcp} server(s) connected\nSkills: {skills} loaded\nMemory: {memory}", + model = model, mcp = mcp_count, skills = skills_count, memory = memory_status, + ); + + for &user_id in allowed_user_ids { + let chat_id = teloxide::types::ChatId(user_id as i64); + if let Err(e) = bot.send_message(chat_id, &msg).await { + warn!( + "Failed to send startup notification to user {}: {}", + user_id, e + ); + } + } +} + +/// Send shutdown notification to all allowed users. +/// Best-effort: logs failures, never blocks shutdown. +pub async fn notify_shutdown(bot: &teloxide::Bot, allowed_user_ids: &[u64]) { + let msg = "RustFox is going offline. Goodbye!"; + + for &user_id in allowed_user_ids { + let chat_id = teloxide::types::ChatId(user_id as i64); + if let Err(e) = bot.send_message(chat_id, msg).await { + warn!( + "Failed to send shutdown notification to user {}: {}", + user_id, e + ); + } + } +} + /// Run the Telegram bot platform pub async fn run( agent: Arc, @@ -91,6 +144,17 @@ pub async fn run( info!("Starting Telegram platform..."); + // Send startup notifications (best-effort) — before agent is moved into dptree + notify_startup( + &bot, + &allowed_user_ids, + &agent.config.openrouter.model, + agent.mcp.server_count(), + agent.skills.read().await.len(), + agent.memory.embeddings.is_available(), + ) + .await; + // Publish the slash-command menu to Telegram so clients show suggestions. // Best-effort: a network failure here must not block the bot from running. let commands = supported_commands(); @@ -135,16 +199,67 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe }; let user_id = user.id.0; - let text = match msg.text() { - Some(t) => t.to_string(), - None => return Ok(()), - }; - let user_name = user.first_name.clone(); + // For media messages, use caption as text; for text messages, use msg.text() + let text = msg + .text() + .or_else(|| msg.caption()) + .unwrap_or("") + .to_string(); + + // Temp dir for file downloads — created lazily by download_telegram_file + let temp_dir = std::env::temp_dir().join(format!("rustfox_{}", uuid::Uuid::new_v4())); + + let mut attachments: Vec = Vec::new(); + + // Handle photo attachments — last PhotoSize is the highest resolution + if let Some(photos) = msg.photo() { + if let Some(largest) = photos.last() { + let file_id = largest.file.id.to_string(); + match download_telegram_file(&bot, &file_id, &temp_dir, None).await { + Ok((path, mime)) => { + attachments.push(Attachment { + kind: AttachmentKind::Image, + path, + mime_type: mime, + file_name: None, + }); + } + Err(e) => warn!("Failed to download photo: {:#}", e), + } + } + } + + // Handle document attachments + if let Some(doc) = msg.document() { + let file_id = doc.file.id.to_string(); + let file_name = doc.file_name.clone(); + match download_telegram_file(&bot, &file_id, &temp_dir, file_name.as_deref()).await { + Ok((path, mime)) => { + let kind = classify_attachment_kind(&mime, file_name.as_deref()); + attachments.push(Attachment { + kind, + path, + mime_type: mime, + file_name, + }); + } + Err(e) => warn!("Failed to download document: {:#}", e), + } + } + + // Skip if there is nothing to process + if text.is_empty() && attachments.is_empty() { + return Ok(()); + } + info!( - "Telegram message from {} ({}): {}", - user_name, user_id, text + "Telegram message from {} ({}): {} [attachments: {}]", + user_name, + user_id, + if text.is_empty() { "(no text)" } else { &text }, + attachments.len() ); // Handle commands @@ -155,9 +270,12 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe { error!("Failed to clear conversation: {}", e); } - bot.send_message(msg.chat.id, escape_text("Conversation cleared.")) - .parse_mode(ParseMode::MarkdownV2) - .await?; + bot.send_message( + msg.chat.id, + escape_text("Conversation archived. Past messages remain searchable."), + ) + .parse_mode(ParseMode::MarkdownV2) + .await?; return Ok(()); } @@ -501,6 +619,7 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe chat_id: msg.chat.id.0.to_string(), user_name, text, + attachments, }; // Process through agent — moves stream_token_tx and tool_event_tx @@ -536,6 +655,11 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe // Wait for stream receiver to complete its final edit stream_handle.await.ok(); + // Cleanup temp dir used for file downloads (async to avoid blocking the executor) + if temp_dir.exists() { + tokio::fs::remove_dir_all(&temp_dir).await.ok(); + } + // Delete the "Thinking..." placeholder now that the response (or error // reply below) has been delivered. Best-effort: ignore failures so a // stale placeholder never blocks reporting the actual outcome. @@ -556,6 +680,88 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe Ok(()) } +/// Download a Telegram file to the given directory, creating it if needed. +/// Returns (local_path, detected_mime_type). +async fn download_telegram_file( + bot: &Bot, + file_id: &str, + dest_dir: &Path, + filename: Option<&str>, +) -> Result<(PathBuf, String)> { + std::fs::create_dir_all(dest_dir).context("Failed to create temp directory")?; + + let file = bot + .get_file(file_id.to_string().into()) + .await + .context("Failed to get file info from Telegram")?; + + let ext = Path::new(&file.path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("bin"); + + let dest_name = match filename { + Some(n) => n.to_string(), + None => format!("{}.{}", uuid::Uuid::new_v4(), ext), + }; + let dest_path = dest_dir.join(&dest_name); + + let mut bytes: Vec = Vec::new(); + bot.download_file(&file.path, &mut bytes) + .await + .context("Failed to download file from Telegram")?; + + std::fs::write(&dest_path, &bytes).context("Failed to write downloaded file")?; + + let mime = infer::get(&bytes) + .map(|t| t.mime_type().to_string()) + .unwrap_or_else(|| mime_from_extension(ext).to_string()); + + Ok((dest_path, mime)) +} + +/// Classify an attachment based on MIME type and filename extension fallback. +fn classify_attachment_kind(mime_type: &str, file_name: Option<&str>) -> AttachmentKind { + if mime_type.starts_with("image/") { + return AttachmentKind::Image; + } + if mime_type == "application/pdf" { + return AttachmentKind::Pdf; + } + if mime_type.contains("wordprocessingml") || mime_type == "application/msword" { + return AttachmentKind::Docx; + } + // Fallback: check extension + let name = file_name.unwrap_or(""); + if name.ends_with(".pdf") { + return AttachmentKind::Pdf; + } + if name.ends_with(".docx") || name.ends_with(".doc") { + return AttachmentKind::Docx; + } + if name.ends_with(".jpg") + || name.ends_with(".jpeg") + || name.ends_with(".png") + || name.ends_with(".gif") + || name.ends_with(".webp") + { + return AttachmentKind::Image; + } + AttachmentKind::Other +} + +fn mime_from_extension(ext: &str) -> &'static str { + match ext { + "jpg" | "jpeg" => "image/jpeg", + "png" => "image/png", + "gif" => "image/gif", + "webp" => "image/webp", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + _ => "application/octet-stream", + } +} + #[cfg(test)] mod tests { use super::*; @@ -675,6 +881,32 @@ mod tests { ); } + #[test] + fn test_classify_attachment_kind_image_jpeg() { + assert_eq!( + classify_attachment_kind("image/jpeg", None), + AttachmentKind::Image + ); + } + + #[test] + fn test_classify_attachment_kind_pdf() { + assert_eq!( + classify_attachment_kind("application/pdf", None), + AttachmentKind::Pdf + ); + } + + #[test] + fn test_classify_attachment_kind_docx() { + assert_eq!( + classify_attachment_kind( + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + None + ), + AttachmentKind::Docx + ); + } #[test] fn test_first_token_does_not_inherit_placeholder_msg_id() { // The streaming task must seed `current_msg_id` to `None` so the first @@ -705,6 +937,22 @@ mod tests { ); } + #[test] + fn test_classify_attachment_kind_fallback_to_extension() { + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("report.pdf")), + AttachmentKind::Pdf + ); + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("letter.docx")), + AttachmentKind::Docx + ); + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("photo.jpg")), + AttachmentKind::Image + ); + } + #[test] fn test_placeholder_is_deleted_after_streaming() { // The Thinking placeholder must be cleaned up in `handle_message` after @@ -716,6 +964,14 @@ mod tests { ); } + #[test] + fn test_classify_attachment_kind_unknown() { + assert_eq!( + classify_attachment_kind("application/zip", Some("archive.zip")), + AttachmentKind::Other + ); + } + #[test] fn test_supported_commands_lists_user_visible_commands() { let cmds = supported_commands(); diff --git a/src/supervisor/backend/reasoning.rs b/src/supervisor/backend/reasoning.rs index 80546ed..8e86dab 100644 --- a/src/supervisor/backend/reasoning.rs +++ b/src/supervisor/backend/reasoning.rs @@ -31,6 +31,7 @@ impl ReasoningBackend { chat_id: chat, user_name: "supervisor".into(), text: prompt, + attachments: Vec::new(), }; agent .process_message(&incoming, None, None)