diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d234aa..2335e60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,15 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.0] - 2026-03-27 + ### Highlights -- Added a hardened outbound profile for cluster and data-plane deployments -- Ambient proxy environment variables are now ignored by default -- Added hostname, port, and redirect restrictions for tighter egress policy +- Pluggable fetchers for GitHub, Wikipedia, YouTube, ArXiv, StackOverflow, HackerNews, RSS, package registries, docs sites, and Twitter +- Batch fetching for concurrent multi-URL requests +- Content-focused extraction with boilerplate stripping and structured metadata +- Conditional fetching with ETag and If-Modified-Since support +- Improved HTML-to-Markdown conversion quality +- Content quality signals: word count, redirect chain, paywall detection +- Optional Web Bot Authentication support +- Hardened outbound fetch policy with proxy isolation and SSRF mitigations +- Live integration test suite behind feature flag + +### Breaking Changes + +- Ambient proxy environment variables are now ignored by default; set them explicitly if needed ### What's Changed -- `fix(security): harden outbound fetch policy and add deployment guidance` +* test(fetchers): add live integration tests behind feature flag ([#84](https://github.com/everruns/fetchkit/pull/84)) +* chore: periodic maintenance — deps update and spec sync ([#83](https://github.com/everruns/fetchkit/pull/83)) +* feat(fetch): add content quality signals (word_count, redirect_chain, is_paywall) ([#82](https://github.com/everruns/fetchkit/pull/82)) +* feat(client): add batch_fetch for concurrent multi-URL fetching ([#81](https://github.com/everruns/fetchkit/pull/81)) +* feat(fetch): add conditional fetching with ETag and If-Modified-Since ([#80](https://github.com/everruns/fetchkit/pull/80)) +* feat(convert): improve HTML-to-Markdown conversion quality ([#79](https://github.com/everruns/fetchkit/pull/79)) +* feat(convert): add content-focused extraction with boilerplate stripping ([#78](https://github.com/everruns/fetchkit/pull/78)) +* feat(convert): add structured metadata extraction from HTML pages ([#77](https://github.com/everruns/fetchkit/pull/77)) +* feat(fetchers): add RSSFeedFetcher for structured feed parsing ([#70](https://github.com/everruns/fetchkit/pull/70)) +* feat(fetchers): add HackerNewsFetcher for structured thread extraction ([#69](https://github.com/everruns/fetchkit/pull/69)) +* feat(fetchers): add ArXivFetcher for paper metadata and abstract ([#68](https://github.com/everruns/fetchkit/pull/68)) +* feat(fetchers): add YouTubeFetcher for video metadata extraction ([#67](https://github.com/everruns/fetchkit/pull/67)) +* feat(fetchers): add WikipediaFetcher for article extraction ([#66](https://github.com/everruns/fetchkit/pull/66)) +* feat(fetchers): add PackageRegistryFetcher for PyPI, crates.io, npm ([#65](https://github.com/everruns/fetchkit/pull/65)) +* feat(fetchers): add StackOverflowFetcher for clean Q&A extraction ([#64](https://github.com/everruns/fetchkit/pull/64)) +* feat(fetchers): add DocsSiteFetcher with llms.txt support ([#63](https://github.com/everruns/fetchkit/pull/63)) +* feat(fetchers): add GitHubCodeFetcher for source file fetching ([#62](https://github.com/everruns/fetchkit/pull/62)) +* feat(fetchers): add GitHubIssueFetcher for structured issue/PR fetching ([#61](https://github.com/everruns/fetchkit/pull/61)) +* feat: add process-issues skill for e2e GitHub issue resolution ([#60](https://github.com/everruns/fetchkit/pull/60)) +* feat: add optional Web Bot Authentication support ([#49](https://github.com/everruns/fetchkit/pull/49)) +* feat(fetchers): add TwitterFetcher for tweet URL handling ([#47](https://github.com/everruns/fetchkit/pull/47)) +* feat: skip HTML conversion for non-HTML responses ([#48](https://github.com/everruns/fetchkit/pull/48)) +* chore(deps): update workspace dependencies and fix flaky proxy tests ([#46](https://github.com/everruns/fetchkit/pull/46)) +* feat(toolkit): align fetchkit with toolkit library contract ([#45](https://github.com/everruns/fetchkit/pull/45)) +* fix(security): harden outbound fetch policy ([#43](https://github.com/everruns/fetchkit/pull/43)) +* docs: clarify latest-main requirement for worktrees ([#44](https://github.com/everruns/fetchkit/pull/44)) +* fix(security): isolate proxy env in shared runtimes ([#42](https://github.com/everruns/fetchkit/pull/42)) +* fix(security): block IPv4-compatible and 6to4 IPv6 addresses in SSRF protection ([#41](https://github.com/everruns/fetchkit/pull/41)) +* fix(security): sanitize reqwest error messages to prevent hostname leakage ([#40](https://github.com/everruns/fetchkit/pull/40)) +* fix: resolve threat model issues ([#37](https://github.com/everruns/fetchkit/pull/37)) + +**Full Changelog**: https://github.com/everruns/fetchkit/compare/v0.1.3...v0.2.0 ## [0.1.3] - 2026-03-12 @@ -96,7 +139,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **Full Changelog**: https://github.com/everruns/fetchkit/commits/v0.1.0 -[Unreleased]: https://github.com/everruns/fetchkit/compare/v0.1.3...HEAD +[Unreleased]: https://github.com/everruns/fetchkit/compare/v0.2.0...HEAD +[0.2.0]: https://github.com/everruns/fetchkit/compare/v0.1.3...v0.2.0 [0.1.3]: https://github.com/everruns/fetchkit/compare/v0.1.2...v0.1.3 [0.1.2]: https://github.com/everruns/fetchkit/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/everruns/fetchkit/compare/v0.1.0...v0.1.1 diff --git a/Cargo.lock b/Cargo.lock index 0ee76b2..f4cd39b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -528,7 +528,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fetchkit" -version = "0.1.3" +version = "0.2.0" dependencies = [ "async-trait", "base64", @@ -554,7 +554,7 @@ dependencies = [ [[package]] name = "fetchkit-cli" -version = "0.1.3" +version = "0.2.0" dependencies = [ "clap", "fetchkit", @@ -565,7 +565,7 @@ dependencies = [ [[package]] name = "fetchkit-python" -version = "0.1.3" +version = "0.2.0" dependencies = [ "fetchkit", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 0f537c1..8e160ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ resolver = "2" members = ["crates/*"] [workspace.package] -version = "0.1.3" +version = "0.2.0" edition = "2021" license = "MIT" authors = ["Everruns"] diff --git a/crates/fetchkit-cli/Cargo.toml b/crates/fetchkit-cli/Cargo.toml index d1178bb..951412b 100644 --- a/crates/fetchkit-cli/Cargo.toml +++ b/crates/fetchkit-cli/Cargo.toml @@ -21,7 +21,7 @@ default = [] bot-auth = ["fetchkit/bot-auth"] [dependencies] -fetchkit = { path = "../fetchkit", version = "0.1.3" } +fetchkit = { path = "../fetchkit", version = "0.2.0" } tokio = { workspace = true } clap = { workspace = true } serde = { workspace = true }