From 1e1d218d4bf1f207dcb7ca3aaed384566cbf753a Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 29 Jun 2026 09:19:23 -0500 Subject: [PATCH] build: consume published quarto-yaml 0.1.0; delete in-tree YAML stack (bd-egcyeym9) Final phase of the YAML-stack extraction. quarto-yaml and quarto-yaml-validation now live in the standalone posit-dev/quarto-yaml workspace and are published to crates.io at 0.1.0; q2 cuts over. - [workspace.dependencies.quarto-yaml] path -> version = "0.1.0"; dropped [workspace.dependencies.quarto-yaml-validation] (q2 does not depend on it). - Converted the two remaining quarto-yaml path-deps (pampa, quarto-config) to { workspace = true }; quarto-core / quarto-lsp-core already were. - Deleted crates/quarto-yaml, crates/quarto-yaml-validation, and the demo binary crates/validate-yaml (the only in-tree consumer of quarto-yaml-validation). - CLAUDE.md: moved quarto-yaml into the Externalized foundation crates section; reframed quarto-yaml-validation as published-but-not-a-q2-dep; removed the validate-yaml binary entry. quarto-yaml-validation is published purely for external Posit consumers; q2 has zero dependency on it after this. Error codes ship unchanged as Q-1-x for 0.1.0 (yaml-schema/* origin codes deferred to 0.2.0). Verification: - cargo build --workspace clean; Cargo.lock (root + wasm) resolves quarto-yaml 0.1.0 from the registry (checksum c32ab7b3...); quarto-yaml-validation and validate-yaml absent from the lock. - cargo nextest run --workspace: 9855 passed, 197 skipped. - Full cargo xtask verify GREEN, all 14 steps incl. the hub-client WASM build + hub tests. The WASM crate resolves quarto-yaml transitively (no direct dep needed); its standalone Cargo.lock flips 0.7.0 path -> 0.1.0 registry. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 5 +- Cargo.lock | 34 +- Cargo.toml | 5 +- ...026-06-29-yaml-stack-extraction-handoff.md | 163 +- crates/pampa/Cargo.toml | 2 +- crates/quarto-config/Cargo.toml | 2 +- crates/quarto-yaml-validation/Cargo.toml | 35 - .../JSON-OUTPUT-SCHEMA.md | 472 --- crates/quarto-yaml-validation/README.md | 248 -- .../SCHEMA-FROM-YAML.md | 564 --- .../YAML-1.2-REQUIREMENT.md | 76 - .../quarto-yaml-validation/src/diagnostic.rs | 666 ---- crates/quarto-yaml-validation/src/error.rs | 1195 ------ crates/quarto-yaml-validation/src/lib.rs | 17 - .../src/schema/annotations.rs | 106 - .../src/schema/helpers.rs | 726 ---- .../src/schema/merge.rs | 640 ---- .../quarto-yaml-validation/src/schema/mod.rs | 1533 -------- .../src/schema/parser.rs | 154 - .../src/schema/parsers/arrays.rs | 103 - .../src/schema/parsers/combinators.rs | 477 --- .../src/schema/parsers/enum.rs | 57 - .../src/schema/parsers/mod.rs | 31 - .../src/schema/parsers/objects.rs | 404 -- .../src/schema/parsers/primitive.rs | 68 - .../src/schema/parsers/ref.rs | 38 - .../src/schema/parsers/wrappers.rs | 60 - .../src/schema/types.rs | 164 - ...ic__tests__all_three_formats_snapshot.snap | 53 - crates/quarto-yaml-validation/src/tests.rs | 110 - .../quarto-yaml-validation/src/validator.rs | 2284 ----------- .../test-fixtures/schemas/definitions.yml | 3405 ----------------- .../schemas/document-execute.yml | 150 - .../test-fixtures/schemas/document-text.yml | 109 - .../schemas/document-website.yml | 79 - .../test-fixtures/schemas/schema.yml | 378 -- .../integration/comprehensive_schemas.rs | 222 -- .../tests/integration/main.rs | 10 - .../tests/integration/real_schemas.rs | 216 -- .../tests/integration/schema_compilation.rs | 408 -- .../tests/integration/schema_inheritance.rs | 304 -- .../integration/validation_diagnostic.rs | 487 --- crates/quarto-yaml/Cargo.toml | 27 - crates/quarto-yaml/README.md | 154 - crates/quarto-yaml/YAML-1.2-REQUIREMENT.md | 113 - crates/quarto-yaml/benches/memory_overhead.rs | 267 -- .../quarto-yaml/benches/scaling_overhead.rs | 305 -- .../claude-notes/implementation-plan.md | 167 - .../claude-notes/implementation-status.md | 206 - .../claude-notes/memory-overhead-analysis.md | 221 -- .../claude-notes/scaling-analysis.md | 238 -- crates/quarto-yaml/src/error.rs | 148 - crates/quarto-yaml/src/lib.rs | 42 - crates/quarto-yaml/src/parser.rs | 1487 ------- .../quarto-yaml/src/yaml_with_source_info.rs | 312 -- crates/validate-yaml/Cargo.toml | 26 - crates/validate-yaml/README.md | 114 - crates/validate-yaml/src/main.rs | 169 - .../test-data/invalid-document.yaml | 4 - .../test-data/simple-schema.yaml | 19 - .../test-data/type-mismatch-document.yaml | 4 - .../test-data/valid-document.yaml | 4 - crates/wasm-quarto-hub-client/Cargo.lock | 4 +- 63 files changed, 116 insertions(+), 20175 deletions(-) delete mode 100644 crates/quarto-yaml-validation/Cargo.toml delete mode 100644 crates/quarto-yaml-validation/JSON-OUTPUT-SCHEMA.md delete mode 100644 crates/quarto-yaml-validation/README.md delete mode 100644 crates/quarto-yaml-validation/SCHEMA-FROM-YAML.md delete mode 100644 crates/quarto-yaml-validation/YAML-1.2-REQUIREMENT.md delete mode 100644 crates/quarto-yaml-validation/src/diagnostic.rs delete mode 100644 crates/quarto-yaml-validation/src/error.rs delete mode 100644 crates/quarto-yaml-validation/src/lib.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/annotations.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/helpers.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/merge.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/mod.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parser.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/arrays.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/combinators.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/enum.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/mod.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/objects.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/primitive.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/ref.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/parsers/wrappers.rs delete mode 100644 crates/quarto-yaml-validation/src/schema/types.rs delete mode 100644 crates/quarto-yaml-validation/src/snapshots/quarto_yaml_validation__diagnostic__tests__all_three_formats_snapshot.snap delete mode 100644 crates/quarto-yaml-validation/src/tests.rs delete mode 100644 crates/quarto-yaml-validation/src/validator.rs delete mode 100644 crates/quarto-yaml-validation/test-fixtures/schemas/definitions.yml delete mode 100644 crates/quarto-yaml-validation/test-fixtures/schemas/document-execute.yml delete mode 100644 crates/quarto-yaml-validation/test-fixtures/schemas/document-text.yml delete mode 100644 crates/quarto-yaml-validation/test-fixtures/schemas/document-website.yml delete mode 100644 crates/quarto-yaml-validation/test-fixtures/schemas/schema.yml delete mode 100644 crates/quarto-yaml-validation/tests/integration/comprehensive_schemas.rs delete mode 100644 crates/quarto-yaml-validation/tests/integration/main.rs delete mode 100644 crates/quarto-yaml-validation/tests/integration/real_schemas.rs delete mode 100644 crates/quarto-yaml-validation/tests/integration/schema_compilation.rs delete mode 100644 crates/quarto-yaml-validation/tests/integration/schema_inheritance.rs delete mode 100644 crates/quarto-yaml-validation/tests/integration/validation_diagnostic.rs delete mode 100644 crates/quarto-yaml/Cargo.toml delete mode 100644 crates/quarto-yaml/README.md delete mode 100644 crates/quarto-yaml/YAML-1.2-REQUIREMENT.md delete mode 100644 crates/quarto-yaml/benches/memory_overhead.rs delete mode 100644 crates/quarto-yaml/benches/scaling_overhead.rs delete mode 100644 crates/quarto-yaml/claude-notes/implementation-plan.md delete mode 100644 crates/quarto-yaml/claude-notes/implementation-status.md delete mode 100644 crates/quarto-yaml/claude-notes/memory-overhead-analysis.md delete mode 100644 crates/quarto-yaml/claude-notes/scaling-analysis.md delete mode 100644 crates/quarto-yaml/src/error.rs delete mode 100644 crates/quarto-yaml/src/lib.rs delete mode 100644 crates/quarto-yaml/src/parser.rs delete mode 100644 crates/quarto-yaml/src/yaml_with_source_info.rs delete mode 100644 crates/validate-yaml/Cargo.toml delete mode 100644 crates/validate-yaml/README.md delete mode 100644 crates/validate-yaml/src/main.rs delete mode 100644 crates/validate-yaml/test-data/invalid-document.yaml delete mode 100644 crates/validate-yaml/test-data/simple-schema.yaml delete mode 100644 crates/validate-yaml/test-data/type-mismatch-document.yaml delete mode 100644 crates/validate-yaml/test-data/valid-document.yaml diff --git a/CLAUDE.md b/CLAUDE.md index 0b942dce4..7a5a76cb5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -291,7 +291,6 @@ When fixing ANY bug: - `hub`: collaborative editing server for Quarto projects (also available as `quarto hub`) - `pampa`: parse qmd text and produce Pandoc AST and other formats - `qmd-syntax-helper`: help users convert qmd files to the new syntax -- `validate-yaml`: exercise `quarto-yaml-validation` **Core libraries:** - `quarto-core`: core rendering infrastructure for Quarto @@ -301,10 +300,10 @@ When fixing ANY bug: **Externalized foundation crates** (published to crates.io from their own `posit-dev/` repos; consumed here as version deps, no longer in `crates/`): - `quarto-error-reporting`: uniform, helpful, beautiful error messages — now **catalog-agnostic** (the `Q-*` data lives in the in-tree `quarto-error-catalog`). Repo: `posit-dev/quarto-error-reporting`. The `json` wire shape is behind a default-off `json` feature; q2's wire-shape consumers enable it. - `quarto-source-map`: maintain source location information for data structures. Repo: `posit-dev/quarto-source-map`. (See `claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md` for the extraction.) +- `quarto-yaml`: YAML parser with accurate fine-grained source locations. Repo: `posit-dev/quarto-yaml` (a two-crate workspace). q2 consumes it as a version dep. (See `claude-notes/plans/2026-06-29-yaml-stack-extraction-handoff.md` for the extraction.) +- `quarto-yaml-validation`: schema validation for YAML objects. Published from the same `posit-dev/quarto-yaml` workspace, but **q2 does not depend on it** — it was extracted purely for external Posit consumers; the demo binary `validate-yaml` was its only in-tree consumer and was deleted at cutover. **Parsing libraries:** -- `quarto-yaml`: YAML parser with accurate fine-grained source locations -- `quarto-yaml-validation`: validate YAML objects using schemas - `quarto-xml`: source-tracked XML parsing - `quarto-parse-errors`: parse error infrastructure diff --git a/Cargo.lock b/Cargo.lock index 998bc4cc2..f0a0b33b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3995,27 +3995,12 @@ dependencies = [ [[package]] name = "quarto-yaml" -version = "0.7.0" -dependencies = [ - "quarto-source-map", - "regex", - "serde", - "thiserror 2.0.18", - "yaml-rust2", -] - -[[package]] -name = "quarto-yaml-validation" -version = "0.7.0" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c32ab7b39ffa5c43c8aa6abf7eff392678acee63bad5d21680fe295e69b7c2e0" dependencies = [ - "anyhow", - "insta", - "quarto-error-reporting", "quarto-source-map", - "quarto-yaml", - "regex", "serde", - "serde_json", "thiserror 2.0.18", "yaml-rust2", ] @@ -6007,19 +5992,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "validate-yaml" -version = "0.7.0" -dependencies = [ - "anyhow", - "clap", - "quarto-error-reporting", - "quarto-source-map", - "quarto-yaml", - "quarto-yaml-validation", - "serde_json", -] - [[package]] name = "valuable" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 7e86786de..fc27a5d78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,10 +107,7 @@ path = "./crates/quarto-core" path = "./crates/quarto-util" [workspace.dependencies.quarto-yaml] -path = "./crates/quarto-yaml" - -[workspace.dependencies.quarto-yaml-validation] -path = "./crates/quarto-yaml-validation" +version = "0.1.0" [workspace.dependencies.quarto-error-reporting] version = "0.1.0" diff --git a/claude-notes/plans/2026-06-29-yaml-stack-extraction-handoff.md b/claude-notes/plans/2026-06-29-yaml-stack-extraction-handoff.md index 7a8aaec3d..8fbb968f7 100644 --- a/claude-notes/plans/2026-06-29-yaml-stack-extraction-handoff.md +++ b/claude-notes/plans/2026-06-29-yaml-stack-extraction-handoff.md @@ -107,7 +107,24 @@ the crate's filesystem home — q2 root — even inside the WASM build). So: resolve `quarto-yaml`, add a direct `quarto-yaml = "0.1.0"` to the wasm crate (as was needed for source-map). -## 6. The one design task: error codes (needs a user decision) +## 6. The one design task: error codes (RESOLVED 2026-06-29 → option B) + +> **DECISION (2026-06-29, user):** Option **(B)** — ship `Q-1-x` as-is in `0.1.0`, +> defer the origin-code migration (`yaml-schema/*`) to `0.2.0`. Rationale: keep +> `0.1.0` **non-breaking** for the invisible internal Posit consumers that +> currently key on `Q-1-x`; cut over to discipline-conformant origin codes in a +> coordinated `0.2.0`. +> +> **Consequence:** `quarto-yaml-validation/src/error.rs` is shipped **unchanged** +> for `0.1.0` — the 14 `ValidationErrorKind::error_code()` mappings (`Q-1-10` … +> `Q-1-99`) and the ~15 tests asserting them stay exactly as-is and stay green. No +> error-code work in this phase. With no catalog installed in the standalone repo, +> diagnostics render **code-only** (`EmptyCatalog`); tests assert on the +> `error_code()` string, not on rendered catalog text, so they are unaffected. +> A `0.2.0` TODO carries the `Q-1-x` → `yaml-schema/*` migration (see §6-original +> below for the proposed mapping). +> +> ### Original analysis (kept for the deferred 0.2.0 work) `quarto-yaml-validation/src/error.rs` `ValidationErrorKind::error_code()` currently returns **Quarto presentation codes** `Q-1-10`, `Q-1-11`, … These do **not** belong @@ -138,60 +155,87 @@ the origin codes and may remap to their own presentation codes. ## 7. Execution checklist ### Phase A — `quarto-yaml` (the leaf; publish first) -- [ ] Create `/Users/cscheid/repos/github/posit-dev/quarto-yaml/` as a **workspace**; - copy `crates/quarto-yaml/` → `crates/quarto-yaml/`; add `LICENSE` (from q2 - root), `.gitignore` (`/target`), `.gitattributes` (`* text=auto eol=lf`), - `README.md` (write fresh — the crate is a YAML parser with source tracking). -- [ ] Workspace `Cargo.toml`: `[workspace] members=["crates/*"]`, +> **Status 2026-06-29:** local scaffolding + verification DONE; committed locally +> on `main` of `posit-dev/quarto-yaml` (commit `06c6dd3`). Stopped at the outward +> gate (`gh repo create` + user `cargo publish`). +- [x] Create `/Users/cscheid/repos/github/posit-dev/quarto-yaml/` as a **workspace**; + copied `crates/quarto-yaml/` (src + benches) → `crates/quarto-yaml/`; added + `LICENSE` (from q2 root), `.gitignore` (`/target`), `.gitattributes` + (`* text=auto eol=lf`), repo `README.md` + crate `README.md`. +- [x] Workspace `Cargo.toml`: `[workspace] resolver="3" members=["crates/*"]`, `[workspace.package]` (version `0.1.0`, edition `2024`, license `MIT`, - `repository = https://github.com/posit-dev/quarto-yaml`, authors), and - `[workspace.dependencies]` with `quarto-source-map = "0.1.0"` + the shared - crates.io deps (pin versions to match q2's `[workspace.dependencies]`: - `yaml-rust2`, `serde`, `thiserror`, …). Drop `[lints] workspace = true` or - add a `[workspace.lints]` block (q2 has none). -- [ ] Build + `cargo test` + `cargo clippy --all-targets -- -D warnings` + `cargo - fmt --check` + `cargo publish --dry-run -p quarto-yaml`. Fix any stable-clippy - lints (see §4). -- [ ] External-consumer smoke test (separate crate, path dep, parse a YAML string, - assert source info) — proves the public API is usable standalone. -- [ ] `gh repo create posit-dev/quarto-yaml --public --source=. --push` - (public; mirror Phase 1/3). Confirm CI green on all 3 OSes. -- [ ] **USER**: `cargo publish -p quarto-yaml` (from the new repo). + `repository`/`homepage` = posit-dev/quarto-yaml, authors), and + `[workspace.dependencies]` with `quarto-source-map = "0.1.0"` + shared deps + pinned to q2 (`yaml-rust2 0.11`, `serde 1.0.228`, `thiserror 2.0`, + `serde_json 1.0.149`, `anyhow 1.0.101`, `regex 1.12`). Added a minimal + `[workspace.lints.clippy]` (`result_large_err`/`large_enum_variant` = allow, + to preserve the public `Result`/`Error` API — not q2's full allow-list). +- [x] Build + `cargo test` (44: 39 unit + 5 doctest) + `cargo clippy + --all-targets -- -D warnings` (clean after the lints block) + `cargo fmt + --check` + `cargo publish --dry-run -p quarto-yaml` (10 files) — all green on + stable rustc 1.95. +- [x] External-consumer smoke test (separate crate, path dep, parsed + `title: My Document`, asserted the value's `source_info.start_offset() == 7`) + — public API usable standalone. ✅ +- [x] `gh repo create posit-dev/quarto-yaml --public --source=. --push` — done + 2026-06-29 (https://github.com/posit-dev/quarto-yaml). **CI green on all 3 + OSes** (run 28377015391: ubuntu 26s, macos 22s, windows 1m4s, fmt+clippy 22s). +- [x] **USER:** `cargo publish -p quarto-yaml` — **DONE** (live on crates.io as + `quarto-yaml 0.1.0`, 2026-06-29). ### Phase B — `quarto-yaml-validation` (second crate, same repo) -- [ ] Copy `crates/quarto-yaml-validation/` (incl. `test-fixtures/`) into the - workspace. Its `quarto-yaml` dep = `{ version = "0.1.0", path = - "../quarto-yaml" }` (path for local dev, version so `cargo publish` resolves - the now-published `quarto-yaml`); `quarto-source-map` / `quarto-error-reporting` - = `"0.1.0"`. -- [ ] **Apply the error-code change** from §6 (after the user's A/B decision) + - update the `error.rs` tests. -- [ ] If yaml-validation tests render diagnostics and asserted `Q-1-x` text, update - them; with no catalog installed in the standalone repo, diagnostics render - **code-only** (`EmptyCatalog`) — assert on the origin codes. -- [ ] Build + test + clippy + `cargo publish --dry-run -p quarto-yaml-validation` + - smoke test (validate a doc against a schema, assert a `yaml-schema/*` code). -- [ ] CI green (3 OSes). **USER**: `cargo publish -p quarto-yaml-validation` - (after `quarto-yaml` is live so the dep resolves). +> **Status 2026-06-29:** local work + CI DONE; committed `ac8d72b`, pushed to +> `main`, **CI green on all 3 OSes** (run 28377797754: ubuntu 30s, macos 37s, +> windows 2m44s, fmt+clippy 28s). Awaiting user `cargo publish`. +- [x] Copied `crates/quarto-yaml-validation/` (src + integration tests + + `test-fixtures/` + the 4 design `.md`s) into the workspace. `quarto-yaml` + dep = `{ workspace = true }`, where the workspace entry carries both + `path = "crates/quarto-yaml"` (local dev) and `version = "0.1.0"` (so + `cargo publish` resolves the registry crate); `quarto-source-map` / + `quarto-error-reporting` = `"0.1.0"` (default features — no json/coalesce + use, so json feature not needed). +- [x] **Error codes:** NO change for `0.1.0` (decision **B** — keep `Q-1-x`). + `error.rs` shipped verbatim; the 14 mappings + ~15 tests stay green. +- [x] No `Q-1-x` test/snapshot edits needed: the render path never consults the + catalog (title "YAML Validation Failed" is hardcoded; no docs URL surfaced), + so the diagnostic snapshot reproduced **byte-for-byte** with no catalog + installed. One stable-clippy fix: moved the test module to end-of-file in + `schema/parsers/combinators.rs` (`items_after_test_module`). +- [x] Build + test (330 incl. snapshot; 5 pre-existing ignored doctests) + clippy + `-D warnings` + fmt + `cargo publish --dry-run -p quarto-yaml-validation` + (39 files) + external smoke test (validated a doc → `Q-1-11` standalone, + rendered `[Q-1-11] age: Expected number, got string`). All green. +- [x] CI green (3 OSes). **USER (pending):** `cargo publish -p + quarto-yaml-validation` (`quarto-yaml 0.1.0` is already live, so the dep + resolves). ### Phase C — q2 cutover (one PR, like #348/#350) -- [ ] Branch `braid/bd-egcyeym9-yaml-cutover` off updated main. -- [ ] `[workspace.dependencies.quarto-yaml]` `path` → `version = "0.1.0"`. -- [ ] Convert `quarto-yaml` path-deps (`pampa`, `quarto-config`) → - `{ workspace = true }`; leave the existing `{ workspace = true }` ones. -- [ ] **Delete** `crates/quarto-yaml-validation/` and `crates/validate-yaml/` (and - drop `[workspace.dependencies.quarto-yaml-validation]` from root `Cargo.toml`; - check for any stray refs to `validate-yaml` in `xtask`/docs/CI). -- [ ] Delete in-tree `crates/quarto-yaml/`. -- [ ] `cargo build --workspace` → confirm Cargo.lock resolves `quarto-yaml 0.1.0` - from the registry. `cargo nextest run --workspace`. **Full `cargo xtask - verify`** (the WASM leg is the real gate — see §5 WASM gotcha; do NOT pipe it - through `tail`, which masks the exit code — a Phase 1 lesson). -- [ ] Update `CLAUDE.md`: move `quarto-yaml` into the "Externalized foundation - crates" section; remove `quarto-yaml-validation` and `validate-yaml` from the - binaries/crate lists; the `validate-yaml` line under **Binaries** must go. -- [ ] Commit, push to `feature/…`, open PR against `main`, watch CI (5 checks), - report. Merge is the user's call. +> **Status 2026-06-29:** all local steps DONE on branch +> `braid/bd-egcyeym9-yaml-cutover`; **full `cargo xtask verify` GREEN (all 14 +> steps incl. WASM build + hub tests)**. Awaiting commit/push/PR. +- [x] Branch `braid/bd-egcyeym9-yaml-cutover` off updated main (HEAD `df029875`). +- [x] `[workspace.dependencies.quarto-yaml]` `path` → `version = "0.1.0"`. +- [x] Converted `quarto-yaml` path-deps (`pampa`, `quarto-config`) → + `{ workspace = true }`; left the existing `{ workspace = true }` ones + (`quarto-core`, `quarto-lsp-core`). +- [x] **Deleted** `crates/quarto-yaml-validation/` and `crates/validate-yaml/`; + dropped `[workspace.dependencies.quarto-yaml-validation]`. No stray + `validate-yaml` refs in xtask/CI/docs — only historical `claude-notes/` + design docs (left as-is); one prose comment in + `quarto-core/src/attribution/mode.rs` mentions the crate as a hypothetical + consumer (left — design rationale, not a dep). +- [x] Deleted in-tree `crates/quarto-yaml/`. +- [x] `cargo build --workspace` clean; **`Cargo.lock` resolves `quarto-yaml 0.1.0` + from the registry** (checksum `c32ab7b3…`); `quarto-yaml-validation` / + `validate-yaml` absent. `cargo nextest run --workspace` **9855 passed**. + **Full `cargo xtask verify` GREEN (14/14)** — the WASM crate resolved + `quarto-yaml` *transitively* (no direct dep needed, per §5); its own + `Cargo.lock` flipped `0.7.0` path → `0.1.0` registry. +- [x] Updated `CLAUDE.md`: `quarto-yaml` moved to the "Externalized foundation + crates" section; `quarto-yaml-validation` reframed as published-but-not-a-q2- + dep; `validate-yaml` binary line removed. +- [ ] Commit, push to `feature/bd-egcyeym9-yaml-cutover`, open PR against `main`, + watch CI, report. Merge is the user's call. ## 8. Proven gotchas (from Phases 1 & 3 — don't rediscover them) @@ -205,12 +249,15 @@ the origin codes and may remap to their own presentation codes. - **crates.io / GitHub are user/identity-gated and irreversible** → you prep & dry- run; the user publishes and (optionally) `cargo owner --add github:posit-dev:`. -## 9. Open items to raise with the user +## 9. Open items — RESOLVED 2026-06-29 -1. **Error-code policy (§6 A vs B)** — the one blocking decision. -2. Confirm the repo is `posit-dev/quarto-yaml` (workspace, two crates) — decided - 2026-06-29, but re-confirm before `gh repo create`. -3. Reuse the Phase-1 visibility/ownership choices (public repo; personal crates.io - account now, `cargo owner --add posit-dev` on a weekday) unless told otherwise. -4. Whether to also relocate the deleted **`CONTRIBUTING-ERRORS.md`** intent / any - q2-internal YAML docs (low priority). +1. **Error-code policy (§6 A vs B)** — **RESOLVED: option B** (keep `Q-1-x` in + `0.1.0`, defer origin codes to `0.2.0`). See §6. +2. **Repo = `posit-dev/quarto-yaml` (workspace, two crates)** — **CONFIRMED**. +3. **Process choices** — **CONFIRMED: mirror Phase 1/3.** Public repo; agent preps + + dry-runs; **user** runs each `cargo publish` (leaf `quarto-yaml` first); + personal crates.io account now, `cargo owner --add posit-dev` deferred to a + weekday. +4. **Relocate `CONTRIBUTING-ERRORS.md` / q2 YAML docs** — low priority; default to + **skip** unless the user asks. (Phase 3 dropped its `CONTRIBUTING-ERRORS.md`; + the Quarto catalog policy lives with `quarto-error-catalog`.) diff --git a/crates/pampa/Cargo.toml b/crates/pampa/Cargo.toml index b5d07a12a..399da4578 100644 --- a/crates/pampa/Cargo.toml +++ b/crates/pampa/Cargo.toml @@ -42,7 +42,7 @@ comrak = { version = "0.52.0", default-features = false } comrak-to-pandoc = { path = "../comrak-to-pandoc" } quarto-error-reporting = { workspace = true } quarto-source-map = { workspace = true } -quarto-yaml = { path = "../quarto-yaml" } +quarto-yaml = { workspace = true } quarto-config = { path = "../quarto-config" } quarto-parse-errors = { path = "../quarto-parse-errors" } quarto-treesitter-ast = { workspace = true } diff --git a/crates/quarto-config/Cargo.toml b/crates/quarto-config/Cargo.toml index 536e538a3..ae7de8a7f 100644 --- a/crates/quarto-config/Cargo.toml +++ b/crates/quarto-config/Cargo.toml @@ -13,7 +13,7 @@ description = "Configuration merging with source tracking for Quarto" [dependencies] quarto-source-map = { workspace = true } -quarto-yaml = { path = "../quarto-yaml" } +quarto-yaml = { workspace = true } quarto-pandoc-types = { path = "../quarto-pandoc-types" } quarto-error-reporting = { workspace = true } indexmap = "2.13" diff --git a/crates/quarto-yaml-validation/Cargo.toml b/crates/quarto-yaml-validation/Cargo.toml deleted file mode 100644 index b8f8ebdf8..000000000 --- a/crates/quarto-yaml-validation/Cargo.toml +++ /dev/null @@ -1,35 +0,0 @@ -[package] -name = "quarto-yaml-validation" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -repository.workspace = true - -[dependencies] -# Error handling -anyhow.workspace = true -thiserror.workspace = true - -# Serialization (serde_json for enum values in schemas) -serde.workspace = true -serde_json.workspace = true -# Note: We don't use serde_yaml because it only supports YAML 1.1. -# See YAML-1.2-REQUIREMENT.md for details. - -# YAML parsing -yaml-rust2.workspace = true - -# Workspace dependencies -quarto-yaml.workspace = true -quarto-source-map.workspace = true -quarto-error-reporting.workspace = true - -# Additional dependencies for validation -regex = "1.12" - -[dev-dependencies] -insta.workspace = true - -[lints] -workspace = true diff --git a/crates/quarto-yaml-validation/JSON-OUTPUT-SCHEMA.md b/crates/quarto-yaml-validation/JSON-OUTPUT-SCHEMA.md deleted file mode 100644 index ceb6baf92..000000000 --- a/crates/quarto-yaml-validation/JSON-OUTPUT-SCHEMA.md +++ /dev/null @@ -1,472 +0,0 @@ -# ValidationDiagnostic JSON Output Schema - - - -This document describes the JSON output format produced by `ValidationDiagnostic::to_json()` for machine-readable consumption of YAML validation errors. - -## Overview - -The JSON output provides structured validation error information with: -- **Machine-readable error types** with structured data -- **Source locations** with filenames and line/column positions -- **Path information** showing where in the YAML document the error occurred -- **Human-readable messages** for convenience - -## Top-Level Structure - -```json -{ - "error_kind": { ... }, // Structured error type with data - "code": "Q-1-XX", // Error code - "message": "...", // Human-readable message - "hints": [ ... ], // Array of hint strings (optional) - "instance_path": [ ... ], // Path in YAML document - "schema_path": [ ... ], // Path in schema - "source_range": { ... } // Source location (optional) -} -``` - -## Field Descriptions - -### `error_kind` (Object) - -Structured error information with discriminated union format: - -```json -{ - "type": "ErrorTypeName", - "data": { ... } -} -``` - -Common error types: - -#### TypeMismatch - -```json -{ - "type": "TypeMismatch", - "data": { - "expected": "number", - "got": "string" - } -} -``` - -#### MissingRequiredProperty - -```json -{ - "type": "MissingRequiredProperty", - "data": { - "property": "author" - } -} -``` - -#### InvalidEnumValue - -```json -{ - "type": "InvalidEnumValue", - "data": { - "value": "foo", - "allowed": ["html", "pdf", "docx"] - } -} -``` - -#### NumberOutOfRange - -```json -{ - "type": "NumberOutOfRange", - "data": { - "value": 150, - "minimum": 0, - "maximum": 100, - "exclusive_minimum": null, - "exclusive_maximum": null - } -} -``` - -#### UnknownProperty - -```json -{ - "type": "UnknownProperty", - "data": { - "property": "unkown_field" - } -} -``` - -#### StringPatternMismatch - -```json -{ - "type": "StringPatternMismatch", - "data": { - "value": "invalid-email", - "pattern": "^[^@]+@[^@]+\\.[^@]+$" - } -} -``` - -#### Other Error Types - -- `SchemaFalse` - Schema explicitly rejects all values -- `StringTooShort` - String length below `minLength` -- `StringTooLong` - String length above `maxLength` -- `NumberNotMultipleOf` - Number not a multiple of specified value -- `ArrayTooShort` - Array length below `minItems` -- `ArrayTooLong` - Array length above `maxItems` -- `ArrayItemsNotUnique` - Array contains duplicate items -- `InvalidPropertyName` - Property name doesn't match pattern -- `AdditionalPropertiesNotAllowed` - Extra properties when `additionalProperties: false` -- `AnyOfNoMatch` - None of the `anyOf` schemas matched -- `AllOfNotAllMatch` - Not all `allOf` schemas matched -- `Other` - Catch-all for other validation failures - -### `code` (String) - -Error code in format `Q-1-XX`: -- `Q-1-10`: Missing required property -- `Q-1-11`: Type mismatch -- `Q-1-12`: Invalid enum value -- `Q-1-13`: String too short -- `Q-1-14`: String too long -- `Q-1-15`: String pattern mismatch -- `Q-1-16`: Number out of range -- `Q-1-17`: Number not multiple of -- `Q-1-18`: Unknown property -- `Q-1-19`: Array too short -- `Q-1-29`: String length out of range -- `Q-1-21`: Array items not unique -- `Q-1-22`: Invalid property name -- `Q-1-23`: Additional properties not allowed -- `Q-1-24`: AnyOf no match -- `Q-1-25`: AllOf not all match -- `Q-1-30`: Schema false -- `Q-1-99`: Other validation error - -### `message` (String) - -Human-readable error message. Provided for convenience but consumers should use `error_kind` for programmatic error handling. - -Example: `"Expected number, got string"` - -### `hints` (Array of Strings, Optional) - -Array of actionable hints for fixing the error. May be empty. - -Example: -```json -[ - "Use a numeric value without quotes?", - "Check the allowed value range in the schema?" -] -``` - -### `instance_path` (Array of PathSegment) - -Path through the YAML document to the location of the error. Each segment is either a key or an index: - -```json -[ - {"type": "Key", "value": "format"}, - {"type": "Key", "value": "html"}, - {"type": "Index", "value": 0} -] -``` - -- **Key**: Object property name -- **Index**: Array index (0-based) - -Empty array `[]` means error is at document root. - -### `schema_path` (Array of Strings) - -Path through the schema to the constraint that failed. Useful for debugging schema issues. - -Example: -```json -["object", "properties", "age", "number"] -``` - -### `source_range` (Object, Optional) - -Source location information with filename and positions. May be absent if source tracking is unavailable. - -```json -{ - "filename": "config.yaml", - "start_offset": 18, - "end_offset": 33, - "start_line": 3, - "start_column": 7, - "end_line": 3, - "end_column": 21 -} -``` - -**Fields:** -- `filename` (String): Path to the source file -- `start_offset` (Number): Start byte offset in file (0-based) -- `end_offset` (Number): End byte offset in file (0-based) -- `start_line` (Number): Start line number (1-based) -- `start_column` (Number): Start column number (1-based) -- `end_line` (Number): End line number (1-based) -- `end_column` (Number): End column number (1-based) - -**Note**: Line and column numbers are 1-indexed for human readability. Offsets are 0-indexed for programmatic use. - -## Complete Examples - -### Type Mismatch Error - -```json -{ - "error_kind": { - "type": "TypeMismatch", - "data": { - "expected": "number", - "got": "string" - } - }, - "code": "Q-1-11", - "message": "Expected number, got string", - "hints": [ - "Use a numeric value without quotes?" - ], - "instance_path": [ - {"type": "Key", "value": "age"} - ], - "schema_path": [ - "object", - "properties", - "age", - "number" - ], - "source_range": { - "filename": "user.yaml", - "start_offset": 5, - "end_offset": 20, - "start_line": 1, - "start_column": 6, - "end_line": 1, - "end_column": 20 - } -} -``` - -### Missing Required Property Error - -```json -{ - "error_kind": { - "type": "MissingRequiredProperty", - "data": { - "property": "author" - } - }, - "code": "Q-1-10", - "message": "Missing required property 'author'", - "hints": [ - "Add the `author` property to your YAML document?" - ], - "instance_path": [], - "schema_path": [ - "object" - ], - "source_range": { - "filename": "document.yaml", - "start_offset": 0, - "end_offset": 109, - "start_line": 1, - "start_column": 1, - "end_line": 5, - "end_column": 1 - } -} -``` - -### Nested Path Error - -```json -{ - "error_kind": { - "type": "StringPatternMismatch", - "data": { - "value": "invalid-email", - "pattern": "^[^@]+@[^@]+\\.[^@]+$" - } - }, - "code": "Q-1-15", - "message": "String does not match pattern", - "hints": [ - "Check that the string matches the expected format?" - ], - "instance_path": [ - {"type": "Key", "value": "user"}, - {"type": "Key", "value": "email"} - ], - "schema_path": [ - "object", - "properties", - "user", - "object", - "properties", - "email", - "string" - ], - "source_range": { - "filename": "config.yaml", - "start_offset": 35, - "end_offset": 49, - "start_line": 4, - "start_column": 10, - "end_line": 4, - "end_column": 24 - } -} -``` - -## Usage in Downstream Tools - -### TypeScript/JavaScript - -```typescript -interface ValidationDiagnostic { - error_kind: { - type: string; - data: Record; - }; - code: string; - message: string; - hints?: string[]; - instance_path: PathSegment[]; - schema_path: string[]; - source_range?: SourceRange; -} - -type PathSegment = - | { type: "Key"; value: string } - | { type: "Index"; value: number }; - -interface SourceRange { - filename: string; - start_offset: number; - end_offset: number; - start_line: number; - start_column: number; - end_line: number; - end_column: number; -} - -// Example usage -function handleValidationError(diagnostic: ValidationDiagnostic) { - switch (diagnostic.error_kind.type) { - case "TypeMismatch": - console.error(`Expected ${diagnostic.error_kind.data.expected}, got ${diagnostic.error_kind.data.got}`); - break; - case "MissingRequiredProperty": - console.error(`Missing property: ${diagnostic.error_kind.data.property}`); - break; - // ... handle other types - } -} -``` - -### Python - -```python -from dataclasses import dataclass -from typing import Optional, Union, List, Dict, Any - -@dataclass -class KeySegment: - type: str # "Key" - value: str - -@dataclass -class IndexSegment: - type: str # "Index" - value: int - -PathSegment = Union[KeySegment, IndexSegment] - -@dataclass -class SourceRange: - filename: str - start_offset: int - end_offset: int - start_line: int - start_column: int - end_line: int - end_column: int - -@dataclass -class ValidationDiagnostic: - error_kind: Dict[str, Any] - code: str - message: str - hints: Optional[List[str]] - instance_path: List[PathSegment] - schema_path: List[str] - source_range: Optional[SourceRange] - -# Example usage -def handle_validation_error(diagnostic: dict): - error_type = diagnostic["error_kind"]["type"] - - if error_type == "TypeMismatch": - data = diagnostic["error_kind"]["data"] - print(f"Expected {data['expected']}, got {data['got']}") - elif error_type == "MissingRequiredProperty": - prop = diagnostic["error_kind"]["data"]["property"] - print(f"Missing property: {prop}") -``` - -### LSP Diagnostics - -Convert to Language Server Protocol diagnostic format: - -```typescript -import { Diagnostic, DiagnosticSeverity, Range, Position } from 'vscode-languageserver'; - -function toDiagnostic(vd: ValidationDiagnostic): Diagnostic { - const range: Range = vd.source_range ? { - start: { - line: vd.source_range.start_line - 1, // LSP uses 0-based - character: vd.source_range.start_column - 1 - }, - end: { - line: vd.source_range.end_line - 1, - character: vd.source_range.end_column - 1 - } - } : defaultRange; - - return { - severity: DiagnosticSeverity.Error, - range, - message: vd.message, - code: vd.code, - source: 'quarto-yaml-validation', - data: vd.error_kind // Preserve structured data - }; -} -``` - -## Versioning - -This schema follows semantic versioning. The current version is **1.0**. - -Breaking changes to the JSON structure will increment the major version. Consumers should check for version compatibility. - -## See Also - -- [ValidationError](src/error.rs) - Internal error representation -- [ValidationDiagnostic](src/diagnostic.rs) - Wrapper type for JSON output -- [Error Codes](ERROR-CODES.md) - Complete list of error codes diff --git a/crates/quarto-yaml-validation/README.md b/crates/quarto-yaml-validation/README.md deleted file mode 100644 index 620892c7b..000000000 --- a/crates/quarto-yaml-validation/README.md +++ /dev/null @@ -1,248 +0,0 @@ -# quarto-yaml-validation - -A Rust library for validating YAML data against schemas defined in quarto-cli's YAML schema format. - -## Features - -- **Schema Parsing**: Parse schemas from YAML using quarto-cli's syntax -- **quarto-cli Compatibility**: Full support for all patterns used in quarto-cli schema files -- **Type Safety**: Strongly-typed Rust representation of schemas -- **Source Tracking**: Maintains source location information for error reporting -- **Comprehensive Testing**: 100% success rate parsing real quarto-cli schemas - -## Supported Schema Patterns - -### Primitive Types -- `string`, `number`, `boolean`, `null`, `any`, `path` -- Validation constraints (minLength, maximum, pattern, etc.) - -### Collections -- **Enum**: Fixed set of allowed values -- **Array**: Heterogeneous arrays with item schemas -- **arrayOf**: Homogeneous arrays (quarto extension) -- **maybeArrayOf**: Value OR array of values (quarto extension) - -### Objects -- **object**: Standard key-value mappings with properties -- **record**: Closed objects with all properties required (quarto extension) -- **required: "all"**: Auto-expand to all property keys - -### Combinators -- **anyOf**: Match any subschema -- **allOf**: Match all subschemas - -### Advanced -- **ref**: Schema references -- **schema wrapper**: Add annotations without nesting -- **Annotations**: descriptions, completions, tags, etc. - -## Quick Start - -```rust -use quarto_yaml_validation::Schema; - -// Parse a schema from YAML -let yaml_text = r#" -object: - properties: - name: string - age: number - required: [name] -"#; - -let yaml = quarto_yaml::parse(yaml_text)?; -let schema = Schema::from_yaml(&yaml)?; - -// Access schema information -match schema { - Schema::Object(obj) => { - println!("Object with {} properties", obj.properties.len()); - println!("Required: {:?}", obj.required); - } - _ => unreachable!(), -} -``` - -## Validation - -```rust -use quarto_yaml_validation::{Schema, SchemaRegistry, ValidationDiagnostic, validate}; -use quarto_source_map::SourceContext; - -// Parse schema and document -let schema_yaml = quarto_yaml::parse(r#" -object: - properties: - age: - number: - minimum: 0 - maximum: 150 - required: [age] -"#)?; -let schema = Schema::from_yaml(&schema_yaml)?; - -let doc = quarto_yaml::parse_file("age: 200", "user.yaml")?; - -// Create SourceContext for error location tracking -let mut source_ctx = SourceContext::new(); -source_ctx.add_file("user.yaml".to_string(), Some("age: 200".to_string())); - -// Validate -let registry = SchemaRegistry::new(); -match validate(&doc, &schema, ®istry, &source_ctx) { - Ok(()) => println!("✓ Valid"), - Err(error) => { - // Convert to diagnostic for rich error reporting - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - // Get human-readable text with source highlighting - eprintln!("{}", diagnostic.to_text(&source_ctx)); - - // Or get machine-readable JSON - let json = diagnostic.to_json(); - println!("{}", serde_json::to_string_pretty(&json)?); - - // Or get a single compact line, optimized for token-efficient - // consumption (e.g. feeding errors to an LLM): - eprintln!("{}", diagnostic.to_compact()); - } -} -``` - -### Error Output Formats - -**Text Output (ariadne-style):** -``` -Error: [Q-1-16] YAML Validation Failed - ╭─[user.yaml:1:6] - │ - 1 │ age: 200 - │ ─┬─ Number 200 is out of range (max: 150) - │ ╰─── violates constraint -───╯ -✖ At document path: `age` -ℹ Schema constraint: object > properties > age > number -? Check the allowed value range in the schema? -``` - -**JSON Output:** -```json -{ - "error_kind": { - "type": "NumberOutOfRange", - "data": { - "value": 200, - "minimum": null, - "maximum": 150, - "exclusive_minimum": null, - "exclusive_maximum": null - } - }, - "code": "Q-1-16", - "message": "Number 200 is out of range (max: 150)", - "instance_path": [{"type": "Key", "value": "age"}], - "schema_path": ["object", "properties", "age", "number"], - "source_range": { - "filename": "user.yaml", - "start_offset": 5, - "end_offset": 8, - "start_line": 1, - "start_column": 6, - "end_line": 1, - "end_column": 9 - } -} -``` - -**Compact Output (`diagnostic.to_compact()`):** - -A single line per error — `file:line:col [CODE] path: message (hint: ...)` — -with no box drawing or redundant structural fields. Designed for -token-efficient consumption for LLMs. - -``` -user.yaml:1:6 [Q-1-16] age: Number 200 is out of range (max: 150) (Hint: Check the allowed value range in the schema?) -``` - -## Documentation - -- **[SCHEMA-FROM-YAML.md](./SCHEMA-FROM-YAML.md)**: Complete YAML syntax reference with examples - - All supported patterns - - Real-world examples from quarto-cli - - Pattern correspondence table (YAML → Rust) - - Usage guide -- **[JSON-OUTPUT-SCHEMA.md](./JSON-OUTPUT-SCHEMA.md)**: JSON error output format specification - - Complete field reference - - Error type catalog - - TypeScript/Python type definitions - - LSP integration examples - -## Testing - -The library includes comprehensive tests against real quarto-cli schema files: - -```bash -cargo test --package quarto-yaml-validation -``` - -**Test Coverage**: -- 56 total tests (43 unit + 13 integration) -- 100% success parsing quarto-cli schemas: - - document-execute.yml: 12/12 schemas - - document-text.yml: 7/7 schemas - - document-website.yml: 8/8 schemas - -## Architecture - -The codebase is organized into focused modules: - -``` -src/ -├── schema/ -│ ├── mod.rs # Schema enum and public API -│ ├── types.rs # Schema struct definitions -│ ├── parser.rs # Entry point: from_yaml() -│ ├── annotations.rs # Annotation parsing -│ ├── helpers.rs # Helper functions -│ └── parsers/ -│ ├── primitive.rs # boolean, number, string, etc. -│ ├── enum.rs # Enum schemas -│ ├── arrays.rs # Array and arrayOf -│ ├── objects.rs # Object and record -│ ├── combinators.rs # anyOf, allOf, maybeArrayOf -│ ├── ref.rs # References -│ └── wrappers.rs # Schema wrappers -├── validator.rs # Validation logic (future) -└── error.rs # Error types -``` - -## Status - -**Production Ready**: All critical quarto-cli patterns implemented and tested. - -### Completed (P0/P1 - High Priority) -- ✅ All primitive types -- ✅ Enum (inline and explicit) -- ✅ Array schemas -- ✅ arrayOf (simple and with length) -- ✅ maybeArrayOf -- ✅ Object schemas -- ✅ record (both forms) -- ✅ required: "all" -- ✅ anyOf / allOf -- ✅ References -- ✅ Schema wrappers -- ✅ Annotations - -### Future Enhancements (P2/P3 - Lower Priority) -- Nested property extraction (double setBaseSchemaProperties) -- Schema inheritance (super/baseSchema) -- resolveRef vs ref distinction -- propertyNames validation -- namingConvention validation -- additionalCompletions -- Pattern as schema type - -## License - -Part of the Kyoto/Quarto project. diff --git a/crates/quarto-yaml-validation/SCHEMA-FROM-YAML.md b/crates/quarto-yaml-validation/SCHEMA-FROM-YAML.md deleted file mode 100644 index 72b0f7420..000000000 --- a/crates/quarto-yaml-validation/SCHEMA-FROM-YAML.md +++ /dev/null @@ -1,564 +0,0 @@ -# Schema YAML Syntax Reference - -This document describes the YAML syntax for defining schemas in `quarto-yaml-validation`. This syntax is compatible with quarto-cli's schema system and supports all patterns used in quarto-cli schema files. - -## Table of Contents - -- [Overview](#overview) -- [Quick Reference](#quick-reference) -- [Primitive Types](#primitive-types) -- [Enum Types](#enum-types) -- [Array Types](#array-types) -- [Object Types](#object-types) -- [Combinators](#combinators) -- [References](#references) -- [Schema Wrappers](#schema-wrappers) -- [Annotations](#annotations) -- [Pattern Correspondence Table](#pattern-correspondence-table) - -## Overview - -The schema system uses YAML to define validation rules for configuration data. Schemas can be defined in three main forms: - -1. **Short form**: Simple string like `"boolean"`, `"string"`, `"number"` -2. **Object form**: Hash with schema type key like `{boolean: {...}}`, `{string: {...}}` -3. **Inline arrays**: Arrays for enum values like `[val1, val2, val3]` - -## Quick Reference - -```yaml -# Primitive types -string # Simple string -number # Numeric value -boolean # True/false -null # Null value -any # Any value -path # File path (alias for string) - -# Enum -enum: [value1, value2] # Inline enum -[value1, value2, value3] # Alternative inline form - -# Arrays -array: # Heterogeneous array - items: string -arrayOf: string # Homogeneous array (all items same type) -maybeArrayOf: string # Value OR array of values - -# Objects -object: # Key-value mapping - properties: - name: string - required: [name] -record: # Shorthand for closed object with all properties required - name: string - age: number - -# Combinators -anyOf: [string, number] # Match any subschema -allOf: [schema1, schema2] # Match all subschemas - -# References -ref: schema/base # Reference to another schema - -# Schema wrapper -schema: string # Add annotations without nesting -``` - -## Primitive Types - -### String - -```yaml -# Short form -string - -# Object form with validation -string: - minLength: 1 - maxLength: 100 - pattern: "^[a-z]+$" - description: "A lowercase string" -``` - -**Rust mapping**: `Schema::String(StringSchema { ... })` - -### Number - -```yaml -# Short form -number - -# Object form with validation -number: - minimum: 0 - maximum: 100 - exclusiveMinimum: 0 - exclusiveMaximum: 100 - multipleOf: 5 - description: "A number between 0 and 100" -``` - -**Rust mapping**: `Schema::Number(NumberSchema { ... })` - -### Boolean - -```yaml -# Short form -boolean - -# Object form with annotations -boolean: - description: "Enable feature" - default: false -``` - -**Rust mapping**: `Schema::Boolean(BooleanSchema { ... })` - -### Null, Any, Path - -```yaml -null # Only matches null -any # Matches any value -path # File path (same as string) -``` - -**Rust mapping**: -- `Schema::Null(NullSchema { ... })` -- `Schema::Any(AnySchema { ... })` -- `Schema::String(StringSchema { ... })` (for path) - -## Enum Types - -Enums define a fixed set of allowed values. - -### Inline Array Form - -```yaml -# Simplest form - array at top level -[value1, value2, value3] -``` - -### Explicit Form - -```yaml -enum: - values: [red, green, blue] - description: "Color choices" -``` - -**Real-world example** (from quarto-cli): -```yaml -# document-text.yml - wrap option -enum: [auto, none, preserve] -``` - -**Rust mapping**: `Schema::Enum(EnumSchema { values, ... })` - -## Array Types - -### Array (Heterogeneous) - -Standard JSON Schema array with explicit items schema: - -```yaml -array: - items: string - minItems: 1 - maxItems: 10 - uniqueItems: true -``` - -**Rust mapping**: `Schema::Array(ArraySchema { ... })` - -### arrayOf (Homogeneous) - -**Quarto extension** - shorthand for arrays where all items have the same type. - -```yaml -# Simple form -arrayOf: string - -# With length constraint -arrayOf: - schema: string - length: 2 - -# Nested arrays -arrayOf: - arrayOf: - schema: string - length: 2 -``` - -**Real-world examples** (from quarto-cli): -```yaml -# definitions.yml - pandoc-shortcodes -arrayOf: path - -# definitions.yml - pandoc-format-request-headers -arrayOf: - arrayOf: - schema: string - length: 2 - -# document-execute.yml - julia exeflags -arrayOf: string -``` - -**Rust mapping**: `Schema::Array(ArraySchema { items: Some(Box::new(inner)), ... })` -- Simple form: Items set to inner schema -- With length: Both `min_items` and `max_items` set to `length` - -### maybeArrayOf - -**Quarto extension** - value can be either T or an array of T. Expands to `anyOf: [T, arrayOf(T)]`. - -```yaml -# Simple form -maybeArrayOf: string - -# Accepts: "value" OR ["value1", "value2"] -``` - -**Real-world example** (from quarto-cli): -```yaml -# definitions.yml - contents-auto -auto: - anyOf: - - boolean - - maybeArrayOf: string -``` - -**Rust mapping**: `Schema::AnyOf(AnyOfSchema { schemas: [inner, array_of_inner], ... })` -- Includes `complete-from` tag for IDE support - -## Object Types - -### Object - -Standard JSON Schema object with properties: - -```yaml -object: - properties: - name: string - age: number - email: - string: - pattern: "^.+@.+$" - patternProperties: - "^x-": string - additionalProperties: boolean - required: [name] - closed: true - minProperties: 1 - maxProperties: 10 -``` - -**Special feature - required: all**: -```yaml -object: - properties: - foo: string - bar: number - baz: boolean - required: all # Expands to [foo, bar, baz] -``` - -**Real-world example** (from quarto-cli): -```yaml -# document-execute.yml - kernelspec -object: - properties: - display_name: - string: - description: The name to display in the UI. - language: - string: - description: The name of the language the kernel implements. - name: - string: - description: The name of the kernel. - required: all -``` - -**Rust mapping**: `Schema::Object(ObjectSchema { ... })` - -### record - -**Quarto extension** - shorthand for a closed object where all properties are required. - -```yaml -# Form 1: Explicit properties -record: - properties: - type: string - value: number - -# Form 2: Shorthand (properties inferred) -record: - type: string - value: number -``` - -Both forms expand to: -```yaml -object: - properties: - type: string - value: number - required: [type, value] - closed: true -``` - -**Real-world example** (from quarto-cli): -```yaml -# definitions.yml - pandoc-format-filters -arrayOf: - anyOf: - - path - - object: - properties: - type: string - path: path - required: [path] - - record: - type: - enum: [citeproc] -``` - -**Rust mapping**: `Schema::Object(ObjectSchema { closed: true, required: all_keys, ... })` - -## Combinators - -### anyOf - -Validates if **any** of the subschemas matches: - -```yaml -# Inline array form -anyOf: [string, number, boolean] - -# Explicit form with annotations -anyOf: - schemas: [string, number] - description: "String or number" -``` - -**Real-world example** (from quarto-cli): -```yaml -# definitions.yml - date -anyOf: - - string - - object: - properties: - value: string - format: string - required: [value] -``` - -**Rust mapping**: `Schema::AnyOf(AnyOfSchema { schemas, ... })` - -### allOf - -Validates if **all** of the subschemas match: - -```yaml -# Inline array form -allOf: [schema1, schema2] - -# Explicit form -allOf: - schemas: [schema1, schema2] - description: "Must match both" -``` - -**Rust mapping**: `Schema::AllOf(AllOfSchema { schemas, ... })` - -## References - -Reference another schema by identifier: - -```yaml -ref: schema/base -``` - -Alternative syntax: -```yaml -$ref: schema/base -``` - -**Rust mapping**: `Schema::Ref(RefSchema { reference: "schema/base", ... })` - -## Schema Wrappers - -The `schema` key allows adding annotations to a schema without nesting under a type key. - -### Without Schema Wrapper - -```yaml -anyOf: - - boolean - - string -description: "A boolean or string" -completions: ["true", "false", "auto"] -``` - -This requires parsing the entire hash to extract the schema type. - -### With Schema Wrapper - -```yaml -schema: - anyOf: - - boolean - - string -description: "A boolean or string" -completions: ["true", "false", "auto"] -``` - -Cleaner separation when the schema is complex. - -**Real-world example** (from quarto-cli): -```yaml -# document-text.yml - eol field -schema: - enum: [lf, crlf, native] -description: "Manually specify line endings" - -# document-execute.yml - julia env -schema: - arrayOf: string - description: Environment variables to pass to the Julia worker process. -``` - -**Rust mapping**: Transparent - parses inner schema and applies outer annotations - -## Annotations - -All schema types support these annotation fields: - -```yaml -description: - short: "Brief description" - long: | - Longer multiline - description - -completions: [value1, value2] # IDE completion suggestions -hidden: true # Hide from UI -default: defaultValue # Default value - -tags: - category: input - custom-key: custom-value -``` - -**Rust mapping**: All annotations stored in `SchemaAnnotations` struct - -## Pattern Correspondence Table - -| YAML Pattern | Rust Type | Status | Notes | -|--------------|-----------|--------|-------| -| `string` | `Schema::String` | ✅ Complete | Short form | -| `string: {minLength: 1}` | `Schema::String` | ✅ Complete | Object form with validation | -| `number` | `Schema::Number` | ✅ Complete | Short form | -| `number: {minimum: 0}` | `Schema::Number` | ✅ Complete | Object form with validation | -| `boolean` | `Schema::Boolean` | ✅ Complete | Short form | -| `null` | `Schema::Null` | ✅ Complete | Short form | -| `any` | `Schema::Any` | ✅ Complete | Short form | -| `path` | `Schema::String` | ✅ Complete | Alias for string | -| `[val1, val2]` | `Schema::Enum` | ✅ Complete | Inline enum | -| `enum: {values: [...]}` | `Schema::Enum` | ✅ Complete | Explicit enum | -| `array: {items: T}` | `Schema::Array` | ✅ Complete | Standard array | -| `arrayOf: T` | `Schema::Array` | ✅ Complete | Quarto extension (P0) | -| `arrayOf: {schema: T, length: N}` | `Schema::Array` | ✅ Complete | With length constraint | -| `maybeArrayOf: T` | `Schema::AnyOf` | ✅ Complete | Quarto extension (P1) | -| `object: {properties: {...}}` | `Schema::Object` | ✅ Complete | Standard object | -| `object: {required: all}` | `Schema::Object` | ✅ Complete | Auto-expand required (P1) | -| `record: {...}` | `Schema::Object` | ✅ Complete | Quarto extension (P1) | -| `anyOf: [...]` | `Schema::AnyOf` | ✅ Complete | Combinator | -| `allOf: [...]` | `Schema::AllOf` | ✅ Complete | Combinator | -| `ref: id` | `Schema::Ref` | ✅ Complete | Reference | -| `schema: T` | (transparent) | ✅ Complete | Schema wrapper (P1) | - -### Not Yet Implemented (P2/P3) - -| YAML Pattern | Priority | Notes | -|--------------|----------|-------| -| Nested property extraction | P2 | Double setBaseSchemaProperties pattern | -| `super: base` inheritance | P2 | Schema inheritance | -| `resolveRef` vs `ref` | P2 | Reference resolution distinction | -| `propertyNames` | P2 | Property name validation | -| `namingConvention` | P2 | Naming convention validation | -| `additionalCompletions` | P2 | Additional completion sources | -| `pattern` as schema type | P3 | Pattern-based validation as type | - -## Usage Examples - -### Basic Validation - -```rust -use quarto_yaml_validation::Schema; - -// Parse a schema from YAML -let yaml_text = r#" -string: - minLength: 1 - maxLength: 100 -"#; -let yaml = quarto_yaml::parse(yaml_text)?; -let schema = Schema::from_yaml(&yaml)?; - -// Use schema for validation (future API) -// let result = schema.validate(&data); -``` - -### Working with Complex Schemas - -```rust -// Parse a complex anyOf schema -let yaml_text = r#" -anyOf: - - string - - object: - properties: - value: string - format: string - required: [value] -"#; -let yaml = quarto_yaml::parse(yaml_text)?; -let schema = Schema::from_yaml(&yaml)?; - -// Access schema information -match schema { - Schema::AnyOf(anyof) => { - println!("AnyOf with {} alternatives", anyof.schemas.len()); - } - _ => unreachable!(), -} -``` - -### Loading quarto-cli Schemas - -```rust -// Load and parse a quarto-cli schema file -let yaml_content = std::fs::read_to_string("document-execute.yml")?; -let yaml = quarto_yaml::parse(&yaml_content)?; - -// The file is an array of field definitions -let items = yaml.as_array().expect("Expected array"); - -for item in items { - let name = item - .get_hash_value("name") - .and_then(|v| v.yaml.as_str()) - .unwrap_or(""); - - if let Some(schema_yaml) = item.get_hash_value("schema") { - let schema = Schema::from_yaml(schema_yaml)?; - println!("Parsed schema for field: {}", name); - } -} -``` \ No newline at end of file diff --git a/crates/quarto-yaml-validation/YAML-1.2-REQUIREMENT.md b/crates/quarto-yaml-validation/YAML-1.2-REQUIREMENT.md deleted file mode 100644 index 7ddf4d1ea..000000000 --- a/crates/quarto-yaml-validation/YAML-1.2-REQUIREMENT.md +++ /dev/null @@ -1,76 +0,0 @@ -# YAML 1.2 Requirement - -## Critical Constraint - -**We CANNOT use `serde_yaml` until it supports YAML 1.2.** - -See `/crates/quarto-yaml/YAML-1.2-REQUIREMENT.md` for full background. - -## Impact on This Crate - -The `Schema` enum in `src/schema.rs` currently implements `serde::Deserialize`, which uses `serde_yaml` under the hood. This is **incorrect** because: - -1. User documents are parsed with YAML 1.2 (via `quarto-yaml`) -2. Schema files are parsed with YAML 1.1 (via `serde_yaml`) -3. This inconsistency breaks user expectations - -## Current Status - -**TEMPORARY**: The current serde implementation is acceptable for initial development and testing, but must be replaced before production use. - -The implementation includes this comment (line 264-267): - -```rust -// Note: This uses serde_yaml which supports YAML 1.1 (via yaml-rust). -// For user YAML documents, we use yaml-rust2 (YAML 1.2) via quarto-yaml. -// This is acceptable because schema definitions are simpler and don't -// typically use YAML 1.2-specific features. User documents get full YAML 1.2 support. -``` - -**This comment is now outdated** - we need YAML 1.2 for schemas too. - -## Required Changes - -Replace serde deserialization with manual parsing from `YamlWithSourceInfo`: - -**Before (current)**: -```rust -impl<'de> Deserialize<'de> for Schema { - fn deserialize(deserializer: D) -> Result { - deserializer.deserialize_any(SchemaVisitor) - } -} -``` - -**After (required)**: -```rust -impl Schema { - pub fn from_yaml(yaml: &YamlWithSourceInfo) -> Result { - // Manual parsing from YamlWithSourceInfo - // This uses yaml-rust2 (YAML 1.2) via quarto-yaml - } -} -``` - -## Benefits of This Approach - -1. ✅ Consistent YAML 1.2 parsing -2. ✅ Source location tracking for better error messages -3. ✅ No serde_yaml dependency -4. ✅ Extensions can use same infrastructure - -## Quarto Extensions - -One design goal is that **Quarto extensions can declare their own schemas** using the same infrastructure as core Quarto. This means: - -- Extensions define schemas in YAML files -- Extensions use `quarto-yaml-validation` to validate their documents -- Everything uses YAML 1.2 consistently - -If we used `serde_yaml`, extensions would be stuck with YAML 1.1 limitations. - -## Implementation Priority - -This change should happen **before** implementing the `validate-yaml` binary, as it affects the fundamental architecture. - -See `/claude-notes/yaml-schema-from-yaml-design.md` for the revised implementation plan. diff --git a/crates/quarto-yaml-validation/src/diagnostic.rs b/crates/quarto-yaml-validation/src/diagnostic.rs deleted file mode 100644 index b2ba629b7..000000000 --- a/crates/quarto-yaml-validation/src/diagnostic.rs +++ /dev/null @@ -1,666 +0,0 @@ -//! Validation diagnostic with structured error information. -//! -//! This module provides `ValidationDiagnostic`, a wrapper around `DiagnosticMessage` -//! that preserves all validation-specific structure (instance paths, schema paths, -//! source ranges) for machine-readable JSON output while delegating text rendering -//! to `DiagnosticMessage`. - -use crate::error::ValidationError; -use quarto_error_reporting::{DiagnosticMessage, DiagnosticMessageBuilder}; -use quarto_source_map::{SourceContext, SourceInfo}; -use serde::{Deserialize, Serialize}; - -/// A validation diagnostic with structured error information. -/// -/// This type preserves all validation-specific structure (instance paths, -/// schema paths, source ranges) while delegating rendering to DiagnosticMessage. -/// -/// # Example -/// -/// ```ignore -/// let vd = ValidationDiagnostic::from_validation_error(&error, &source_ctx); -/// -/// // Machine-readable JSON -/// println!("{}", serde_json::to_string_pretty(&vd.to_json())?); -/// -/// // Human-readable text with ariadne -/// eprintln!("{}", vd.to_text(&source_ctx)); -/// ``` -#[derive(Debug, Clone)] -pub struct ValidationDiagnostic { - /// Structured error kind - machine readable - pub kind: crate::error::ValidationErrorKind, - - /// The validation error code (Q-1-xxx) - pub code: String, - - /// Path through the YAML instance where the error occurred - /// Example: ["format", "html", "toc"] - pub instance_path: Vec, - - /// Path through the schema that was being validated - /// Example: ["properties", "format", "properties", "html", "properties", "toc"] - pub schema_path: Vec, - - /// Source location with filename and byte offsets/line numbers - pub source_range: Option, - - /// Author-supplied hint override from the schema's `errorMessage` - /// annotation. When present, it replaces the auto-generated hint. - pub custom_hint: Option, - - /// Internal: DiagnosticMessage for text rendering - diagnostic: DiagnosticMessage, -} - -/// A segment in an instance path (object key or array index) -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(tag = "type", content = "value")] -pub enum PathSegment { - /// Object property key - Key(String), - /// Array index - Index(usize), -} - -/// Source range with filename and both offset and line/column positions -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct SourceRange { - /// Filename (human-readable, not a file_id) - pub filename: String, - - /// Start byte offset in the file - pub start_offset: usize, - - /// End byte offset in the file - pub end_offset: usize, - - /// Start line number (1-indexed) - pub start_line: usize, - - /// Start column number (1-indexed) - pub start_column: usize, - - /// End line number (1-indexed) - pub end_line: usize, - - /// End column number (1-indexed) - pub end_column: usize, -} - -impl ValidationDiagnostic { - /// Get human-readable message (lazily generated from kind) - pub fn message(&self) -> String { - self.kind.message() - } - - /// Get hints. An author-supplied `errorMessage` override (if any) replaces - /// the auto-generated hint; otherwise the hint is derived from the kind. - pub fn hints(&self) -> Vec { - Self::effective_hints(&self.kind, self.custom_hint.as_deref()) - } - - /// Compute the effective hints: the authored override if present, else the - /// auto-generated hints for this error kind. - fn effective_hints( - kind: &crate::error::ValidationErrorKind, - custom_hint: Option<&str>, - ) -> Vec { - match custom_hint { - Some(hint) => vec![hint.to_string()], - None => Self::suggest_fixes_from_kind(kind), - } - } - - /// Create a new ValidationDiagnostic from a ValidationError - /// - /// # Arguments - /// - /// * `error` - The validation error to convert - /// * `source_ctx` - Source context for resolving file names and line/column positions - /// - /// # Example - /// - /// ```ignore - /// let error = ValidationError::new("Expected number, got string", path); - /// let vd = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - /// ``` - pub fn from_validation_error(error: &ValidationError, source_ctx: &SourceContext) -> Self { - // Build the diagnostic message for text rendering - let diagnostic = Self::build_diagnostic_message(error, source_ctx); - - // Extract source range with filename - let source_range = error - .yaml_node - .as_ref() - .and_then(|node| Self::extract_source_range(&node.source_info, source_ctx)); - - // Convert instance path segments - let instance_path = error - .instance_path - .segments() - .iter() - .map(|seg| match seg { - crate::error::PathSegment::Key(k) => PathSegment::Key(k.clone()), - crate::error::PathSegment::Index(i) => PathSegment::Index(*i), - }) - .collect(); - - Self { - kind: error.kind.clone(), - code: error.error_code().to_string(), - instance_path, - schema_path: error.schema_path.segments().to_vec(), - source_range, - custom_hint: error.custom_hint.clone(), - diagnostic, - } - } - - /// Render as JSON for machine consumption - /// - /// # Example - /// - /// ```ignore - /// let json = vd.to_json(); - /// println!("{}", serde_json::to_string_pretty(&json)?); - /// ``` - pub fn to_json(&self) -> serde_json::Value { - use serde_json::json; - - let mut obj = json!({ - "error_kind": self.kind, // Structured, machine-readable - "code": self.code, - "instance_path": self.instance_path, - "schema_path": self.schema_path, - }); - - if let Some(range) = &self.source_range { - obj["source_range"] = json!(range); - } - - // Include human-readable fields for convenience - obj["message"] = json!(self.kind.message()); - - let hints = Self::effective_hints(&self.kind, self.custom_hint.as_deref()); - if !hints.is_empty() { - obj["hints"] = json!(hints); - } - - obj - } - - /// Render as text for human consumption (uses ariadne/tidyverse) - /// - /// # Example - /// - /// ```ignore - /// let text = vd.to_text(&source_ctx); - /// eprintln!("{}", text); - /// ``` - pub fn to_text(&self, source_ctx: &SourceContext) -> String { - self.diagnostic.to_text(Some(source_ctx)) - } - - /// Render as a single compact line, optimized for token-efficient - /// consumption (e.g. feeding validation errors to an LLM). - /// - /// Format: `file:line:col [CODE] path: message (hint: ...)` - /// - /// The location prefix is omitted when no source range is available, and - /// the path renders as `(root)` for top-level errors. Unlike [`to_text`], - /// this drops the ariadne box drawing, source snippet, and the redundant - /// schema-constraint line; hints are kept because they materially help a - /// model propose a correct fix. Multiple hints are joined with `; `. - /// - /// [`to_text`]: Self::to_text - /// - /// # Example - /// - /// ```ignore - /// for d in diagnostics { - /// println!("{}", d.to_compact()); - /// } - /// ``` - pub fn to_compact(&self) -> String { - let mut out = String::new(); - - if let Some(range) = &self.source_range { - out.push_str(&format!( - "{}:{}:{} ", - range.filename, range.start_line, range.start_column - )); - } - - out.push_str(&format!("[{}] ", self.code)); - - let path = Self::instance_path_string(&self.instance_path); - if path.is_empty() { - out.push_str("(root): "); - } else { - out.push_str(&path); - out.push_str(": "); - } - - out.push_str(&self.message()); - - let hints = self.hints(); - if !hints.is_empty() { - out.push_str(&format!(" (Hint: {})", hints.join("; "))); - } - - out - } - - /// Render an instance path as a compact dotted/indexed string. - /// - /// e.g. `[Key("authors"), Index(0), Key("name")]` -> `authors[0].name`. - fn instance_path_string(segments: &[PathSegment]) -> String { - let mut out = String::new(); - for seg in segments { - match seg { - PathSegment::Key(k) => { - if !out.is_empty() { - out.push('.'); - } - out.push_str(k); - } - PathSegment::Index(i) => { - out.push_str(&format!("[{}]", i)); - } - } - } - out - } - - /// Helper: Build DiagnosticMessage for text rendering - fn build_diagnostic_message( - error: &ValidationError, - _source_ctx: &SourceContext, - ) -> DiagnosticMessage { - let mut builder = DiagnosticMessageBuilder::error("YAML Validation Failed") - .with_code(error.error_code()) - .problem(error.message()); - - // Attach full SourceInfo for ariadne rendering - if let Some(yaml_node) = &error.yaml_node { - builder = builder.with_location(yaml_node.source_info.clone()); - } - - // Add human-readable details - if !error.instance_path.is_empty() { - builder = builder.add_detail(format!("At document path: `{}`", error.instance_path)); - } else { - builder = builder.add_detail("At document root"); - } - - if !error.schema_path.is_empty() { - builder = builder.add_info(format!("Schema constraint: {}", error.schema_path)); - } - - // Add hints (authored `errorMessage` override wins over generated ones) - for hint in Self::effective_hints(&error.kind, error.custom_hint.as_deref()) { - builder = builder.add_hint(hint); - } - - builder.build() - } - - /// Helper: Extract SourceRange from SourceInfo - fn extract_source_range( - source_info: &SourceInfo, - source_ctx: &SourceContext, - ) -> Option { - // Map the start of the range (offset 0 in SourceInfo coordinates) - // This will handle Substring/Concat/Original correctly - let start_mapped = source_info.map_offset(0, source_ctx)?; - - // Map the end of the range (length in SourceInfo coordinates) - // For SourceInfo, the end offset is relative to the same base as start_offset - let length = source_info.end_offset() - source_info.start_offset(); - let end_mapped = source_info.map_offset(length, source_ctx)?; - - // Get filename - let file = source_ctx.get_file(start_mapped.file_id)?; - - Some(SourceRange { - filename: file.path.clone(), - start_offset: source_info.start_offset(), - end_offset: source_info.end_offset(), - start_line: start_mapped.location.row + 1, // 1-indexed - start_column: start_mapped.location.column + 1, // 1-indexed - end_line: end_mapped.location.row + 1, - end_column: end_mapped.location.column + 1, - }) - } - - // No longer needed - error codes come from ValidationErrorKind::error_code() - - /// Suggest fixes based on error kind - fn suggest_fixes_from_kind(kind: &crate::error::ValidationErrorKind) -> Vec { - use crate::error::ValidationErrorKind; - let mut hints = Vec::new(); - - match kind { - ValidationErrorKind::MissingRequiredProperty { property, .. } => { - hints.push(format!( - "Add the `{}` property to your YAML document?", - property - )); - } - ValidationErrorKind::TypeMismatch { expected, .. } => match expected.as_str() { - "boolean" => { - hints.push("Use `true` or `false` (YAML 1.2 standard)?".to_string()); - } - "number" => { - hints.push("Use a numeric value without quotes?".to_string()); - } - "string" => { - hints.push( - "Ensure the value is a string (quoted if it contains special characters)?" - .to_string(), - ); - } - "array" => { - hints.push( - "Use YAML array syntax: `[item1, item2]` or list format?".to_string(), - ); - } - "object" => { - hints.push("Use YAML mapping syntax with key-value pairs?".to_string()); - } - _ => {} - }, - ValidationErrorKind::InvalidEnumValue { .. } => { - hints.push("Check the schema for allowed values?".to_string()); - } - ValidationErrorKind::StringPatternMismatch { .. } => { - hints.push("Check that the string matches the expected format?".to_string()); - } - ValidationErrorKind::NumberOutOfRange { .. } - | ValidationErrorKind::NumberNotMultipleOf { .. } => { - hints.push("Check the allowed value range in the schema?".to_string()); - } - ValidationErrorKind::UnknownProperty { .. } => { - hints.push( - "Check for typos in property names or remove unrecognized properties?" - .to_string(), - ); - } - ValidationErrorKind::ArrayItemsNotUnique => { - hints.push("Remove duplicate items from the array?".to_string()); - } - _ => { - // No specific hints for other error kinds - } - } - - hints - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::error::InstancePath; - - #[test] - fn test_path_segment_serialization() { - let key = PathSegment::Key("format".to_string()); - let json = serde_json::to_value(&key).unwrap(); - assert_eq!(json["type"], "Key"); - assert_eq!(json["value"], "format"); - - let index = PathSegment::Index(42); - let json = serde_json::to_value(&index).unwrap(); - assert_eq!(json["type"], "Index"); - assert_eq!(json["value"], 42); - } - - #[test] - fn test_source_range_serialization() { - let range = SourceRange { - filename: "test.yaml".to_string(), - start_offset: 10, - end_offset: 20, - start_line: 1, - start_column: 5, - end_line: 1, - end_column: 15, - }; - - let json = serde_json::to_value(&range).unwrap(); - assert_eq!(json["filename"], "test.yaml"); - assert_eq!(json["start_offset"], 10); - assert_eq!(json["end_offset"], 20); - assert_eq!(json["start_line"], 1); - assert_eq!(json["start_column"], 5); - } - - #[test] - fn test_error_code() { - use crate::error::ValidationErrorKind; - - let error = ValidationError::new( - ValidationErrorKind::MissingRequiredProperty { - property: "author".to_string(), - allowed: None, - expected_type: None, - }, - InstancePath::new(), - ); - assert_eq!(error.error_code(), "Q-1-10"); - - let error = ValidationError::new( - ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: "string".to_string(), - }, - InstancePath::new(), - ); - assert_eq!(error.error_code(), "Q-1-11"); - - let error = ValidationError::new( - ValidationErrorKind::InvalidEnumValue { - value: "foo".to_string(), - allowed: vec!["html".to_string(), "pdf".to_string()], - }, - InstancePath::new(), - ); - assert_eq!(error.error_code(), "Q-1-12"); - - let error = ValidationError::new( - ValidationErrorKind::UnknownProperty { - property: "foo".to_string(), - }, - InstancePath::new(), - ); - assert_eq!(error.error_code(), "Q-1-18"); - } - - #[test] - fn test_instance_path_string() { - assert_eq!(ValidationDiagnostic::instance_path_string(&[]), ""); - assert_eq!( - ValidationDiagnostic::instance_path_string(&[PathSegment::Key("format".to_string())]), - "format" - ); - assert_eq!( - ValidationDiagnostic::instance_path_string(&[ - PathSegment::Key("authors".to_string()), - PathSegment::Index(0), - PathSegment::Key("name".to_string()), - ]), - "authors[0].name" - ); - } - - /// Strip ANSI SGR color codes and OSC-8 hyperlink sequences so the human - /// (ariadne) rendering can be snapshotted in a clean, machine-independent - /// form. The OSC-8 hyperlink embeds an absolute `file://` path, which is - /// not portable across machines; stripping it leaves the visible - /// `filename:line:col` text untouched. - fn strip_ansi(s: &str) -> String { - let mut out = String::new(); - let mut chars = s.chars().peekable(); - while let Some(c) = chars.next() { - if c != '\u{1b}' { - out.push(c); - continue; - } - match chars.peek() { - // CSI (e.g. color): ESC [ ... - Some('[') => { - chars.next(); - while let Some(&nc) = chars.peek() { - chars.next(); - if ('@'..='~').contains(&nc) { - break; - } - } - } - // OSC (e.g. hyperlink): ESC ] ... - Some(']') => { - chars.next(); - while let Some(nc) = chars.next() { - if nc == '\u{07}' { - break; - } - if nc == '\u{1b}' { - if let Some('\\') = chars.peek() { - chars.next(); - } - break; - } - } - } - _ => {} - } - } - out - } - - fn test_source_context(filename: &str, content: &str) -> SourceContext { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - let mut ctx = SourceContext::new(); - let mut hasher = DefaultHasher::new(); - filename.hash(&mut hasher); - let file_id = quarto_source_map::FileId(hasher.finish() as usize); - ctx.add_file_with_id(file_id, filename.to_string(), Some(content.to_string())); - ctx - } - - /// Snapshot the three rendering variants (compact, JSON, human) for a - /// single validation error, so changes to any format's wording/structure - /// are caught in one place. The error is produced end-to-end through - /// `validate()` so the source range is real. - #[test] - fn test_all_three_formats_snapshot() { - use crate::{Schema, SchemaRegistry, validate}; - - let schema_yaml = quarto_yaml::parse( - r#" -object: - properties: - age: - number: - minimum: 0 - maximum: 100 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"age: "not a number""#; - let doc = quarto_yaml::parse_file(doc_content, "test.yaml").unwrap(); - let source_ctx = test_source_context("test.yaml", doc_content); - - let registry = SchemaRegistry::new(); - let error = validate(&doc, &schema, ®istry, &source_ctx) - .expect_err("validation should fail for type mismatch"); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - let combined = format!( - "=== compact ===\n{}\n\n=== json ===\n{}\n\n=== human (ANSI stripped) ===\n{}", - diagnostic.to_compact(), - serde_json::to_string_pretty(&diagnostic.to_json()).unwrap(), - strip_ansi(&diagnostic.to_text(&source_ctx)), - ); - - insta::assert_snapshot!(combined); - } - - #[test] - fn test_to_compact() { - use crate::error::ValidationErrorKind; - - let source_ctx = SourceContext::new(); - - // Error at a nested path: location omitted (no yaml_node), code + path + message + hint. - let mut path = InstancePath::new(); - path.push_key("format"); - path.push_key("html"); - let error = ValidationError::new( - ValidationErrorKind::TypeMismatch { - expected: "boolean".to_string(), - got: "string".to_string(), - }, - path, - ); - let vd = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let compact = vd.to_compact(); - assert_eq!( - compact, - "[Q-1-11] format.html: Expected boolean, got string (Hint: Use `true` or `false` (YAML 1.2 standard)?)" - ); - - // Root-level error renders `(root)` rather than an empty path. - let error = ValidationError::new( - ValidationErrorKind::MissingRequiredProperty { - property: "version".to_string(), - allowed: None, - expected_type: None, - }, - InstancePath::new(), - ); - let vd = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let compact = vd.to_compact(); - assert!(compact.starts_with("[Q-1-10] (root): Missing required property 'version'")); - assert!( - !compact.contains('\n'), - "compact output must be a single line" - ); - } - - #[test] - fn test_suggest_fixes() { - use crate::error::ValidationErrorKind; - - let kind = ValidationErrorKind::MissingRequiredProperty { - property: "author".to_string(), - allowed: None, - expected_type: None, - }; - let hints = ValidationDiagnostic::suggest_fixes_from_kind(&kind); - assert_eq!(hints.len(), 1); - assert!(hints[0].contains("author")); - - let kind = ValidationErrorKind::TypeMismatch { - expected: "boolean".to_string(), - got: "string".to_string(), - }; - let hints = ValidationDiagnostic::suggest_fixes_from_kind(&kind); - assert_eq!(hints.len(), 1); - assert!(hints[0].contains("true")); - - let kind = ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: "string".to_string(), - }; - let hints = ValidationDiagnostic::suggest_fixes_from_kind(&kind); - assert_eq!(hints.len(), 1); - assert!(hints[0].contains("numeric")); - } -} diff --git a/crates/quarto-yaml-validation/src/error.rs b/crates/quarto-yaml-validation/src/error.rs deleted file mode 100644 index 65c6a2c67..000000000 --- a/crates/quarto-yaml-validation/src/error.rs +++ /dev/null @@ -1,1195 +0,0 @@ -// Error types for YAML validation - -use quarto_yaml::{SourceInfo, YamlWithSourceInfo}; -use std::fmt; -use thiserror::Error; - -/// Errors that can occur during schema parsing from YAML -#[derive(Debug)] -pub enum SchemaError { - /// Invalid schema type name - InvalidType(String), - - /// Invalid schema structure. - /// - /// `location` is `None` for errors that describe a bug in the schema - /// definition itself (no user-YAML to point at); `Some(...)` for - /// errors that arose while validating user-supplied YAML against - /// the schema. - InvalidStructure { - message: String, - location: Option, - }, - - /// Missing required field - MissingField { field: String, location: SourceInfo }, - - /// Unresolved schema reference - UnresolvedRef(String), - - /// YAML parsing error - YamlError(quarto_yaml::Error), -} - -impl fmt::Display for SchemaError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - SchemaError::InvalidType(s) => write!(f, "Invalid schema type: {}", s), - SchemaError::InvalidStructure { message, location } => match location { - Some(loc) => write!( - f, - "Invalid schema structure: {} (at offset {})", - message, - loc.start_offset() - ), - None => write!(f, "Invalid schema structure: {}", message), - }, - SchemaError::MissingField { field, location } => { - write!( - f, - "Missing required field '{}' (at offset {})", - field, - location.start_offset() - ) - } - SchemaError::UnresolvedRef(s) => write!(f, "Unresolved schema reference: {}", s), - SchemaError::YamlError(e) => write!(f, "YAML parsing error: {}", e), - } - } -} - -impl std::error::Error for SchemaError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - SchemaError::YamlError(e) => Some(e), - _ => None, - } - } -} - -impl From for SchemaError { - fn from(e: quarto_yaml::Error) -> Self { - SchemaError::YamlError(e) - } -} - -/// Result type for schema parsing operations -pub type SchemaResult = Result; - -/// Result type for validation operations -pub type ValidationResult = Result; - -/// Structured validation error kinds -#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] -#[serde(tag = "type", content = "data")] -pub enum ValidationErrorKind { - /// Type mismatch - TypeMismatch { expected: String, got: String }, - - /// Missing required property - /// - /// When the schema knows what the absent property should have been, the - /// error advertises it, mirroring the message the user would have seen had - /// the property been present with a bad value: - /// - `allowed` carries the permitted values when the subschema is an enum - /// (like [`ValidationErrorKind::InvalidEnumValue`]). - /// - `expected_type` carries the permitted type(s) otherwise (like - /// [`ValidationErrorKind::TypeMismatch`]). - MissingRequiredProperty { - property: String, - allowed: Option>, - expected_type: Option, - }, - - /// Unknown property in closed object - UnknownProperty { property: String }, - - /// Value not in enum - InvalidEnumValue { value: String, allowed: Vec }, - - /// Number out of range - NumberOutOfRange { - value: f64, - minimum: Option, - maximum: Option, - exclusive_minimum: Option, - exclusive_maximum: Option, - }, - - /// Number not a multiple of - NumberNotMultipleOf { value: f64, multiple_of: f64 }, - - /// String length invalid - StringLengthInvalid { - length: usize, - min_length: Option, - max_length: Option, - }, - - /// String doesn't match pattern - StringPatternMismatch { value: String, pattern: String }, - - /// Array length invalid - ArrayLengthInvalid { - length: usize, - min_items: Option, - max_items: Option, - }, - - /// Array items not unique - ArrayItemsNotUnique, - - /// Object property count invalid - ObjectPropertyCountInvalid { - count: usize, - min_properties: Option, - max_properties: Option, - }, - - /// Unresolved schema reference - UnresolvedReference { ref_id: String }, - - /// A mapping key appears more than once - DuplicateKey { key: String }, - - /// Other validation error - /// - /// **WARNING**: This is a last-resort variant for errors that don't fit any other category. - /// Before using this, strongly consider whether the error should be represented as a new - /// structured variant in ValidationErrorKind. Structured variants are preferable because: - /// - They're machine-readable and can be matched on - /// - They carry type-safe data - /// - They enable better error reporting and hints - /// - /// Only use `Other` for truly unexpected or edge-case errors. - Other { message: String }, -} - -impl ValidationErrorKind { - /// Get the error code for this error kind - pub fn error_code(&self) -> &'static str { - match self { - ValidationErrorKind::MissingRequiredProperty { .. } => "Q-1-10", - ValidationErrorKind::TypeMismatch { .. } => "Q-1-11", - ValidationErrorKind::InvalidEnumValue { .. } => "Q-1-12", - ValidationErrorKind::ArrayLengthInvalid { .. } => "Q-1-13", - ValidationErrorKind::StringPatternMismatch { .. } => "Q-1-14", - ValidationErrorKind::NumberOutOfRange { .. } - | ValidationErrorKind::NumberNotMultipleOf { .. } => "Q-1-15", - ValidationErrorKind::ObjectPropertyCountInvalid { .. } => "Q-1-16", - ValidationErrorKind::UnresolvedReference { .. } => "Q-1-17", - ValidationErrorKind::UnknownProperty { .. } => "Q-1-18", - ValidationErrorKind::DuplicateKey { .. } => "Q-1-20", - ValidationErrorKind::ArrayItemsNotUnique => "Q-1-19", - ValidationErrorKind::StringLengthInvalid { .. } => "Q-1-29", - ValidationErrorKind::Other { .. } => "Q-1-99", - } - } - - /// Format a human-readable message from this error kind - pub fn message(&self) -> String { - match self { - ValidationErrorKind::TypeMismatch { expected, got } => { - format!("Expected {}, got {}", expected, got) - } - ValidationErrorKind::MissingRequiredProperty { - property, - allowed, - expected_type, - } => match (allowed, expected_type) { - (Some(values), _) if !values.is_empty() => { - format!( - "Missing required property '{}' (must be one of: {})", - property, - values.join(", ") - ) - } - (_, Some(ty)) => { - format!("Missing required property '{}' (expected {})", property, ty) - } - _ => format!("Missing required property '{}'", property), - }, - ValidationErrorKind::UnknownProperty { property } => { - format!("Unknown property '{}'", property) - } - ValidationErrorKind::InvalidEnumValue { value, allowed } => { - format!( - "Value must be one of: {}, got '{}'", - allowed.join(", "), - value - ) - } - ValidationErrorKind::NumberOutOfRange { - value, - minimum, - maximum, - exclusive_minimum, - exclusive_maximum, - } => { - if let Some(min) = minimum { - format!("Number {} is less than minimum {}", value, min) - } else if let Some(max) = maximum { - format!("Number {} is greater than maximum {}", value, max) - } else if let Some(min) = exclusive_minimum { - format!("Number {} is not greater than {}", value, min) - } else if let Some(max) = exclusive_maximum { - format!("Number {} is not less than {}", value, max) - } else { - format!("Number {} is out of range", value) - } - } - ValidationErrorKind::NumberNotMultipleOf { value, multiple_of } => { - format!("Number {} is not a multiple of {}", value, multiple_of) - } - ValidationErrorKind::StringLengthInvalid { - length, - min_length, - max_length, - } => { - if let Some(min) = min_length { - format!("String length {} is less than minimum {}", length, min) - } else if let Some(max) = max_length { - format!("String length {} is greater than maximum {}", length, max) - } else { - format!("String length {} is invalid", length) - } - } - ValidationErrorKind::StringPatternMismatch { value, pattern } => { - format!("String '{}' does not match pattern '{}'", value, pattern) - } - ValidationErrorKind::ArrayLengthInvalid { - length, - min_items, - max_items, - } => { - if let Some(min) = min_items { - format!("Array length {} is less than minimum {}", length, min) - } else if let Some(max) = max_items { - format!("Array length {} is greater than maximum {}", length, max) - } else { - format!("Array length {} is invalid", length) - } - } - ValidationErrorKind::ArrayItemsNotUnique => "Array items must be unique".to_string(), - ValidationErrorKind::ObjectPropertyCountInvalid { - count, - min_properties, - max_properties, - } => { - if let Some(min) = min_properties { - format!("Object has {} properties, less than minimum {}", count, min) - } else if let Some(max) = max_properties { - format!( - "Object has {} properties, greater than maximum {}", - count, max - ) - } else { - format!("Object has {} properties (invalid)", count) - } - } - ValidationErrorKind::UnresolvedReference { ref_id } => { - format!("Unresolved schema reference: {}", ref_id) - } - ValidationErrorKind::DuplicateKey { key } => { - format!("Duplicate key '{}'", key) - } - ValidationErrorKind::Other { message } => message.clone(), - } - } -} - -/// Validation error with source location information -#[derive(Debug, Clone, Error)] -pub struct ValidationError { - /// The structured error kind - pub kind: ValidationErrorKind, - /// Instance path where the error occurred (e.g., ["format", "html", "toc"]) - pub instance_path: InstancePath, - /// Schema path that failed (e.g., ["properties", "format", "properties", "html"]) - pub schema_path: SchemaPath, - /// The YAML node where the error occurred (if available) - pub yaml_node: Option, - /// Source location (file, line, column) for error reporting - pub location: Option, - /// Author-supplied hint override, taken from the `errorMessage` annotation - /// on the schema node where the failure occurred. When present, this - /// replaces the auto-generated hint line; the factual primary message - /// (from [`ValidationErrorKind::message`]) is left intact. - pub custom_hint: Option, -} - -impl fmt::Display for ValidationError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let message = self.kind.message(); - if let Some(loc) = &self.location { - write!( - f, - "Validation error at {}:{}:{}: {}", - loc.file, loc.line, loc.column, message - ) - } else { - write!(f, "Validation error at {}: {}", self.instance_path, message) - } - } -} - -impl ValidationError { - /// Create a new validation error with a structured kind - pub fn new(kind: ValidationErrorKind, instance_path: InstancePath) -> Self { - Self { - kind, - instance_path, - schema_path: SchemaPath::new(), - yaml_node: None, - location: None, - custom_hint: None, - } - } - - /// Get the human-readable message for this error - pub fn message(&self) -> String { - self.kind.message() - } - - /// Get the error code for this error - pub fn error_code(&self) -> &'static str { - self.kind.error_code() - } - - /// Set the schema path for this error - pub fn with_schema_path(mut self, schema_path: SchemaPath) -> Self { - self.schema_path = schema_path; - self - } - - /// Set the YAML node for this error - pub fn with_yaml_node( - mut self, - node: YamlWithSourceInfo, - ctx: &quarto_source_map::SourceContext, - ) -> Self { - // Extract location from the node using SourceContext - // Map the offset to get proper file/line/column information - if let Some(mapped) = node.source_info.map_offset(0, ctx) - && let Some(file) = ctx.get_file(mapped.file_id) - { - self.location = Some(SourceLocation { - file: file.path.clone(), - line: mapped.location.row + 1, // 1-indexed for display - column: mapped.location.column + 1, // 1-indexed for display - }); - } - - // Still store the node for potential future use - self.yaml_node = Some(node); - self - } -} - -/// Instance path (e.g., ["format", "html", "toc"]) -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct InstancePath { - segments: Vec, -} - -impl InstancePath { - /// Create a new empty instance path - pub fn new() -> Self { - Self { - segments: Vec::new(), - } - } - - /// Push a key segment onto the path - pub fn push_key(&mut self, key: impl Into) { - self.segments.push(PathSegment::Key(key.into())); - } - - /// Push an index segment onto the path - pub fn push_index(&mut self, index: usize) { - self.segments.push(PathSegment::Index(index)); - } - - /// Pop the last segment from the path - pub fn pop(&mut self) -> Option { - self.segments.pop() - } - - /// Get the segments as a slice - pub fn segments(&self) -> &[PathSegment] { - &self.segments - } - - /// Check if the path is empty - pub fn is_empty(&self) -> bool { - self.segments.is_empty() - } - - /// Get the length of the path - pub fn len(&self) -> usize { - self.segments.len() - } -} - -impl fmt::Display for InstancePath { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.segments.is_empty() { - write!(f, "(root)") - } else { - for (i, segment) in self.segments.iter().enumerate() { - if i > 0 { - write!(f, ".")?; - } - write!(f, "{}", segment)?; - } - Ok(()) - } - } -} - -/// Schema path (e.g., ["properties", "format", "properties", "html"]) -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct SchemaPath { - segments: Vec, -} - -impl SchemaPath { - /// Create a new empty schema path - pub fn new() -> Self { - Self { - segments: Vec::new(), - } - } - - /// Push a segment onto the path - pub fn push(&mut self, segment: impl Into) { - self.segments.push(segment.into()); - } - - /// Pop the last segment from the path - pub fn pop(&mut self) -> Option { - self.segments.pop() - } - - /// Get the segments as a slice - pub fn segments(&self) -> &[String] { - &self.segments - } - - /// Check if the path is empty - pub fn is_empty(&self) -> bool { - self.segments.is_empty() - } - - /// Get the length of the path - pub fn len(&self) -> usize { - self.segments.len() - } -} - -impl fmt::Display for SchemaPath { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.segments.is_empty() { - write!(f, "(root)") - } else { - write!(f, "{}", self.segments.join(" > ")) - } - } -} - -/// A segment in an instance path -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum PathSegment { - /// Object key - Key(String), - /// Array index - Index(usize), -} - -impl fmt::Display for PathSegment { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - PathSegment::Key(key) => write!(f, "{}", key), - PathSegment::Index(index) => write!(f, "[{}]", index), - } - } -} - -/// Source location for error reporting -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct SourceLocation { - pub file: String, - pub line: usize, - pub column: usize, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_instance_path_display() { - let mut path = InstancePath::new(); - assert_eq!(path.to_string(), "(root)"); - - path.push_key("format"); - assert_eq!(path.to_string(), "format"); - - path.push_key("html"); - assert_eq!(path.to_string(), "format.html"); - - path.push_index(0); - assert_eq!(path.to_string(), "format.html.[0]"); - } - - #[test] - fn test_schema_path_display() { - let mut path = SchemaPath::new(); - assert_eq!(path.to_string(), "(root)"); - - path.push("properties"); - path.push("format"); - assert_eq!(path.to_string(), "properties > format"); - } - - #[test] - fn test_validation_error_creation() { - let mut path = InstancePath::new(); - path.push_key("format"); - - let error = ValidationError::new( - ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: "string".to_string(), - }, - path, - ); - assert_eq!(error.message(), "Expected number, got string"); - assert_eq!(error.instance_path.len(), 1); - } - - #[test] - fn test_validation_error_other_variant() { - let error = ValidationError::new( - ValidationErrorKind::Other { - message: "Something unexpected happened".to_string(), - }, - InstancePath::new(), - ); - assert_eq!(error.message(), "Something unexpected happened"); - assert_eq!(error.error_code(), "Q-1-99"); - } - - // Tests for SchemaError Display implementation - #[test] - fn test_schema_error_invalid_type_display() { - let error = SchemaError::InvalidType("bad_type".to_string()); - assert_eq!(error.to_string(), "Invalid schema type: bad_type"); - } - - #[test] - fn test_schema_error_invalid_structure_display() { - use quarto_source_map::{FileId, SourceInfo}; - let location = Some(SourceInfo::original(FileId(0), 10, 20)); - let error = SchemaError::InvalidStructure { - message: "unexpected array".to_string(), - location, - }; - assert_eq!( - error.to_string(), - "Invalid schema structure: unexpected array (at offset 10)" - ); - } - - #[test] - fn test_schema_error_invalid_structure_display_no_location() { - let error = SchemaError::InvalidStructure { - message: "schema bug: malformed combinator".to_string(), - location: None, - }; - assert_eq!( - error.to_string(), - "Invalid schema structure: schema bug: malformed combinator" - ); - } - - #[test] - fn test_schema_error_missing_field_display() { - use quarto_source_map::{FileId, SourceInfo}; - let location = SourceInfo::original(FileId(0), 5, 15); - let error = SchemaError::MissingField { - field: "type".to_string(), - location, - }; - assert_eq!( - error.to_string(), - "Missing required field 'type' (at offset 5)" - ); - } - - #[test] - fn test_schema_error_unresolved_ref_display() { - let error = SchemaError::UnresolvedRef("missing_schema".to_string()); - assert_eq!( - error.to_string(), - "Unresolved schema reference: missing_schema" - ); - } - - #[test] - fn test_schema_error_yaml_error_display() { - let yaml_err = quarto_yaml::Error::ParseError { - message: "invalid yaml".to_string(), - location: None, - }; - let error = SchemaError::YamlError(yaml_err); - assert!(error.to_string().contains("YAML parsing error")); - } - - #[test] - fn test_schema_error_source() { - use std::error::Error; - // Test that YamlError variant returns the source error - let yaml_err = quarto_yaml::Error::ParseError { - message: "test".to_string(), - location: None, - }; - let error = SchemaError::YamlError(yaml_err); - assert!(error.source().is_some()); - - // Test that other variants return None - let error = SchemaError::InvalidType("test".to_string()); - assert!(error.source().is_none()); - } - - #[test] - fn test_schema_error_from_yaml_error() { - let yaml_err = quarto_yaml::Error::ParseError { - message: "parse failed".to_string(), - location: None, - }; - let schema_err: SchemaError = yaml_err.into(); - match schema_err { - SchemaError::YamlError(_) => {} // expected - _ => panic!("Expected YamlError variant"), - } - } - - // Tests for ValidationErrorKind::error_code - #[test] - fn test_error_code_missing_required_property() { - let kind = ValidationErrorKind::MissingRequiredProperty { - property: "foo".to_string(), - allowed: None, - expected_type: None, - }; - assert_eq!(kind.error_code(), "Q-1-10"); - } - - #[test] - fn test_error_code_type_mismatch() { - let kind = ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: "string".to_string(), - }; - assert_eq!(kind.error_code(), "Q-1-11"); - } - - #[test] - fn test_error_code_invalid_enum_value() { - let kind = ValidationErrorKind::InvalidEnumValue { - value: "bad".to_string(), - allowed: vec!["a".to_string(), "b".to_string()], - }; - assert_eq!(kind.error_code(), "Q-1-12"); - } - - #[test] - fn test_error_code_array_length_invalid() { - let kind = ValidationErrorKind::ArrayLengthInvalid { - length: 5, - min_items: Some(10), - max_items: None, - }; - assert_eq!(kind.error_code(), "Q-1-13"); - } - - #[test] - fn test_error_code_string_pattern_mismatch() { - let kind = ValidationErrorKind::StringPatternMismatch { - value: "test".to_string(), - pattern: "^[0-9]+$".to_string(), - }; - assert_eq!(kind.error_code(), "Q-1-14"); - } - - #[test] - fn test_error_code_number_out_of_range() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: 100.0, - minimum: Some(0.0), - maximum: Some(50.0), - exclusive_minimum: None, - exclusive_maximum: None, - }; - assert_eq!(kind.error_code(), "Q-1-15"); - } - - #[test] - fn test_error_code_number_not_multiple_of() { - let kind = ValidationErrorKind::NumberNotMultipleOf { - value: 7.0, - multiple_of: 3.0, - }; - assert_eq!(kind.error_code(), "Q-1-15"); - } - - #[test] - fn test_error_code_object_property_count_invalid() { - let kind = ValidationErrorKind::ObjectPropertyCountInvalid { - count: 5, - min_properties: Some(10), - max_properties: None, - }; - assert_eq!(kind.error_code(), "Q-1-16"); - } - - #[test] - fn test_error_code_unresolved_reference() { - let kind = ValidationErrorKind::UnresolvedReference { - ref_id: "missing".to_string(), - }; - assert_eq!(kind.error_code(), "Q-1-17"); - } - - #[test] - fn test_error_code_unknown_property() { - let kind = ValidationErrorKind::UnknownProperty { - property: "foo".to_string(), - }; - assert_eq!(kind.error_code(), "Q-1-18"); - } - - #[test] - fn test_error_code_duplicate_key() { - let kind = ValidationErrorKind::DuplicateKey { - key: "examples".to_string(), - }; - assert_eq!(kind.error_code(), "Q-1-20"); - } - - #[test] - fn test_message_duplicate_key() { - let kind = ValidationErrorKind::DuplicateKey { - key: "examples".to_string(), - }; - assert_eq!(kind.message(), "Duplicate key 'examples'"); - } - - #[test] - fn test_error_code_array_items_not_unique() { - let kind = ValidationErrorKind::ArrayItemsNotUnique; - assert_eq!(kind.error_code(), "Q-1-19"); - } - - #[test] - fn test_error_code_string_length_invalid() { - let kind = ValidationErrorKind::StringLengthInvalid { - length: 5, - min_length: Some(10), - max_length: None, - }; - assert_eq!(kind.error_code(), "Q-1-29"); - } - - // Tests for ValidationErrorKind::message edge cases - #[test] - fn test_message_number_out_of_range_minimum() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: -5.0, - minimum: Some(0.0), - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - }; - assert_eq!(kind.message(), "Number -5 is less than minimum 0"); - } - - #[test] - fn test_message_number_out_of_range_maximum() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: 100.0, - minimum: None, - maximum: Some(50.0), - exclusive_minimum: None, - exclusive_maximum: None, - }; - assert_eq!(kind.message(), "Number 100 is greater than maximum 50"); - } - - #[test] - fn test_message_number_out_of_range_exclusive_minimum() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: 5.0, - minimum: None, - maximum: None, - exclusive_minimum: Some(5.0), - exclusive_maximum: None, - }; - assert_eq!(kind.message(), "Number 5 is not greater than 5"); - } - - #[test] - fn test_message_number_out_of_range_exclusive_maximum() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: 10.0, - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: Some(10.0), - }; - assert_eq!(kind.message(), "Number 10 is not less than 10"); - } - - #[test] - fn test_message_number_out_of_range_no_bounds() { - let kind = ValidationErrorKind::NumberOutOfRange { - value: 42.0, - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - }; - assert_eq!(kind.message(), "Number 42 is out of range"); - } - - #[test] - fn test_message_number_not_multiple_of() { - let kind = ValidationErrorKind::NumberNotMultipleOf { - value: 7.0, - multiple_of: 3.0, - }; - assert_eq!(kind.message(), "Number 7 is not a multiple of 3"); - } - - #[test] - fn test_message_string_length_invalid_min() { - let kind = ValidationErrorKind::StringLengthInvalid { - length: 3, - min_length: Some(10), - max_length: None, - }; - assert_eq!(kind.message(), "String length 3 is less than minimum 10"); - } - - #[test] - fn test_message_string_length_invalid_max() { - let kind = ValidationErrorKind::StringLengthInvalid { - length: 100, - min_length: None, - max_length: Some(50), - }; - assert_eq!( - kind.message(), - "String length 100 is greater than maximum 50" - ); - } - - #[test] - fn test_message_string_length_invalid_no_bounds() { - let kind = ValidationErrorKind::StringLengthInvalid { - length: 42, - min_length: None, - max_length: None, - }; - assert_eq!(kind.message(), "String length 42 is invalid"); - } - - #[test] - fn test_message_array_length_invalid_min() { - let kind = ValidationErrorKind::ArrayLengthInvalid { - length: 2, - min_items: Some(5), - max_items: None, - }; - assert_eq!(kind.message(), "Array length 2 is less than minimum 5"); - } - - #[test] - fn test_message_array_length_invalid_max() { - let kind = ValidationErrorKind::ArrayLengthInvalid { - length: 20, - min_items: None, - max_items: Some(10), - }; - assert_eq!(kind.message(), "Array length 20 is greater than maximum 10"); - } - - #[test] - fn test_message_array_length_invalid_no_bounds() { - let kind = ValidationErrorKind::ArrayLengthInvalid { - length: 42, - min_items: None, - max_items: None, - }; - assert_eq!(kind.message(), "Array length 42 is invalid"); - } - - #[test] - fn test_message_object_property_count_min() { - let kind = ValidationErrorKind::ObjectPropertyCountInvalid { - count: 1, - min_properties: Some(3), - max_properties: None, - }; - assert_eq!( - kind.message(), - "Object has 1 properties, less than minimum 3" - ); - } - - #[test] - fn test_message_object_property_count_max() { - let kind = ValidationErrorKind::ObjectPropertyCountInvalid { - count: 15, - min_properties: None, - max_properties: Some(10), - }; - assert_eq!( - kind.message(), - "Object has 15 properties, greater than maximum 10" - ); - } - - #[test] - fn test_message_object_property_count_no_bounds() { - let kind = ValidationErrorKind::ObjectPropertyCountInvalid { - count: 5, - min_properties: None, - max_properties: None, - }; - assert_eq!(kind.message(), "Object has 5 properties (invalid)"); - } - - #[test] - fn test_message_array_items_not_unique() { - let kind = ValidationErrorKind::ArrayItemsNotUnique; - assert_eq!(kind.message(), "Array items must be unique"); - } - - #[test] - fn test_message_unknown_property() { - let kind = ValidationErrorKind::UnknownProperty { - property: "extra_field".to_string(), - }; - assert_eq!(kind.message(), "Unknown property 'extra_field'"); - } - - #[test] - fn test_message_invalid_enum_value() { - let kind = ValidationErrorKind::InvalidEnumValue { - value: "invalid".to_string(), - allowed: vec!["a".to_string(), "b".to_string(), "c".to_string()], - }; - assert_eq!( - kind.message(), - "Value must be one of: a, b, c, got 'invalid'" - ); - } - - #[test] - fn test_message_unresolved_reference() { - let kind = ValidationErrorKind::UnresolvedReference { - ref_id: "missing_schema".to_string(), - }; - assert_eq!( - kind.message(), - "Unresolved schema reference: missing_schema" - ); - } - - #[test] - fn test_message_string_pattern_mismatch() { - let kind = ValidationErrorKind::StringPatternMismatch { - value: "abc".to_string(), - pattern: "^[0-9]+$".to_string(), - }; - assert_eq!( - kind.message(), - "String 'abc' does not match pattern '^[0-9]+$'" - ); - } - - // Tests for ValidationError Display with location - #[test] - fn test_validation_error_display_with_location() { - let mut path = InstancePath::new(); - path.push_key("format"); - let mut error = ValidationError::new( - ValidationErrorKind::TypeMismatch { - expected: "object".to_string(), - got: "string".to_string(), - }, - path, - ); - error.location = Some(SourceLocation { - file: "test.yml".to_string(), - line: 10, - column: 5, - }); - assert_eq!( - error.to_string(), - "Validation error at test.yml:10:5: Expected object, got string" - ); - } - - #[test] - fn test_validation_error_display_without_location() { - let mut path = InstancePath::new(); - path.push_key("format"); - path.push_key("html"); - let error = ValidationError::new( - ValidationErrorKind::MissingRequiredProperty { - property: "toc".to_string(), - allowed: None, - expected_type: None, - }, - path, - ); - assert_eq!( - error.to_string(), - "Validation error at format.html: Missing required property 'toc'" - ); - } - - #[test] - fn test_missing_required_property_with_allowed_values() { - let kind = ValidationErrorKind::MissingRequiredProperty { - property: "version".to_string(), - allowed: Some(vec!["0.1.0".to_string()]), - expected_type: None, - }; - assert_eq!( - kind.message(), - "Missing required property 'version' (must be one of: 0.1.0)" - ); - } - - #[test] - fn test_missing_required_property_empty_allowed_values() { - // An empty allowed list must not append a "(must be one of: )" clause. - let kind = ValidationErrorKind::MissingRequiredProperty { - property: "version".to_string(), - allowed: Some(vec![]), - expected_type: None, - }; - assert_eq!(kind.message(), "Missing required property 'version'"); - } - - // Tests for ValidationError::with_schema_path - #[test] - fn test_validation_error_with_schema_path() { - let error = ValidationError::new( - ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: "string".to_string(), - }, - InstancePath::new(), - ); - let mut schema_path = SchemaPath::new(); - schema_path.push("properties"); - schema_path.push("count"); - - let error = error.with_schema_path(schema_path); - assert_eq!(error.schema_path.len(), 2); - assert_eq!(error.schema_path.segments()[0], "properties"); - } - - // Tests for InstancePath utility methods - #[test] - fn test_instance_path_pop() { - let mut path = InstancePath::new(); - path.push_key("a"); - path.push_key("b"); - assert_eq!(path.len(), 2); - - let popped = path.pop(); - assert!(matches!(popped, Some(PathSegment::Key(k)) if k == "b")); - assert_eq!(path.len(), 1); - - let popped = path.pop(); - assert!(matches!(popped, Some(PathSegment::Key(k)) if k == "a")); - assert_eq!(path.len(), 0); - - let popped = path.pop(); - assert!(popped.is_none()); - } - - #[test] - fn test_instance_path_is_empty() { - let mut path = InstancePath::new(); - assert!(path.is_empty()); - - path.push_key("test"); - assert!(!path.is_empty()); - } - - #[test] - fn test_instance_path_segments() { - let mut path = InstancePath::new(); - path.push_key("a"); - path.push_index(0); - path.push_key("b"); - - let segments = path.segments(); - assert_eq!(segments.len(), 3); - assert!(matches!(&segments[0], PathSegment::Key(k) if k == "a")); - assert!(matches!(&segments[1], PathSegment::Index(0))); - assert!(matches!(&segments[2], PathSegment::Key(k) if k == "b")); - } - - // Tests for SchemaPath utility methods - #[test] - fn test_schema_path_pop() { - let mut path = SchemaPath::new(); - path.push("properties"); - path.push("format"); - assert_eq!(path.len(), 2); - - let popped = path.pop(); - assert_eq!(popped, Some("format".to_string())); - assert_eq!(path.len(), 1); - } - - #[test] - fn test_schema_path_is_empty() { - let mut path = SchemaPath::new(); - assert!(path.is_empty()); - - path.push("test"); - assert!(!path.is_empty()); - } - - #[test] - fn test_schema_path_segments() { - let mut path = SchemaPath::new(); - path.push("properties"); - path.push("items"); - - let segments = path.segments(); - assert_eq!(segments, &["properties", "items"]); - } - - // Tests for PathSegment display - #[test] - fn test_path_segment_key_display() { - let segment = PathSegment::Key("property_name".to_string()); - assert_eq!(segment.to_string(), "property_name"); - } - - #[test] - fn test_path_segment_index_display() { - let segment = PathSegment::Index(42); - assert_eq!(segment.to_string(), "[42]"); - } -} diff --git a/crates/quarto-yaml-validation/src/lib.rs b/crates/quarto-yaml-validation/src/lib.rs deleted file mode 100644 index e404a2de2..000000000 --- a/crates/quarto-yaml-validation/src/lib.rs +++ /dev/null @@ -1,17 +0,0 @@ -// YAML validation for Quarto -// -// This crate provides schema-based validation for YAML content, -// with support for Quarto's simplified JSON Schema subset. - -pub mod diagnostic; -pub mod error; -pub mod schema; -pub mod validator; - -pub use diagnostic::{PathSegment, SourceRange, ValidationDiagnostic}; -pub use error::{ValidationError, ValidationResult}; -pub use schema::{Schema, SchemaRegistry, merge_object_schemas}; -pub use validator::{ValidationContext, validate}; - -#[cfg(test)] -mod tests; diff --git a/crates/quarto-yaml-validation/src/schema/annotations.rs b/crates/quarto-yaml-validation/src/schema/annotations.rs deleted file mode 100644 index 8b87560f9..000000000 --- a/crates/quarto-yaml-validation/src/schema/annotations.rs +++ /dev/null @@ -1,106 +0,0 @@ -//! Schema annotation parsing -//! -//! This module handles parsing of common schema annotations that can be -//! attached to any schema type (description, documentation, error messages, etc.) - -use crate::error::SchemaResult; -use quarto_yaml::YamlWithSourceInfo; -use std::collections::HashMap; - -use super::helpers::{get_hash_bool, get_hash_string, get_hash_string_array, get_hash_tags}; -use super::types::SchemaAnnotations; - -/// Static empty annotations for False and True schemas -pub(super) static EMPTY_ANNOTATIONS: SchemaAnnotations = SchemaAnnotations { - id: None, - description: None, - documentation: None, - error_message: None, - hidden: None, - completions: None, - additional_completions: None, - tags: None, -}; - -/// Parse common annotations from a schema object -pub(super) fn parse_annotations(yaml: &YamlWithSourceInfo) -> SchemaResult { - Ok(SchemaAnnotations { - id: get_hash_string(yaml, "$id")?, - description: get_hash_string(yaml, "description")?, - documentation: get_hash_string(yaml, "documentation")?, - error_message: get_hash_string(yaml, "errorMessage")?, - hidden: get_hash_bool(yaml, "hidden")?, - completions: get_hash_string_array(yaml, "completions")?, - additional_completions: get_hash_string_array(yaml, "additionalCompletions")?, - tags: get_hash_tags(yaml)?, - }) -} - -/// Merge outer annotations with inner annotations -/// -/// Outer annotations override inner ones, following quarto-cli semantics: -/// - id, description, documentation, error_message, hidden: outer overrides inner -/// - completions: complex merging with additionalCompletions (see below) -/// - tags: outer merges with inner (outer values override inner values for same keys) -/// -/// Completion merging follows quarto-cli's setBaseSchemaProperties: -/// 1. Start with inner.completions -/// 2. Append inner.additional_completions -/// 3. Append outer.additional_completions -/// 4. If outer.completions exists, it overwrites everything -pub(super) fn merge_annotations( - inner: SchemaAnnotations, - outer: SchemaAnnotations, -) -> SchemaAnnotations { - // Merge completions according to quarto-cli semantics - let mut merged_completions = inner.completions.unwrap_or_default(); - - // Add inner additional completions - if let Some(add_comp) = inner.additional_completions { - merged_completions.extend(add_comp); - } - - // Add outer additional completions - if let Some(add_comp) = &outer.additional_completions { - merged_completions.extend(add_comp.iter().cloned()); - } - - // Outer completions overwrites everything if present - let final_completions = if outer.completions.is_some() { - outer.completions - } else if !merged_completions.is_empty() { - Some(merged_completions) - } else { - None - }; - - SchemaAnnotations { - id: outer.id.or(inner.id), - description: outer.description.or(inner.description), - documentation: outer.documentation.or(inner.documentation), - error_message: outer.error_message.or(inner.error_message), - hidden: outer.hidden.or(inner.hidden), - completions: final_completions, - additional_completions: None, // Clear after merging - tags: merge_tags(inner.tags, outer.tags), - } -} - -/// Merge tag maps, with outer tags overriding inner tags for the same key -fn merge_tags( - inner: Option>, - outer: Option>, -) -> Option> { - match (inner, outer) { - (None, None) => None, - (Some(i), None) => Some(i), - (None, Some(o)) => Some(o), - (Some(mut i), Some(o)) => { - // Outer tags override inner tags for same keys - for (k, v) in o { - i.insert(k, v); - } - Some(i) - } - } -} diff --git a/crates/quarto-yaml-validation/src/schema/helpers.rs b/crates/quarto-yaml-validation/src/schema/helpers.rs deleted file mode 100644 index 8df2ffccf..000000000 --- a/crates/quarto-yaml-validation/src/schema/helpers.rs +++ /dev/null @@ -1,726 +0,0 @@ -//! Helper functions for parsing YAML schemas -//! -//! This module contains utility functions for extracting specific types -//! of values from YamlWithSourceInfo structures, with proper error handling. - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::{SourceInfo, YamlWithSourceInfo}; -use std::collections::HashMap; -use yaml_rust2::Yaml; - -/// Get a string value from a hash by key -pub(super) fn get_hash_string( - yaml: &YamlWithSourceInfo, - key: &str, -) -> SchemaResult> { - if let Some(value) = yaml.get_hash_value(key) { - if let Some(s) = value.yaml.as_str() { - return Ok(Some(s.to_string())); - } - return Err(SchemaError::InvalidStructure { - message: format!("Field '{}' must be a string", key), - location: Some(value.source_info.clone()), - }); - } - Ok(None) -} - -/// Get a number value from a hash by key -pub(super) fn get_hash_number(yaml: &YamlWithSourceInfo, key: &str) -> SchemaResult> { - if let Some(value) = yaml.get_hash_value(key) { - match &value.yaml { - Yaml::Integer(i) => return Ok(Some(*i as f64)), - Yaml::Real(r) => { - if let Ok(f) = r.parse::() { - return Ok(Some(f)); - } - } - _ => {} - } - return Err(SchemaError::InvalidStructure { - message: format!("Field '{}' must be a number", key), - location: Some(value.source_info.clone()), - }); - } - Ok(None) -} - -/// Get a usize value from a hash by key -pub(super) fn get_hash_usize(yaml: &YamlWithSourceInfo, key: &str) -> SchemaResult> { - if let Some(value) = yaml.get_hash_value(key) { - if let Some(i) = value.yaml.as_i64() - && i >= 0 - { - return Ok(Some(i as usize)); - } - return Err(SchemaError::InvalidStructure { - message: format!("Field '{}' must be a non-negative integer", key), - location: Some(value.source_info.clone()), - }); - } - Ok(None) -} - -/// Get a boolean value from a hash by key -pub(super) fn get_hash_bool(yaml: &YamlWithSourceInfo, key: &str) -> SchemaResult> { - if let Some(value) = yaml.get_hash_value(key) { - if let Some(b) = value.yaml.as_bool() { - return Ok(Some(b)); - } - return Err(SchemaError::InvalidStructure { - message: format!("Field '{}' must be a boolean", key), - location: Some(value.source_info.clone()), - }); - } - Ok(None) -} - -/// Get an array of strings from a hash by key -pub(super) fn get_hash_string_array( - yaml: &YamlWithSourceInfo, - key: &str, -) -> SchemaResult>> { - if let Some(value) = yaml.get_hash_value(key) { - let items = value - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: format!("Field '{}' must be an array", key), - location: Some(value.source_info.clone()), - })?; - - let result: SchemaResult> = items - .iter() - .map(|item| { - item.yaml.as_str().map(|s| s.to_string()).ok_or_else(|| { - SchemaError::InvalidStructure { - message: format!("Field '{}' items must be strings", key), - location: Some(item.source_info.clone()), - } - }) - }) - .collect(); - return Ok(Some(result?)); - } - Ok(None) -} - -/// Get tags (a hash of key-value pairs) from a schema -pub(super) fn get_hash_tags( - yaml: &YamlWithSourceInfo, -) -> SchemaResult>> { - if let Some(value) = yaml.get_hash_value("tags") { - let entries = value - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "tags must be an object".to_string(), - location: Some(value.source_info.clone()), - })?; - - let mut tags = HashMap::new(); - for entry in entries { - let key = entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "tag key must be a string".to_string(), - location: Some(entry.key.source_info.clone()), - })?; - let value = yaml_to_json_value(&entry.value.yaml, &entry.value.source_info)?; - tags.insert(key.to_string(), value); - } - return Ok(Some(tags)); - } - Ok(None) -} - -/// Convert yaml-rust2 Yaml to serde_json::Value (for enum values and tags) -pub(super) fn yaml_to_json_value( - yaml: &Yaml, - location: &SourceInfo, -) -> SchemaResult { - match yaml { - Yaml::String(s) => Ok(serde_json::Value::String(s.clone())), - Yaml::Integer(i) => Ok(serde_json::Value::Number((*i).into())), - Yaml::Real(r) => { - if let Ok(f) = r.parse::() - && let Some(n) = serde_json::Number::from_f64(f) - { - return Ok(serde_json::Value::Number(n)); - } - Err(SchemaError::InvalidStructure { - message: format!("Invalid number: {}", r), - location: Some(location.clone()), - }) - } - Yaml::Boolean(b) => Ok(serde_json::Value::Bool(*b)), - Yaml::Null => Ok(serde_json::Value::Null), - _ => Err(SchemaError::InvalidStructure { - message: "Unsupported YAML type for JSON conversion".to_string(), - location: Some(location.clone()), - }), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use quarto_yaml::YamlHashEntry; - use yaml_rust2::yaml::Hash; - - fn source_info() -> SourceInfo { - SourceInfo::for_test() - } - - /// Create a YamlWithSourceInfo hash with a single key-value pair - fn make_hash(key: &str, value: Yaml) -> YamlWithSourceInfo { - let mut hash = Hash::new(); - hash.insert(Yaml::String(key.to_string()), value.clone()); - - let key_node = YamlWithSourceInfo::new_scalar(Yaml::String(key.to_string()), source_info()); - let value_node = YamlWithSourceInfo::new_scalar(value, source_info()); - - let entry = YamlHashEntry::new( - key_node, - value_node, - source_info(), - source_info(), - source_info(), - ); - - YamlWithSourceInfo::new_hash(Yaml::Hash(hash), source_info(), vec![entry]) - } - - /// Create a YamlWithSourceInfo hash with a key pointing to an array - fn make_hash_with_array(key: &str, items: Vec) -> YamlWithSourceInfo { - let mut hash = Hash::new(); - hash.insert(Yaml::String(key.to_string()), Yaml::Array(items.clone())); - - let key_node = YamlWithSourceInfo::new_scalar(Yaml::String(key.to_string()), source_info()); - - let children: Vec = items - .into_iter() - .map(|y| YamlWithSourceInfo::new_scalar(y, source_info())) - .collect(); - let value_node = - YamlWithSourceInfo::new_array(Yaml::Array(vec![]), source_info(), children); - - let entry = YamlHashEntry::new( - key_node, - value_node, - source_info(), - source_info(), - source_info(), - ); - - YamlWithSourceInfo::new_hash(Yaml::Hash(hash), source_info(), vec![entry]) - } - - /// Create a YamlWithSourceInfo hash with a key pointing to a nested hash - fn make_hash_with_nested_hash( - outer_key: &str, - inner_entries: Vec<(&str, Yaml)>, - ) -> YamlWithSourceInfo { - let mut outer_hash = Hash::new(); - let mut inner_hash = Hash::new(); - let mut inner_hash_entries = Vec::new(); - - for (k, v) in inner_entries { - inner_hash.insert(Yaml::String(k.to_string()), v.clone()); - - let inner_key_node = - YamlWithSourceInfo::new_scalar(Yaml::String(k.to_string()), source_info()); - let inner_value_node = YamlWithSourceInfo::new_scalar(v, source_info()); - - inner_hash_entries.push(YamlHashEntry::new( - inner_key_node, - inner_value_node, - source_info(), - source_info(), - source_info(), - )); - } - - outer_hash.insert( - Yaml::String(outer_key.to_string()), - Yaml::Hash(inner_hash.clone()), - ); - - let outer_key_node = - YamlWithSourceInfo::new_scalar(Yaml::String(outer_key.to_string()), source_info()); - let inner_hash_node = - YamlWithSourceInfo::new_hash(Yaml::Hash(inner_hash), source_info(), inner_hash_entries); - - let entry = YamlHashEntry::new( - outer_key_node, - inner_hash_node, - source_info(), - source_info(), - source_info(), - ); - - YamlWithSourceInfo::new_hash(Yaml::Hash(outer_hash), source_info(), vec![entry]) - } - - // ==================== get_hash_string tests ==================== - - #[test] - fn test_get_hash_string_valid() { - let yaml = make_hash("name", Yaml::String("hello".to_string())); - let result = get_hash_string(&yaml, "name").unwrap(); - assert_eq!(result, Some("hello".to_string())); - } - - #[test] - fn test_get_hash_string_missing_key() { - let yaml = make_hash("name", Yaml::String("hello".to_string())); - let result = get_hash_string(&yaml, "other").unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_string_not_a_string() { - let yaml = make_hash("name", Yaml::Integer(42)); - let result = get_hash_string(&yaml, "name"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'name' must be a string")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_string_boolean_not_string() { - let yaml = make_hash("flag", Yaml::Boolean(true)); - let result = get_hash_string(&yaml, "flag"); - assert!(result.is_err()); - } - - // ==================== get_hash_number tests ==================== - - #[test] - fn test_get_hash_number_integer() { - let yaml = make_hash("count", Yaml::Integer(42)); - let result = get_hash_number(&yaml, "count").unwrap(); - assert_eq!(result, Some(42.0)); - } - - #[test] - #[allow(clippy::approx_constant)] // 3.14 is parser test data, not an approximation of π - fn test_get_hash_number_real() { - let yaml = make_hash("value", Yaml::Real("3.14".to_string())); - let result = get_hash_number(&yaml, "value").unwrap(); - assert_eq!(result, Some(3.14)); - } - - #[test] - fn test_get_hash_number_missing_key() { - let yaml = make_hash("count", Yaml::Integer(42)); - let result = get_hash_number(&yaml, "other").unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_number_not_a_number() { - let yaml = make_hash("count", Yaml::String("not a number".to_string())); - let result = get_hash_number(&yaml, "count"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'count' must be a number")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_number_invalid_real() { - // A Real that cannot be parsed as f64 - let yaml = make_hash("value", Yaml::Real("not_a_float".to_string())); - let result = get_hash_number(&yaml, "value"); - assert!(result.is_err()); - } - - // ==================== get_hash_usize tests ==================== - - #[test] - fn test_get_hash_usize_valid() { - let yaml = make_hash("size", Yaml::Integer(10)); - let result = get_hash_usize(&yaml, "size").unwrap(); - assert_eq!(result, Some(10)); - } - - #[test] - fn test_get_hash_usize_zero() { - let yaml = make_hash("size", Yaml::Integer(0)); - let result = get_hash_usize(&yaml, "size").unwrap(); - assert_eq!(result, Some(0)); - } - - #[test] - fn test_get_hash_usize_missing_key() { - let yaml = make_hash("size", Yaml::Integer(10)); - let result = get_hash_usize(&yaml, "other").unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_usize_negative() { - let yaml = make_hash("size", Yaml::Integer(-5)); - let result = get_hash_usize(&yaml, "size"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'size' must be a non-negative integer")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_usize_not_an_integer() { - let yaml = make_hash("size", Yaml::String("large".to_string())); - let result = get_hash_usize(&yaml, "size"); - assert!(result.is_err()); - } - - #[test] - fn test_get_hash_usize_real_number() { - let yaml = make_hash("size", Yaml::Real("3.14".to_string())); - let result = get_hash_usize(&yaml, "size"); - assert!(result.is_err()); - } - - // ==================== get_hash_bool tests ==================== - - #[test] - fn test_get_hash_bool_true() { - let yaml = make_hash("enabled", Yaml::Boolean(true)); - let result = get_hash_bool(&yaml, "enabled").unwrap(); - assert_eq!(result, Some(true)); - } - - #[test] - fn test_get_hash_bool_false() { - let yaml = make_hash("enabled", Yaml::Boolean(false)); - let result = get_hash_bool(&yaml, "enabled").unwrap(); - assert_eq!(result, Some(false)); - } - - #[test] - fn test_get_hash_bool_missing_key() { - let yaml = make_hash("enabled", Yaml::Boolean(true)); - let result = get_hash_bool(&yaml, "other").unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_bool_not_a_boolean() { - let yaml = make_hash("enabled", Yaml::String("yes".to_string())); - let result = get_hash_bool(&yaml, "enabled"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'enabled' must be a boolean")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_bool_integer_not_boolean() { - let yaml = make_hash("enabled", Yaml::Integer(1)); - let result = get_hash_bool(&yaml, "enabled"); - assert!(result.is_err()); - } - - // ==================== get_hash_string_array tests ==================== - - #[test] - fn test_get_hash_string_array_valid() { - let yaml = make_hash_with_array( - "items", - vec![Yaml::String("a".to_string()), Yaml::String("b".to_string())], - ); - let result = get_hash_string_array(&yaml, "items").unwrap(); - assert_eq!(result, Some(vec!["a".to_string(), "b".to_string()])); - } - - #[test] - fn test_get_hash_string_array_empty() { - let yaml = make_hash_with_array("items", vec![]); - let result = get_hash_string_array(&yaml, "items").unwrap(); - assert_eq!(result, Some(vec![])); - } - - #[test] - fn test_get_hash_string_array_missing_key() { - let yaml = make_hash_with_array("items", vec![Yaml::String("a".to_string())]); - let result = get_hash_string_array(&yaml, "other").unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_string_array_not_an_array() { - let yaml = make_hash("items", Yaml::String("not an array".to_string())); - let result = get_hash_string_array(&yaml, "items"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'items' must be an array")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_string_array_non_string_items() { - let yaml = make_hash_with_array("items", vec![Yaml::Integer(1), Yaml::Integer(2)]); - let result = get_hash_string_array(&yaml, "items"); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("'items' items must be strings")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_string_array_mixed_items() { - let yaml = make_hash_with_array( - "items", - vec![Yaml::String("valid".to_string()), Yaml::Integer(42)], - ); - let result = get_hash_string_array(&yaml, "items"); - assert!(result.is_err()); - } - - // ==================== get_hash_tags tests ==================== - - #[test] - fn test_get_hash_tags_valid() { - let yaml = make_hash_with_nested_hash( - "tags", - vec![ - ("key1", Yaml::String("value1".to_string())), - ("key2", Yaml::Integer(42)), - ], - ); - let result = get_hash_tags(&yaml).unwrap(); - assert!(result.is_some()); - let tags = result.unwrap(); - assert_eq!(tags.get("key1"), Some(&serde_json::json!("value1"))); - assert_eq!(tags.get("key2"), Some(&serde_json::json!(42))); - } - - #[test] - fn test_get_hash_tags_missing() { - let yaml = make_hash("other", Yaml::String("value".to_string())); - let result = get_hash_tags(&yaml).unwrap(); - assert_eq!(result, None); - } - - #[test] - fn test_get_hash_tags_not_an_object() { - let yaml = make_hash("tags", Yaml::String("not an object".to_string())); - let result = get_hash_tags(&yaml); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("tags must be an object")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_tags_non_string_key() { - // Create a hash with an integer key (which YAML allows but our API doesn't) - let mut outer_hash = Hash::new(); - let mut inner_hash = Hash::new(); - inner_hash.insert(Yaml::Integer(123), Yaml::String("value".to_string())); - - outer_hash.insert( - Yaml::String("tags".to_string()), - Yaml::Hash(inner_hash.clone()), - ); - - // Create the inner key-value entry with integer key - let inner_key_node = YamlWithSourceInfo::new_scalar(Yaml::Integer(123), source_info()); - let inner_value_node = - YamlWithSourceInfo::new_scalar(Yaml::String("value".to_string()), source_info()); - let inner_entry = YamlHashEntry::new( - inner_key_node, - inner_value_node, - source_info(), - source_info(), - source_info(), - ); - - let inner_hash_node = - YamlWithSourceInfo::new_hash(Yaml::Hash(inner_hash), source_info(), vec![inner_entry]); - - let outer_key_node = - YamlWithSourceInfo::new_scalar(Yaml::String("tags".to_string()), source_info()); - let outer_entry = YamlHashEntry::new( - outer_key_node, - inner_hash_node, - source_info(), - source_info(), - source_info(), - ); - - let yaml = - YamlWithSourceInfo::new_hash(Yaml::Hash(outer_hash), source_info(), vec![outer_entry]); - - let result = get_hash_tags(&yaml); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("tag key must be a string")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_get_hash_tags_with_boolean() { - let yaml = make_hash_with_nested_hash("tags", vec![("flag", Yaml::Boolean(true))]); - let result = get_hash_tags(&yaml).unwrap(); - assert!(result.is_some()); - let tags = result.unwrap(); - assert_eq!(tags.get("flag"), Some(&serde_json::json!(true))); - } - - #[test] - fn test_get_hash_tags_with_null() { - let yaml = make_hash_with_nested_hash("tags", vec![("empty", Yaml::Null)]); - let result = get_hash_tags(&yaml).unwrap(); - assert!(result.is_some()); - let tags = result.unwrap(); - assert_eq!(tags.get("empty"), Some(&serde_json::Value::Null)); - } - - // ==================== yaml_to_json_value tests ==================== - - #[test] - fn test_yaml_to_json_value_string() { - let yaml = Yaml::String("hello".to_string()); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!("hello")); - } - - #[test] - fn test_yaml_to_json_value_integer() { - let yaml = Yaml::Integer(42); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!(42)); - } - - #[test] - fn test_yaml_to_json_value_negative_integer() { - let yaml = Yaml::Integer(-100); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!(-100)); - } - - #[test] - #[allow(clippy::approx_constant)] // 3.14159 is parser test data, not an approximation of π - fn test_yaml_to_json_value_real() { - let yaml = Yaml::Real("3.14159".to_string()); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!(3.14159)); - } - - #[test] - fn test_yaml_to_json_value_boolean_true() { - let yaml = Yaml::Boolean(true); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!(true)); - } - - #[test] - fn test_yaml_to_json_value_boolean_false() { - let yaml = Yaml::Boolean(false); - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::json!(false)); - } - - #[test] - fn test_yaml_to_json_value_null() { - let yaml = Yaml::Null; - let result = yaml_to_json_value(&yaml, &source_info()).unwrap(); - assert_eq!(result, serde_json::Value::Null); - } - - #[test] - fn test_yaml_to_json_value_invalid_real() { - let yaml = Yaml::Real("not_a_number".to_string()); - let result = yaml_to_json_value(&yaml, &source_info()); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("Invalid number")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_yaml_to_json_value_array_unsupported() { - let yaml = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); - let result = yaml_to_json_value(&yaml, &source_info()); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("Unsupported YAML type")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_yaml_to_json_value_hash_unsupported() { - let yaml = Yaml::Hash(Hash::new()); - let result = yaml_to_json_value(&yaml, &source_info()); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("Unsupported YAML type")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_yaml_to_json_value_infinity() { - // f64::INFINITY cannot be represented in JSON - let yaml = Yaml::Real("inf".to_string()); - let result = yaml_to_json_value(&yaml, &source_info()); - // "inf" parses to f64::INFINITY, but serde_json::Number::from_f64 returns None - assert!(result.is_err()); - } - - #[test] - fn test_yaml_to_json_value_nan() { - // NaN cannot be represented in JSON - let yaml = Yaml::Real("nan".to_string()); - let result = yaml_to_json_value(&yaml, &source_info()); - // "nan" parses to f64::NAN, but serde_json::Number::from_f64 returns None - assert!(result.is_err()); - } -} diff --git a/crates/quarto-yaml-validation/src/schema/merge.rs b/crates/quarto-yaml-validation/src/schema/merge.rs deleted file mode 100644 index 2a20a31f1..000000000 --- a/crates/quarto-yaml-validation/src/schema/merge.rs +++ /dev/null @@ -1,640 +0,0 @@ -//! Schema merging logic for inheritance -//! -//! Implements quarto-cli's schema inheritance semantics when combining -//! base schemas with derived schemas via the `super` field. -//! -//! This module provides the `merge_object_schemas()` function which merges -//! base object schemas with a derived object schema according to quarto-cli's -//! merging rules (from common.ts:221-403). - -use crate::error::{SchemaError, SchemaResult}; -use crate::schema::types::{AllOfSchema, AnyOfSchema, ObjectSchema, SchemaAnnotations}; -use crate::schema::{Schema, SchemaRegistry}; -use std::collections::HashMap; - -/// Resolve a base schema reference if it's an eager ref -/// -/// Returns the resolved schema if it's a Ref with eager=true, -/// otherwise returns the schema as-is. -fn resolve_base_schema(schema: &Schema, registry: &SchemaRegistry) -> SchemaResult { - match schema { - Schema::Ref(ref_schema) if ref_schema.eager => { - // Eager resolution - look up in registry - registry - .resolve(&ref_schema.reference) - .cloned() - .ok_or_else(|| SchemaError::InvalidStructure { - message: format!( - "Cannot resolve reference '{}' - not found in registry", - ref_schema.reference - ), - // Schema-structure bug — no user-YAML to point at. - location: None, - }) - } - _ => Ok(schema.clone()), - } -} - -/// Validate that a schema is an ObjectSchema -/// -/// Returns the ObjectSchema if valid, error otherwise -fn expect_object_schema(schema: &Schema) -> SchemaResult<&ObjectSchema> { - match schema { - Schema::Object(obj) => Ok(obj), - _ => Err(SchemaError::InvalidStructure { - message: format!( - "Base schema must be an object schema, got {}", - schema.type_name() - ), - // Schema-structure bug — no user-YAML to point at. - location: None, - }), - } -} - -/// Merge base schemas with derived schema -/// -/// Implements quarto-cli's objectSchema() merging logic from common.ts:221-403 -/// -/// # Arguments -/// * `base_schemas` - List of base schemas (may contain unresolved refs) -/// * `derived` - The derived object schema -/// * `registry` - Schema registry for resolving references -/// -/// # Returns -/// A new ObjectSchema with merged properties -pub fn merge_object_schemas( - base_schemas: &[Schema], - derived: &ObjectSchema, - registry: &SchemaRegistry, -) -> SchemaResult { - // Resolve all base schema references - let resolved_bases: SchemaResult> = base_schemas - .iter() - .map(|s| resolve_base_schema(s, registry)) - .collect(); - let resolved_bases = resolved_bases?; - - // Validate all are object schemas - let base_objects: SchemaResult> = - resolved_bases.iter().map(expect_object_schema).collect(); - let base_objects = base_objects?; - - if base_objects.is_empty() { - return Err(SchemaError::InvalidStructure { - message: "base schema cannot be empty list".to_string(), - // Schema-structure bug — no user-YAML to point at. - location: None, - }); - } - - // Start with annotations from first base schema - let mut result = ObjectSchema { - annotations: base_objects[0].annotations.clone(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, // Don't propagate base_schema (already merged) - }; - - // Apply remaining base schema annotations (later bases override earlier) - for base in base_objects.iter().skip(1) { - if base.annotations.id.is_some() { - result.annotations.id.clone_from(&base.annotations.id); - } - if base.annotations.description.is_some() { - result - .annotations - .description - .clone_from(&base.annotations.description); - } - if base.annotations.documentation.is_some() { - result - .annotations - .documentation - .clone_from(&base.annotations.documentation); - } - if base.annotations.error_message.is_some() { - result - .annotations - .error_message - .clone_from(&base.annotations.error_message); - } - if base.annotations.hidden.is_some() { - result.annotations.hidden = base.annotations.hidden; - } - if base.annotations.completions.is_some() { - result - .annotations - .completions - .clone_from(&base.annotations.completions); - } - if base.annotations.additional_completions.is_some() { - result - .annotations - .additional_completions - .clone_from(&base.annotations.additional_completions); - } - if base.annotations.tags.is_some() { - result.annotations.tags.clone_from(&base.annotations.tags); - } - } - - // Remove $id to avoid duplicate IDs (quarto-cli line 243-245) - result.annotations.id = None; - - // Merge properties (base properties first, then derived overrides) - for base in &base_objects { - for (key, schema) in &base.properties { - result.properties.insert(key.clone(), schema.clone()); - } - } - for (key, schema) in &derived.properties { - result.properties.insert(key.clone(), schema.clone()); - } - - // Merge patternProperties - for base in &base_objects { - for (key, schema) in &base.pattern_properties { - result - .pattern_properties - .insert(key.clone(), schema.clone()); - } - } - for (key, schema) in &derived.pattern_properties { - result - .pattern_properties - .insert(key.clone(), schema.clone()); - } - - // Merge required (flatten all) - for base in &base_objects { - result.required.extend(base.required.iter().cloned()); - } - result.required.extend(derived.required.iter().cloned()); - - // Merge additionalProperties using allOf - let mut additional_props_schemas = Vec::new(); - for base in &base_objects { - if let Some(ref ap) = base.additional_properties { - additional_props_schemas.push((**ap).clone()); - } - } - if let Some(ref ap) = derived.additional_properties { - additional_props_schemas.push((**ap).clone()); - } - - result.additional_properties = if additional_props_schemas.is_empty() { - None - } else if additional_props_schemas.len() == 1 { - Some(Box::new( - additional_props_schemas.into_iter().next().unwrap(), - )) - } else { - // Combine with allOf - Some(Box::new(Schema::AllOf(AllOfSchema { - annotations: SchemaAnnotations::default(), - schemas: additional_props_schemas, - }))) - }; - - // Merge propertyNames using anyOf (but skip case-detection ones) - let mut property_names_schemas = Vec::new(); - for base in &base_objects { - if let Some(ref pn) = base.property_names { - // Check if this is a case-detection schema (has tags.case-detection) - let is_case_detection = match pn.as_ref() { - Schema::String(s) => s - .annotations - .tags - .as_ref() - .and_then(|tags| tags.get("case-detection")) - .and_then(|v| v.as_bool()) - .unwrap_or(false), - _ => false, - }; - - if !is_case_detection { - property_names_schemas.push((**pn).clone()); - } - } - } - - result.property_names = if property_names_schemas.is_empty() { - None - } else if property_names_schemas.len() == 1 { - Some(Box::new(property_names_schemas.into_iter().next().unwrap())) - } else { - // Combine with anyOf - Some(Box::new(Schema::AnyOf(AnyOfSchema { - annotations: SchemaAnnotations::default(), - schemas: property_names_schemas, - }))) - }; - - // Merge closed (true if ANY base or derived is closed) - result.closed = base_objects.iter().any(|b| b.closed) || derived.closed; - - // Apply derived-specific fields (override bases) - if derived.min_properties.is_some() { - result.min_properties = derived.min_properties; - } - if derived.max_properties.is_some() { - result.max_properties = derived.max_properties; - } - if derived.naming_convention.is_some() { - result - .naming_convention - .clone_from(&derived.naming_convention); - } - - // Apply derived description if present (override base) - if derived.annotations.description.is_some() { - result - .annotations - .description - .clone_from(&derived.annotations.description); - } - - Ok(result) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::schema::types::*; - - #[test] - fn test_merge_simple_properties() { - let registry = SchemaRegistry::new(); - - // Create base schema - let mut base_props = HashMap::new(); - base_props.insert( - "id".to_string(), - Schema::String(StringSchema { - annotations: Default::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let base = ObjectSchema { - annotations: Default::default(), - properties: base_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["id".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Create derived schema - let mut derived_props = HashMap::new(); - derived_props.insert( - "name".to_string(), - Schema::String(StringSchema { - annotations: Default::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: derived_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["name".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Merge - let merged = merge_object_schemas(&[Schema::Object(base)], &derived, ®istry).unwrap(); - - // Verify merged has both properties - assert!(merged.properties.contains_key("id")); - assert!(merged.properties.contains_key("name")); - assert_eq!(merged.required.len(), 2); - assert!(merged.required.contains(&"id".to_string())); - assert!(merged.required.contains(&"name".to_string())); - } - - #[test] - fn test_merge_with_ref() { - let mut registry = SchemaRegistry::new(); - - // Register base schema - let mut base_props = HashMap::new(); - base_props.insert( - "base_field".to_string(), - Schema::String(StringSchema { - annotations: Default::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let base = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations { - id: Some("base-schema".to_string()), - ..Default::default() - }, - properties: base_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["base_field".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - registry.register("base-schema".to_string(), base); - - // Create ref to base - let base_ref = Schema::Ref(RefSchema { - annotations: Default::default(), - reference: "base-schema".to_string(), - eager: true, - }); - - // Create derived - let mut derived_props = HashMap::new(); - derived_props.insert( - "derived_field".to_string(), - Schema::Boolean(BooleanSchema { - annotations: Default::default(), - }), - ); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: derived_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Merge - let merged = merge_object_schemas(&[base_ref], &derived, ®istry).unwrap(); - - // Verify - assert!(merged.properties.contains_key("base_field")); - assert!(merged.properties.contains_key("derived_field")); - assert_eq!(merged.required.len(), 1); - assert_eq!(merged.required[0], "base_field"); - } - - #[test] - fn test_property_override() { - let registry = SchemaRegistry::new(); - - // Base has 'name' as string with no constraints - let mut base_props = HashMap::new(); - base_props.insert( - "name".to_string(), - Schema::String(StringSchema { - annotations: SchemaAnnotations { - description: Some("Base description".to_string()), - ..Default::default() - }, - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let base = ObjectSchema { - annotations: Default::default(), - properties: base_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Derived overrides 'name' with pattern - let mut derived_props = HashMap::new(); - derived_props.insert( - "name".to_string(), - Schema::String(StringSchema { - annotations: SchemaAnnotations { - description: Some("Derived description".to_string()), - ..Default::default() - }, - min_length: None, - max_length: None, - pattern: Some("^[A-Z]".to_string()), - }), - ); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: derived_props, - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Merge - let merged = merge_object_schemas(&[Schema::Object(base)], &derived, ®istry).unwrap(); - - // Derived should win - match merged.properties.get("name") { - Some(Schema::String(s)) => { - assert_eq!(s.pattern, Some("^[A-Z]".to_string())); - assert_eq!( - s.annotations.description, - Some("Derived description".to_string()) - ); - } - _ => panic!("Expected string schema for name"), - } - } - - #[test] - fn test_closed_inheritance() { - let registry = SchemaRegistry::new(); - - // Base is closed - let base = ObjectSchema { - annotations: Default::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: true, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Derived is not closed - let derived = ObjectSchema { - annotations: Default::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - // Merge - should be closed - let merged = merge_object_schemas(&[Schema::Object(base)], &derived, ®istry).unwrap(); - - assert!(merged.closed); - } - - #[test] - fn test_missing_reference_error() { - let registry = SchemaRegistry::new(); // Empty - - let base_ref = Schema::Ref(RefSchema { - annotations: Default::default(), - reference: "non-existent".to_string(), - eager: true, - }); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - let result = merge_object_schemas(&[base_ref], &derived, ®istry); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("not found in registry") - ); - } - - #[test] - fn test_non_object_base_error() { - let mut registry = SchemaRegistry::new(); - - // Register a STRING schema (not object) - registry.register( - "not-object".to_string(), - Schema::String(StringSchema { - annotations: Default::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let base_ref = Schema::Ref(RefSchema { - annotations: Default::default(), - reference: "not-object".to_string(), - eager: true, - }); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - let result = merge_object_schemas(&[base_ref], &derived, ®istry); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("must be an object schema") - ); - } - - #[test] - fn test_empty_base_list_error() { - let registry = SchemaRegistry::new(); - - let derived = ObjectSchema { - annotations: Default::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }; - - let result = merge_object_schemas(&[], &derived, ®istry); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("cannot be empty list") - ); - } -} diff --git a/crates/quarto-yaml-validation/src/schema/mod.rs b/crates/quarto-yaml-validation/src/schema/mod.rs deleted file mode 100644 index 443a2fa5d..000000000 --- a/crates/quarto-yaml-validation/src/schema/mod.rs +++ /dev/null @@ -1,1533 +0,0 @@ -//! Schema types for YAML validation -//! -//! This module defines the schema type system used for validation, -//! closely matching Quarto's simplified JSON Schema subset. -//! -//! IMPORTANT: This module does NOT use serde deserialization for loading schemas -//! from YAML because serde_yaml only supports YAML 1.1. We need YAML 1.2 support -//! for consistency with user documents and to support Quarto extensions. -//! See ../YAML-1.2-REQUIREMENT.md for details. -//! -//! Instead, schemas are parsed from YamlWithSourceInfo (quarto-yaml) which uses -//! yaml-rust2 (YAML 1.2). See Schema::from_yaml() method below. - -use crate::error::SchemaResult; -use quarto_yaml::YamlWithSourceInfo; -use std::collections::HashMap; - -// Internal modules -mod annotations; -mod helpers; -mod merge; -mod parser; -mod parsers; -mod types; - -// Public re-exports -pub use merge::merge_object_schemas; -pub use types::{ - AllOfSchema, AnyOfSchema, AnySchema, ArraySchema, BooleanSchema, EnumSchema, NamingConvention, - NullSchema, NumberSchema, ObjectSchema, RefSchema, SchemaAnnotations, StringSchema, -}; - -use annotations::EMPTY_ANNOTATIONS; - -/// The main schema enum representing all possible schema types -#[derive(Debug, Clone, PartialEq)] -pub enum Schema { - /// Always fails validation - // False, - // we're going to try not having a False schema because it's non-monotonic and so causes too much noncompositionality - - /// Always passes validation - True, - /// Boolean type schema - Boolean(BooleanSchema), - /// Number type schema (integer or float) - Number(NumberSchema), - /// String type schema - String(StringSchema), - /// Null type schema - Null(NullSchema), - /// Enum type schema (fixed set of values) - Enum(EnumSchema), - /// Any type schema (no validation) - Any(AnySchema), - /// AnyOf schema (validates if any subschema matches) - AnyOf(AnyOfSchema), - /// AllOf schema (validates if all subschemas match) - AllOf(AllOfSchema), - /// Array type schema - Array(ArraySchema), - /// Object type schema - Object(ObjectSchema), - /// Reference to another schema - Ref(RefSchema), -} - -impl Schema { - /// Parse a Schema from YamlWithSourceInfo. - /// - /// This supports all quarto-cli schema syntaxes: - /// - Short forms: "boolean", "string", "number", etc. - /// - Object forms: {boolean: {...}}, {string: {...}}, etc. - /// - Inline arrays: [val1, val2, val3] (for enums) - /// - /// # Example - /// - /// ``` - /// use quarto_yaml_validation::Schema; - /// use quarto_yaml; - /// - /// let yaml = quarto_yaml::parse("boolean").unwrap(); - /// let schema = Schema::from_yaml(&yaml).unwrap(); - /// ``` - pub fn from_yaml(yaml: &YamlWithSourceInfo) -> SchemaResult { - parser::from_yaml(yaml) - } - - /// Get the annotations for this schema - pub fn annotations(&self) -> &SchemaAnnotations { - match self { - Schema::True => &EMPTY_ANNOTATIONS, - Schema::Boolean(s) => &s.annotations, - Schema::Number(s) => &s.annotations, - Schema::String(s) => &s.annotations, - Schema::Null(s) => &s.annotations, - Schema::Enum(s) => &s.annotations, - Schema::Any(s) => &s.annotations, - Schema::AnyOf(s) => &s.annotations, - Schema::AllOf(s) => &s.annotations, - Schema::Array(s) => &s.annotations, - Schema::Object(s) => &s.annotations, - Schema::Ref(s) => &s.annotations, - } - } - - /// Get a mutable reference to the annotations for this schema - pub fn annotations_mut(&mut self) -> Option<&mut SchemaAnnotations> { - match self { - Schema::True => None, - Schema::Boolean(s) => Some(&mut s.annotations), - Schema::Number(s) => Some(&mut s.annotations), - Schema::String(s) => Some(&mut s.annotations), - Schema::Null(s) => Some(&mut s.annotations), - Schema::Enum(s) => Some(&mut s.annotations), - Schema::Any(s) => Some(&mut s.annotations), - Schema::AnyOf(s) => Some(&mut s.annotations), - Schema::AllOf(s) => Some(&mut s.annotations), - Schema::Array(s) => Some(&mut s.annotations), - Schema::Object(s) => Some(&mut s.annotations), - Schema::Ref(s) => Some(&mut s.annotations), - } - } - - /// Replace annotations for this schema, returning the modified schema - /// - /// # Panics - /// - /// Panics if called on False or True schemas, as they don't support annotations. - pub(crate) fn with_annotations(mut self, annotations: SchemaAnnotations) -> Self { - match &mut self { - Schema::True => panic!("Cannot set annotations on Schema::True"), - Schema::Boolean(s) => s.annotations = annotations, - Schema::Number(s) => s.annotations = annotations, - Schema::String(s) => s.annotations = annotations, - Schema::Null(s) => s.annotations = annotations, - Schema::Enum(s) => s.annotations = annotations, - Schema::Any(s) => s.annotations = annotations, - Schema::AnyOf(s) => s.annotations = annotations, - Schema::AllOf(s) => s.annotations = annotations, - Schema::Array(s) => s.annotations = annotations, - Schema::Object(s) => s.annotations = annotations, - Schema::Ref(s) => s.annotations = annotations, - } - self - } - - /// Get a human-readable name for this schema type - pub fn type_name(&self) -> &'static str { - match self { - Schema::True => "true", - Schema::Boolean(_) => "boolean", - Schema::Number(_) => "number", - Schema::String(_) => "string", - Schema::Null(_) => "null", - Schema::Enum(_) => "enum", - Schema::Any(_) => "any", - Schema::AnyOf(_) => "anyOf", - Schema::AllOf(_) => "allOf", - Schema::Array(_) => "array", - Schema::Object(_) => "object", - Schema::Ref(_) => "$ref", - } - } - - /// Compile a schema by resolving eager references and merging inheritance. - /// - /// This creates a structurally complete schema suitable for validation. - /// Lazy references (eager=false) are kept as references and resolved - /// during validation to support circular dependencies. - /// - /// # Two-Phase Processing - /// - /// Schemas go through two phases: - /// 1. **Parsing** (stateless, no registry): YAML → Schema AST - /// 2. **Compilation** (with registry): Schema AST → Compiled Schema - /// - /// Compilation resolves: - /// - Eager references (`resolveRef`, `eager: true`) - must resolve for schema completeness - /// - Object inheritance (`base_schema`) - merges properties from base schemas - /// - Nested schemas recursively - /// - /// Compilation preserves: - /// - Lazy references (`ref`, `eager: false`) - resolved during validation - /// - /// # Arguments - /// * `registry` - Schema registry for resolving references - /// - /// # Returns - /// A compiled schema with all eager references resolved and inheritance merged - /// - /// # Errors - /// Returns error if: - /// - An eager reference cannot be resolved (not in registry) - /// - Base schema is not an ObjectSchema - /// - Circular eager references detected (future enhancement) - /// - /// # Example - /// - /// ``` - /// use quarto_yaml_validation::{Schema, SchemaRegistry}; - /// use quarto_yaml; - /// - /// let mut registry = SchemaRegistry::new(); - /// - /// // Parse and register base schema - /// let base_yaml = quarto_yaml::parse(r#" - /// object: - /// properties: - /// id: string - /// "#).unwrap(); - /// let base = Schema::from_yaml(&base_yaml).unwrap(); - /// registry.register("base".to_string(), base); - /// - /// // Parse derived schema with inheritance - /// let derived_yaml = quarto_yaml::parse(r#" - /// object: - /// super: - /// resolveRef: base - /// properties: - /// name: string - /// "#).unwrap(); - /// let derived = Schema::from_yaml(&derived_yaml).unwrap(); - /// - /// // Compile - merges base and derived - /// let compiled = derived.compile(®istry).unwrap(); - /// - /// // Compiled schema now has both 'id' and 'name' properties - /// ``` - pub fn compile(&self, registry: &SchemaRegistry) -> SchemaResult { - match self { - // Object with inheritance - must merge base schemas - Schema::Object(obj) if obj.base_schema.is_some() => { - // Compile base schemas first (recursive) - let base_schemas = obj.base_schema.as_ref().unwrap(); - let compiled_bases: SchemaResult> = - base_schemas.iter().map(|s| s.compile(registry)).collect(); - let compiled_bases = compiled_bases?; - - // Merge with derived schema - let merged = merge_object_schemas(&compiled_bases, obj, registry)?; - - // Result has no base_schema (it's been merged) - Ok(Schema::Object(merged)) - } - - // Eager reference - must resolve now - Schema::Ref(r) if r.eager => { - let resolved = registry.resolve(&r.reference).ok_or_else(|| { - crate::error::SchemaError::InvalidStructure { - message: format!( - "Cannot resolve eager reference '{}' - not found in registry", - r.reference - ), - // Schema-structure bug — no user-YAML to point at. - location: None, - } - })?; - - // Recursively compile the resolved schema - resolved.compile(registry) - } - - // Lazy reference - keep as is for validation time - Schema::Ref(_) => Ok(self.clone()), - - // Recursively compile nested schemas in containers - Schema::AnyOf(anyof) => { - let compiled_schemas: SchemaResult> = - anyof.schemas.iter().map(|s| s.compile(registry)).collect(); - Ok(Schema::AnyOf(AnyOfSchema { - annotations: anyof.annotations.clone(), - schemas: compiled_schemas?, - })) - } - - Schema::AllOf(allof) => { - let compiled_schemas: SchemaResult> = - allof.schemas.iter().map(|s| s.compile(registry)).collect(); - Ok(Schema::AllOf(AllOfSchema { - annotations: allof.annotations.clone(), - schemas: compiled_schemas?, - })) - } - - Schema::Array(arr) => { - let compiled_items = if let Some(items) = &arr.items { - Some(Box::new(items.compile(registry)?)) - } else { - None - }; - Ok(Schema::Array(ArraySchema { - annotations: arr.annotations.clone(), - items: compiled_items, - min_items: arr.min_items, - max_items: arr.max_items, - unique_items: arr.unique_items, - })) - } - - Schema::Object(obj) => { - // Object without inheritance - compile nested property schemas - let mut compiled_properties = HashMap::new(); - for (key, prop_schema) in &obj.properties { - compiled_properties.insert(key.clone(), prop_schema.compile(registry)?); - } - - let mut compiled_pattern_properties = HashMap::new(); - for (pattern, prop_schema) in &obj.pattern_properties { - compiled_pattern_properties - .insert(pattern.clone(), prop_schema.compile(registry)?); - } - - let compiled_additional = if let Some(ap) = &obj.additional_properties { - Some(Box::new(ap.compile(registry)?)) - } else { - None - }; - - let compiled_property_names = if let Some(pn) = &obj.property_names { - Some(Box::new(pn.compile(registry)?)) - } else { - None - }; - - Ok(Schema::Object(ObjectSchema { - annotations: obj.annotations.clone(), - properties: compiled_properties, - pattern_properties: compiled_pattern_properties, - additional_properties: compiled_additional, - required: obj.required.clone(), - min_properties: obj.min_properties, - max_properties: obj.max_properties, - closed: obj.closed, - property_names: compiled_property_names, - naming_convention: obj.naming_convention.clone(), - base_schema: None, // No inheritance at this level - })) - } - - // Primitives don't need compilation - Schema::True - | Schema::Boolean(_) - | Schema::Number(_) - | Schema::String(_) - | Schema::Null(_) - | Schema::Enum(_) - | Schema::Any(_) => Ok(self.clone()), - } - } -} - -/// Schema registry for managing schemas with $ref resolution -#[derive(Debug, Default)] -pub struct SchemaRegistry { - schemas: HashMap, -} - -impl SchemaRegistry { - /// Create a new empty schema registry - pub fn new() -> Self { - Self { - schemas: HashMap::new(), - } - } - - /// Register a schema with an ID - pub fn register(&mut self, id: String, schema: Schema) { - self.schemas.insert(id, schema); - } - - /// Resolve a schema reference - pub fn resolve(&self, reference: &str) -> Option<&Schema> { - self.schemas.get(reference) - } - - /// Get all registered schema IDs - pub fn ids(&self) -> impl Iterator { - self.schemas.keys() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_schema_type_name() { - assert_eq!(Schema::True.type_name(), "true"); - assert_eq!( - Schema::Boolean(BooleanSchema { - annotations: Default::default() - }) - .type_name(), - "boolean" - ); - } - - #[test] - fn test_schema_registry() { - let mut registry = SchemaRegistry::new(); - let schema = Schema::Boolean(BooleanSchema { - annotations: SchemaAnnotations { - id: Some("test-bool".to_string()), - ..Default::default() - }, - }); - - registry.register("test-bool".to_string(), schema.clone()); - - let resolved = registry.resolve("test-bool"); - assert!(resolved.is_some()); - assert_eq!(resolved.unwrap(), &schema); - } - - // Tests for Schema::from_yaml() - - #[test] - fn test_from_yaml_boolean_short() { - let yaml = quarto_yaml::parse("boolean").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert!(matches!(schema, Schema::Boolean(_))); - assert_eq!(schema.type_name(), "boolean"); - } - - #[test] - fn test_from_yaml_boolean_long() { - let yaml = quarto_yaml::parse( - r#" -boolean: - description: "A boolean value" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Boolean(s) = schema { - assert_eq!( - s.annotations.description, - Some("A boolean value".to_string()) - ); - } else { - panic!("Expected Boolean schema"); - } - } - - #[test] - fn test_from_yaml_number_short() { - let yaml = quarto_yaml::parse("number").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert_eq!(schema.type_name(), "number"); - } - - #[test] - fn test_from_yaml_number_long() { - let yaml = quarto_yaml::parse( - r#" -number: - minimum: 0 - maximum: 100 - description: "A percentage" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Number(s) = schema { - assert_eq!(s.minimum, Some(0.0)); - assert_eq!(s.maximum, Some(100.0)); - assert_eq!(s.annotations.description, Some("A percentage".to_string())); - } else { - panic!("Expected Number schema"); - } - } - - #[test] - fn test_from_yaml_string_short() { - let yaml = quarto_yaml::parse("string").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert_eq!(schema.type_name(), "string"); - } - - #[test] - fn test_from_yaml_path() { - let yaml = quarto_yaml::parse("path").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert_eq!(schema.type_name(), "string"); - } - - #[test] - fn test_from_yaml_string_long() { - let yaml = quarto_yaml::parse( - r#" -string: - pattern: "^[a-z]+$" - minLength: 1 - maxLength: 50 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::String(s) = schema { - assert_eq!(s.pattern, Some("^[a-z]+$".to_string())); - assert_eq!(s.min_length, Some(1)); - assert_eq!(s.max_length, Some(50)); - } else { - panic!("Expected String schema"); - } - } - - #[test] - fn test_from_yaml_null() { - let yaml = quarto_yaml::parse("null").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert_eq!(schema.type_name(), "null"); - } - - #[test] - fn test_from_yaml_any() { - let yaml = quarto_yaml::parse("any").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - assert_eq!(schema.type_name(), "any"); - } - - #[test] - fn test_from_yaml_enum_inline() { - let yaml = quarto_yaml::parse( - r#" -enum: [foo, bar, baz] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Enum(s) = schema { - assert_eq!(s.values.len(), 3); - } else { - panic!("Expected Enum schema"); - } - } - - #[test] - fn test_from_yaml_enum_inline_array() { - let yaml = quarto_yaml::parse("[foo, bar, baz]").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Enum(s) = schema { - assert_eq!(s.values.len(), 3); - } else { - panic!("Expected Enum schema"); - } - } - - #[test] - fn test_from_yaml_enum_explicit() { - let yaml = quarto_yaml::parse( - r#" -enum: - values: [red, green, blue] - description: "Primary colors" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Enum(s) = schema { - assert_eq!(s.values.len(), 3); - assert_eq!( - s.annotations.description, - Some("Primary colors".to_string()) - ); - } else { - panic!("Expected Enum schema"); - } - } - - #[test] - fn test_from_yaml_anyof_array() { - let yaml = quarto_yaml::parse( - r#" -anyOf: - - boolean - - string - - number -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::AnyOf(s) = schema { - assert_eq!(s.schemas.len(), 3); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_from_yaml_anyof_object() { - let yaml = quarto_yaml::parse( - r#" -anyOf: - schemas: - - boolean - - string - description: "Either boolean or string" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::AnyOf(s) = schema { - assert_eq!(s.schemas.len(), 2); - assert_eq!( - s.annotations.description, - Some("Either boolean or string".to_string()) - ); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_from_yaml_allof() { - let yaml = quarto_yaml::parse( - r#" -allOf: - - string - - enum: [foo, bar] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::AllOf(s) = schema { - assert_eq!(s.schemas.len(), 2); - } else { - panic!("Expected AllOf schema"); - } - } - - #[test] - fn test_from_yaml_array() { - let yaml = quarto_yaml::parse( - r#" -array: - items: string - minItems: 1 - maxItems: 10 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Array(s) = schema { - assert!(s.items.is_some()); - assert_eq!(s.min_items, Some(1)); - assert_eq!(s.max_items, Some(10)); - } else { - panic!("Expected Array schema"); - } - } - - #[test] - fn test_from_yaml_object_simple() { - let yaml = quarto_yaml::parse( - r#" -object: - properties: - name: string - age: number - required: [name] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!(s.properties.len(), 2); - assert!(s.properties.contains_key("name")); - assert!(s.properties.contains_key("age")); - assert_eq!(s.required.len(), 1); - assert_eq!(s.required[0], "name"); - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_object_complex() { - let yaml = quarto_yaml::parse( - r#" -object: - properties: - foo: string - bar: number - patternProperties: - "^x-": string - additionalProperties: boolean - required: [foo] - closed: true - minProperties: 1 - maxProperties: 10 - description: "A complex object" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!(s.properties.len(), 2); - assert_eq!(s.pattern_properties.len(), 1); - assert!(s.additional_properties.is_some()); - assert_eq!(s.required.len(), 1); - assert!(s.closed); - assert_eq!(s.min_properties, Some(1)); - assert_eq!(s.max_properties, Some(10)); - assert_eq!( - s.annotations.description, - Some("A complex object".to_string()) - ); - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_object_required_all() { - let yaml = quarto_yaml::parse( - r#" -object: - properties: - foo: string - bar: number - baz: boolean - required: all -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!(s.properties.len(), 3); - assert_eq!(s.required.len(), 3); - // All properties should be in required list - assert!(s.required.contains(&"foo".to_string())); - assert!(s.required.contains(&"bar".to_string())); - assert!(s.required.contains(&"baz".to_string())); - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_object_property_names_pattern() { - let yaml = quarto_yaml::parse( - r#" -object: - propertyNames: - string: - pattern: "^[a-z_]+$" - additionalProperties: string -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert!(s.property_names.is_some()); - if let Some(property_names) = s.property_names { - if let Schema::String(str_schema) = *property_names { - assert_eq!(str_schema.pattern, Some("^[a-z_]+$".to_string())); - } else { - panic!("Expected String schema for propertyNames"); - } - } - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_object_property_names_enum() { - let yaml = quarto_yaml::parse( - r#" -object: - propertyNames: - enum: - - name - - schema - - description -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert!(s.property_names.is_some()); - if let Some(property_names) = s.property_names { - assert!(matches!(*property_names, Schema::Enum(_))); - } - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_record_with_key_schema() { - let yaml = quarto_yaml::parse( - r#" -record: - keySchema: - string: - pattern: "^[a-z]+$" - valueSchema: number -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - // keySchema becomes property_names - assert!(s.property_names.is_some()); - if let Some(property_names) = s.property_names { - if let Schema::String(str_schema) = *property_names { - assert_eq!(str_schema.pattern, Some("^[a-z]+$".to_string())); - } else { - panic!("Expected String schema for property_names"); - } - } - // valueSchema becomes additional_properties - assert!(s.additional_properties.is_some()); - if let Some(additional_properties) = s.additional_properties { - assert!(matches!(*additional_properties, Schema::Number(_))); - } - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_naming_convention_single() { - let yaml = quarto_yaml::parse( - r#" -object: - namingConvention: camelCase -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!( - s.naming_convention, - Some(NamingConvention::Single("capitalizationCase".to_string())) - ); - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_naming_convention_multiple() { - let yaml = quarto_yaml::parse( - r#" -object: - namingConvention: - - snake_case - - kebab-case -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - if let Some(NamingConvention::Multiple(conventions)) = s.naming_convention { - assert_eq!(conventions.len(), 2); - assert!(conventions.contains(&"underscore_case".to_string())); - assert!(conventions.contains(&"dash-case".to_string())); - } else { - panic!("Expected Multiple naming convention"); - } - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_naming_convention_ignore() { - let yaml = quarto_yaml::parse( - r#" -object: - namingConvention: ignore -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!( - s.naming_convention, - Some(NamingConvention::Single("ignore".to_string())) - ); - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_naming_convention_normalization() { - // Test various input formats normalize correctly - let test_cases = vec![ - ("camelCase", "capitalizationCase"), - ("snake_case", "underscore_case"), - ("kebab-case", "dash-case"), - ("camel-case", "capitalizationCase"), - ("underscore-case", "underscore_case"), - ("dashCase", "dash-case"), - ]; - - for (input, expected) in test_cases { - let yaml_str = format!("object:\n namingConvention: {}", input); - let yaml = quarto_yaml::parse(&yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!( - s.naming_convention, - Some(NamingConvention::Single(expected.to_string())), - "Failed for input: {}", - input - ); - } else { - panic!("Expected Object schema for input: {}", input); - } - } - } - - #[test] - fn test_from_yaml_ref() { - let yaml = quarto_yaml::parse( - r#" -ref: schema/base -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Ref(s) = schema { - assert_eq!(s.reference, "schema/base"); - assert!(!s.eager); // ref is lazy - } else { - panic!("Expected Ref schema"); - } - } - - #[test] - fn test_from_yaml_dollar_ref() { - let yaml = quarto_yaml::parse( - r#" -$ref: schema/base -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Ref(s) = schema { - assert_eq!(s.reference, "schema/base"); - assert!(!s.eager); // $ref is also lazy - } else { - panic!("Expected Ref schema"); - } - } - - #[test] - fn test_from_yaml_resolve_ref() { - let yaml = quarto_yaml::parse( - r#" -resolveRef: schema/base -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Ref(s) = schema { - assert_eq!(s.reference, "schema/base"); - assert!(s.eager); // resolveRef is eager - } else { - panic!("Expected Ref schema"); - } - } - - #[test] - fn test_from_yaml_nested() { - let yaml = quarto_yaml::parse( - r#" -object: - properties: - status: - anyOf: - - boolean - - enum: [active, inactive, pending] - config: - object: - properties: - enabled: boolean -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::Object(s) = schema { - assert_eq!(s.properties.len(), 2); - // Check nested anyOf - if let Some(Schema::AnyOf(anyof)) = s.properties.get("status") { - assert_eq!(anyof.schemas.len(), 2); - } else { - panic!("Expected AnyOf schema for status"); - } - // Check nested object - if let Some(Schema::Object(obj)) = s.properties.get("config") { - assert_eq!(obj.properties.len(), 1); - } else { - panic!("Expected Object schema for config"); - } - } else { - panic!("Expected Object schema"); - } - } - - #[test] - fn test_from_yaml_error_invalid_type() { - let yaml = quarto_yaml::parse("invalid_type").unwrap(); - let result = Schema::from_yaml(&yaml); - assert!(result.is_err()); - if let Err(crate::error::SchemaError::InvalidType(t)) = result { - assert_eq!(t, "invalid_type"); - } else { - panic!("Expected InvalidType error"); - } - } - - #[test] - fn test_from_yaml_error_empty_object() { - let yaml = quarto_yaml::parse("{}").unwrap(); - let result = Schema::from_yaml(&yaml); - assert!(result.is_err()); - } - - #[test] - fn test_from_yaml_with_annotations() { - let yaml = quarto_yaml::parse( - r#" -string: - description: "A string field" - hidden: true - completions: [foo, bar] - tags: - category: input -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::String(s) = schema { - assert_eq!( - s.annotations.description, - Some("A string field".to_string()) - ); - assert_eq!(s.annotations.hidden, Some(true)); - assert_eq!( - s.annotations.completions, - Some(vec!["foo".to_string(), "bar".to_string()]) - ); - assert!(s.annotations.tags.is_some()); - } else { - panic!("Expected String schema"); - } - } - - #[test] - fn test_arrayof_simple() { - let yaml = quarto_yaml::parse("arrayOf: string").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Array(arr) => { - assert!(arr.items.is_some()); - match arr.items.as_ref().unwrap().as_ref() { - Schema::String(_) => {} - _ => panic!("Expected String schema in items"), - } - } - _ => panic!("Expected Array schema"), - } - } - - #[test] - fn test_arrayof_nested() { - // Test nested arrayOf like quarto-cli uses: arrayOf: { arrayOf: { schema: string, length: 2 } } - let yaml_str = r#" -arrayOf: - arrayOf: - schema: string - length: 2 -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Array(outer) => { - assert!(outer.items.is_some()); - match outer.items.as_ref().unwrap().as_ref() { - Schema::Array(inner) => { - assert!(inner.items.is_some()); - assert_eq!(inner.min_items, Some(2)); - assert_eq!(inner.max_items, Some(2)); - match inner.items.as_ref().unwrap().as_ref() { - Schema::String(_) => {} - _ => panic!("Expected String schema in nested items"), - } - } - _ => panic!("Expected Array schema in items"), - } - } - _ => panic!("Expected Array schema"), - } - } - - #[test] - fn test_arrayof_with_length() { - let yaml_str = r#" -arrayOf: - schema: string - length: 5 -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Array(arr) => { - assert!(arr.items.is_some()); - assert_eq!(arr.min_items, Some(5)); - assert_eq!(arr.max_items, Some(5)); - match arr.items.as_ref().unwrap().as_ref() { - Schema::String(_) => {} - _ => panic!("Expected String schema"), - } - } - _ => panic!("Expected Array schema"), - } - } - - #[test] - fn test_maybe_arrayof() { - let yaml = quarto_yaml::parse("maybeArrayOf: string").unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::AnyOf(anyof) => { - // Should have 2 schemas: the scalar and the array - assert_eq!(anyof.schemas.len(), 2); - - // First should be string - match &anyof.schemas[0] { - Schema::String(_) => {} - _ => panic!("Expected String schema as first option"), - } - - // Second should be array of string - match &anyof.schemas[1] { - Schema::Array(arr) => { - assert!(arr.items.is_some()); - match arr.items.as_ref().unwrap().as_ref() { - Schema::String(_) => {} - _ => panic!("Expected String schema in array"), - } - } - _ => panic!("Expected Array schema as second option"), - } - - // Should have complete-from tag - assert!(anyof.annotations.tags.is_some()); - let tags = anyof.annotations.tags.as_ref().unwrap(); - assert!(tags.contains_key("complete-from")); - } - _ => panic!("Expected AnyOf schema"), - } - } - - #[test] - fn test_record_form1() { - let yaml_str = r#" -record: - properties: - type: - enum: [citeproc] -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Object(obj) => { - assert!(obj.closed); - assert_eq!(obj.properties.len(), 1); - assert!(obj.properties.contains_key("type")); - assert_eq!(obj.required.len(), 1); - assert!(obj.required.contains(&"type".to_string())); - } - _ => panic!("Expected Object schema"), - } - } - - #[test] - fn test_record_form2() { - let yaml_str = r#" -record: - name: string - age: number -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Object(obj) => { - assert!(obj.closed); - assert_eq!(obj.properties.len(), 2); - assert!(obj.properties.contains_key("name")); - assert!(obj.properties.contains_key("age")); - assert_eq!(obj.required.len(), 2); - assert!(obj.required.contains(&"name".to_string())); - assert!(obj.required.contains(&"age".to_string())); - } - _ => panic!("Expected Object schema"), - } - } - - #[test] - fn test_schema_wrapper() { - let yaml_str = r#" -schema: - anyOf: - - boolean - - string -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::AnyOf(anyof) => { - assert_eq!(anyof.schemas.len(), 2); - match &anyof.schemas[0] { - Schema::Boolean(_) => {} - _ => panic!("Expected Boolean schema"), - } - match &anyof.schemas[1] { - Schema::String(_) => {} - _ => panic!("Expected String schema"), - } - } - _ => panic!("Expected AnyOf schema"), - } - } - - #[test] - fn test_schema_wrapper_with_outer_annotations() { - let yaml_str = r#" -schema: - anyOf: - - boolean - - string -description: "Outer description" -completions: ["value1", "value2", "value3"] -hidden: true -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::AnyOf(s) => { - assert_eq!( - s.annotations.description, - Some("Outer description".to_string()) - ); - assert_eq!( - s.annotations.completions, - Some(vec![ - "value1".to_string(), - "value2".to_string(), - "value3".to_string() - ]) - ); - assert_eq!(s.annotations.hidden, Some(true)); - } - _ => panic!("Expected AnyOf schema"), - } - } - - #[test] - fn test_schema_wrapper_annotation_override() { - let yaml_str = r#" -schema: - string: - description: "Inner description" - completions: ["inner1", "inner2"] -description: "Outer description" -completions: ["outer1", "outer2"] -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::String(s) => { - // Outer should override inner - assert_eq!( - s.annotations.description, - Some("Outer description".to_string()) - ); - assert_eq!( - s.annotations.completions, - Some(vec!["outer1".to_string(), "outer2".to_string()]) - ); - } - _ => panic!("Expected String schema"), - } - } - - #[test] - fn test_schema_wrapper_tag_merging() { - let yaml_str = r#" -schema: - string: - tags: - category: input - inner-only: true -description: "Test" -tags: - category: output - outer-only: true -"#; - let yaml = quarto_yaml::parse(yaml_str).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::String(s) => { - let tags = s.annotations.tags.as_ref().unwrap(); - // Outer "category" should override inner - assert_eq!(tags.get("category"), Some(&serde_json::json!("output"))); - // Both inner-only and outer-only should be present - assert_eq!(tags.get("inner-only"), Some(&serde_json::json!(true))); - assert_eq!(tags.get("outer-only"), Some(&serde_json::json!(true))); - } - _ => panic!("Expected String schema"), - } - } - - #[test] - fn test_additional_completions_basic() { - let yaml = quarto_yaml::parse( - r#" -schema: - string: - completions: ["a", "b"] -additionalCompletions: ["c", "d"] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::String(s) = schema { - assert_eq!( - s.annotations.completions, - Some(vec![ - "a".to_string(), - "b".to_string(), - "c".to_string(), - "d".to_string() - ]) - ); - // additional_completions should be cleared after merge - assert_eq!(s.annotations.additional_completions, None); - } else { - panic!("Expected String schema"); - } - } - - #[test] - fn test_additional_completions_overwrite() { - let yaml = quarto_yaml::parse( - r#" -schema: - string: - completions: ["a", "b"] -additionalCompletions: ["c", "d"] -completions: ["e", "f"] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::String(s) = schema { - // completions overwrites everything - assert_eq!( - s.annotations.completions, - Some(vec!["e".to_string(), "f".to_string()]) - ); - } else { - panic!("Expected String schema"); - } - } - - #[test] - fn test_additional_completions_without_wrapper() { - let yaml = quarto_yaml::parse( - r#" -string: - additionalCompletions: ["x", "y"] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - if let Schema::String(s) = schema { - // Without schema wrapper, additionalCompletions is stored but not merged - assert_eq!(s.annotations.completions, None); - assert_eq!( - s.annotations.additional_completions, - Some(vec!["x".to_string(), "y".to_string()]) - ); - } else { - panic!("Expected String schema"); - } - } - - // Tests for schema inheritance (super field) - - #[test] - fn test_object_with_super_single() { - let yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: base-schema - properties: - name: string -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - match schema { - Schema::Object(obj) => { - assert!(obj.base_schema.is_some()); - let bases = obj.base_schema.unwrap(); - assert_eq!(bases.len(), 1); - match &bases[0] { - Schema::Ref(r) => { - assert_eq!(r.reference, "base-schema"); - assert!(r.eager); - } - _ => panic!("Expected Ref schema"), - } - assert!(obj.properties.contains_key("name")); - } - _ => panic!("Expected Object schema"), - } - } - - #[test] - fn test_object_with_super_array() { - let yaml = quarto_yaml::parse( - r#" -object: - super: - - resolveRef: base1 - - resolveRef: base2 - properties: - name: string -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - match schema { - Schema::Object(obj) => { - assert!(obj.base_schema.is_some()); - let bases = obj.base_schema.unwrap(); - assert_eq!(bases.len(), 2); - match &bases[0] { - Schema::Ref(r) => { - assert_eq!(r.reference, "base1"); - assert!(r.eager); - } - _ => panic!("Expected Ref schema for base1"), - } - match &bases[1] { - Schema::Ref(r) => { - assert_eq!(r.reference, "base2"); - assert!(r.eager); - } - _ => panic!("Expected Ref schema for base2"), - } - } - _ => panic!("Expected Object schema"), - } - } - - #[test] - fn test_object_without_super() { - let yaml = quarto_yaml::parse( - r#" -object: - properties: - name: string -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - match schema { - Schema::Object(obj) => { - assert!(obj.base_schema.is_none()); - assert!(obj.properties.contains_key("name")); - } - _ => panic!("Expected Object schema"), - } - } - - #[test] - fn test_super_with_inline_schema() { - let yaml = quarto_yaml::parse( - r#" -object: - super: - object: - properties: - base_field: string - properties: - derived_field: number -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - match schema { - Schema::Object(obj) => { - assert!(obj.base_schema.is_some()); - let bases = obj.base_schema.unwrap(); - assert_eq!(bases.len(), 1); - match &bases[0] { - Schema::Object(base_obj) => { - assert!(base_obj.properties.contains_key("base_field")); - } - _ => panic!("Expected Object schema for base"), - } - assert!(obj.properties.contains_key("derived_field")); - } - _ => panic!("Expected Object schema"), - } - } -} diff --git a/crates/quarto-yaml-validation/src/schema/parser.rs b/crates/quarto-yaml-validation/src/schema/parser.rs deleted file mode 100644 index a24e81ce5..000000000 --- a/crates/quarto-yaml-validation/src/schema/parser.rs +++ /dev/null @@ -1,154 +0,0 @@ -//! Schema parsing entry point -//! -//! This module provides the main entry point for parsing schemas from YAML: -//! - from_yaml(): Main parsing function -//! - parse_short_form(): Handle short string forms like "boolean", "string" -//! - parse_object_form(): Handle object forms like {boolean: {...}} -//! - parse_inline_enum(): Handle inline enum arrays like [val1, val2, val3] - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::{SourceInfo, YamlWithSourceInfo}; -use yaml_rust2::Yaml; - -use super::Schema; -use super::helpers::yaml_to_json_value; -use super::parsers::*; -use super::types::{EnumSchema, NullSchema}; - -/// Parse a Schema from YamlWithSourceInfo. -/// -/// This supports all quarto-cli schema syntaxes: -/// - Short forms: "boolean", "string", "number", etc. -/// - Object forms: {boolean: {...}}, {string: {...}}, etc. -/// - Inline arrays: [val1, val2, val3] (for enums) -/// -/// # Example -/// -/// ``` -/// use quarto_yaml_validation::Schema; -/// use quarto_yaml; -/// -/// let yaml = quarto_yaml::parse("boolean").unwrap(); -/// let schema = Schema::from_yaml(&yaml).unwrap(); -/// ``` -pub(super) fn from_yaml(yaml: &YamlWithSourceInfo) -> SchemaResult { - match &yaml.yaml { - // Short form: "boolean", "string", etc. - Yaml::String(s) => parse_short_form(s.as_str(), &yaml.source_info), - - // Object form: {boolean: {...}}, {enum: [...]}, etc. - Yaml::Hash(_) => parse_object_form(yaml), - - // Array form: [val1, val2, val3] - inline enum - Yaml::Array(_) => parse_inline_enum(yaml), - - // Null can be a schema type too - Yaml::Null => Ok(Schema::Null(NullSchema { - annotations: Default::default(), - })), - - _ => Err(SchemaError::InvalidStructure { - message: format!("Expected schema, got {:?}", yaml.yaml), - location: Some(yaml.source_info.clone()), - }), - } -} - -/// Parse short form: "boolean", "string", "number", "any", "null", "path" -fn parse_short_form(s: &str, _location: &SourceInfo) -> SchemaResult { - match s { - "boolean" => Ok(Schema::Boolean(super::types::BooleanSchema { - annotations: Default::default(), - })), - "number" => Ok(Schema::Number(super::types::NumberSchema { - annotations: Default::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - })), - "string" | "path" => Ok(Schema::String(super::types::StringSchema { - annotations: Default::default(), - min_length: None, - max_length: None, - pattern: None, - })), - "null" => Ok(Schema::Null(NullSchema { - annotations: Default::default(), - })), - "any" => Ok(Schema::Any(super::types::AnySchema { - annotations: Default::default(), - })), - _ => Err(SchemaError::InvalidType(s.to_string())), - } -} - -/// Parse object form: {boolean: {...}}, {string: {...}}, etc. -fn parse_object_form(yaml: &YamlWithSourceInfo) -> SchemaResult { - let entries = yaml - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Expected hash for object form schema".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - if entries.is_empty() { - return Err(SchemaError::InvalidStructure { - message: "Empty schema object".to_string(), - location: Some(yaml.source_info.clone()), - }); - } - - // Peek at first key to determine schema type - let first_entry = &entries[0]; - let key = first_entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Schema type key must be a string".to_string(), - location: Some(first_entry.key.source_info.clone()), - })?; - - match key { - "boolean" => parse_boolean_schema(&first_entry.value), - "number" => parse_number_schema(&first_entry.value), - "string" | "path" => parse_string_schema(&first_entry.value), - "null" => parse_null_schema(&first_entry.value), - "enum" => parse_enum_schema(&first_entry.value), - "any" => parse_any_schema(&first_entry.value), - "anyOf" => parse_anyof_schema(&first_entry.value), - "allOf" => parse_allof_schema(&first_entry.value), - "array" => parse_array_schema(&first_entry.value), - "arrayOf" => parse_arrayof_schema(&first_entry.value), - "maybeArrayOf" => parse_maybe_arrayof_schema(&first_entry.value), - "object" => parse_object_schema(&first_entry.value), - "record" => parse_record_schema(&first_entry.value), - "schema" => parse_schema_wrapper(yaml), // Note: pass whole yaml, not just value - "ref" | "$ref" => parse_ref_schema(&first_entry.value, false), // Lazy reference - "resolveRef" => parse_ref_schema(&first_entry.value, true), // Eager reference - _ => Err(SchemaError::InvalidType(key.to_string())), - } -} - -/// Parse inline enum array: [val1, val2, val3] -fn parse_inline_enum(yaml: &YamlWithSourceInfo) -> SchemaResult { - let items = yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Expected array for inline enum".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - // Convert YamlWithSourceInfo items to serde_json::Value for enum values - let values: SchemaResult> = items - .iter() - .map(|item| yaml_to_json_value(&item.yaml, &item.source_info)) - .collect(); - - Ok(Schema::Enum(EnumSchema { - annotations: Default::default(), - values: values?, - })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/arrays.rs b/crates/quarto-yaml-validation/src/schema/parsers/arrays.rs deleted file mode 100644 index 2a3597b32..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/arrays.rs +++ /dev/null @@ -1,103 +0,0 @@ -//! Array schema parser -//! -//! This module handles parsing of array schemas which validate array/list values. -//! Arrays can have: -//! - items: Schema for array elements -//! - minItems/maxItems: Length constraints -//! - uniqueItems: Whether elements must be unique -//! -//! Also handles quarto-cli's arrayOf shorthand syntax: -//! - arrayOf: - Simple form -//! - arrayOf: { schema: , length: N } - Fixed-length arrays - -use crate::error::SchemaResult; -use quarto_yaml::YamlWithSourceInfo; - -use crate::schema::Schema; -use crate::schema::annotations::parse_annotations; -use crate::schema::helpers::{get_hash_bool, get_hash_usize}; -use crate::schema::parser::from_yaml; -use crate::schema::types::ArraySchema; - -/// Parse an array schema -/// -/// Format: -/// ```yaml -/// array: -/// items: -/// minItems: 1 -/// maxItems: 10 -/// uniqueItems: true -/// ``` -pub(in crate::schema) fn parse_array_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - let items = if let Some(items_yaml) = yaml.get_hash_value("items") { - Some(Box::new(from_yaml(items_yaml)?)) - } else { - None - }; - let min_items = get_hash_usize(yaml, "minItems")?; - let max_items = get_hash_usize(yaml, "maxItems")?; - let unique_items = get_hash_bool(yaml, "uniqueItems")?; - - Ok(Schema::Array(ArraySchema { - annotations, - items, - min_items, - max_items, - unique_items, - })) -} - -/// Parse arrayOf schema (quarto-cli shorthand) -/// -/// Simple form: -/// ```yaml -/// arrayOf: string -/// ``` -/// -/// Complex form with length: -/// ```yaml -/// arrayOf: -/// schema: string -/// length: 2 -/// ``` -/// -/// The `length` property sets both minItems and maxItems to the same value. -pub(in crate::schema) fn parse_arrayof_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Check if this is the complex form with a `schema` key - if let Some(schema_yaml) = yaml.get_hash_value("schema") { - // Complex form: arrayOf: { schema: , length: N } - let items = Some(Box::new(from_yaml(schema_yaml)?)); - let length = get_hash_usize(yaml, "length")?; - - // If length is specified, set both min_items and max_items - let (min_items, max_items) = if let Some(len) = length { - (Some(len), Some(len)) - } else { - (None, None) - }; - - Ok(Schema::Array(ArraySchema { - annotations, - items, - min_items, - max_items, - unique_items: None, - })) - } else { - // Simple form: arrayOf: - // The entire YAML value is the schema - let items = Some(Box::new(from_yaml(yaml)?)); - - Ok(Schema::Array(ArraySchema { - annotations: Default::default(), - items, - min_items: None, - max_items: None, - unique_items: None, - })) - } -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/combinators.rs b/crates/quarto-yaml-validation/src/schema/parsers/combinators.rs deleted file mode 100644 index 244b1efe6..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/combinators.rs +++ /dev/null @@ -1,477 +0,0 @@ -//! Combinator schema parsers -//! -//! This module handles parsing of schema combinators: -//! - anyOf: Validates if any subschema matches -//! - allOf: Validates if all subschemas match -//! - maybeArrayOf: Quarto extension that expands to anyOf(T, arrayOf(T)) -//! -//! Both support inline array form and explicit object form with annotations. - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::YamlWithSourceInfo; -use std::collections::HashMap; - -use crate::schema::Schema; -use crate::schema::annotations::parse_annotations; -use crate::schema::parser::from_yaml; -use crate::schema::types::{AllOfSchema, AnyOfSchema, ArraySchema, SchemaAnnotations}; - -/// Parse an anyOf schema -/// -/// Validates if any of the subschemas matches. Supports: -/// - Inline form: anyOf: [schema1, schema2, ...] -/// - Explicit form: anyOf: { schemas: [...], description: "..." } -pub(in crate::schema) fn parse_anyof_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Handle both array form and object form with schemas: field - let schemas = if let Some(schemas_yaml) = yaml.get_hash_value("schemas") { - // Explicit form: anyOf: { schemas: [...] } - let items = schemas_yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "anyOf schemas must be an array".to_string(), - location: Some(schemas_yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items.iter().map(from_yaml).collect(); - result? - } else { - // Inline form: anyOf: [schema1, schema2, ...] - let items = yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Expected array for anyOf".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items.iter().map(from_yaml).collect(); - result? - }; - - Ok(Schema::AnyOf(AnyOfSchema { - annotations, - schemas, - })) -} - -#[cfg(test)] -mod tests { - use super::*; - use quarto_yaml::YamlWithSourceInfo; - use yaml_rust2::Yaml; - use yaml_rust2::yaml::Hash; - - fn source_info() -> quarto_yaml::SourceInfo { - quarto_yaml::SourceInfo::for_test() - } - - // ==================== parse_anyof_schema tests ==================== - - #[test] - fn test_anyof_inline_array_valid() { - // anyOf: [string, boolean] - let yaml = quarto_yaml::parse( - r#" -anyOf: - - string - - boolean -"#, - ) - .unwrap(); - - let anyof_value = yaml.get_hash_value("anyOf").unwrap(); - let result = parse_anyof_schema(anyof_value).unwrap(); - - if let Schema::AnyOf(s) = result { - assert_eq!(s.schemas.len(), 2); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_anyof_explicit_form_valid() { - // anyOf: { schemas: [string, boolean], description: "test" } - let yaml = quarto_yaml::parse( - r#" -anyOf: - schemas: - - string - - boolean - description: "Either string or boolean" -"#, - ) - .unwrap(); - - let anyof_value = yaml.get_hash_value("anyOf").unwrap(); - let result = parse_anyof_schema(anyof_value).unwrap(); - - if let Schema::AnyOf(s) = result { - assert_eq!(s.schemas.len(), 2); - assert_eq!( - s.annotations.description, - Some("Either string or boolean".to_string()) - ); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_anyof_schemas_not_array_error() { - // anyOf: { schemas: "not an array" } - let yaml = quarto_yaml::parse( - r#" -anyOf: - schemas: "not an array" -"#, - ) - .unwrap(); - - let anyof_value = yaml.get_hash_value("anyOf").unwrap(); - let result = parse_anyof_schema(anyof_value); - - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("anyOf schemas must be an array")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_anyof_inline_not_array_error() { - // anyOf: "not an array" (inline form but not an array) - // This requires constructing a YAML where anyOf points to a scalar - let mut hash = Hash::new(); - hash.insert( - Yaml::String("anyOf".to_string()), - Yaml::String("not an array".to_string()), - ); - - let key_node = - YamlWithSourceInfo::new_scalar(Yaml::String("anyOf".to_string()), source_info()); - let value_node = - YamlWithSourceInfo::new_scalar(Yaml::String("not an array".to_string()), source_info()); - - let entry = quarto_yaml::YamlHashEntry::new( - key_node, - value_node, - source_info(), - source_info(), - source_info(), - ); - - let yaml = YamlWithSourceInfo::new_hash(Yaml::Hash(hash), source_info(), vec![entry]); - let anyof_value = yaml.get_hash_value("anyOf").unwrap(); - - let result = parse_anyof_schema(anyof_value); - - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("Expected array for anyOf")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - // ==================== parse_allof_schema tests ==================== - - #[test] - fn test_allof_inline_array_valid() { - // allOf: [string, number] - let yaml = quarto_yaml::parse( - r#" -allOf: - - string - - number -"#, - ) - .unwrap(); - - let allof_value = yaml.get_hash_value("allOf").unwrap(); - let result = parse_allof_schema(allof_value).unwrap(); - - if let Schema::AllOf(s) = result { - assert_eq!(s.schemas.len(), 2); - } else { - panic!("Expected AllOf schema"); - } - } - - #[test] - fn test_allof_explicit_form_valid() { - // allOf: { schemas: [string, number], description: "test" } - let yaml = quarto_yaml::parse( - r#" -allOf: - schemas: - - string - - number - description: "Must match both string and number constraints" -"#, - ) - .unwrap(); - - let allof_value = yaml.get_hash_value("allOf").unwrap(); - let result = parse_allof_schema(allof_value).unwrap(); - - if let Schema::AllOf(s) = result { - assert_eq!(s.schemas.len(), 2); - assert_eq!( - s.annotations.description, - Some("Must match both string and number constraints".to_string()) - ); - } else { - panic!("Expected AllOf schema"); - } - } - - #[test] - fn test_allof_schemas_not_array_error() { - // allOf: { schemas: "not an array" } - let yaml = quarto_yaml::parse( - r#" -allOf: - schemas: "not an array" -"#, - ) - .unwrap(); - - let allof_value = yaml.get_hash_value("allOf").unwrap(); - let result = parse_allof_schema(allof_value); - - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("allOf schemas must be an array")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - #[test] - fn test_allof_inline_not_array_error() { - // allOf: "not an array" (inline form but not an array) - let mut hash = Hash::new(); - hash.insert( - Yaml::String("allOf".to_string()), - Yaml::String("not an array".to_string()), - ); - - let key_node = - YamlWithSourceInfo::new_scalar(Yaml::String("allOf".to_string()), source_info()); - let value_node = - YamlWithSourceInfo::new_scalar(Yaml::String("not an array".to_string()), source_info()); - - let entry = quarto_yaml::YamlHashEntry::new( - key_node, - value_node, - source_info(), - source_info(), - source_info(), - ); - - let yaml = YamlWithSourceInfo::new_hash(Yaml::Hash(hash), source_info(), vec![entry]); - let allof_value = yaml.get_hash_value("allOf").unwrap(); - - let result = parse_allof_schema(allof_value); - - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - SchemaError::InvalidStructure { message, .. } => { - assert!(message.contains("Expected array for allOf")); - } - _ => panic!("Expected InvalidStructure error"), - } - } - - // ==================== parse_maybe_arrayof_schema tests ==================== - - #[test] - fn test_maybe_arrayof_basic() { - // maybeArrayOf: string -> anyOf([string, arrayOf(string)]) - let yaml = quarto_yaml::parse("maybeArrayOf: string").unwrap(); - let maybearray_value = yaml.get_hash_value("maybeArrayOf").unwrap(); - - let result = parse_maybe_arrayof_schema(maybearray_value).unwrap(); - - if let Schema::AnyOf(s) = result { - assert_eq!(s.schemas.len(), 2); - // First schema should be string - assert!(matches!(s.schemas[0], Schema::String(_))); - // Second schema should be array of string - assert!(matches!(s.schemas[1], Schema::Array(_))); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_maybe_arrayof_has_complete_from_tag() { - // maybeArrayOf should add "complete-from": ["anyOf", 0] tag - let yaml = quarto_yaml::parse("maybeArrayOf: boolean").unwrap(); - let maybearray_value = yaml.get_hash_value("maybeArrayOf").unwrap(); - - let result = parse_maybe_arrayof_schema(maybearray_value).unwrap(); - - if let Schema::AnyOf(s) = result { - assert!(s.annotations.tags.is_some()); - let tags = s.annotations.tags.as_ref().unwrap(); - assert!(tags.contains_key("complete-from")); - assert_eq!( - tags.get("complete-from"), - Some(&serde_json::json!(["anyOf", 0])) - ); - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_maybe_arrayof_with_complex_schema() { - // maybeArrayOf with a more complex inner schema (number with constraints) - let yaml = quarto_yaml::parse( - r#" -maybeArrayOf: - number: - minimum: 0 -"#, - ) - .unwrap(); - let maybearray_value = yaml.get_hash_value("maybeArrayOf").unwrap(); - - let result = parse_maybe_arrayof_schema(maybearray_value).unwrap(); - - if let Schema::AnyOf(s) = result { - assert_eq!(s.schemas.len(), 2); - // First schema should be a number schema - assert!(matches!(s.schemas[0], Schema::Number(_))); - // Second schema should be array - if let Schema::Array(arr) = &s.schemas[1] { - // Array items should be the same number schema - assert!(arr.items.is_some()); - } else { - panic!("Expected Array schema as second option"); - } - } else { - panic!("Expected AnyOf schema"); - } - } - - #[test] - fn test_maybe_arrayof_array_schema_has_no_constraints() { - // The arrayOf part should have no min/max items or uniqueItems constraints - let yaml = quarto_yaml::parse("maybeArrayOf: string").unwrap(); - let maybearray_value = yaml.get_hash_value("maybeArrayOf").unwrap(); - - let result = parse_maybe_arrayof_schema(maybearray_value).unwrap(); - - if let Schema::AnyOf(s) = result { - if let Schema::Array(arr) = &s.schemas[1] { - assert!(arr.min_items.is_none()); - assert!(arr.max_items.is_none()); - assert!(arr.unique_items.is_none()); - } else { - panic!("Expected Array schema"); - } - } else { - panic!("Expected AnyOf schema"); - } - } -} - -/// Parse an allOf schema -/// -/// Validates if all of the subschemas match. Supports: -/// - Inline form: allOf: [schema1, schema2, ...] -/// - Explicit form: allOf: { schemas: [...], description: "..." } -pub(in crate::schema) fn parse_allof_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Similar to anyOf - let schemas = if let Some(schemas_yaml) = yaml.get_hash_value("schemas") { - let items = schemas_yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "allOf schemas must be an array".to_string(), - location: Some(schemas_yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items.iter().map(from_yaml).collect(); - result? - } else { - let items = yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Expected array for allOf".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items.iter().map(from_yaml).collect(); - result? - }; - - Ok(Schema::AllOf(AllOfSchema { - annotations, - schemas, - })) -} - -/// Parse a maybeArrayOf schema (quarto-cli extension) -/// -/// This expands `maybeArrayOf: T` into `anyOf([T, arrayOf(T)])` with a special tag -/// to indicate completions should come from the first option (the scalar form). -/// -/// Format: -/// ```yaml -/// maybeArrayOf: string -/// ``` -/// -/// Expands to: -/// ```yaml -/// anyOf: -/// - string -/// - arrayOf: string -/// tags: -/// complete-from: ["anyOf", 0] -/// ``` -pub(in crate::schema) fn parse_maybe_arrayof_schema( - yaml: &YamlWithSourceInfo, -) -> SchemaResult { - // Parse the inner schema - let inner_schema = from_yaml(yaml)?; - - // Create arrayOf version of the schema - let array_schema = Schema::Array(ArraySchema { - annotations: Default::default(), - items: Some(Box::new(inner_schema.clone())), - min_items: None, - max_items: None, - unique_items: None, - }); - - // Create anyOf with both versions - let schemas = vec![inner_schema, array_schema]; - - // Add "complete-from" tag - let mut tags = HashMap::new(); - tags.insert("complete-from".to_string(), serde_json::json!(["anyOf", 0])); - - let annotations = SchemaAnnotations { - tags: Some(tags), - ..Default::default() - }; - - Ok(Schema::AnyOf(AnyOfSchema { - annotations, - schemas, - })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/enum.rs b/crates/quarto-yaml-validation/src/schema/parsers/enum.rs deleted file mode 100644 index 8409f3e33..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/enum.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Enum schema parser -//! -//! This module handles parsing of enum schemas which define a fixed set -//! of allowed values. Supports both inline and explicit forms: -//! - Inline: enum: [val1, val2, val3] -//! - Explicit: enum: { values: [...], description: "..." } - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::YamlWithSourceInfo; - -use crate::schema::Schema; -use crate::schema::annotations::parse_annotations; -use crate::schema::helpers::yaml_to_json_value; -use crate::schema::types::EnumSchema; - -/// Parse an enum schema -/// -/// Handles both inline array form and explicit object form with annotations -pub(in crate::schema) fn parse_enum_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Handle both inline array and explicit object form - let values = if let Some(values_yaml) = yaml.get_hash_value("values") { - // Explicit form: enum: { values: [...] } - let items = values_yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "enum values must be an array".to_string(), - location: Some(values_yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items - .iter() - .map(|item| yaml_to_json_value(&item.yaml, &item.source_info)) - .collect(); - result? - } else { - // Inline form: enum: [val1, val2, val3] - let items = yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "Expected array for inline enum".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items - .iter() - .map(|item| yaml_to_json_value(&item.yaml, &item.source_info)) - .collect(); - result? - }; - - Ok(Schema::Enum(EnumSchema { - annotations, - values, - })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/mod.rs b/crates/quarto-yaml-validation/src/schema/parsers/mod.rs deleted file mode 100644 index b4c17257d..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/mod.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! Schema parser modules -//! -//! This module contains all the individual schema type parsers, -//! organized by category: -//! - primitive: Basic types (boolean, number, string, null, any) -//! - enum: Enumeration types -//! - ref: Reference types -//! - combinators: anyOf, allOf -//! - arrays: Array types -//! - objects: Object types -//! - wrappers: Schema wrappers (future) - -pub(super) mod arrays; -pub(super) mod combinators; -pub(super) mod r#enum; -pub(super) mod objects; -pub(super) mod primitive; -pub(super) mod r#ref; -pub(super) mod wrappers; - -// Re-export parser functions for use within the schema module -pub(super) use arrays::{parse_array_schema, parse_arrayof_schema}; -pub(super) use combinators::{parse_allof_schema, parse_anyof_schema, parse_maybe_arrayof_schema}; -pub(super) use r#enum::parse_enum_schema; -pub(super) use objects::{parse_object_schema, parse_record_schema}; -pub(super) use primitive::{ - parse_any_schema, parse_boolean_schema, parse_null_schema, parse_number_schema, - parse_string_schema, -}; -pub(super) use r#ref::parse_ref_schema; -pub(super) use wrappers::parse_schema_wrapper; diff --git a/crates/quarto-yaml-validation/src/schema/parsers/objects.rs b/crates/quarto-yaml-validation/src/schema/parsers/objects.rs deleted file mode 100644 index 0778761d0..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/objects.rs +++ /dev/null @@ -1,404 +0,0 @@ -//! Object schema parser -//! -//! This module handles parsing of object schemas which validate key-value mappings. -//! Objects can have: -//! - properties: Named property schemas -//! - patternProperties: Pattern-based property schemas -//! - additionalProperties: Schema for unspecified properties -//! - required: List of required property names -//! - closed: Quarto extension - disallow properties not in schema -//! - minProperties/maxProperties: Property count constraints -//! - namingConvention: Quarto extension - naming convention for property keys -//! -//! Also handles quarto-cli's record syntax, which is shorthand for a closed object -//! with all properties required - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::YamlWithSourceInfo; -use std::collections::HashMap; - -use crate::schema::Schema; -use crate::schema::annotations::parse_annotations; -use crate::schema::helpers::{get_hash_bool, get_hash_usize}; -use crate::schema::parser::from_yaml; -use crate::schema::types::{NamingConvention, ObjectSchema}; - -/// Normalize naming convention string to canonical form -/// -/// Supports multiple input formats and normalizes them to one of: -/// - "capitalizationCase" (camelCase) -/// - "underscore_case" (snake_case) -/// - "dash-case" (kebab-case) -/// - "ignore" -fn normalize_convention(input: &str, location: &quarto_yaml::SourceInfo) -> SchemaResult { - match input { - "ignore" => Ok("ignore".to_string()), - - // camelCase / capitalizationCase variants - "camelCase" - | "capitalizationCase" - | "camel-case" - | "camel_case" - | "capitalization-case" - | "capitalization_case" => Ok("capitalizationCase".to_string()), - - // snake_case / underscoreCase variants - "snakeCase" | "underscoreCase" | "snake-case" | "snake_case" | "underscore-case" - | "underscore_case" => Ok("underscore_case".to_string()), - - // kebab-case / dashCase variants - "dashCase" | "kebabCase" | "dash-case" | "dash_case" | "kebab-case" | "kebab_case" => { - Ok("dash-case".to_string()) - } - - _ => Err(SchemaError::InvalidStructure { - message: format!("Unknown naming convention: '{}'", input), - location: Some(location.clone()), - }), - } -} - -/// Parse an object schema -/// -/// Format: -/// ```yaml -/// object: -/// properties: -/// name: string -/// age: number -/// patternProperties: -/// "^x-": string -/// additionalProperties: boolean -/// required: [name] -/// closed: true -/// minProperties: 1 -/// maxProperties: 10 -/// ``` -pub(in crate::schema) fn parse_object_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Parse properties - let properties = if let Some(props_yaml) = yaml.get_hash_value("properties") { - let entries = props_yaml - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "properties must be an object".to_string(), - location: Some(props_yaml.source_info.clone()), - })?; - - let mut props = HashMap::new(); - for entry in entries { - let key = entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "property key must be a string".to_string(), - location: Some(entry.key.source_info.clone()), - })?; - let schema = from_yaml(&entry.value)?; - props.insert(key.to_string(), schema); - } - props - } else { - HashMap::new() - }; - - // Parse patternProperties - let pattern_properties = - if let Some(pattern_props_yaml) = yaml.get_hash_value("patternProperties") { - let entries = - pattern_props_yaml - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "patternProperties must be an object".to_string(), - location: Some(pattern_props_yaml.source_info.clone()), - })?; - - let mut props = HashMap::new(); - for entry in entries { - let key = entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "patternProperty key must be a string".to_string(), - location: Some(entry.key.source_info.clone()), - })?; - let schema = from_yaml(&entry.value)?; - props.insert(key.to_string(), schema); - } - props - } else { - HashMap::new() - }; - - // Parse additionalProperties - let additional_properties = - if let Some(additional_yaml) = yaml.get_hash_value("additionalProperties") { - Some(Box::new(from_yaml(additional_yaml)?)) - } else { - None - }; - - // Parse required - let required = if let Some(required_yaml) = yaml.get_hash_value("required") { - // Check if it's the string "all" - if let Some(req_str) = required_yaml.yaml.as_str() { - if req_str == "all" { - // Expand to all property keys - properties.keys().cloned().collect() - } else { - return Err(SchemaError::InvalidStructure { - message: format!( - "Invalid required value: '{}' (expected 'all' or array)", - req_str - ), - location: Some(required_yaml.source_info.clone()), - }); - } - } else { - // Handle array form - let items = required_yaml - .as_array() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "required must be 'all' or an array".to_string(), - location: Some(required_yaml.source_info.clone()), - })?; - - let result: SchemaResult> = items - .iter() - .map(|item| { - item.yaml.as_str().map(|s| s.to_string()).ok_or_else(|| { - SchemaError::InvalidStructure { - message: "required items must be strings".to_string(), - location: Some(item.source_info.clone()), - } - }) - }) - .collect(); - result? - } - } else { - Vec::new() - }; - - let min_properties = get_hash_usize(yaml, "minProperties")?; - let max_properties = get_hash_usize(yaml, "maxProperties")?; - let closed = get_hash_bool(yaml, "closed")?.unwrap_or(false); - - // Parse propertyNames - let property_names = if let Some(property_names_yaml) = yaml.get_hash_value("propertyNames") { - Some(Box::new(from_yaml(property_names_yaml)?)) - } else { - None - }; - - // Parse namingConvention - let naming_convention = if let Some(nc_yaml) = yaml.get_hash_value("namingConvention") { - if let Some(s) = nc_yaml.yaml.as_str() { - // Single string value - Some(NamingConvention::Single(normalize_convention( - s, - &nc_yaml.source_info, - )?)) - } else if let Some(arr) = nc_yaml.as_array() { - // Array of strings - let conventions: SchemaResult> = arr - .iter() - .map(|item| { - item.yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "namingConvention items must be strings".to_string(), - location: Some(item.source_info.clone()), - }) - .and_then(|s| normalize_convention(s, &item.source_info)) - }) - .collect(); - Some(NamingConvention::Multiple(conventions?)) - } else { - return Err(SchemaError::InvalidStructure { - message: "namingConvention must be a string or array of strings".to_string(), - location: Some(nc_yaml.source_info.clone()), - }); - } - } else { - None - }; - - // Parse super/baseSchema for inheritance - let base_schema = if let Some(super_yaml) = yaml.get_hash_value("super") { - if let Some(arr) = super_yaml.as_array() { - // Array form: super: [schema1, schema2] - let schemas: SchemaResult> = arr.iter().map(from_yaml).collect(); - Some(schemas?) - } else { - // Single schema form: super: { resolveRef: ... } - Some(vec![from_yaml(super_yaml)?]) - } - } else { - None - }; - - Ok(Schema::Object(ObjectSchema { - annotations, - properties, - pattern_properties, - additional_properties, - required, - min_properties, - max_properties, - closed, - property_names, - naming_convention, - base_schema, - })) -} - -/// Parse a record schema (quarto-cli shorthand) -/// -/// This is syntactic sugar that expands to a closed object with all properties required. -/// -/// Form 1: -/// ```yaml -/// record: -/// properties: -/// key1: string -/// key2: number -/// ``` -/// -/// Form 2 (shorthand): -/// ```yaml -/// record: -/// key1: string -/// key2: number -/// ``` -/// -/// Form 3 (with keySchema/valueSchema): -/// ```yaml -/// record: -/// keySchema: -/// string: -/// pattern: "^[a-z]+$" -/// valueSchema: number -/// ``` -/// -/// Forms 1 & 2 expand to: -/// ```yaml -/// object: -/// properties: { ... } -/// closed: true -/// required: all # All property keys -/// ``` -/// -/// Form 3 expands to: -/// ```yaml -/// object: -/// propertyNames: -/// additionalProperties: -/// ``` -pub(in crate::schema) fn parse_record_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - - // Check for keySchema/valueSchema form first (Form 3) - let has_key_schema = yaml.get_hash_value("keySchema").is_some(); - let has_value_schema = yaml.get_hash_value("valueSchema").is_some(); - - if has_key_schema || has_value_schema { - // Form 3: record with keySchema and/or valueSchema - let property_names = if let Some(key_schema_yaml) = yaml.get_hash_value("keySchema") { - Some(Box::new(from_yaml(key_schema_yaml)?)) - } else { - None - }; - - let additional_properties = - if let Some(value_schema_yaml) = yaml.get_hash_value("valueSchema") { - Some(Box::new(from_yaml(value_schema_yaml)?)) - } else { - None - }; - - return Ok(Schema::Object(ObjectSchema { - annotations, - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties, - required: Vec::new(), - min_properties: None, - max_properties: None, - closed: false, - property_names, - naming_convention: None, - base_schema: None, - })); - } - - // Check if this is Form 1 (has "properties" key) or Form 2 (direct properties) - let properties = if let Some(props_yaml) = yaml.get_hash_value("properties") { - // Form 1: record: { properties: { ... } } - let entries = props_yaml - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "record properties must be an object".to_string(), - location: Some(props_yaml.source_info.clone()), - })?; - - let mut props = HashMap::new(); - for entry in entries { - let key = entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "property key must be a string".to_string(), - location: Some(entry.key.source_info.clone()), - })?; - let schema = from_yaml(&entry.value)?; - props.insert(key.to_string(), schema); - } - props - } else { - // Form 2: record: { key1: schema1, key2: schema2 } - // The entire yaml value is the properties hash - let entries = yaml - .as_hash() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "record must be an object".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - let mut props = HashMap::new(); - for entry in entries { - let key = entry - .key - .yaml - .as_str() - .ok_or_else(|| SchemaError::InvalidStructure { - message: "property key must be a string".to_string(), - location: Some(entry.key.source_info.clone()), - })?; - let schema = from_yaml(&entry.value)?; - props.insert(key.to_string(), schema); - } - props - }; - - // All properties are required - let required: Vec = properties.keys().cloned().collect(); - - Ok(Schema::Object(ObjectSchema { - annotations, - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required, - min_properties: None, - max_properties: None, - closed: true, // Records are always closed - property_names: None, - naming_convention: None, - base_schema: None, - })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/primitive.rs b/crates/quarto-yaml-validation/src/schema/parsers/primitive.rs deleted file mode 100644 index 36f250a68..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/primitive.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Primitive type schema parsers -//! -//! This module contains parsers for basic/primitive schema types: -//! - boolean -//! - number -//! - string (including "path" alias) -//! - null -//! - any - -use crate::error::SchemaResult; -use quarto_yaml::YamlWithSourceInfo; - -use crate::schema::Schema; -use crate::schema::annotations::parse_annotations; -use crate::schema::helpers::{get_hash_number, get_hash_string, get_hash_usize}; -use crate::schema::types::{AnySchema, BooleanSchema, NullSchema, NumberSchema, StringSchema}; - -/// Parse a boolean schema -pub(in crate::schema) fn parse_boolean_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - Ok(Schema::Boolean(BooleanSchema { annotations })) -} - -/// Parse a number schema (integer or float) -pub(in crate::schema) fn parse_number_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - let minimum = get_hash_number(yaml, "minimum")?; - let maximum = get_hash_number(yaml, "maximum")?; - let exclusive_minimum = get_hash_number(yaml, "exclusiveMinimum")?; - let exclusive_maximum = get_hash_number(yaml, "exclusiveMaximum")?; - let multiple_of = get_hash_number(yaml, "multipleOf")?; - - Ok(Schema::Number(NumberSchema { - annotations, - minimum, - maximum, - exclusive_minimum, - exclusive_maximum, - multiple_of, - })) -} - -/// Parse a string schema (also handles "path" alias) -pub(in crate::schema) fn parse_string_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - let min_length = get_hash_usize(yaml, "minLength")?; - let max_length = get_hash_usize(yaml, "maxLength")?; - let pattern = get_hash_string(yaml, "pattern")?; - - Ok(Schema::String(StringSchema { - annotations, - min_length, - max_length, - pattern, - })) -} - -/// Parse a null schema -pub(in crate::schema) fn parse_null_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - Ok(Schema::Null(NullSchema { annotations })) -} - -/// Parse an any schema (accepts any value) -pub(in crate::schema) fn parse_any_schema(yaml: &YamlWithSourceInfo) -> SchemaResult { - let annotations = parse_annotations(yaml)?; - Ok(Schema::Any(AnySchema { annotations })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/ref.rs b/crates/quarto-yaml-validation/src/schema/parsers/ref.rs deleted file mode 100644 index 0ab846d8e..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/ref.rs +++ /dev/null @@ -1,38 +0,0 @@ -//! Reference schema parser -//! -//! This module handles parsing of $ref schemas which reference other -//! schemas by their $id. -//! -//! Formats: -//! - ref: "schema-id" or $ref: "schema-id" - Lazy reference (resolved during validation) -//! - resolveRef: "schema-id" - Eager reference (resolved during parsing) - -use crate::error::{SchemaError, SchemaResult}; -use quarto_yaml::YamlWithSourceInfo; - -use crate::schema::Schema; -use crate::schema::types::RefSchema; - -/// Parse a reference schema -/// -/// References are simple string values pointing to another schema's $id. -/// The `eager` parameter indicates whether this is a resolveRef (true) or ref/$ref (false). -pub(in crate::schema) fn parse_ref_schema( - yaml: &YamlWithSourceInfo, - eager: bool, -) -> SchemaResult { - let reference = - yaml.yaml - .as_str() - .map(|s| s.to_string()) - .ok_or_else(|| SchemaError::InvalidStructure { - message: "ref must be a string".to_string(), - location: Some(yaml.source_info.clone()), - })?; - - Ok(Schema::Ref(RefSchema { - annotations: Default::default(), - reference, - eager, - })) -} diff --git a/crates/quarto-yaml-validation/src/schema/parsers/wrappers.rs b/crates/quarto-yaml-validation/src/schema/parsers/wrappers.rs deleted file mode 100644 index e6b8fd269..000000000 --- a/crates/quarto-yaml-validation/src/schema/parsers/wrappers.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Schema wrapper parsers -//! -//! This module handles meta-schema patterns that wrap other schemas: -//! - schema: Explicit schema wrapper for adding properties without type nesting -//! - pattern: (Future) Pattern-based string matching as a schema type - -use crate::error::SchemaResult; -use quarto_yaml::YamlWithSourceInfo; - -use crate::schema::Schema; -use crate::schema::annotations::{merge_annotations, parse_annotations}; -use crate::schema::parser::from_yaml; - -/// Parse a schema wrapper -/// -/// The `schema` key allows adding properties (description, completions, etc.) -/// to a schema without nesting under a type key. -/// -/// Format: -/// ```yaml -/// schema: -/// anyOf: -/// - boolean -/// - string -/// description: "A boolean or string" -/// completions: ["true", "false", "auto"] -/// ``` -/// -/// This is equivalent to: -/// ```yaml -/// anyOf: -/// - boolean -/// - string -/// description: "A boolean or string" -/// completions: ["true", "false", "auto"] -/// ``` -/// -/// But allows cleaner separation when the schema is complex. -pub(in crate::schema) fn parse_schema_wrapper(yaml: &YamlWithSourceInfo) -> SchemaResult { - // Extract the inner schema - let schema_yaml = yaml.get_hash_value("schema").ok_or_else(|| { - crate::error::SchemaError::InvalidStructure { - message: "schema wrapper requires 'schema' key".to_string(), - location: Some(yaml.source_info.clone()), - } - })?; - - // Parse the inner schema (gets inner annotations) - let inner_schema = from_yaml(schema_yaml)?; - - // Parse annotations from the OUTER wrapper - let outer_annotations = parse_annotations(yaml)?; - - // Merge outer with inner (outer overrides inner) - let inner_annotations = inner_schema.annotations().clone(); - let merged_annotations = merge_annotations(inner_annotations, outer_annotations); - - // Apply merged annotations to the schema - Ok(inner_schema.with_annotations(merged_annotations)) -} diff --git a/crates/quarto-yaml-validation/src/schema/types.rs b/crates/quarto-yaml-validation/src/schema/types.rs deleted file mode 100644 index 12d1dbdb3..000000000 --- a/crates/quarto-yaml-validation/src/schema/types.rs +++ /dev/null @@ -1,164 +0,0 @@ -//! Schema type definitions -//! -//! This module contains all the schema struct definitions that represent -//! different validation types in Quarto's simplified JSON Schema subset. -//! -//! Each schema type struct contains: -//! - annotations: Common metadata like description, documentation, etc. -//! - type-specific fields: Constraints and validation rules specific to that type - -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; - -use super::Schema; - -/// Naming convention for object property names (Quarto extension) -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(untagged)] -pub enum NamingConvention { - /// Single naming convention - Single(String), - /// Multiple allowed naming conventions (property must match at least one) - Multiple(Vec), -} - -/// Annotations that can be attached to any schema -#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub struct SchemaAnnotations { - /// Schema identifier for references - #[serde(rename = "$id", skip_serializing_if = "Option::is_none")] - pub id: Option, - - /// Short description - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - - /// Detailed documentation - #[serde(skip_serializing_if = "Option::is_none")] - pub documentation: Option, - - /// Custom error message to display on validation failure - #[serde(rename = "errorMessage", skip_serializing_if = "Option::is_none")] - pub error_message: Option, - - /// Whether this schema should be hidden in IDE completions - #[serde(skip_serializing_if = "Option::is_none")] - pub hidden: Option, - - /// Completion suggestions for IDE support - #[serde(skip_serializing_if = "Option::is_none")] - pub completions: Option>, - - /// Additional completions to merge with existing completions (Quarto extension) - #[serde( - rename = "additionalCompletions", - skip_serializing_if = "Option::is_none" - )] - pub additional_completions: Option>, - - /// Tags for categorization (e.g., "engine: knitr") - #[serde(skip_serializing_if = "Option::is_none")] - pub tags: Option>, -} - -/// Boolean type schema -#[derive(Debug, Clone, PartialEq)] -pub struct BooleanSchema { - pub annotations: SchemaAnnotations, -} - -/// Number type schema (integer or float) -#[derive(Debug, Clone, PartialEq)] -pub struct NumberSchema { - pub annotations: SchemaAnnotations, - pub minimum: Option, - pub maximum: Option, - pub exclusive_minimum: Option, - pub exclusive_maximum: Option, - pub multiple_of: Option, -} - -/// String type schema -#[derive(Debug, Clone, PartialEq)] -pub struct StringSchema { - pub annotations: SchemaAnnotations, - pub min_length: Option, - pub max_length: Option, - pub pattern: Option, -} - -/// Null type schema -#[derive(Debug, Clone, PartialEq)] -pub struct NullSchema { - pub annotations: SchemaAnnotations, -} - -/// Enum type schema -#[derive(Debug, Clone, PartialEq)] -pub struct EnumSchema { - pub annotations: SchemaAnnotations, - pub values: Vec, -} - -/// Any type schema (no validation) -#[derive(Debug, Clone, PartialEq)] -pub struct AnySchema { - pub annotations: SchemaAnnotations, -} - -/// AnyOf schema (validates if any subschema matches) -#[derive(Debug, Clone, PartialEq)] -pub struct AnyOfSchema { - pub annotations: SchemaAnnotations, - pub schemas: Vec, -} - -/// AllOf schema (validates if all subschemas match) -#[derive(Debug, Clone, PartialEq)] -pub struct AllOfSchema { - pub annotations: SchemaAnnotations, - pub schemas: Vec, -} - -/// Array type schema -#[derive(Debug, Clone, PartialEq)] -pub struct ArraySchema { - pub annotations: SchemaAnnotations, - pub items: Option>, - pub min_items: Option, - pub max_items: Option, - pub unique_items: Option, -} - -/// Object type schema -#[derive(Debug, Clone, PartialEq)] -pub struct ObjectSchema { - pub annotations: SchemaAnnotations, - pub properties: HashMap, - pub pattern_properties: HashMap, - pub additional_properties: Option>, - pub required: Vec, - pub min_properties: Option, - pub max_properties: Option, - /// Quarto extension: if true, object cannot have properties not in schema - pub closed: bool, - /// Schema that property names (keys) must match - pub property_names: Option>, - /// Quarto extension: naming convention(s) for property names - pub naming_convention: Option, - /// Base schemas for inheritance (via `super` field in YAML) - /// - /// Can contain Schema::Ref with eager=true (to be resolved during compilation) - /// or actual ObjectSchema instances (if already resolved). - /// When present, schemas should be merged during compilation phase. - pub base_schema: Option>, -} - -/// Reference to another schema -#[derive(Debug, Clone, PartialEq)] -pub struct RefSchema { - pub annotations: SchemaAnnotations, - pub reference: String, - /// Whether this reference should be resolved eagerly (true for resolveRef, false for ref/$ref) - pub eager: bool, -} diff --git a/crates/quarto-yaml-validation/src/snapshots/quarto_yaml_validation__diagnostic__tests__all_three_formats_snapshot.snap b/crates/quarto-yaml-validation/src/snapshots/quarto_yaml_validation__diagnostic__tests__all_three_formats_snapshot.snap deleted file mode 100644 index 974e72dbf..000000000 --- a/crates/quarto-yaml-validation/src/snapshots/quarto_yaml_validation__diagnostic__tests__all_three_formats_snapshot.snap +++ /dev/null @@ -1,53 +0,0 @@ ---- -source: crates/quarto-yaml-validation/src/diagnostic.rs -expression: combined ---- -=== compact === -test.yaml:1:6 [Q-1-11] age: Expected number, got string (Hint: Use a numeric value without quotes?) - -=== json === -{ - "code": "Q-1-11", - "error_kind": { - "data": { - "expected": "number", - "got": "string" - }, - "type": "TypeMismatch" - }, - "hints": [ - "Use a numeric value without quotes?" - ], - "instance_path": [ - { - "type": "Key", - "value": "age" - } - ], - "message": "Expected number, got string", - "schema_path": [ - "object", - "number" - ], - "source_range": { - "end_column": 18, - "end_line": 1, - "end_offset": 17, - "filename": "test.yaml", - "start_column": 6, - "start_line": 1, - "start_offset": 5 - } -} - -=== human (ANSI stripped) === -Error: [Q-1-11] YAML Validation Failed - ╭─[ test.yaml:1:6 ] - │ - 1 │ age: "not a number" - │ ──────┬───── - │ ╰─────── Expected number, got string -───╯ -✖ At document path: `age` -ℹ Schema constraint: object > number -ℹ Use a numeric value without quotes? diff --git a/crates/quarto-yaml-validation/src/tests.rs b/crates/quarto-yaml-validation/src/tests.rs deleted file mode 100644 index 426a131da..000000000 --- a/crates/quarto-yaml-validation/src/tests.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Tests for YAML validation - -#[cfg(test)] -mod integration_tests { - use crate::schema::*; - use crate::validator::validate; - use quarto_source_map::SourceContext; - use quarto_yaml::{SourceInfo, YamlWithSourceInfo}; - use yaml_rust2::Yaml; - - fn make_yaml_bool(value: bool) -> YamlWithSourceInfo { - YamlWithSourceInfo::new_scalar(Yaml::Boolean(value), SourceInfo::for_test()) - } - - fn make_yaml_string(value: &str) -> YamlWithSourceInfo { - YamlWithSourceInfo::new_scalar(Yaml::String(value.to_string()), SourceInfo::for_test()) - } - - fn make_yaml_number(value: i64) -> YamlWithSourceInfo { - YamlWithSourceInfo::new_scalar(Yaml::Integer(value), SourceInfo::for_test()) - } - - fn make_source_ctx() -> SourceContext { - SourceContext::new() - } - - #[test] - fn test_boolean_validation() { - let registry = SchemaRegistry::new(); - let schema = Schema::Boolean(BooleanSchema { - annotations: SchemaAnnotations::default(), - }); - - let yaml = make_yaml_bool(true); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_ok()); - - let yaml = make_yaml_string("not a boolean"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - } - - #[test] - fn test_string_validation() { - let registry = SchemaRegistry::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: Some(3), - max_length: Some(10), - pattern: None, - }); - - let yaml = make_yaml_string("hello"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_ok()); - - let yaml = make_yaml_string("hi"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - - let yaml = make_yaml_string("this is too long"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - } - - #[test] - fn test_number_validation() { - let registry = SchemaRegistry::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: Some(0.0), - maximum: Some(100.0), - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - let yaml = make_yaml_number(50); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_ok()); - - let yaml = make_yaml_number(-1); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - - let yaml = make_yaml_number(101); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - } - - #[test] - fn test_enum_validation() { - let registry = SchemaRegistry::new(); - let schema = Schema::Enum(EnumSchema { - annotations: SchemaAnnotations::default(), - values: vec![ - serde_json::Value::String("red".to_string()), - serde_json::Value::String("green".to_string()), - serde_json::Value::String("blue".to_string()), - ], - }); - - let yaml = make_yaml_string("red"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_ok()); - - let yaml = make_yaml_string("yellow"); - assert!(validate(&yaml, &schema, ®istry, &make_source_ctx()).is_err()); - } - - #[test] - fn test_schema_true_and_false() { - let registry = SchemaRegistry::new(); - let yaml = make_yaml_bool(true); - - let schema_true = Schema::True; - assert!(validate(&yaml, &schema_true, ®istry, &make_source_ctx()).is_ok()); - } -} diff --git a/crates/quarto-yaml-validation/src/validator.rs b/crates/quarto-yaml-validation/src/validator.rs deleted file mode 100644 index b501e1e0f..000000000 --- a/crates/quarto-yaml-validation/src/validator.rs +++ /dev/null @@ -1,2284 +0,0 @@ -// YAML validation engine - -use crate::error::{ - InstancePath, PathSegment, SchemaPath, ValidationError, ValidationErrorKind, ValidationResult, -}; -use crate::schema::{Schema, SchemaRegistry}; -use quarto_source_map::SourceContext; -use quarto_yaml::YamlWithSourceInfo; -use regex::Regex; -use std::collections::HashSet; -use yaml_rust2::Yaml; - -/// Validates a YAML value against a schema -pub fn validate( - value: &YamlWithSourceInfo, - schema: &Schema, - registry: &SchemaRegistry, - source_ctx: &SourceContext, -) -> ValidationResult<()> { - let mut context = ValidationContext::new(registry, source_ctx); - validate_generic(value, schema, &mut context) -} - -/// Validation context tracks state during validation -pub struct ValidationContext<'a> { - /// Reference to the schema registry for $ref resolution - registry: &'a SchemaRegistry, - /// Source context for mapping offsets to line/column - source_ctx: &'a SourceContext, - /// Current instance path (e.g., ["format", "html", "toc"]) - instance_path: InstancePath, - /// Current schema path (e.g., ["properties", "format"]) - schema_path: SchemaPath, - /// Stack of `errorMessage` overrides, one frame per schema node currently - /// being validated (innermost last). Each frame is the node's own - /// `errorMessage` (or `None`). `add_error` copies the top frame into the - /// produced error's `custom_hint`, so the override binds strictly to the - /// schema node where the failure occurs — a child node without its own - /// `errorMessage` masks (does not inherit) an enclosing one. - custom_hint_stack: Vec>, - /// Collected validation errors - errors: Vec, -} - -impl<'a> ValidationContext<'a> { - /// Create a new validation context - pub fn new(registry: &'a SchemaRegistry, source_ctx: &'a SourceContext) -> Self { - Self { - registry, - source_ctx, - instance_path: InstancePath::new(), - schema_path: SchemaPath::new(), - custom_hint_stack: Vec::new(), - errors: Vec::new(), - } - } - - /// Add an error to the context - pub fn add_error(&mut self, kind: ValidationErrorKind, node: &YamlWithSourceInfo) { - let mut error = ValidationError::new(kind, self.instance_path.clone()) - .with_schema_path(self.schema_path.clone()) - .with_yaml_node(node.clone(), self.source_ctx); - // Bind the override of the schema node currently being validated (if - // any) to this error. - error.custom_hint = self.custom_hint_stack.last().cloned().flatten(); - self.errors.push(error); - } - - /// Execute a function with the current schema node's `errorMessage` pushed - /// as a frame. A `None` frame is pushed when the node has no `errorMessage`, - /// which masks any enclosing override — overrides bind strictly to the node - /// that declares them. - pub fn with_custom_hint(&mut self, hint: Option<&str>, f: F) -> R - where - F: FnOnce(&mut Self) -> R, - { - self.custom_hint_stack.push(hint.map(str::to_string)); - let result = f(self); - self.custom_hint_stack.pop(); - result - } - - /// Execute a function with a new instance path segment - pub fn with_instance_path(&mut self, segment: PathSegment, f: F) -> R - where - F: FnOnce(&mut Self) -> R, - { - self.instance_path.push_key(segment.to_string()); - let result = f(self); - self.instance_path.pop(); - result - } - - /// Execute a function with a new schema path segment - pub fn with_schema_path(&mut self, segment: impl Into, f: F) -> R - where - F: FnOnce(&mut Self) -> R, - { - self.schema_path.push(segment); - let result = f(self); - self.schema_path.pop(); - result - } - - /// Get the collected errors - pub fn errors(&self) -> &[ValidationError] { - &self.errors - } - - /// Check if validation failed - pub fn has_errors(&self) -> bool { - !self.errors.is_empty() - } -} - -/// Navigate through a YamlWithSourceInfo tree using an instance path -/// -/// This function is critical for error reporting - it finds the exact YAML node -/// corresponding to a validation error. -/// -/// # Arguments -/// * `path` - The instance path to follow (e.g., ["format", "html", "toc"]) -/// * `annotation` - The YAML tree to navigate -/// * `return_key` - If true, return the key node; if false, return the value node -/// * `path_index` - Current position in the path (used for recursion) -pub fn navigate<'a>( - path: &InstancePath, - annotation: &'a YamlWithSourceInfo, - return_key: bool, - path_index: usize, -) -> Option<&'a YamlWithSourceInfo> { - // Base case: we've reached the end of the path - if path_index >= path.segments().len() { - return Some(annotation); - } - - let segment = &path.segments()[path_index]; - - // Check if this is a hash/mapping - if let Some(entries) = annotation.as_hash() { - // For mappings, search backwards (like TypeScript version) - match segment { - PathSegment::Key(search_key) => { - for entry in entries.iter().rev() { - if let Yaml::String(ref key_str) = entry.key.yaml - && key_str == search_key - { - let target = if return_key && path_index == path.segments().len() - 1 { - &entry.key - } else { - &entry.value - }; - return navigate(path, target, return_key, path_index + 1); - } - } - None - } - PathSegment::Index(_) => { - // Index doesn't make sense for a mapping - None - } - } - } - // Check if this is an array/sequence - else if let Some(items) = annotation.as_array() { - match segment { - PathSegment::Index(index) => { - if *index < items.len() { - navigate(path, &items[*index], return_key, path_index + 1) - } else { - None - } - } - PathSegment::Key(_) => { - // Key doesn't make sense for a sequence - None - } - } - } - // Scalar - can't navigate into it - else { - None - } -} - -/// Main validation dispatcher -fn validate_generic( - value: &YamlWithSourceInfo, - schema: &Schema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - // Bind this schema node's `errorMessage` (if any) for the duration of its - // validation, so any failure raised here picks it up as a custom hint. - let hint = schema.annotations().error_message.as_deref(); - context.with_custom_hint(hint, |context| { - validate_generic_inner(value, schema, context) - }) -} - -fn validate_generic_inner( - value: &YamlWithSourceInfo, - schema: &Schema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - match schema { - Schema::True => Ok(()), - Schema::Boolean(s) => { - context.with_schema_path("boolean", |ctx| validate_boolean(value, s, ctx)) - } - Schema::Number(s) => { - context.with_schema_path("number", |ctx| validate_number(value, s, ctx)) - } - Schema::String(s) => { - context.with_schema_path("string", |ctx| validate_string(value, s, ctx)) - } - Schema::Null(s) => context.with_schema_path("null", |ctx| validate_null(value, s, ctx)), - Schema::Enum(s) => context.with_schema_path("enum", |ctx| validate_enum(value, s, ctx)), - Schema::Any(_) => Ok(()), - Schema::AnyOf(s) => context.with_schema_path("anyOf", |ctx| validate_any_of(value, s, ctx)), - Schema::AllOf(s) => context.with_schema_path("allOf", |ctx| validate_all_of(value, s, ctx)), - Schema::Array(s) => context.with_schema_path("array", |ctx| validate_array(value, s, ctx)), - Schema::Object(s) => { - context.with_schema_path("object", |ctx| validate_object(value, s, ctx)) - } - Schema::Ref(s) => { - // Resolve the reference - if let Some(resolved) = context.registry.resolve(&s.reference) { - validate_generic(value, resolved, context) - } else { - context.add_error( - ValidationErrorKind::UnresolvedReference { - ref_id: s.reference.clone(), - }, - value, - ); - Err(context.errors[0].clone()) - } - } - } -} - -/// Validate a boolean value -fn validate_boolean( - value: &YamlWithSourceInfo, - _schema: &crate::schema::BooleanSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - match &value.yaml { - Yaml::Boolean(_) => Ok(()), - _ => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "boolean".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - Err(context.errors[0].clone()) - } - } -} - -/// Validate a number value -fn validate_number( - value: &YamlWithSourceInfo, - schema: &crate::schema::NumberSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - let num = match &value.yaml { - Yaml::Integer(n) => *n as f64, - Yaml::Real(_) => value.yaml.as_f64().unwrap_or(f64::NAN), - _ => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "number".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - return Err(context.errors[0].clone()); - } - }; - - // Check minimum - if let Some(min) = schema.minimum - && num < min - { - context.add_error( - ValidationErrorKind::NumberOutOfRange { - value: num, - minimum: Some(min), - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check maximum - if let Some(max) = schema.maximum - && num > max - { - context.add_error( - ValidationErrorKind::NumberOutOfRange { - value: num, - minimum: None, - maximum: Some(max), - exclusive_minimum: None, - exclusive_maximum: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check exclusive minimum - if let Some(min) = schema.exclusive_minimum - && num <= min - { - context.add_error( - ValidationErrorKind::NumberOutOfRange { - value: num, - minimum: None, - maximum: None, - exclusive_minimum: Some(min), - exclusive_maximum: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check exclusive maximum - if let Some(max) = schema.exclusive_maximum - && num >= max - { - context.add_error( - ValidationErrorKind::NumberOutOfRange { - value: num, - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: Some(max), - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check multiple of - if let Some(multiple) = schema.multiple_of - && (num % multiple).abs() > f64::EPSILON - { - context.add_error( - ValidationErrorKind::NumberNotMultipleOf { - value: num, - multiple_of: multiple, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - Ok(()) -} - -/// Validate a string value -fn validate_string( - value: &YamlWithSourceInfo, - schema: &crate::schema::StringSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - let s = match &value.yaml { - Yaml::String(s) => s, - _ => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "string".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - return Err(context.errors[0].clone()); - } - }; - - // Check min length - if let Some(min) = schema.min_length - && s.len() < min - { - context.add_error( - ValidationErrorKind::StringLengthInvalid { - length: s.len(), - min_length: Some(min), - max_length: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check max length - if let Some(max) = schema.max_length - && s.len() > max - { - context.add_error( - ValidationErrorKind::StringLengthInvalid { - length: s.len(), - min_length: None, - max_length: Some(max), - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check pattern - if let Some(pattern) = &schema.pattern { - let re = Regex::new(pattern).map_err(|e| { - // Invalid regex is a schema error, not a validation error. - // This is a programming error in the schema definition itself. - // We use Other here because this isn't really a validation failure - // of the YAML document - it's a problem with the schema. - ValidationError::new( - ValidationErrorKind::Other { - message: format!("Invalid regex pattern '{}': {}", pattern, e), - }, - context.instance_path.clone(), - ) - })?; - - if !re.is_match(s) { - context.add_error( - ValidationErrorKind::StringPatternMismatch { - value: s.clone(), - pattern: pattern.clone(), - }, - value, - ); - return Err(context.errors[0].clone()); - } - } - - Ok(()) -} - -/// Validate a null value -fn validate_null( - value: &YamlWithSourceInfo, - _schema: &crate::schema::NullSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - match &value.yaml { - Yaml::Null => Ok(()), - _ => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "null".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - Err(context.errors[0].clone()) - } - } -} - -/// Validate an enum value -fn validate_enum( - value: &YamlWithSourceInfo, - schema: &crate::schema::EnumSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - // Convert YAML value to JSON value for comparison - let json_value = yaml_to_json_value(&value.yaml); - - for allowed in &schema.values { - if &json_value == allowed { - return Ok(()); - } - } - - context.add_error( - ValidationErrorKind::InvalidEnumValue { - value: format!("{}", json_value), - allowed: schema.values.iter().map(|v| format!("{}", v)).collect(), - }, - value, - ); - Err(context.errors[0].clone()) -} - -/// Validate anyOf (at least one schema must match) -fn validate_any_of( - value: &YamlWithSourceInfo, - schema: &crate::schema::AnyOfSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - let original_error_count = context.errors.len(); - - for subschema in schema.schemas.iter() { - let mut sub_context = ValidationContext::new(context.registry, context.source_ctx); - sub_context.instance_path = context.instance_path.clone(); - sub_context.schema_path = context.schema_path.clone(); - - if validate_generic(value, subschema, &mut sub_context).is_ok() { - // Success! Clear any errors from failed attempts - context.errors.truncate(original_error_count); - return Ok(()); - } - - // This subschema failed, but continue trying others - context.errors.extend(sub_context.errors); - } - - // All subschemas failed - // TODO: Implement error pruning to select the "best" error - Err(context.errors[original_error_count].clone()) -} - -/// Validate allOf (all schemas must match) -fn validate_all_of( - value: &YamlWithSourceInfo, - schema: &crate::schema::AllOfSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - for subschema in &schema.schemas { - validate_generic(value, subschema, context)?; - } - Ok(()) -} - -/// Validate an array value -fn validate_array( - value: &YamlWithSourceInfo, - schema: &crate::schema::ArraySchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - let items = match value.as_array() { - Some(items) => items, - None => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "array".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - return Err(context.errors[0].clone()); - } - }; - - // Check min items - if let Some(min) = schema.min_items - && items.len() < min - { - context.add_error( - ValidationErrorKind::ArrayLengthInvalid { - length: items.len(), - min_items: Some(min), - max_items: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check max items - if let Some(max) = schema.max_items - && items.len() > max - { - context.add_error( - ValidationErrorKind::ArrayLengthInvalid { - length: items.len(), - min_items: None, - max_items: Some(max), - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Check unique items - if let Some(true) = schema.unique_items { - let mut seen = HashSet::new(); - for item in items { - let json_value = yaml_to_json_value(&item.yaml); - if !seen.insert(format!("{:?}", json_value)) { - context.add_error(ValidationErrorKind::ArrayItemsNotUnique, value); - return Err(context.errors[0].clone()); - } - } - } - - // Validate each item - if let Some(item_schema) = &schema.items { - for (i, item) in items.iter().enumerate() { - context.with_instance_path(PathSegment::Index(i), |ctx| { - validate_generic(item, item_schema, ctx) - })?; - } - } - - Ok(()) -} - -/// Validate an object value -fn validate_object( - value: &YamlWithSourceInfo, - schema: &crate::schema::ObjectSchema, - context: &mut ValidationContext, -) -> ValidationResult<()> { - let entries = match value.as_hash() { - Some(entries) => entries, - None => { - context.add_error( - ValidationErrorKind::TypeMismatch { - expected: "object".to_string(), - got: yaml_type_name(&value.yaml).to_string(), - }, - value, - ); - return Err(context.errors[0].clone()); - } - }; - - // Extract keys, rejecting duplicates. The source-tracked hash view - // preserves every entry (the collapsed `Yaml::Hash` would not), so a - // repeated key is detectable here. Report it on the second occurrence, - // pointing at that key's span. - let mut keys = HashSet::new(); - for entry in entries { - if let Yaml::String(ref key) = entry.key.yaml - && !keys.insert(key.clone()) - { - context.add_error( - ValidationErrorKind::DuplicateKey { key: key.clone() }, - &entry.key, - ); - return Err(context.errors[0].clone()); - } - } - - // Check required properties - for required in &schema.required { - if !keys.contains(required) { - // Advertise what the absent property should have been, mirroring the - // message the user would have seen had it been present with a bad - // value: enum values for an enum subschema, otherwise the type(s). - let (allowed, expected_type) = match schema.properties.get(required) { - Some(Schema::Enum(e)) if !e.values.is_empty() => ( - Some(e.values.iter().map(|v| format!("{}", v)).collect()), - None, - ), - Some(prop) => (None, expected_type_description(prop)), - None => (None, None), - }; - context.add_error( - ValidationErrorKind::MissingRequiredProperty { - property: required.clone(), - allowed, - expected_type, - }, - value, - ); - return Err(context.errors[0].clone()); - } - } - - // Check min/max properties - if let Some(min) = schema.min_properties - && entries.len() < min - { - context.add_error( - ValidationErrorKind::ObjectPropertyCountInvalid { - count: entries.len(), - min_properties: Some(min), - max_properties: None, - }, - value, - ); - return Err(context.errors[0].clone()); - } - - if let Some(max) = schema.max_properties - && entries.len() > max - { - context.add_error( - ValidationErrorKind::ObjectPropertyCountInvalid { - count: entries.len(), - min_properties: None, - max_properties: Some(max), - }, - value, - ); - return Err(context.errors[0].clone()); - } - - // Validate each property - for entry in entries { - if let Yaml::String(ref key) = entry.key.yaml { - // Check if property is defined in schema - let property_schema = schema.properties.get(key); - - if let Some(prop_schema) = property_schema { - context.with_instance_path(PathSegment::Key(key.clone()), |ctx| { - validate_generic(&entry.value, prop_schema, ctx) - })?; - } else if schema.closed { - // Closed object - no additional properties allowed - context.add_error( - ValidationErrorKind::UnknownProperty { - property: key.clone(), - }, - value, - ); - return Err(context.errors[0].clone()); - } else if let Some(additional) = &schema.additional_properties { - // Validate against additional properties schema - context.with_instance_path(PathSegment::Key(key.clone()), |ctx| { - validate_generic(&entry.value, additional, ctx) - })?; - } - } - } - - Ok(()) -} - -/// Get a human-readable type name for a YAML value -fn yaml_type_name(value: &Yaml) -> &'static str { - match value { - Yaml::Null | Yaml::BadValue => "null", - Yaml::Boolean(_) => "boolean", - Yaml::Integer(_) => "integer", - Yaml::Real(_) => "float", - Yaml::String(_) => "string", - Yaml::Array(_) => "array", - Yaml::Hash(_) => "object", - Yaml::Alias(_) => "alias", - } -} - -/// Brief description of the type(s) a property subschema permits, reusing the -/// `Schema::type_name()` vocabulary (which matches `TypeMismatch`'s `expected`). -/// -/// Returns `None` for schemas that carry no useful concrete type to advertise -/// (`any`, `true`, `allOf`, unresolved `$ref`); `Enum` is handled separately via -/// its permitted values, so it also returns `None` here. -fn expected_type_description(schema: &Schema) -> Option { - match schema { - Schema::Boolean(_) - | Schema::Number(_) - | Schema::String(_) - | Schema::Null(_) - | Schema::Array(_) - | Schema::Object(_) => Some(schema.type_name().to_string()), - Schema::AnyOf(s) => { - let names: Vec = s - .schemas - .iter() - .filter_map(expected_type_description) - .collect(); - if names.is_empty() { - None - } else { - Some(names.join(" or ")) - } - } - Schema::Enum(_) | Schema::AllOf(_) | Schema::Any(_) | Schema::True | Schema::Ref(_) => None, - } -} - -/// Convert YAML value to JSON value for comparison -fn yaml_to_json_value(value: &Yaml) -> serde_json::Value { - match value { - Yaml::Null | Yaml::BadValue => serde_json::Value::Null, - Yaml::Boolean(b) => serde_json::Value::Bool(*b), - Yaml::Integer(n) => serde_json::Value::Number((*n).into()), - Yaml::Real(s) => { - if let Ok(f) = s.parse::() { - serde_json::Number::from_f64(f) - .map_or(serde_json::Value::Null, serde_json::Value::Number) - } else { - serde_json::Value::Null - } - } - Yaml::String(s) => serde_json::Value::String(s.clone()), - Yaml::Array(items) => { - serde_json::Value::Array(items.iter().map(yaml_to_json_value).collect()) - } - Yaml::Hash(entries) => { - let mut map = serde_json::Map::new(); - for (key, value) in entries { - if let Yaml::String(key_str) = key { - map.insert(key_str.clone(), yaml_to_json_value(value)); - } - } - serde_json::Value::Object(map) - } - Yaml::Alias(_) => serde_json::Value::Null, // Aliases should be resolved before validation - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::schema::{ - AllOfSchema, AnyOfSchema, AnySchema, ArraySchema, BooleanSchema, EnumSchema, NullSchema, - NumberSchema, ObjectSchema, RefSchema, SchemaAnnotations, StringSchema, - }; - use quarto_yaml::{SourceInfo, YamlHashEntry}; - use std::collections::HashMap; - use yaml_rust2::Yaml; - - // Helper to create a simple YAML scalar - fn yaml_scalar(yaml: Yaml) -> YamlWithSourceInfo { - YamlWithSourceInfo::new_scalar(yaml, SourceInfo::for_test()) - } - - // Helper to create a YAML array - fn yaml_array(items: Vec) -> YamlWithSourceInfo { - let children: Vec = items - .into_iter() - .map(|y| YamlWithSourceInfo::new_scalar(y, SourceInfo::for_test())) - .collect(); - let yaml_items: Vec = children.iter().map(|c| c.yaml.clone()).collect(); - YamlWithSourceInfo::new_array(Yaml::Array(yaml_items), SourceInfo::for_test(), children) - } - - // Helper to create a YAML object - fn yaml_object(entries: Vec<(&str, Yaml)>) -> YamlWithSourceInfo { - let hash_entries: Vec = entries - .into_iter() - .map(|(k, v)| YamlHashEntry { - key: YamlWithSourceInfo::new_scalar( - Yaml::String(k.to_string()), - SourceInfo::for_test(), - ), - value: YamlWithSourceInfo::new_scalar(v, SourceInfo::for_test()), - key_span: SourceInfo::for_test(), - value_span: SourceInfo::for_test(), - entry_span: SourceInfo::for_test(), - }) - .collect(); - let mut yaml_hash = yaml_rust2::yaml::Hash::new(); - for entry in &hash_entries { - if let Yaml::String(ref k) = entry.key.yaml { - yaml_hash.insert(Yaml::String(k.clone()), entry.value.yaml.clone()); - } - } - YamlWithSourceInfo::new_hash(Yaml::Hash(yaml_hash), SourceInfo::for_test(), hash_entries) - } - - // ==================== Boolean Tests ==================== - - #[test] - fn test_validate_boolean() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Boolean(BooleanSchema { - annotations: SchemaAnnotations::default(), - }); - - let yaml = yaml_scalar(Yaml::Boolean(true)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - let yaml = yaml_scalar(Yaml::Boolean(false)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_boolean_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Boolean(BooleanSchema { - annotations: SchemaAnnotations::default(), - }); - - let yaml = yaml_scalar(Yaml::String("not a boolean".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Number Tests ==================== - - #[test] - fn test_validate_number_integer() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_number_real() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - let yaml = yaml_scalar(Yaml::Real("3.14".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_number_yaml_infinity_and_nan() { - // End-to-end: parse real YAML text and validate against a `number` - // schema. The YAML core-schema float spellings (`.inf`, `-.inf`, …) - // must validate as numbers. Regression for tidyverse/data-dict#47, - // where `range: [-.inf, 8.3]` was rejected with "got string". - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let number_schema = || { - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }) - }; - - for text in [ - ".inf", "+.inf", "-.inf", ".Inf", ".INF", ".nan", ".NaN", ".NAN", - ] { - let yaml = quarto_yaml::parse(text).unwrap(); - assert!( - validate(&yaml, &number_schema(), ®istry, &source_ctx).is_ok(), - "{text:?} should validate as a number" - ); - } - - // And the data-dict shape: an array of numbers including -.inf. - let array_schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: Some(Box::new(number_schema())), - min_items: None, - max_items: None, - unique_items: None, - }); - let yaml = quarto_yaml::parse("[-.inf, 8.3]").unwrap(); - assert!( - validate(&yaml, &array_schema, ®istry, &source_ctx).is_ok(), - "[-.inf, 8.3] should validate as an array of numbers" - ); - } - - #[test] - fn test_validate_number_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - let yaml = yaml_scalar(Yaml::String("not a number".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_number_minimum() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: Some(10.0), - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - // Valid: at minimum - let yaml = yaml_scalar(Yaml::Integer(10)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: above minimum - let yaml = yaml_scalar(Yaml::Integer(15)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: below minimum - let yaml = yaml_scalar(Yaml::Integer(5)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_number_maximum() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: Some(100.0), - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }); - - // Valid: at maximum - let yaml = yaml_scalar(Yaml::Integer(100)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: below maximum - let yaml = yaml_scalar(Yaml::Integer(50)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: above maximum - let yaml = yaml_scalar(Yaml::Integer(150)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_number_exclusive_minimum() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: Some(10.0), - exclusive_maximum: None, - multiple_of: None, - }); - - // Valid: above exclusive minimum - let yaml = yaml_scalar(Yaml::Integer(11)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: at exclusive minimum - let yaml = yaml_scalar(Yaml::Integer(10)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - - // Invalid: below exclusive minimum - let yaml = yaml_scalar(Yaml::Integer(5)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_number_exclusive_maximum() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: Some(100.0), - multiple_of: None, - }); - - // Valid: below exclusive maximum - let yaml = yaml_scalar(Yaml::Integer(99)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: at exclusive maximum - let yaml = yaml_scalar(Yaml::Integer(100)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - - // Invalid: above exclusive maximum - let yaml = yaml_scalar(Yaml::Integer(150)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_number_multiple_of() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: Some(5.0), - }); - - // Valid: multiple of 5 - let yaml = yaml_scalar(Yaml::Integer(15)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: zero is multiple of anything - let yaml = yaml_scalar(Yaml::Integer(0)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: not a multiple of 5 - let yaml = yaml_scalar(Yaml::Integer(7)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== String Tests ==================== - - #[test] - fn test_validate_string() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }); - - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_string_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }); - - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_string_min_length() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: Some(5), - max_length: None, - pattern: None, - }); - - // Valid: exactly min length - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: above min length - let yaml = yaml_scalar(Yaml::String("hello world".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: below min length - let yaml = yaml_scalar(Yaml::String("hi".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_string_max_length() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: Some(10), - pattern: None, - }); - - // Valid: exactly max length - let yaml = yaml_scalar(Yaml::String("0123456789".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: below max length - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: above max length - let yaml = yaml_scalar(Yaml::String("this is too long".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_string_pattern() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: Some("^[a-z]+$".to_string()), - }); - - // Valid: matches pattern - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: doesn't match pattern - let yaml = yaml_scalar(Yaml::String("Hello123".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Null Tests ==================== - - #[test] - fn test_validate_null() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Null(NullSchema { - annotations: SchemaAnnotations::default(), - }); - - let yaml = yaml_scalar(Yaml::Null); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_null_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Null(NullSchema { - annotations: SchemaAnnotations::default(), - }); - - let yaml = yaml_scalar(Yaml::String("not null".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Enum Tests ==================== - - #[test] - fn test_validate_enum() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Enum(EnumSchema { - annotations: SchemaAnnotations::default(), - values: vec![ - serde_json::json!("red"), - serde_json::json!("green"), - serde_json::json!("blue"), - ], - }); - - // Valid: matches enum value - let yaml = yaml_scalar(Yaml::String("red".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - let yaml = yaml_scalar(Yaml::String("green".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_enum_invalid() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Enum(EnumSchema { - annotations: SchemaAnnotations::default(), - values: vec![ - serde_json::json!("red"), - serde_json::json!("green"), - serde_json::json!("blue"), - ], - }); - - // Invalid: not in enum - let yaml = yaml_scalar(Yaml::String("yellow".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_enum_integer() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Enum(EnumSchema { - annotations: SchemaAnnotations::default(), - values: vec![ - serde_json::json!(1), - serde_json::json!(2), - serde_json::json!(3), - ], - }); - - // Valid: matches enum value - let yaml = yaml_scalar(Yaml::Integer(2)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: not in enum - let yaml = yaml_scalar(Yaml::Integer(5)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Array Tests ==================== - - #[test] - fn test_validate_array() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: None, - min_items: None, - max_items: None, - unique_items: None, - }); - - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_array_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: None, - min_items: None, - max_items: None, - unique_items: None, - }); - - let yaml = yaml_scalar(Yaml::String("not an array".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_array_min_items() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: None, - min_items: Some(2), - max_items: None, - unique_items: None, - }); - - // Valid: at min items - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: below min items - let yaml = yaml_array(vec![Yaml::Integer(1)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_array_max_items() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: None, - min_items: None, - max_items: Some(3), - unique_items: None, - }); - - // Valid: at max items - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: above max items - let yaml = yaml_array(vec![ - Yaml::Integer(1), - Yaml::Integer(2), - Yaml::Integer(3), - Yaml::Integer(4), - ]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_array_unique_items() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: None, - min_items: None, - max_items: None, - unique_items: Some(true), - }); - - // Valid: all unique - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: duplicates - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(1)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_array_items_schema() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Array(ArraySchema { - annotations: SchemaAnnotations::default(), - items: Some(Box::new(Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }))), - min_items: None, - max_items: None, - unique_items: None, - }); - - // Valid: all items are numbers - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: contains non-number - let yaml = yaml_array(vec![ - Yaml::Integer(1), - Yaml::String("not a number".to_string()), - ]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Object Tests ==================== - - #[test] - fn test_validate_object() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - let yaml = yaml_object(vec![("name", Yaml::String("test".to_string()))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_object_wrong_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - let yaml = yaml_scalar(Yaml::String("not an object".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_required() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["name".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: has required property - let yaml = yaml_object(vec![("name", Yaml::String("test".to_string()))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: missing required property - let yaml = yaml_object(vec![("other", Yaml::String("test".to_string()))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_required_enum_reports_allowed_values() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - // A required `version` property whose subschema is `enum: ["0.1.0"]`. - let mut properties = HashMap::new(); - properties.insert( - "version".to_string(), - Schema::Enum(EnumSchema { - annotations: SchemaAnnotations::default(), - values: vec![serde_json::Value::String("0.1.0".to_string())], - }), - ); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["version".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Missing the required enum property: the error should advertise the - // allowed values, mirroring InvalidEnumValue. - let yaml = yaml_object(vec![("other", Yaml::String("test".to_string()))]); - let err = validate(&yaml, &schema, ®istry, &source_ctx) - .expect_err("missing required property should fail validation"); - match &err.kind { - ValidationErrorKind::MissingRequiredProperty { - property, - allowed, - expected_type, - } => { - assert_eq!(property, "version"); - // Enum values render via `format!("{}", json)`, so strings keep - // their quotes — matching the `InvalidEnumValue` rendering. - assert_eq!( - allowed.as_deref(), - Some(["\"0.1.0\"".to_string()].as_slice()) - ); - assert_eq!(*expected_type, None); - } - other => panic!("expected MissingRequiredProperty, got {:?}", other), - } - assert!( - err.kind.message().contains("must be one of: \"0.1.0\""), - "message was: {}", - err.kind.message() - ); - } - - #[test] - fn test_validate_object_required_non_enum_reports_expected_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - // A required non-enum property advertises its expected type instead. - let mut properties = HashMap::new(); - properties.insert( - "name".to_string(), - Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["name".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - let yaml = yaml_object(vec![("other", Yaml::String("test".to_string()))]); - let err = validate(&yaml, &schema, ®istry, &source_ctx) - .expect_err("missing required property should fail validation"); - match &err.kind { - ValidationErrorKind::MissingRequiredProperty { - property, - allowed, - expected_type, - } => { - assert_eq!(property, "name"); - assert_eq!(*allowed, None); - assert_eq!(expected_type.as_deref(), Some("string")); - } - other => panic!("expected MissingRequiredProperty, got {:?}", other), - } - assert_eq!( - err.kind.message(), - "Missing required property 'name' (expected string)" - ); - } - - #[test] - fn test_validate_object_required_number_reports_expected_type() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - // A required number property advertises "number" as its expected type. - let mut properties = HashMap::new(); - properties.insert( - "count".to_string(), - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }), - ); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["count".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - let yaml = yaml_object(vec![("other", Yaml::String("test".to_string()))]); - let err = validate(&yaml, &schema, ®istry, &source_ctx) - .expect_err("missing required property should fail validation"); - match &err.kind { - ValidationErrorKind::MissingRequiredProperty { - property, - allowed, - expected_type, - } => { - assert_eq!(property, "count"); - assert_eq!(*allowed, None); - assert_eq!(expected_type.as_deref(), Some("number")); - } - other => panic!("expected MissingRequiredProperty, got {:?}", other), - } - assert_eq!( - err.kind.message(), - "Missing required property 'count' (expected number)" - ); - } - - #[test] - fn test_validate_object_required_anyof_reports_expected_types() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - // An anyOf subschema advertises the union of its member types. - let mut properties = HashMap::new(); - properties.insert( - "x".to_string(), - Schema::AnyOf(AnyOfSchema { - annotations: SchemaAnnotations::default(), - schemas: vec![ - Schema::Boolean(BooleanSchema { - annotations: SchemaAnnotations::default(), - }), - Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ], - }), - ); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec!["x".to_string()], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - let yaml = yaml_object(vec![("other", Yaml::String("test".to_string()))]); - let err = validate(&yaml, &schema, ®istry, &source_ctx) - .expect_err("missing required property should fail validation"); - assert_eq!( - err.kind.message(), - "Missing required property 'x' (expected boolean or string)" - ); - } - - #[test] - fn test_validate_object_min_properties() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: Some(2), - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: at min properties - let yaml = yaml_object(vec![("a", Yaml::Integer(1)), ("b", Yaml::Integer(2))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: below min properties - let yaml = yaml_object(vec![("a", Yaml::Integer(1))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_max_properties() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: Some(2), - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: at max properties - let yaml = yaml_object(vec![("a", Yaml::Integer(1)), ("b", Yaml::Integer(2))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: above max properties - let yaml = yaml_object(vec![ - ("a", Yaml::Integer(1)), - ("b", Yaml::Integer(2)), - ("c", Yaml::Integer(3)), - ]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_closed() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - let mut properties = HashMap::new(); - properties.insert( - "name".to_string(), - Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: None, - closed: true, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: only known property - let yaml = yaml_object(vec![("name", Yaml::String("test".to_string()))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: unknown property in closed object - let yaml = yaml_object(vec![ - ("name", Yaml::String("test".to_string())), - ("unknown", Yaml::Integer(42)), - ]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_property_schema() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - let mut properties = HashMap::new(); - properties.insert( - "count".to_string(), - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: Some(0.0), - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }), - ); - - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties, - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: count is a valid number - let yaml = yaml_object(vec![("count", Yaml::Integer(5))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: count is negative - let yaml = yaml_object(vec![("count", Yaml::Integer(-1))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_additional_properties() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: Some(Box::new(Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }))), - required: vec![], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // Valid: additional property is a number - let yaml = yaml_object(vec![("anything", Yaml::Integer(42))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: additional property is not a number - let yaml = yaml_object(vec![("anything", Yaml::String("not a number".to_string()))]); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_object_duplicate_keys() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Object(ObjectSchema { - annotations: SchemaAnnotations::default(), - properties: HashMap::new(), - pattern_properties: HashMap::new(), - additional_properties: None, - required: vec![], - min_properties: None, - max_properties: None, - closed: false, - property_names: None, - naming_convention: None, - base_schema: None, - }); - - // A mapping with a repeated key must be rejected, not silently deduped. - let yaml = yaml_object(vec![ - ("examples", Yaml::String("a".to_string())), - ("examples", Yaml::String("c".to_string())), - ]); - let result = validate(&yaml, &schema, ®istry, &source_ctx); - let err = result.expect_err("duplicate key should fail validation"); - assert_eq!( - err.kind, - ValidationErrorKind::DuplicateKey { - key: "examples".to_string(), - } - ); - assert_eq!(err.error_code(), "Q-1-20"); - } - - // ==================== AnyOf Tests ==================== - - #[test] - fn test_validate_any_of() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::AnyOf(AnyOfSchema { - annotations: SchemaAnnotations::default(), - schemas: vec![ - Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }), - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }), - ], - }); - - // Valid: matches first schema (string) - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Valid: matches second schema (number) - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: matches neither - let yaml = yaml_scalar(Yaml::Boolean(true)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== AllOf Tests ==================== - - #[test] - fn test_validate_all_of() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::AllOf(AllOfSchema { - annotations: SchemaAnnotations::default(), - schemas: vec![ - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: Some(0.0), - maximum: None, - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }), - Schema::Number(NumberSchema { - annotations: SchemaAnnotations::default(), - minimum: None, - maximum: Some(100.0), - exclusive_minimum: None, - exclusive_maximum: None, - multiple_of: None, - }), - ], - }); - - // Valid: matches both schemas (0 <= x <= 100) - let yaml = yaml_scalar(Yaml::Integer(50)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: fails first schema (< 0) - let yaml = yaml_scalar(Yaml::Integer(-5)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - - // Invalid: fails second schema (> 100) - let yaml = yaml_scalar(Yaml::Integer(150)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Schema::True and Schema::Any Tests ==================== - - #[test] - fn test_validate_true() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::True; - - // True schema accepts anything - let yaml = yaml_scalar(Yaml::String("anything".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - let yaml = yaml_scalar(Yaml::Null); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - #[test] - fn test_validate_any() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Any(AnySchema { - annotations: SchemaAnnotations::default(), - }); - - // Any schema accepts anything - let yaml = yaml_scalar(Yaml::String("anything".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - } - - // ==================== Ref Tests ==================== - - #[test] - fn test_validate_ref() { - let mut registry = SchemaRegistry::new(); - - // Register a schema - registry.register( - "string-schema".to_string(), - Schema::String(StringSchema { - annotations: SchemaAnnotations::default(), - min_length: None, - max_length: None, - pattern: None, - }), - ); - - let source_ctx = SourceContext::new(); - let schema = Schema::Ref(RefSchema { - annotations: SchemaAnnotations::default(), - reference: "string-schema".to_string(), - eager: false, - }); - - // Valid: matches referenced schema - let yaml = yaml_scalar(Yaml::String("hello".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_ok()); - - // Invalid: doesn't match referenced schema - let yaml = yaml_scalar(Yaml::Integer(42)); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - #[test] - fn test_validate_ref_unresolved() { - let registry = SchemaRegistry::new(); - let source_ctx = SourceContext::new(); - let schema = Schema::Ref(RefSchema { - annotations: SchemaAnnotations::default(), - reference: "nonexistent".to_string(), - eager: false, - }); - - // Error: unresolved reference - let yaml = yaml_scalar(Yaml::String("anything".to_string())); - assert!(validate(&yaml, &schema, ®istry, &source_ctx).is_err()); - } - - // ==================== Navigate Tests ==================== - - #[test] - fn test_navigate_empty_path() { - let yaml = yaml_scalar(Yaml::String("test".to_string())); - let path = InstancePath::new(); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_some()); - } - - #[test] - fn test_navigate_object_key() { - let yaml = yaml_object(vec![ - ("name", Yaml::String("test".to_string())), - ("age", Yaml::Integer(30)), - ]); - - let mut path = InstancePath::new(); - path.push_key("name".to_string()); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_some()); - if let Some(node) = result { - assert_eq!(node.yaml, Yaml::String("test".to_string())); - } - } - - #[test] - fn test_navigate_array_index() { - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - - let mut path = InstancePath::new(); - path.push_index(1); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_some()); - if let Some(node) = result { - assert_eq!(node.yaml, Yaml::Integer(2)); - } - } - - #[test] - fn test_navigate_nested() { - // Create nested structure: { "items": [1, 2, 3] } - let items_array = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]); - let hash_entries = vec![YamlHashEntry { - key: YamlWithSourceInfo::new_scalar( - Yaml::String("items".to_string()), - SourceInfo::for_test(), - ), - value: items_array, - key_span: SourceInfo::for_test(), - value_span: SourceInfo::for_test(), - entry_span: SourceInfo::for_test(), - }]; - let mut yaml_hash = yaml_rust2::yaml::Hash::new(); - yaml_hash.insert( - Yaml::String("items".to_string()), - Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]), - ); - let yaml = YamlWithSourceInfo::new_hash( - Yaml::Hash(yaml_hash), - SourceInfo::for_test(), - hash_entries, - ); - - let mut path = InstancePath::new(); - path.push_key("items".to_string()); - path.push_index(2); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_some()); - if let Some(node) = result { - assert_eq!(node.yaml, Yaml::Integer(3)); - } - } - - #[test] - fn test_navigate_key_not_found() { - let yaml = yaml_object(vec![("name", Yaml::String("test".to_string()))]); - - let mut path = InstancePath::new(); - path.push_key("nonexistent".to_string()); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_none()); - } - - #[test] - fn test_navigate_index_out_of_bounds() { - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2)]); - - let mut path = InstancePath::new(); - path.push_index(10); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_none()); - } - - #[test] - fn test_navigate_index_on_object() { - let yaml = yaml_object(vec![("name", Yaml::String("test".to_string()))]); - - let mut path = InstancePath::new(); - path.push_index(0); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_none()); - } - - #[test] - fn test_navigate_key_on_array() { - let yaml = yaml_array(vec![Yaml::Integer(1), Yaml::Integer(2)]); - - let mut path = InstancePath::new(); - path.push_key("name".to_string()); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_none()); - } - - #[test] - fn test_navigate_into_scalar() { - let yaml = yaml_scalar(Yaml::String("test".to_string())); - - let mut path = InstancePath::new(); - path.push_key("name".to_string()); - - let result = navigate(&path, &yaml, false, 0); - assert!(result.is_none()); - } - - // ==================== yaml_type_name Tests ==================== - - #[test] - fn test_yaml_type_name() { - assert_eq!(yaml_type_name(&Yaml::Null), "null"); - assert_eq!(yaml_type_name(&Yaml::Boolean(true)), "boolean"); - assert_eq!(yaml_type_name(&Yaml::Integer(42)), "integer"); - assert_eq!(yaml_type_name(&Yaml::Real("3.14".to_string())), "float"); - assert_eq!(yaml_type_name(&Yaml::String("test".to_string())), "string"); - assert_eq!(yaml_type_name(&Yaml::Array(vec![])), "array"); - assert_eq!( - yaml_type_name(&Yaml::Hash(yaml_rust2::yaml::Hash::new())), - "object" - ); - assert_eq!(yaml_type_name(&Yaml::BadValue), "null"); - assert_eq!(yaml_type_name(&Yaml::Alias(0)), "alias"); - } - - // ==================== yaml_to_json_value Tests ==================== - - #[test] - fn test_yaml_to_json_value() { - assert_eq!(yaml_to_json_value(&Yaml::Null), serde_json::Value::Null); - assert_eq!( - yaml_to_json_value(&Yaml::Boolean(true)), - serde_json::Value::Bool(true) - ); - assert_eq!( - yaml_to_json_value(&Yaml::Integer(42)), - serde_json::json!(42) - ); - assert_eq!( - yaml_to_json_value(&Yaml::String("test".to_string())), - serde_json::json!("test") - ); - assert_eq!(yaml_to_json_value(&Yaml::BadValue), serde_json::Value::Null); - assert_eq!(yaml_to_json_value(&Yaml::Alias(0)), serde_json::Value::Null); - } - - #[test] - fn test_yaml_to_json_value_real() { - let result = yaml_to_json_value(&Yaml::Real("1.234".to_string())); - if let serde_json::Value::Number(n) = result { - assert!((n.as_f64().unwrap() - 1.234).abs() < 0.001); - } else { - panic!("Expected Number"); - } - } - - #[test] - fn test_yaml_to_json_value_array() { - let yaml = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); - assert_eq!(yaml_to_json_value(&yaml), serde_json::json!([1, 2])); - } - - #[test] - fn test_yaml_to_json_value_hash() { - let mut hash = yaml_rust2::yaml::Hash::new(); - hash.insert(Yaml::String("key".to_string()), Yaml::Integer(42)); - let yaml = Yaml::Hash(hash); - assert_eq!(yaml_to_json_value(&yaml), serde_json::json!({"key": 42})); - } -} diff --git a/crates/quarto-yaml-validation/test-fixtures/schemas/definitions.yml b/crates/quarto-yaml-validation/test-fixtures/schemas/definitions.yml deleted file mode 100644 index 1dd5f5ed0..000000000 --- a/crates/quarto-yaml-validation/test-fixtures/schemas/definitions.yml +++ /dev/null @@ -1,3405 +0,0 @@ -- id: date - anyOf: - - string - - object: - properties: - value: string - format: string - required: [value] - -- id: date-format - schema: string - -- id: math-methods - enum: - values: [plain, webtex, gladtex, mathml, mathjax, katex] - -- id: pandoc-format-request-headers - arrayOf: - arrayOf: - schema: string - length: 2 - -- id: pandoc-format-output-file - anyOf: - - path - - enum: - values: [null] - hidden: true - -- id: pandoc-format-filters - arrayOf: - anyOf: - - path - - object: - properties: - type: string - path: path - required: [path] - - object: - properties: - type: string - path: path - at: - enum: - [ - pre-ast, - post-ast, - pre-quarto, - post-quarto, - pre-render, - post-render, - ] - required: [path, at] - - record: - type: - enum: [citeproc] - -- id: pandoc-shortcodes - arrayOf: path - -- id: page-column - enum: - [ - body, - body-outset, - body-outset-left, - body-outset-right, - page, - page-left, - page-right, - page-inset, - page-inset-left, - page-inset-right, - screen, - screen-left, - screen-right, - screen-inset, - screen-inset-shaded, - screen-inset-left, - screen-inset-right, - margin, - ] - -- id: contents-auto - object: - properties: - auto: - anyOf: - - boolean - - maybeArrayOf: string - description: - short: Automatically generate sidebar contents. - long: | - Automatically generate sidebar contents. Pass `true` to include all documents - in the site, a directory name to include only documents in that directory, - or a glob (or list of globs) to include documents based on a pattern. - - Subdirectories will create sections (use an `index.qmd` in the directory to - provide its title). Order will be alphabetical unless a numeric `order` field - is provided in document metadata. - -- id: navigation-item - anyOf: - - path - - ref: navigation-item-object - -- id: navigation-item-object - object: - closed: true - properties: - aria-label: - string: - description: "Accessible label for the item." - file: - hidden: true - string: - description: | - Alias for href - href: - string: - description: | - Link to file contained with the project or external URL - icon: - string: - description: - short: Name of bootstrap icon (e.g. `github`, `twitter`, `share`) - long: | - Name of bootstrap icon (e.g. `github`, `twitter`, `share`) - See for a list of available icons - id: - # "core identification" - # this field is only used in typescript - schema: string - hidden: true - menu: - arrayOf: - schema: - ref: navigation-item - text: - string: - description: | - Text to display for item (defaults to the - document title if not provided) - url: - hidden: true - string: - description: | - Alias for href - rel: - string: - description: | - Value for rel attribute. Multiple space-separated values are permitted. - See - for a details. - target: - string: - description: | - Value for target attribute. - See - for details. - -- id: giscus-themes - enum: - values: - [ - light, - light_high_contrast, - light_protanopia, - light_tritanopia, - dark, - dark_high_contrast, - dark_protanopia, - dark_tritanopia, - dark_dimmed, - transparent_dark, - cobalt, - purple_dark, - noborder_light, - noborder_dark, - noborder_gray, - preferred_color_scheme, - ] - -- id: giscus-configuration - object: - closed: true - properties: - repo: - string: - description: - short: The Github repo that will be used to store comments. - long: | - The Github repo that will be used to store comments. - - In order to work correctly, the repo must be public, with the giscus app installed, and - the discussions feature must be enabled. - repo-id: - string: - description: - short: The Github repository identifier. - long: | - The Github repository identifier. - - You can quickly find this by using the configuration tool at [https://giscus.app](https://giscus.app). - If this is not provided, Quarto will attempt to discover it at render time. - category: - string: - description: - short: The discussion category where new discussions will be created. - long: | - The discussion category where new discussions will be created. It is recommended - to use a category with the **Announcements** type so that new discussions - can only be created by maintainers and giscus. - category-id: - string: - description: - short: The Github category identifier. - long: | - The Github category identifier. - - You can quickly find this by using the configuration tool at [https://giscus.app](https://giscus.app). - If this is not provided, Quarto will attempt to discover it at render time. - mapping: - schema: - anyOf: - - string - - number - completions: - - pathname - - url - - title - - og:title - description: - short: The mapping between the page and the embedded discussion. - long: | - The mapping between the page and the embedded discussion. - - - `pathname`: The discussion title contains the page path - - `url`: The discussion title contains the page url - - `title`: The discussion title contains the page title - - `og:title`: The discussion title contains the `og:title` metadata value - - any other string or number: Any other strings will be passed through verbatim and a discussion title - containing that value will be used. Numbers will be treated - as a discussion number and automatic discussion creation is not supported. - reactions-enabled: - boolean: - description: Display reactions for the discussion's main post before the comments. - loading: - enum: [lazy] - description: "Specify `loading: lazy` to defer loading comments until the user scrolls near the comments container." - input-position: - enum: [top, bottom] - description: Place the comment input box above or below the comments. - theme: - anyOf: - - string - - ref: giscus-themes - - object: - closed: true - properties: - light: - anyOf: - - string - - ref: giscus-themes - description: The light theme name. - dark: - anyOf: - - string - - ref: giscus-themes - description: The dark theme name. - - description: - short: The giscus theme to use when displaying comments. - long: | - The giscus theme to use when displaying comments. Light and dark themes are supported. If a single theme is provided by name, it will be used as light and dark theme. To use different themes, use `light` and `dark` key: - - ```yaml - website: - comments: - giscus: - theme: - light: light # giscus theme used for light website theme - dark: dark_dimmed # giscus theme used for dark website theme - ``` - language: - string: - description: The language that should be used when displaying the commenting interface. - required: [repo] - -- id: document-comments-configuration - anyOf: - - enum: [false] - - object: - closed: true - properties: - utterances: - object: - closed: true - properties: - repo: - string: - description: The Github repo that will be used to store comments. - label: - string: - description: The label that will be assigned to issues created by Utterances. - theme: - string: - description: - short: The Github theme that should be used for Utterances. - long: | - The Github theme that should be used for Utterances - (`github-light`, `github-dark`, `github-dark-orange`, - `icy-dark`, `dark-blue`, `photon-dark`, `body-light`, - or `gruvbox-dark`) - completions: - - github-light - - github-dark - - github-dark-orange - - icy-dark - - dark-blue - - photon-dark - - body-light - - gruvbox-dark - issue-term: - string: - description: - short: How posts should be mapped to Github issues - long: | - How posts should be mapped to Github issues - (`pathname`, `url`, `title` or `og:title`) - completions: - - pathname - - url - - title - - og:title - required: [repo] - giscus: - ref: giscus-configuration - hypothesis: - anyOf: - - boolean - - object: - closed: true - properties: - client-url: - string: - description: Override the default hypothesis client url with a custom client url. - openSidebar: - boolean: - default: false - description: Controls whether the sidebar opens automatically on startup. - showHighlights: - anyOf: - - boolean - - enum: ["always", "whenSidebarOpen", "never"] - default: "always" - description: Controls whether the in-document highlights are shown by default (`always`, `whenSidebarOpen` or `never`) - theme: - enum: ["classic", "clean"] - default: classic - description: Controls the overall look of the sidebar (`classic` or `clean`) - enableExperimentalNewNoteButton: - boolean: - default: false - description: | - Controls whether the experimental New Note button - should be shown in the notes tab in the sidebar. - usernameUrl: - schema: string - description: | - Specify a URL to direct a user to, - in a new tab. when they click on the annotation author - link in the header of an annotation. - services: - arrayOf: - object: - properties: - apiUrl: - string: - description: The base URL of the service API. - authority: - string: - description: The domain name which the annotation service is associated with. - grantToken: - string: - description: An OAuth 2 grant token which the client can send to the service in order to get an access token for making authenticated requests to the service. - allowLeavingGroups: - boolean: - default: true - description: A flag indicating whether users should be able to leave groups of which they are a member. - enableShareLinks: - boolean: - default: true - description: A flag indicating whether annotation cards should show links that take the user to see an annotation in context. - groups: - anyOf: - - enum: ["$rpc:requestGroups"] - - arrayOf: string - description: An array of Group IDs or the literal string `$rpc:requestGroups` - icon: - string: - description: The URL to an image for the annotation service. This image will appear to the left of the name of the currently selected group. - required: [apiUrl, authority, grantToken] - description: | - Alternative annotation services which the client should - connect to instead of connecting to the public Hypothesis - service at hypothes.is. - branding: - object: - properties: - accentColor: - string: - description: Secondary color for elements of the commenting UI. - appBackgroundColor: - string: - description: The main background color of the commenting UI. - ctaBackgroundColor: - string: - description: The background color for call to action buttons. - selectionFontFamily: - string: - description: The font family for selection text in the annotation card. - annotationFontFamily: - string: - description: The font family for the actual annotation value that the user writes about the page or selection. - description: Settings to adjust the commenting sidebar's look and feel. - externalContainerSelector: - string: - description: A CSS selector specifying the containing element into which the sidebar iframe will be placed. - focus: - object: - properties: - user: - object: - properties: - username: - string: - description: The username of the user to focus on. - userid: - string: - description: The userid of the user to focus on. - displayName: - string: - description: The display name of the user to focus on. - required: [user] - description: Defines a focused filter set for the available annotations on a page. - requestConfigFromFrame: - object: - properties: - origin: - string: - description: Host url and port number of receiving iframe - ancestorLevel: - number: - description: Number of nested iframes deep the client is relative from the receiving iframe. - assetRoot: - string: - description: The root URL from which assets are loaded. - sidebarAppUrl: - string: - description: The URL for the sidebar application which displays annotations. - default: "https://hypothes.is/app.html" - -- id: social-metadata - object: - closed: true - properties: - title: - string: - description: - short: "The title of the page" - long: | - The title of the page. Note that by default Quarto will automatically - use the title metadata from the page. Specify this field if you’d like - to override the title for this provider. - description: - string: - description: - short: "A short description of the content." - long: | - A short description of the content. Note that by default Quarto will - automatically use the description metadata from the page. Specify this - field if you’d like to override the description for this provider. - image: - path: - description: - short: "The path to a preview image for the content." - long: | - The path to a preview image for the content. By default, Quarto will use - the `image` value from the format metadata. If you provide an - image, you may also optionally provide an `image-width` and `image-height`. - image-alt: - path: - description: - short: "The alt text for the preview image." - long: | - The alt text for the preview image. By default, Quarto will use - the `image-alt` value from the format metadata. If you provide an - image, you may also optionally provide an `image-width` and `image-height`. - image-width: - number: - description: "Image width (pixels)" - image-height: - number: - description: "Image height (pixels)" - -- id: page-footer-region - anyOf: - - string - - arrayOf: - ref: navigation-item - -- id: sidebar-contents - anyOf: - - string - - ref: contents-auto - - arrayOf: - anyOf: - - ref: navigation-item - - path - - object: - closed: true - properties: - section: - anyOf: - - string - - null - contents: - ref: sidebar-contents - - ref: contents-auto - -- id: project-preview - object: - closed: true - properties: - port: - number: - description: Port to listen on (defaults to random value between 3000 and 8000) - host: - string: - description: Hostname to bind to (defaults to 127.0.0.1) - serve: - description: Use an exernal application to preview the project. - schema: - ref: project-serve - browser: - boolean: - description: Open a web browser to view the preview (defaults to true) - watch-inputs: - boolean: - description: Re-render input files when they change (defaults to true) - navigate: - boolean: - description: Navigate the browser automatically when outputs are updated (defaults to true) - timeout: - number: - description: Time (in seconds) after which to exit if there are no active clients - -- id: project-serve - object: - closed: true - properties: - cmd: - string: - description: | - Serve project preview using the specified command. - Interpolate the `--port` into the command using `{port}`. - args: - string: - description: Additional command line arguments for preview command. - env: - object: - description: Environment variables to set for preview command. - ready: - string: - description: Regular expression for detecting when the server is ready. - required: [cmd, ready] - -- id: publish - description: Sites published from project - schema: - object: - closed: true - properties: - netlify: - arrayOf: - ref: publish-record - description: "Sites published to Netlify" - -- id: publish-record - object: - closed: true - properties: - id: - string: - description: "Unique identifier for site" - url: - string: - description: "Published URL for site" - -- id: twitter-card-config - object: - super: - resolveRef: social-metadata - closed: true - properties: - card-style: - enum: [summary, summary_large_image] - description: - short: "Card style" - long: | - Card style (`summary` or `summary_large_image`). - - If this is not provided, the best style will automatically - selected based upon other metadata. You can learn more about Twitter Card - styles [here](https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/abouts-cards). - creator: - string: - description: "`@username` of the content creator (must be a quoted string)" - site: - string: - description: "`@username` of the website (must be a quoted string)" - -- id: open-graph-config - object: - super: - resolveRef: social-metadata - closed: true - properties: - locale: - string: - description: "Locale of open graph metadata" - site-name: - string: - description: - short: "Name that should be displayed for the overall site" - long: | - Name that should be displayed for the overall site. If not explicitly - provided in the `open-graph` metadata, Quarto will use the website or - book `title` by default. - -- id: page-footer - object: - properties: - left: - ref: page-footer-region - description: "Footer left content" - right: - ref: page-footer-region - description: "Footer right content" - center: - ref: page-footer-region - description: "Footer center content" - border: - anyOf: - - boolean - - string - description: "Footer border (`true`, `false`, or a border color)" - background: - schema: string - description: "Footer background color" - foreground: - schema: string - description: "Footer foreground color" - closed: true - -- id: base-website - object: - closed: true - properties: - title: - string: - description: "Website title" - description: - string: - description: "Website description" - favicon: - string: - description: "The path to the favicon for this website" - site-url: - string: - description: "Base URL for published website" - site-path: - string: - description: | - Path to site (defaults to `/`). Not required if you specify `site-url`. - repo-url: - string: - description: "Base URL for website source code repository" - repo-link-target: - string: - description: "The value of the target attribute for repo links" - repo-link-rel: - string: - description: "The value of the rel attribute for repo links" - repo-subdir: - string: - description: "Subdirectory of repository containing website" - repo-branch: - string: - description: "Branch of website source code (defaults to `main`)" - issue-url: - string: - description: "URL to use for the 'report an issue' repository action." - repo-actions: - maybeArrayOf: - enum: [none, edit, source, issue] - description: - short: "Links to source repository actions" - long: "Links to source repository actions (`none` or one or more of `edit`, `source`, `issue`)" - reader-mode: - boolean: - description: | - Displays a 'reader-mode' tool which allows users to hide the sidebar and table of contents when viewing a page. - google-analytics: - anyOf: - - string - - object: - properties: - tracking-id: - schema: string - description: "The Google tracking Id or measurement Id of this website." - storage: - enum: [cookies, none] - description: - short: "Storage options for Google Analytics data" - long: | - Storage option for Google Analytics data using on of these two values: - - `cookies`: Use cookies to store unique user and session identification (default). - - `none`: Do not use cookies to store unique user and session identification. - - For more about choosing storage options see [Storage](https://quarto.org/docs/websites/website-tools.html#storage). - anonymize-ip: - schema: boolean - description: - short: "Anonymize the user ip address." - long: | - Anonymize the user ip address. For more about this feature, see - [IP Anonymization (or IP masking) in Google Analytics](https://support.google.com/analytics/answer/2763052?hl=en). - version: - enum: [3, 4] - description: - short: "The version number of Google Analytics to use." - long: | - The version number of Google Analytics to use. - - - `3`: Use analytics.js - - `4`: use gtag. - - This is automatically detected based upon the `tracking-id`, but you may specify it. - description: "Enable Google Analytics for this website" - announcement: - anyOf: - - string - - object: - properties: - content: - schema: string - description: "The content of the announcement" - dismissable: - schema: boolean - description: "Whether this announcement may be dismissed by the user." - icon: - schema: string - description: - short: "The icon to display in the announcement" - long: | - Name of bootstrap icon (e.g. `github`, `twitter`, `share`) for the announcement. - See for a list of available icons - position: - schema: - enum: ["above-navbar", "below-navbar"] - description: - short: "The position of the announcement." - long: | - The position of the announcement. One of `above-navbar` (default) or `below-navbar`. - type: - schema: - enum: - [ - primary, - secondary, - success, - danger, - warning, - info, - light, - dark, - ] - description: - short: "The type of announcement. Affects the appearance of the announcement." - long: | - The type of announcement. One of `primary`, `secondary`, `success`, `danger`, `warning`, - `info`, `light` or `dark`. Affects the appearance of the announcement. - - description: Provides an announcement displayed at the top of the page. - cookie-consent: - anyOf: - - enum: [express, implied] - - boolean - - object: - properties: - type: - enum: [implied, express] - description: - short: "The type of consent that should be requested" - long: | - The type of consent that should be requested, using one of these two values: - - - `implied` (default): This will notify the user that the site uses cookies and permit them to change preferences, but not block cookies unless the user changes their preferences. - - - `express`: This will block cookies until the user expressly agrees to allow them (or continue blocking them if the user doesn’t agree). - style: - enum: [simple, headline, interstitial, standalone] - description: - short: "The style of the consent banner that is displayed" - long: | - The style of the consent banner that is displayed: - - - `simple` (default): A simple dialog in the lower right corner of the website. - - - `headline`: A full width banner across the top of the website. - - - `interstitial`: An semi-transparent overlay of the entire website. - - - `standalone`: An opaque overlay of the entire website. - palette: - enum: [light, dark] - description: "Whether to use a dark or light appearance for the consent banner (`light` or `dark`)." - policy-url: - schema: string - description: "The url to the website’s cookie or privacy policy." - language: - schema: string - description: - short: "The language to be used when diplaying the cookie consent prompt (defaults to document language)." - long: | - The language to be used when diplaying the cookie consent prompt specified using an IETF language tag. - - If not specified, the document language will be used. - prefs-text: - schema: string - description: - short: "The text to display for the cookie preferences link in the website footer." - description: - short: "Request cookie consent before enabling scripts that set cookies" - long: | - Quarto includes the ability to request cookie consent before enabling scripts that set cookies, using [Cookie Consent](https://www.cookieconsent.com/). - - The user’s cookie preferences will automatically control Google Analytics (if enabled) and can be used to control custom scripts you add as well. For more information see [Custom Scripts and Cookie Consent](https://quarto.org/docs/websites/website-tools.html#custom-scripts-and-cookie-consent). - search: - anyOf: - - boolean - - object: - properties: - location: - enum: [navbar, sidebar] - description: "Location for search widget (`navbar` or `sidebar`)" - type: - enum: [overlay, textbox] - description: "Type of search UI (`overlay` or `textbox`)" - limit: - schema: number - description: "Number of matches to display (defaults to 20)" - collapse-after: - schema: number - description: "Matches after which to collapse additional results" - copy-button: - schema: boolean - description: "Provide button for copying search link" - merge-navbar-crumbs: - schema: boolean - default: true - description: "When false, do not merge navbar crumbs into the crumbs in `search.json`." - keyboard-shortcut: - maybeArrayOf: - string: - description: "One or more keys that will act as a shortcut to launch search (single characters)" - show-item-context: - schema: - anyOf: - - enum: ["tree", "parent", "root"] - - boolean - description: "Whether to include search result parents when displaying items in search results (when possible)." - algolia: - object: - properties: - index-name: - schema: string - description: "The name of the index to use when performing a search" - application-id: - schema: string - description: "The unique ID used by Algolia to identify your application" - search-only-api-key: - schema: string - description: "The Search-Only API key to use to connect to Algolia" - analytics-events: - boolean: - description: "Enable tracking of Algolia analytics events" - show-logo: - boolean: - description: "Enable the display of the Algolia logo in the search results footer." - index-fields: - object: - properties: - href: - schema: string - description: "Field that contains the URL of index entries" - title: - schema: string - description: "Field that contains the title of index entries" - text: - schema: string - description: "Field that contains the text of index entries" - section: - schema: string - description: "Field that contains the section of index entries" - closed: true - params: - object: - description: "Additional parameters to pass when executing a search" - closed: true - description: "Use external Algolia search index" - closed: true - description: "Provide full text search for website" - - navbar: - anyOf: - - boolean - - object: - properties: - title: - anyOf: - - string - - boolean - description: "The navbar title. Uses the project title if none is specified." - logo: - ref: logo-light-dark-specifier - description: "Specification of image that will be displayed to the left of the title." - logo-alt: - string: - description: "Alternate text for the logo image." - logo-href: - string: - description: "Target href from navbar logo / title. By default, the logo and title link to the root page of the site (/index.html)." - background: - string: - description: "The navbar's background color (named or hex color)." - completions: - - primary - - secondary - - success - - danger - - warning - - info - - light - - dark - foreground: - string: - description: "The navbar's foreground color (named or hex color)." - completions: - - primary - - secondary - - success - - danger - - warning - - info - - light - - dark - search: - boolean: - description: "Include a search box in the navbar." - pinned: - boolean: - description: "Always show the navbar (keeping it pinned)." - default: false - collapse: - boolean: - description: "Collapse the navbar into a menu when the display becomes narrow." - default: true - collapse-below: - enum: [sm, md, lg, xl, xxl] - description: "The responsive breakpoint below which the navbar will collapse into a menu (`sm`, `md`, `lg` (default), `xl`, `xxl`)." - default: "lg" - left: - arrayOf: - ref: navigation-item - description: "List of items for the left side of the navbar." - right: - arrayOf: - ref: navigation-item - description: "List of items for the right side of the navbar." - toggle-position: - schema: - enum: [left, right] - description: "The position of the collapsed navbar toggle when in responsive mode" - default: "left" - tools-collapse: - boolean: - description: "Collapse tools into the navbar menu when the display becomes narrow." - default: false - - description: "Top navigation options" - - sidebar: - anyOf: - - boolean - - maybeArrayOf: - object: - properties: - id: - string: - description: "The identifier for this sidebar." - title: - anyOf: - - string - - boolean - description: "The sidebar title. Uses the project title if none is specified." - logo: - ref: logo-light-dark-specifier - description: "Specification of image that will be displayed in the sidebar." - logo-alt: - string: - description: "Alternate text for the logo image." - logo-href: - string: - description: "Target href from navbar logo / title. By default, the logo and title link to the root page of the site (/index.html)." - search: - boolean: - description: "Include a search control in the sidebar." - tools: - arrayOf: - ref: navigation-item-object - description: "List of sidebar tools" - contents: - ref: sidebar-contents - description: "List of items for the sidebar" - style: - enum: ["docked", "floating"] - description: "The style of sidebar (`docked` or `floating`)." - default: "floating" - background: - schema: string - completions: - - primary - - secondary - - success - - danger - - warning - - info - - light - - dark - description: "The sidebar's background color (named or hex color)." - foreground: - schema: string - completions: - - primary - - secondary - - success - - danger - - warning - - info - - light - - dark - description: "The sidebar's foreground color (named or hex color)." - border: - boolean: - description: "Whether to show a border on the sidebar (defaults to true for 'docked' sidebars)" - alignment: - enum: ["left", "right", "center"] - description: "Alignment of the items within the sidebar (`left`, `right`, or `center`)" - collapse-level: - number: - description: "The depth at which the sidebar contents should be collapsed by default." - default: 2 - pinned: - boolean: - description: "When collapsed, pin the collapsed sidebar to the top of the page." - header: - maybeArrayOf: string - description: "Markdown to place above sidebar content (text or file path)" - footer: - maybeArrayOf: string - description: "Markdown to place below sidebar content (text or file path)" - description: "Side navigation options" - body-header: - string: - description: "Markdown to insert at the beginning of each page’s body (below the title and author block)." - body-footer: - string: - description: "Markdown to insert below each page’s body." - margin-header: - maybeArrayOf: string - description: "Markdown to place above margin content (text or file path)" - margin-footer: - maybeArrayOf: string - description: "Markdown to place below margin content (text or file path)" - page-navigation: - boolean: - description: "Provide next and previous article links in footer" - back-to-top-navigation: - boolean: - description: "Provide a 'back to top' navigation button" - bread-crumbs: - boolean: - description: "Whether to show navigation breadcrumbs for pages more than 1 level deep" - page-footer: - anyOf: - - string - - ref: page-footer - description: "Shared page footer" - image: - path: - description: | - Default site thumbnail image for `twitter` /`open-graph` - image-alt: - path: - description: | - Default site thumbnail image alt text for `twitter` /`open-graph` - comments: - schema: - ref: document-comments-configuration - - open-graph: - anyOf: - - boolean - - ref: open-graph-config - description: "Publish open graph metadata" - twitter-card: - anyOf: - - boolean - - ref: twitter-card-config - description: "Publish twitter card metadata" - other-links: - schema: - ref: other-links - tags: - formats: [$html-doc] - description: "A list of other links to appear below the TOC." - code-links: - schema: - anyOf: - - boolean - - ref: code-links-schema - tags: - formats: [$html-doc] - description: "A list of code links to appear with this document." - drafts: - schema: - maybeArrayOf: path - description: "A list of input documents that should be treated as drafts" - draft-mode: - schema: - enum: ["visible", "unlinked", "gone"] - description: - short: "How to handle drafts that are encountered." - long: | - How to handle drafts that are encountered. - - `visible` - the draft will visible and fully available - `unlinked` - the draft will be rendered, but will not appear in navigation, search, or listings. - `gone` - the draft will have no content and will not be linked to (default). - -- id: book-schema - schema: - object: - closed: true - super: - resolveRef: base-website - properties: - title: - string: - description: "Book title" - subtitle: - string: - description: "Book subtitle" - author: - maybeArrayOf: - anyOf: [string, object] - description: "Author or authors of the book" - date: - string: - description: "Book publication date" - date-format: - string: - description: "Format string for dates in the book" - abstract: - string: - description: "Book abstract" - description: - string: - description: "Description metadata for HTML version of book" - chapters: - schema: - ref: chapter-list - description: "Book part and chapter files" - hidden: true - appendices: - schema: - ref: chapter-list - description: "Book appendix files" - hidden: true - references: - path: - description: "Book references file" - output-file: - path: - description: "Base name for single-file output (e.g. PDF, ePub, docx)" - cover-image: - path: - description: "Cover image (used in HTML and ePub formats)" - cover-image-alt: - string: - description: "Alternative text for cover image (used in HTML format)" - sharing: - maybeArrayOf: - enum: [twitter, facebook, linkedin] - description: | - Sharing buttons to include on navbar or sidebar - (one or more of `twitter`, `facebook`, `linkedin`) - downloads: - maybeArrayOf: - enum: [pdf, epub, docx] - description: | - Download buttons for other formats to include on navbar or sidebar - (one or more of `pdf`, `epub`, and `docx`) - tools: - arrayOf: - schema: - ref: navigation-item - description: "Custom tools for navbar or sidebar" - doi: - string: - tags: - formats: [$html-doc] - description: The Digital Object Identifier for this book. - -- id: chapter-item - anyOf: - - ref: navigation-item - - object: - properties: - part: - string: - description: "Part title or path to input file" - chapters: - arrayOf: - ref: navigation-item - description: "Path to chapter input file" - required: [part] - -- id: chapter-list - arrayOf: - ref: chapter-item - -- id: other-links - arrayOf: - object: - properties: - text: - string: - description: "The text for the link." - href: - string: - description: "The href for the link." - icon: - string: - description: "The bootstrap icon name for the link." - rel: - string: - description: "The rel attribute value for the link." - target: - string: - description: "The target attribute value for the link." - required: [text, href] - -- id: crossref-labels-schema - string: - completions: - - alpha - - arabic - - roman - -- id: epub-contributor - anyOf: - - string - - maybeArrayOf: - object: - closed: true - properties: - role: - string: - description: - short: The role of this creator or contributor. - long: | - The role of this creator or contributor using - [MARC relators](https://loc.gov/marc/relators/relaterm.html). Human readable - translations to commonly used relators (e.g. 'author', 'editor') will - attempt to be automatically translated. - file-as: - string: - description: An alternate version of the creator or contributor text used for alphabatizing. - text: - string: - description: The text describing the creator or contributor (for example, creator name). - -- id: format-language - object: - properties: - toc-title-document: string - toc-title-website: string - related-formats-title: string - related-notebooks-title: string - callout-tip-title: string - callout-note-title: string - callout-warning-title: string - callout-important-title: string - callout-caution-title: string - section-title-abstract: string - section-title-footnotes: string - section-title-appendices: string - code-summary: string - code-tools-menu-caption: string - code-tools-show-all-code: string - code-tools-hide-all-code: string - code-tools-view-source: string - code-tools-source-code: string - search-no-results-text: string - copy-button-tooltip: string - copy-button-tooltip-success: string - repo-action-links-edit: string - repo-action-links-source: string - repo-action-links-issue: string - search-matching-documents-text: string - search-copy-link-title: string - search-hide-matches-text: string - search-more-match-text: string - search-more-matches-text: string - search-clear-button-title: string - search-text-placeholder: string - search-detached-cancel-button-title: string - search-submit-button-title: string - crossref-fig-title: string - crossref-tbl-title: string - crossref-lst-title: string - crossref-thm-title: string - crossref-lem-title: string - crossref-cor-title: string - crossref-prp-title: string - crossref-cnj-title: string - crossref-def-title: string - crossref-exm-title: string - crossref-exr-title: string - crossref-fig-prefix: string - crossref-tbl-prefix: string - crossref-lst-prefix: string - crossref-ch-prefix: string - crossref-apx-prefix: string - crossref-sec-prefix: string - crossref-eq-prefix: string - crossref-thm-prefix: string - crossref-lem-prefix: string - crossref-cor-prefix: string - crossref-prp-prefix: string - crossref-cnj-prefix: string - crossref-def-prefix: string - crossref-exm-prefix: string - crossref-exr-prefix: string - crossref-lof-title: string - crossref-lot-title: string - crossref-lol-title: string - errorDescription: "be a format language description object" - -- id: website-about - object: - closed: true - required: [template] - properties: - id: - string: - description: - short: "The target id for the about page." - long: | - The target id of this about page. When the about page is rendered, it will - place read the contents of a `div` with this id into the about template that you - have selected (and replace the contents with the rendered about content). - - If no such `div` is defined on the page, a `div` with this id will be created - and appended to the end of the page. - template: - anyOf: - - enum: [jolla, trestles, solana, marquee, broadside] - - path - description: - short: "The template to use to layout this about page." - long: | - The template to use to layout this about page. Choose from: - - - `jolla` - - `trestles` - - `solana` - - `marquee` - - `broadside` - image: - path: - description: - short: "The path to the main image on the about page." - long: | - The path to the main image on the about page. If not specified, - the `image` provided for the document itself will be used. - image-alt: - path: - description: "The alt text for the main image on the about page." - image-title: - path: - description: "The title for the main image on the about page." - image-width: - string: - description: - short: "A valid CSS width for the about page image." - long: | - A valid CSS width for the about page image. - image-shape: - enum: [rectangle, round, rounded] - description: - short: "The shape of the image on the about page." - long: | - The shape of the image on the about page. - - - `rectangle` - - `round` - - `rounded` - links: - arrayOf: - ref: navigation-item - -- id: website-listing - object: - closed: true - properties: - id: - string: - description: - short: "The id of this listing." - long: | - The id of this listing. When the listing is rendered, it will - place the contents into a `div` with this id. If no such `div` is defined on the - page, a `div` with this id will be created and appended to the end of the page. - - If no `id` is provided for a listing, Quarto will synthesize one when rendering the page. - type: - enum: [default, table, grid, custom] - description: - short: "The type of listing to create." - long: | - The type of listing to create. Choose one of: - - - `default`: A blog style list of items - - `table`: A table of items - - `grid`: A grid of item cards - - `custom`: A custom template, provided by the `template` field - contents: - maybeArrayOf: - anyOf: - - string - - ref: website-listing-contents-object - description: "The files or path globs of Quarto documents or YAML files that should be included in the listing." - sort: - anyOf: - - boolean - - maybeArrayOf: string - description: - short: "Sort items in the listing by these fields." - long: | - Sort items in the listing by these fields. The sort key is made up of a - field name followed by a direction `asc` or `desc`. - - For example: - `date asc` - - Use `sort:false` to use the unsorted original order of items. - max-items: - number: - description: The maximum number of items to include in this listing. - page-size: - number: - description: The number of items to display on a page. - sort-ui: - anyOf: - - boolean - - arrayOf: string - description: - short: "Shows or hides the sorting control for the listing." - long: | - Shows or hides the sorting control for the listing. To control the - fields that will be displayed in the sorting control, provide a list - of field names. - filter-ui: - anyOf: - - boolean - - arrayOf: string - description: - short: "Shows or hides the filtering control for the listing." - long: | - Shows or hides the filtering control for the listing. To control the - fields that will be used to filter the listing, provide a list - of field names. By default all fields of the listing will be used - when filtering. - categories: - anyOf: - - boolean - - enum: [numbered, unnumbered, cloud] - description: - short: "Display item categories from this listing in the margin of the page." - long: | - Display item categories from this listing in the margin of the page. - - - `numbered`: Category list with number of items - - `unnumbered`: Category list - - `cloud`: Word cloud style categories - - feed: - anyOf: - - boolean - - object: - closed: true - properties: - items: - number: - description: | - The number of items to include in your feed. Defaults to 20. - type: - enum: [full, partial, metadata] - description: - short: Whether to include full or partial content in the feed. - long: | - Whether to include full or partial content in the feed. - - - `full` (default): Include the complete content of the document in the feed. - - `partial`: Include only the first paragraph of the document in the feed. - - `metadata`: Use only the title, description, and other document metadata in the feed. - title: - string: - description: - short: The title for this feed. - long: | - The title for this feed. Defaults to the site title provided the Quarto project. - image: - path: - description: - short: The path to an image for this feed. - long: | - The path to an image for this feed. If not specified, the image for the page the listing - appears on will be used, otherwise an image will be used if specified for the site - in the Quarto project. - description: - string: - description: - short: The description of this feed. - long: | - The description of this feed. If not specified, the description for the page the - listing appears on will be used, otherwise the description - of the site will be used if specified in the Quarto project. - language: - string: - description: - short: The language of the feed. - long: | - The language of the feed. Omitted if not specified. - See [https://www.rssboard.org/rss-language-codes](https://www.rssboard.org/rss-language-codes) - for a list of valid language codes. - categories: - maybeArrayOf: - string: - description: A list of categories for which to create separate RSS feeds containing only posts with that category - xml-stylesheet: - path: - description: The path to an XML stylesheet (XSL file) used to style the RSS feed. - description: Enables an RSS feed for the listing. - date-format: - string: - description: - short: "The date format to use when displaying dates (e.g. d-M-yyy)." - long: | - The date format to use when displaying dates (e.g. d-M-yyy). - Learn more about supported date formatting values [here](https://quarto.org/docs/reference/dates.html). - max-description-length: - number: - description: - short: "The maximum length (in characters) of the description displayed in the listing." - long: | - The maximum length (in characters) of the description displayed in the listing. - Defaults to 175. - image-placeholder: - string: - description: "The default image to use if an item in the listing doesn't have an image." - image-lazy-loading: - boolean: - description: "If false, images in the listing will be loaded immediately. If true, images will be loaded as they come into view." - default: true - image-align: - enum: [left, right] - description: In `default` type listings, whether to place the image on the right or left side of the post content (`left` or `right`). - image-height: - string: - description: - short: "The height of the image being displayed." - long: | - The height of the image being displayed (a CSS height string). - - The width is automatically determined and the image will fill the rectangle without scaling (cropped to fill). - grid-columns: - number: - description: - short: "In `grid` type listings, the number of columns in the grid display." - long: | - In grid type listings, the number of columns in the grid display. - Defaults to 3. - grid-item-border: - boolean: - description: - short: "In `grid` type listings, whether to display a border around the item card." - long: | - In grid type listings, whether to display a border around the item card. Defaults to `true`. - grid-item-align: - enum: [left, right, center] - description: - short: "In `grid` type listings, the alignment of the content within the card." - long: | - In grid type listings, the alignment of the content within the card (`left` (default), `right`, or `center`). - table-striped: - boolean: - description: - short: "In `table` type listings, display the table rows with alternating background colors." - long: | - In table type listings, display the table rows with alternating background colors. - Defaults to `false`. - table-hover: - boolean: - description: - short: "In `table` type listings, highlight rows of the table when the user hovers the mouse over them." - long: | - In table type listings, highlight rows of the table when the user hovers the mouse over them. - Defaults to false. - template: - path: - description: - short: "The path to a custom listing template." - long: | - The path to a custom listing template. - template-params: - schema: object - description: "Parameters that are passed to the custom template." - fields: - arrayOf: string - description: - short: "The list of fields to include in this listing" - long: | - The list of fields to include in this listing. - field-display-names: - object: - description: - short: "A mapping of display names for listing fields." - long: | - A mapping that provides display names for specific fields. For example, to display the title column as ‘Report’ in a table listing you would write: - - ```yaml - listing: - field-display-names: - title: "Report" - ``` - field-types: - object: - description: - short: "Provides the date type for the field of a listing item." - long: | - Provides the date type for the field of a listing item. Unknown fields are treated - as strings unless a type is provided. Valid types are `date`, `number`. - field-links: - arrayOf: string - description: - short: "This list of fields to display as links in a table listing." - long: | - The list of fields to display as hyperlinks to the source document - when the listing type is a table. By default, only the `title` or - `filename` is displayed as a link. - field-required: - arrayOf: string - description: - short: "Fields that items in this listing must have populated." - long: | - Fields that items in this listing must have populated. - If a listing is rendered and one more items in this listing - is missing a required field, an error will occur and the render will. - include: - maybeArrayOf: object - description: "Items with matching field values will be included in the listing." - exclude: - maybeArrayOf: object - description: "Items with matching field values will be excluded from the listing." - -- id: website-listing-contents-object - object: - properties: - author: - maybeArrayOf: string - date: string - title: string - subtitle: string - -- id: csl-date - anyOf: - - string - - maybeArrayOf: number - - object: - properties: - year: - number: - description: The year - month: - number: - description: The month - day: - number: - description: The day - -- id: csl-person - anyOf: - - maybeArrayOf: string - - maybeArrayOf: - object: - properties: - family-name: - string: - description: The family name. - given-name: - string: - description: The given name. - -- id: csl-number - anyOf: - - number - - string - -- id: csl-item-shared - object: - properties: - abstract-url: - string: - description: A url to the abstract for this item. - accessed: - ref: csl-date - description: Date the item has been accessed. - annote: - string: - description: - short: Short markup, decoration, or annotation to the item (e.g., to indicate items included in a review). - long: | - Short markup, decoration, or annotation to the item (e.g., to indicate items included in a review); - - For descriptive text (e.g., in an annotated bibliography), use `note` instead - archive: - string: - description: Archive storing the item - archive-collection: - string: - description: Collection the item is part of within an archive. - archive_collection: - schema: string - hidden: true - archive-location: - string: - description: Storage location within an archive (e.g. a box and folder number). - archive_location: - schema: string - hidden: true - archive-place: - string: - description: Geographic location of the archive. - authority: - string: - description: Issuing or judicial authority (e.g. "USPTO" for a patent, "Fairfax Circuit Court" for a legal case). - available-date: - ref: csl-date - description: - short: Date the item was initially available - long: | - Date the item was initially available (e.g. the online publication date of a journal - article before its formal publication date; the date a treaty was made available for signing). - call-number: - string: - description: Call number (to locate the item in a library). - chair: - ref: csl-person - description: The person leading the session containing a presentation (e.g. the organizer of the `container-title` of a `speech`). - chapter-number: - ref: csl-number - description: Chapter number (e.g. chapter number in a book; track number on an album). - citation-key: - string: - description: - short: Identifier of the item in the input data file (analogous to BiTeX entrykey). - long: | - Identifier of the item in the input data file (analogous to BiTeX entrykey); - - Use this variable to facilitate conversion between word-processor and plain-text writing systems; - For an identifer intended as formatted output label for a citation - (e.g. “Ferr78”), use `citation-label` instead - citation-label: - string: - description: - short: Label identifying the item in in-text citations of label styles (e.g. "Ferr78"). - long: | - Label identifying the item in in-text citations of label styles (e.g. "Ferr78"); - - May be assigned by the CSL processor based on item metadata; For the identifier of the item - in the input data file, use `citation-key` instead - citation-number: - schema: - ref: csl-number - description: Index (starting at 1) of the cited reference in the bibliography (generated by the CSL processor). - hidden: true - collection-editor: - ref: csl-person - description: Editor of the collection holding the item (e.g. the series editor for a book). - collection-number: - ref: csl-number - description: Number identifying the collection holding the item (e.g. the series number for a book) - collection-title: - string: - description: Title of the collection holding the item (e.g. the series title for a book; the lecture series title for a presentation). - compiler: - ref: csl-person - description: Person compiling or selecting material for an item from the works of various persons or bodies (e.g. for an anthology). - composer: - ref: csl-person - description: Composer (e.g. of a musical score). - container-author: - ref: csl-person - description: Author of the container holding the item (e.g. the book author for a book chapter). - container-title: - string: - description: - short: Title of the container holding the item. - long: | - Title of the container holding the item (e.g. the book title for a book chapter, - the journal title for a journal article; the album title for a recording; - the session title for multi-part presentation at a conference) - container-title-short: - string: - description: Short/abbreviated form of container-title; - hidden: true - contributor: - ref: csl-person - description: A minor contributor to the item; typically cited using “with” before the name when listed in a bibliography. - curator: - ref: csl-person - description: Curator of an exhibit or collection (e.g. in a museum). - dimensions: - string: - description: Physical (e.g. size) or temporal (e.g. running time) dimensions of the item. - director: - ref: csl-person - description: Director (e.g. of a film). - division: - string: - description: Minor subdivision of a court with a `jurisdiction` for a legal item - DOI: - schema: string - hidden: true - edition: - ref: csl-number - description: (Container) edition holding the item (e.g. "3" when citing a chapter in the third edition of a book). - editor: - ref: csl-person - description: The editor of the item. - editorial-director: - ref: csl-person - description: Managing editor ("Directeur de la Publication" in French). - editor-translator: - ref: csl-person - description: - short: Combined editor and translator of a work. - long: | - Combined editor and translator of a work. - - The citation processory must be automatically generate if editor and translator variables - are identical; May also be provided directly in item data. - event: - schema: string - hidden: true - event-date: - ref: csl-date - description: Date the event related to an item took place. - event-title: - string: - description: Name of the event related to the item (e.g. the conference name when citing a conference paper; the meeting where presentation was made). - event-place: - string: - description: Geographic location of the event related to the item (e.g. "Amsterdam, The Netherlands"). - executive-producer: - ref: csl-person - description: Executive producer of the item (e.g. of a television series). - first-reference-note-number: - schema: - ref: csl-number - description: - short: Number of a preceding note containing the first reference to the item. - long: | - Number of a preceding note containing the first reference to the item - - Assigned by the CSL processor; Empty in non-note-based styles or when the item hasn't - been cited in any preceding notes in a document - hidden: true - fulltext-url: - string: - description: A url to the full text for this item. - genre: - string: - description: - short: Type, class, or subtype of the item - long: | - Type, class, or subtype of the item (e.g. "Doctoral dissertation" for a PhD thesis; "NIH Publication" for an NIH technical report); - - Do not use for topical descriptions or categories (e.g. "adventure" for an adventure movie) - guest: - ref: csl-person - description: Guest (e.g. on a TV show or podcast). - host: - ref: csl-person - description: Host of the item (e.g. of a TV show or podcast). - id: - anyOf: - - string - - number - description: A value which uniquely identifies this item. - illustrator: - ref: csl-person - description: Illustrator (e.g. of a children’s book or graphic novel). - interviewer: - ref: csl-person - description: Interviewer (e.g. of an interview). - isbn: - string: - description: International Standard Book Number (e.g. "978-3-8474-1017-1"). - ISBN: - schema: string - hidden: true - issn: - string: - description: International Standard Serial Number. - ISSN: - schema: string - hidden: true - issue: - ref: csl-number - description: - short: Issue number of the item or container holding the item - long: | - Issue number of the item or container holding the item (e.g. "5" when citing a - journal article from journal volume 2, issue 5); - - Use `volume-title` for the title of the issue, if any. - issued: - ref: csl-date - description: Date the item was issued/published. - jurisdiction: - string: - description: Geographic scope of relevance (e.g. "US" for a US patent; the court hearing a legal case). - keyword: - string: - description: Keyword(s) or tag(s) attached to the item. - language: - string: - description: - short: The language of the item (used only for citation of the item). - long: | - The language of the item (used only for citation of the item). - - Should be entered as an ISO 639-1 two-letter language code (e.g. "en", "zh"), - optionally with a two-letter locale code (e.g. "de-DE", "de-AT"). - - This does not change the language of the item, instead it documents - what language the item uses (which may be used in citing the item). - license: - string: - description: - short: The license information applicable to an item. - long: | - The license information applicable to an item (e.g. the license an article - or software is released under; the copyright information for an item; - the classification status of a document) - locator: - ref: csl-number - description: - short: A cite-specific pinpointer within the item. - long: | - A cite-specific pinpointer within the item (e.g. a page number within a book, - or a volume in a multi-volume work). - - Must be accompanied in the input data by a label indicating the locator type - (see the Locators term list). - - medium: - string: - description: Description of the item’s format or medium (e.g. "CD", "DVD", "Album", etc.) - narrator: - ref: csl-person - description: Narrator (e.g. of an audio book). - note: - string: - description: Descriptive text or notes about an item (e.g. in an annotated bibliography). - number: - ref: csl-number - description: Number identifying the item (e.g. a report number). - number-of-pages: - ref: csl-number - description: Total number of pages of the cited item. - number-of-volumes: - ref: csl-number - description: Total number of volumes, used when citing multi-volume books and such. - organizer: - ref: csl-person - description: Organizer of an event (e.g. organizer of a workshop or conference). - original-author: - ref: csl-person - description: - short: The original creator of a work. - long: | - The original creator of a work (e.g. the form of the author name - listed on the original version of a book; the historical author of a work; - the original songwriter or performer for a musical piece; the original - developer or programmer for a piece of software; the original author of an - adapted work such as a book adapted into a screenplay) - original-date: - ref: csl-date - description: Issue date of the original version. - original-publisher: - string: - description: Original publisher, for items that have been republished by a different publisher. - original-publisher-place: - string: - description: Geographic location of the original publisher (e.g. "London, UK"). - original-title: - string: - description: Title of the original version (e.g. "Война и мир", the untranslated Russian title of "War and Peace"). - page: - ref: csl-number - description: Range of pages the item (e.g. a journal article) covers in a container (e.g. a journal issue). - page-first: - ref: csl-number - description: First page of the range of pages the item (e.g. a journal article) covers in a container (e.g. a journal issue). - page-last: - ref: csl-number - description: Last page of the range of pages the item (e.g. a journal article) covers in a container (e.g. a journal issue). - part-number: - ref: csl-number - description: - short: Number of the specific part of the item being cited (e.g. part 2 of a journal article). - long: | - Number of the specific part of the item being cited (e.g. part 2 of a journal article). - - Use `part-title` for the title of the part, if any. - part-title: - string: - description: Title of the specific part of an item being cited. - pdf-url: - string: - description: A url to the pdf for this item. - performer: - ref: csl-person - description: Performer of an item (e.g. an actor appearing in a film; a muscian performing a piece of music). - pmcid: - string: - description: PubMed Central reference number. - PMCID: - schema: string - hidden: true - pmid: - string: - description: PubMed reference number. - PMID: - schema: string - hidden: true - printing-number: - ref: csl-number - description: Printing number of the item or container holding the item. - producer: - ref: csl-person - description: Producer (e.g. of a television or radio broadcast). - public-url: - string: - description: A public url for this item. - publisher: - string: - description: The publisher of the item. - publisher-place: - string: - description: The geographic location of the publisher. - recipient: - ref: csl-person - description: Recipient (e.g. of a letter). - reviewed-author: - ref: csl-person - description: Author of the item reviewed by the current item. - reviewed-genre: - string: - description: Type of the item being reviewed by the current item (e.g. book, film). - reviewed-title: - string: - description: Title of the item reviewed by the current item. - scale: - string: - description: Scale of e.g. a map or model. - script-writer: - ref: csl-person - description: Writer of a script or screenplay (e.g. of a film). - section: - ref: csl-number - description: Section of the item or container holding the item (e.g. "§2.0.1" for a law; "politics" for a newspaper article). - series-creator: - ref: csl-person - description: Creator of a series (e.g. of a television series). - source: - string: - description: Source from whence the item originates (e.g. a library catalog or database). - status: - string: - description: Publication status of the item (e.g. "forthcoming"; "in press"; "advance online publication"; "retracted") - submitted: - ref: csl-date - description: Date the item (e.g. a manuscript) was submitted for publication. - supplement-number: - ref: csl-number - description: Supplement number of the item or container holding the item (e.g. for secondary legal items that are regularly updated between editions). - title-short: - string: - description: Short/abbreviated form of`title`. - hidden: true - translator: - ref: csl-person - description: Translator - type: - enum: - [ - "article", - "article-journal", - "article-magazine", - "article-newspaper", - "bill", - "book", - "broadcast", - "chapter", - "classic", - "collection", - "dataset", - "document", - "entry", - "entry-dictionary", - "entry-encyclopedia", - "event", - "figure", - "graphic", - "hearing", - "interview", - "legal_case", - "legislation", - "manuscript", - "map", - "motion_picture", - "musical_score", - "pamphlet", - "paper-conference", - "patent", - "performance", - "periodical", - "personal_communication", - "post", - "post-weblog", - "regulation", - "report", - "review", - "review-book", - "software", - "song", - "speech", - "standard", - "thesis", - "treaty", - "webpage", - ] - description: The [type](https://docs.citationstyles.org/en/stable/specification.html#appendix-iii-types) of the item. - url: - string: - description: Uniform Resource Locator (e.g. "https://aem.asm.org/cgi/content/full/74/9/2766") - URL: - schema: string - hidden: true - version: - ref: csl-number - description: Version of the item (e.g. "2.0.9" for a software program). - volume: - ref: csl-number - description: - short: Volume number of the item (e.g. “2” when citing volume 2 of a book) or the container holding the item. - long: | - Volume number of the item (e.g. "2" when citing volume 2 of a book) or the container holding the - item (e.g. "2" when citing a chapter from volume 2 of a book). - - Use `volume-title` for the title of the volume, if any. - volume-title: - string: - description: - short: Title of the volume of the item or container holding the item. - long: | - Title of the volume of the item or container holding the item. - - Also use for titles of periodical special issues, special sections, and the like. - year-suffix: - string: - description: Disambiguating year suffix in author-date styles (e.g. "a" in "Doe, 1999a"). - -- id: csl-item - object: - super: - resolveRef: csl-item-shared - closed: true - properties: - abstract: - string: - description: Abstract of the item (e.g. the abstract of a journal article) - author: - ref: csl-person - description: The author(s) of the item. - doi: - string: - description: Digital Object Identifier (e.g. "10.1128/AEM.02591-07") - references: - string: - description: - short: Resources related to the procedural history of a legal case or legislation. - long: | - Resources related to the procedural history of a legal case or legislation; - - Can also be used to refer to the procedural history of other items (e.g. - "Conference canceled" for a presentation accepted as a conference that was subsequently - canceled; details of a retraction or correction notice) - title: - string: - description: The primary title of the item. - id: - anyOf: - - string - - number - description: Citation identifier for the item (e.g. "item1"). Will be autogenerated if not provided. - -- id: citation-item - object: - super: - resolveRef: csl-item - closed: true - properties: - article-id: - maybeArrayOf: - anyOf: - - string - - object: - properties: - type: - string: - description: The type of identifier - value: - string: - description: The value for the identifier - description: The unique identifier for this article. - elocation-id: - string: - description: Bibliographic identifier for a document that does not have traditional printed page numbers. - eissn: - string: - description: Electronic International Standard Serial Number. - pissn: - string: - description: Print International Standard Serial Number. - art-access-id: - string: - description: Generic article accession identifier. - publisher-location: - string: - description: The location of the publisher of this item. - subject: - string: - description: The name of a subject or topic describing the article. - categories: - maybeArrayOf: - string: - description: A list of subjects or topics describing the article. - container-id: - maybeArrayOf: - anyOf: - - string - - object: - properties: - type: - string: - description: The type of identifier (e.g. `nlm-ta` or `pmc`). - value: - string: - description: The value for the identifier - description: - short: External identifier of a publication or journal. - long: | - External identifier, typically assigned to a journal by - a publisher, archive, or library to provide a unique identifier for - the journal or publication. - jats-type: - string: - description: The type used for the JATS `article` tag. - -- id: smart-include - anyOf: - - record: - text: - string: - description: Textual content to add to includes - - record: - file: - string: - description: Name of file with content to add to includes - -- id: semver - string: - # from https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string - pattern: "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" - description: Version number according to Semantic Versioning - -- id: quarto-date - anyOf: - - string - - object: - closed: true - properties: - format: string - value: string - required: [value] - -- id: project-profile - schema: - object: - closed: true - properties: - default: - maybeArrayOf: string - description: | - Default profile to apply if QUARTO_PROFILE is not defined. - group: - maybeArrayOf: - arrayOf: string - description: | - Define a profile group for which at least one profile is always active. - description: Specify a default profile and profile groups - -- id: bad-parse-schema - schema: - object: - propertyNames: - string: - pattern: "^[^\\s]+$" - -- id: quarto-dev-schema - schema: - object: - properties: - _quarto: - hidden: true - object: - properties: - trace-filters: string - tests: object - -- id: notebook-view-schema - schema: - object: - properties: - notebook: - string: - description: "The path to the locally referenced notebook." - title: - description: "The title of the notebook when viewed." - anyOf: - - string - - boolean - url: - string: - description: "The url to use when viewing this notebook." - download-url: - string: - description: "The url to use when downloading the notebook from the preview" - required: [notebook] - -- id: code-links-schema - schema: - anyOf: - - boolean - - maybeArrayOf: - anyOf: - - object: - properties: - icon: - string: - description: The bootstrap icon for this code link. - text: - string: - description: The text for this code link. - href: - string: - description: The href for this code link. - rel: - string: - description: The rel used in the `a` tag for this code link. - target: - string: - description: The target used in the `a` tag for this code link. - - enum: ["repo", "binder", "devcontainer"] - -- id: manuscript-schema - schema: - object: - closed: true - properties: - "article": - path: - description: "The input document that will serve as the root document for this manuscript" - "code-links": - schema: - ref: code-links-schema - description: "Code links to display for this manuscript." - "manuscript-url": - string: - description: "The deployed url for this manuscript" - "meca-bundle": - anyOf: - - boolean - - string - description: "Whether to generate a MECA bundle for this manuscript" - "notebooks": - arrayOf: - anyOf: - - string - - ref: notebook-view-schema - "resources": - maybeArrayOf: - schema: path - description: "Additional file resources to be copied to output directory" - "environment": - maybeArrayOf: - schema: path - description: "Files that specify the execution environment (e.g. renv.lock, requirements.text, etc...)" - -- id: brand-meta - description: > - Metadata for a brand, including the brand name and important links. - object: - closed: false - properties: - name: - description: The brand name. - anyOf: - - string - - object: - properties: - full: - string: - description: The full, official or legal name of the company or brand. - short: - string: - description: The short, informal, or common name of the company or brand. - link: - description: > - Important links for the brand, including social media links. - If a single string, it is the brand's home page or website. - Additional fields are allowed for internal use. - anyOf: - - string - - object: - properties: - home: - string: - description: The brand's home page or website. - mastodon: - string: - description: The brand's Mastodon URL. - bluesky: - string: - description: The brand's Bluesky URL. - github: - string: - description: The brand's GitHub URL. - linkedin: - string: - description: The brand's LinkedIn URL. - twitter: - string: - description: The brand's Twitter URL. - facebook: - string: - description: The brand's Facebook URL. - -- id: brand-string-light-dark - anyOf: - - string - - object: - closed: true - properties: - light: - schema: string - description: > - A link or path to the brand's light-colored logo or icon. - dark: - schema: string - description: > - A link or path to the brand's dark-colored logo or icon. - -- id: brand-logo-explicit-resource - object: - closed: true - properties: - path: path - alt: - schema: string - description: > - Alternative text for the logo, used for accessibility. - required: [path] - -- id: brand-logo-resource - anyOf: - - string - - ref: brand-logo-explicit-resource - -- id: brand-logo-single - description: > - Provide definitions and defaults for brand's logo in various formats and sizes. - object: - closed: true - properties: - images: - description: A dictionary of named logo resources. - schema: - object: - additionalProperties: - schema: { ref: brand-logo-resource } - small: - description: > - A link or path to the brand's small-sized logo or icon. - schema: string - medium: - description: > - A link or path to the brand's medium-sized logo. - schema: string - large: - description: > - A link or path to the brand's large- or full-sized logo. - schema: string - -- id: brand-logo-unified - description: > - Provide definitions and defaults for brand's logo in various formats and sizes. - object: - closed: true - properties: - images: - description: A dictionary of named logo resources. - schema: - object: - additionalProperties: - schema: { ref: brand-logo-resource } - small: - description: > - A link or path to the brand's small-sized logo or icon, or a link or path - to both the light and dark versions. - schema: - ref: brand-string-light-dark - medium: - description: > - A link or path to the brand's medium-sized logo, or a link or path - to both the light and dark versions. - schema: - ref: brand-string-light-dark - large: - description: > - A link or path to the brand's large- or full-sized logo, or a link or path - to both the light and dark versions. - schema: - ref: brand-string-light-dark - -- id: brand-named-logo - description: Names of customizeable logos - enum: [small, medium, large] - -- id: logo-options - object: - closed: false # e.g. to allow typst location, padding, padding-*, width - properties: - path: - schema: path - description: > - Path or brand.yml logo resource name. - alt: - schema: string - description: > - Alternative text for the logo, used for accessibility. - required: [path] - -- id: logo-specifier - anyOf: - - string - - schema: - ref: logo-options - -- id: logo-options-path-optional - object: - closed: false # e.g. to allow typst location, padding, padding-*, width - properties: - path: - schema: path - description: > - Path or brand.yml logo resource name. - alt: - schema: string - description: > - Alternative text for the logo, used for accessibility. - -- id: logo-specifier-path-optional - anyOf: - - string - - schema: - ref: logo-options-path-optional - -- id: logo-light-dark-specifier - description: > - Any of the ways a logo can be specified: string, object, or light/dark object of - string or object - anyOf: - - ref: logo-specifier - - object: - closed: true - properties: - light: - schema: - ref: logo-specifier - description: > - Specification of a light logo - dark: - schema: - ref: logo-specifier - description: > - Specification of a dark logo - -- id: logo-light-dark-specifier-path-optional - description: > - Any of the ways a logo can be specified: string, object, or light/dark object of - string or object - anyOf: - - ref: logo-specifier-path-optional - - object: - closed: true - properties: - light: - schema: - ref: logo-specifier-path-optional - description: > - Specification of a light logo - dark: - schema: - ref: logo-specifier-path-optional - description: > - Specification of a dark logo - -# normalized version of logo-light-dark-specifier -- id: normalized-logo-light-dark-specifier - description: > - Any of the ways a logo can be specified: string, object, or light/dark object of - string or object - object: - closed: true - properties: - light: - schema: - ref: logo-options - description: > - Options for a light logo - dark: - schema: - ref: logo-options - description: > - Options for a dark logo - -- id: brand-color-value - schema: string - -- id: brand-color-single - description: > - The brand's custom color palette and theme. - object: - closed: true - properties: - palette: - description: > - The brand's custom color palette. Any number of colors can be defined, - each color having a custom name. - object: - additionalProperties: - schema: - ref: brand-color-value - foreground: - description: The foreground color, used for text. - schema: - ref: brand-color-value - default: black - background: - description: The background color, used for the page background. - schema: - ref: brand-color-value - default: white - primary: - description: > - The primary accent color, i.e. the main theme color. Typically used for - hyperlinks, active states, primary action buttons, etc. - schema: - ref: brand-color-value - secondary: - description: > - The secondary accent color. Typically used for lighter text or disabled states. - schema: - ref: brand-color-value - tertiary: - description: > - The tertiary accent color. Typically an even lighter color, used for hover states, - accents, and wells. - schema: - ref: brand-color-value - success: - description: The color used for positive or successful actions and information. - schema: - ref: brand-color-value - info: - description: The color used for neutral or informational actions and information. - schema: - ref: brand-color-value - warning: - description: The color used for warning or cautionary actions and information. - schema: - ref: brand-color-value - danger: - description: The color used for errors, dangerous actions, or negative information. - schema: - ref: brand-color-value - light: - description: > - A bright color, used as a high-contrast foreground color on dark elements - or low-contrast background color on light elements. - schema: - ref: brand-color-value - dark: - description: > - A dark color, used as a high-contrast foreground color on light elements - or high-contrast background color on light elements. - schema: - ref: brand-color-value - link: - description: > - The color used for hyperlinks. If not defined, the `primary` color is used. - schema: - ref: brand-color-value - -- id: brand-color-light-dark - anyOf: - - ref: brand-color-value - - object: - closed: true - properties: - light: - schema: - ref: brand-color-value - description: > - A link or path to the brand's light-colored logo or icon. - dark: - schema: - ref: brand-color-value - description: > - A link or path to the brand's dark-colored logo or icon. - -- id: brand-color-unified - description: > - The brand's custom color palette and theme. - object: - closed: true - properties: - palette: - description: > - The brand's custom color palette. Any number of colors can be defined, - each color having a custom name. - object: - additionalProperties: - schema: - ref: brand-color-value - foreground: - description: The foreground color, used for text. - schema: - ref: brand-color-light-dark - default: black - background: - description: The background color, used for the page background. - schema: - ref: brand-color-light-dark - default: white - primary: - description: > - The primary accent color, i.e. the main theme color. Typically used for - hyperlinks, active states, primary action buttons, etc. - schema: - ref: brand-color-light-dark - secondary: - description: > - The secondary accent color. Typically used for lighter text or disabled states. - schema: - ref: brand-color-light-dark - tertiary: - description: > - The tertiary accent color. Typically an even lighter color, used for hover states, - accents, and wells. - schema: - ref: brand-color-light-dark - success: - description: The color used for positive or successful actions and information. - schema: - ref: brand-color-light-dark - info: - description: The color used for neutral or informational actions and information. - schema: - ref: brand-color-light-dark - warning: - description: The color used for warning or cautionary actions and information. - schema: - ref: brand-color-light-dark - danger: - description: The color used for errors, dangerous actions, or negative information. - schema: - ref: brand-color-light-dark - light: - description: > - A bright color, used as a high-contrast foreground color on dark elements - or low-contrast background color on light elements. - schema: - ref: brand-color-light-dark - dark: - description: > - A dark color, used as a high-contrast foreground color on light elements - or high-contrast background color on light elements. - schema: - ref: brand-color-light-dark - link: - description: > - The color used for hyperlinks. If not defined, the `primary` color is used. - schema: - ref: brand-color-light-dark - -- id: brand-maybe-named-color - description: > - A color, which may be a named brand color. - anyOf: - - ref: brand-named-theme-color - - schema: string - -- id: brand-maybe-named-color-light-dark - anyOf: - - ref: brand-maybe-named-color - - object: - closed: true - properties: - light: - schema: - ref: brand-maybe-named-color - description: > - A link or path to the brand's light-colored logo or icon. - dark: - schema: - ref: brand-maybe-named-color - description: > - A link or path to the brand's dark-colored logo or icon. - -- id: brand-named-theme-color - description: > - A named brand color, taken either from `color.theme` or `color.palette` (in that order). - enum: - [ - foreground, - background, - primary, - secondary, - tertiary, - success, - info, - warning, - danger, - light, - dark, - link, - ] - -- id: brand-typography-single - description: Typography definitions for the brand. - object: - closed: true - properties: - fonts: - description: Font files and definitions for the brand. - arrayOf: - ref: brand-font - base: - description: > - The base font settings for the brand. These are used as the default for - all text. - ref: brand-typography-options-base - headings: - description: Settings for headings, or a string specifying the font family only. - ref: brand-typography-options-headings-single - monospace: - description: Settings for monospace text, or a string specifying the font family only. - ref: brand-typography-options-monospace-single - monospace-inline: - description: Settings for inline code, or a string specifying the font family only. - ref: brand-typography-options-monospace-inline-single - monospace-block: - description: Settings for code blocks, or a string specifying the font family only. - ref: brand-typography-options-monospace-block-single - link: - description: Settings for links. - ref: brand-typography-options-link-single - -- id: brand-typography-unified - description: Typography definitions for the brand. - object: - closed: true - properties: - fonts: - description: Font files and definitions for the brand. - arrayOf: - ref: brand-font - base: - description: > - The base font settings for the brand. These are used as the default for - all text. - ref: brand-typography-options-base - headings: - description: Settings for headings, or a string specifying the font family only. - ref: brand-typography-options-headings-unified - monospace: - description: Settings for monospace text, or a string specifying the font family only. - ref: brand-typography-options-monospace-unified - monospace-inline: - description: Settings for inline code, or a string specifying the font family only. - ref: brand-typography-options-monospace-inline-unified - monospace-block: - description: Settings for code blocks, or a string specifying the font family only. - ref: brand-typography-options-monospace-block-unified - link: - description: Settings for links. - ref: brand-typography-options-link-unified - -- id: brand-typography-options-base - description: Base typographic options. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - line-height: - ref: line-height-number-string - -- id: brand-typography-options-headings-single - description: Typographic options for headings. - anyOf: - - string - - object: - closed: true - properties: - family: string - weight: - ref: brand-font-weight - style: - ref: brand-font-style - color: - ref: brand-maybe-named-color - line-height: - ref: line-height-number-string - -- id: brand-typography-options-headings-unified - description: Typographic options for headings. - anyOf: - - string - - object: - closed: true - properties: - family: string - weight: - ref: brand-font-weight - style: - ref: brand-font-style - color: - ref: brand-maybe-named-color-light-dark - line-height: - ref: line-height-number-string - -- id: brand-typography-options-monospace-single - description: Typographic options for monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color - background-color: - ref: brand-maybe-named-color - -- id: brand-typography-options-monospace-unified - description: Typographic options for monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color-light-dark - background-color: - ref: brand-maybe-named-color-light-dark - -- id: brand-typography-options-monospace-inline-single - description: Typographic options for inline monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color - background-color: - ref: brand-maybe-named-color - -- id: brand-typography-options-monospace-inline-unified - description: Typographic options for inline monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color-light-dark - background-color: - ref: brand-maybe-named-color-light-dark - -- id: line-height-number-string - description: Line height - anyOf: [number, string] - -- id: brand-typography-options-monospace-block-single - description: Typographic options for block monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color - background-color: - ref: brand-maybe-named-color - line-height: - ref: line-height-number-string - -- id: brand-typography-options-monospace-block-unified - description: Typographic options for block monospace elements. - anyOf: - - string - - object: - closed: true - properties: - family: string - size: string - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color-light-dark - background-color: - ref: brand-maybe-named-color-light-dark - line-height: - ref: line-height-number-string - -- id: brand-typography-options-link-single - description: Typographic options for inline monospace elements. - anyOf: - - string - - object: - closed: true - properties: - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color - background-color: - ref: brand-maybe-named-color - decoration: string - -- id: brand-typography-options-link-unified - description: Typographic options for inline monospace elements. - anyOf: - - string - - object: - closed: true - properties: - weight: - ref: brand-font-weight - color: - ref: brand-maybe-named-color-light-dark - background-color: - ref: brand-maybe-named-color-light-dark - decoration: string - -- id: brand-named-typography-elements - description: Names of customizeable typography elements - enum: [base, headings, monospace, monospace-inline, monospace-block, link] - -- id: brand-font - description: Font files and definitions for the brand. - anyOf: - - ref: brand-font-google - - ref: brand-font-bunny - - ref: brand-font-file - - ref: brand-font-system - # a font definition missing source information, - # from which we will assume a default source - # - # in Quarto, the default source for typst is `google` - # and the default source for html formats is `bunny` - -- id: brand-font-weight - description: A font weight. - enum: - [ - 100, - 200, - 300, - 400, - 500, - 600, - 700, - 800, - 900, - thin, - extra-light, - ultra-light, - light, - normal, - regular, - medium, - semi-bold, - demi-bold, - bold, - extra-bold, - ultra-bold, - black, - ] - default: 400 - -- id: brand-font-style - description: A font style. - enum: [normal, italic, oblique] - default: normal - -- id: brand-font-common - schema: - object: - closed: true - properties: - family: - description: The font family name, which must match the name of the font on the foundry website. - schema: string - weight: - description: The font weights to include. - maybeArrayOf: - ref: brand-font-weight - default: [400, 700] - style: - description: The font styles to include. - maybeArrayOf: - ref: brand-font-style - default: [normal, italic] - display: - description: > - The font display method, determines how a font face is font face is shown - depending on its download status and readiness for use. - enum: [auto, block, swap, fallback, optional] - default: swap - -- id: brand-font-system - description: A system font definition. - object: - super: - resolveRef: brand-font-common - closed: true - properties: - source: - enum: [system] - required: [source] - -- id: brand-font-google - description: A font definition from Google Fonts. - object: - super: - resolveRef: brand-font-common - closed: true - properties: - source: - enum: [google] - required: [source] - -- id: brand-font-bunny - description: A font definition from fonts.bunny.net. - object: - super: - resolveRef: brand-font-common - closed: true - properties: - source: - enum: [bunny] - required: [source] - -- id: brand-font-file - description: A method for providing font files directly, either locally or from an online location. - object: - closed: true - properties: - source: - enum: [file] - family: - description: The font family name. - schema: string - files: - arrayOf: - anyOf: - - path - - schema: - object: - properties: - path: - schema: path - description: > - The path to the font file. This can be a local path or a URL. - weight: - ref: brand-font-weight - style: - ref: brand-font-style - required: - [path] - # We might want to eventually support these but not on 1.0 - # - # display: - # description: > - # The font display method, determines how a font face is font face is shown - # depending on its download status and readiness for use. - # enum: [auto, block, swap, fallback, optional] - # default: swap - # unicode-range: - # description: > - # The range of unicode characters included in the font. Examples: - # - `U+0025-00FF` includes all characters from `U+0025` to `U+00FF`. - # - `U+0131,U+0152-0153` includes the characters `U+0131`, `U+0152`, and `U+0153`. - # schema: - # string: - # pattern: '^U\+[0-9A-F]{4}(?:-[0-9A-F]{4})?(?:,U\+[0-9A-F]{4}(?:-[0-9A-F]{4})?)*$' - description: > - The font files to include. These can be local or online. - Local file paths should be relative to the `brand.yml` file. - Online paths should be complete URLs. - required: [files, family, source] - -- id: brand-font-family - description: > - A locally-installed font family name. When used, the end-user is responsible - for ensuring that the font is installed on their system. - schema: string - -- id: brand-single - object: - closed: true - properties: - meta: - ref: brand-meta - logo: - ref: brand-logo-single - color: - ref: brand-color-single - typography: - ref: brand-typography-single - defaults: - ref: brand-defaults - -- id: brand-unified - object: - closed: true - properties: - meta: - ref: brand-meta - logo: - ref: brand-logo-unified - color: - ref: brand-color-unified - typography: - ref: brand-typography-unified - defaults: - ref: brand-defaults - -- id: brand-path-only-light-dark - anyOf: - - string - - object: - closed: true - properties: - light: string - dark: string - description: | - A path to a brand.yml file, or an object with light and dark paths to brand.yml - -- id: brand-path-bool-light-dark - anyOf: - - string # a file path - - boolean # if false, don't use branding on this document - - object: - closed: true - properties: - light: - anyOf: - - string - - ref: brand-single - description: > - The path to a light brand file or an inline light brand definition. - dark: - anyOf: - - string - - ref: brand-single - description: > - The path to a dark brand file or an inline dark brand definition. - - ref: brand-unified - description: | - Branding information to use for this document. If a string, the path to a brand file. - If false, don't use branding on this document. If an object, an inline (unified) brand - definition, or an object with light and dark brand paths or definitions. - -- id: brand-defaults - object: - properties: - bootstrap: - ref: brand-defaults-bootstrap - quarto: - schema: object - -- id: brand-defaults-bootstrap - object: - properties: - defaults: - schema: - object: - additionalProperties: - schema: - anyOf: - - string - - boolean - - number -# - id: quarto-extension diff --git a/crates/quarto-yaml-validation/test-fixtures/schemas/document-execute.yml b/crates/quarto-yaml-validation/test-fixtures/schemas/document-execute.yml deleted file mode 100644 index cef9fbf46..000000000 --- a/crates/quarto-yaml-validation/test-fixtures/schemas/document-execute.yml +++ /dev/null @@ -1,150 +0,0 @@ -- name: engine - schema: - string: - completions: [jupyter, knitr, julia] - description: "Engine used for executable code blocks." - -- name: jupyter - schema: - anyOf: - - boolean - - string - - object: - hidden: true # don't complete through a single-key object - properties: - kernelspec: - object: - properties: - display_name: - string: - description: The name to display in the UI. - language: - string: - description: The name of the language the kernel implements. - name: - string: - description: The name of the kernel. - required: all - description: Configures the Jupyter engine. - -- name: julia - schema: - object: - properties: - exeflags: - schema: - arrayOf: string - description: Arguments to pass to the Julia worker process. - env: - schema: - arrayOf: string - description: Environment variables to pass to the Julia worker process. - description: Configures the Julia engine. - -- name: knitr - schema: - anyOf: - - boolean - - object: - closed: true - properties: - opts_knit: - object: - description: Knit options. - opts_chunk: - object: - description: Knitr chunk options. - description: Set Knitr options. - -- name: cache - tags: - execute-only: true - schema: - anyOf: - - boolean - - enum: [refresh] - default: false - description: - short: "Cache results of computations." - long: | - Cache results of computations (using the [knitr cache](https://yihui.org/knitr/demo/cache/) - for R documents, and [Jupyter Cache](https://jupyter-cache.readthedocs.io/en/latest/) - for Jupyter documents). - - Note that cache invalidation is triggered by changes in chunk source code - (or other cache attributes you've defined). - - - `true`: Cache results - - `false`: Do not cache results - - `refresh`: Force a refresh of the cache even if has not been otherwise invalidated. - -- name: freeze - tags: - execute-only: true - schema: - anyOf: - - boolean - - enum: [auto] - default: false - description: - short: "Re-use previous computational output when rendering" - long: | - Control the re-use of previous computational output when rendering. - - - `true`: Never recompute previously generated computational output during a global project render - - `false` (default): Recompute previously generated computational output - - `auto`: Re-compute previously generated computational output only in case their source file changes - -- name: server - hidden: true - schema: - anyOf: - - enum: [shiny] - - object: - properties: - type: - enum: [shiny] - description: Type of server to run behind the document (e.g. `shiny`) - ojs-export: - maybeArrayOf: string - description: OJS variables to export to server. - ojs-import: - maybeArrayOf: string - description: Server reactive values to import into OJS. - description: Document server - -- name: daemon - hidden: true - schema: - anyOf: [number, boolean] - default: 300 - description: - short: "Run Jupyter kernels within a peristent daemon (to mitigate kernel startup time)." - long: | - Run Jupyter kernels within a peristent daemon (to mitigate kernel startup time). - By default a daemon with a timeout of 300 seconds will be used. Set `daemon` - to another timeout value or to `false` to disable it altogether. - -- name: daemon-restart - schema: boolean - hidden: true - default: false - description: "Restart any running Jupyter daemon before rendering." - -- name: enabled - schema: boolean - default: true - hidden: true - description: "Enable code cell execution." - -- name: ipynb - schema: boolean - default: false - hidden: true - description: "Execute code cell execution in Jupyter notebooks." - -- name: debug - hidden: true - schema: boolean - default: false - description: "Show code-execution related debug information." diff --git a/crates/quarto-yaml-validation/test-fixtures/schemas/document-text.yml b/crates/quarto-yaml-validation/test-fixtures/schemas/document-text.yml deleted file mode 100644 index 9fd120520..000000000 --- a/crates/quarto-yaml-validation/test-fixtures/schemas/document-text.yml +++ /dev/null @@ -1,109 +0,0 @@ -- name: wrap - tags: - formats: - ["!$pdf-all", "!$office-all", "!$odt-all", "!$html-all", "!$docbook-all"] - schema: - enum: [auto, none, preserve] - default: auto - description: - short: "Determine how text is wrapped in the output (`auto`, `none`, or `preserve`)." - long: | - Determine how text is wrapped in the output (the source code, not the rendered - version). - - - `auto` (default): Pandoc will attempt to wrap lines to the column width specified by `columns` (default 72). - - `none`: Pandoc will not wrap lines at all. - - `preserve`: Pandoc will attempt to preserve the wrapping from the source - document. Where there are nonsemantic newlines in the source, there will be - nonsemantic newlines in the output as well. - -- name: columns - tags: - formats: - [ - "!$pdf-all", - "!$office-all", - "!$odt-all", - "!$html-all", - "!$docbook-all", - "typst", - ] - schema: number - description: - short: For text formats, specify length of lines in characters. For `typst`, number of columns for body text. - long: | - Specify length of lines in characters. This affects text wrapping in generated source - code (see `wrap`). It also affects calculation of column widths for plain text - tables. - - For `typst`, number of columns for body text. - -- name: tab-stop - tags: - formats: - ["!$pdf-all", "!$office-all", "!$odt-all", "!$html-all", "!$docbook-all"] - schema: number - description: - short: "Specify the number of spaces per tab (default is 4)." - long: | - Specify the number of spaces per tab (default is 4). Note that tabs - within normal textual input are always converted to spaces. Tabs - within code are also converted, however this can be disabled with - `preserve-tabs: false`. - -- name: preserve-tabs - tags: - formats: - ["!$pdf-all", "!$office-all", "!$odt-all", "!$html-all", "!$docbook-all"] - schema: boolean - default: false - description: - short: | - Preserve tabs within code instead of converting them to spaces. - long: | - Preserve tabs within code instead of converting them to spaces. - (By default, pandoc converts tabs to spaces before parsing its input.) - Note that this will only affect tabs in literal code spans and code blocks. - Tabs in regular text are always treated as spaces. - -- name: eol - tags: - formats: - ["!$pdf-all", "!$office-all", "!$odt-all", "!$html-all", "!$docbook-all"] - schema: - enum: [lf, crlf, native] - description: - short: "Manually specify line endings (`lf`, `crlf`, or `native`)." - long: | - Manually specify line endings: - - - `crlf`: Use Windows line endings - - `lf`: Use macOS/Linux/UNIX line endings - - `native` (default): Use line endings appropriate to the OS on which pandoc is being run). - -- name: strip-comments - schema: boolean - tags: - formats: [$markdown-all, textile, $html-files] - description: - short: "Strip out HTML comments in source, rather than passing them on to output." - long: | - Strip out HTML comments in the Markdown source, - rather than passing them on to Markdown, Textile or HTML - output as raw HTML. This does not apply to HTML comments - inside raw HTML blocks when the `markdown_in_html_blocks` - extension is not set. - -- name: ascii - tags: - formats: [$html-all, $pdf-all, $markdown-all, ms] - schema: boolean - description: - short: "Use only ASCII characters in output." - long: | - Use only ASCII characters in output. Currently supported for XML - and HTML formats (which use entities instead of UTF-8 when this - option is selected), CommonMark, gfm, and Markdown (which use - entities), roff ms (which use hexadecimal escapes), and to a - limited degree LaTeX (which uses standard commands for accented - characters when possible). roff man output uses ASCII by default. diff --git a/crates/quarto-yaml-validation/test-fixtures/schemas/document-website.yml b/crates/quarto-yaml-validation/test-fixtures/schemas/document-website.yml deleted file mode 100644 index dd2a2db74..000000000 --- a/crates/quarto-yaml-validation/test-fixtures/schemas/document-website.yml +++ /dev/null @@ -1,79 +0,0 @@ -- name: search - schema: boolean - tags: - formats: [$html-doc] - default: true - description: Setting this to false prevents this document from being included in searches. - -- name: repo-actions - schema: - anyOf: - - boolean - - maybeArrayOf: - enum: [none, edit, source, issue] - description: - short: "Links to source repository actions" - long: "Links to source repository actions (`none` or one or more of `edit`, `source`, `issue`)" - - tags: - formats: [$html-doc] - description: | - Setting this to false prevents the `repo-actions` from appearing on this page. - Other possible values are `none` or one or more of `edit`, `source`, and `issue`, *e.g.* `[edit, source, issue]`. - -- name: aliases - schema: - arrayOf: string - tags: - formats: [$html-doc] - description: URLs that alias this document, when included in a website. - -- name: image - schema: - anyOf: - - path - - boolean - tags: - formats: [$html-doc] - description: - short: The path to a preview image for this document. - long: | - The path to a preview image for this content. By default, - Quarto will use the image value from the site: metadata. - If you provide an image, you may also optionally provide - an image-width and image-height to improve - the appearance of your Twitter Card. - - If image is not provided, Quarto will automatically attempt - to locate a preview image. - -- name: image-height - schema: string - tags: - formats: [$html-doc] - description: The height of the preview image for this document. - -- name: image-width - schema: string - tags: - formats: [$html-doc] - description: The width of the preview image for this document. - -- name: image-alt - schema: string - tags: - formats: [$html-doc] - description: The alt text for preview image on this page. - -- name: image-lazy-loading - schema: boolean - tags: - formats: [$html-doc] - description: - short: If true, the preview image will only load when it comes into view. - long: | - Enables lazy loading for the preview image. If true, the preview image element - will have `loading="lazy"`, and will only load when it comes into view. - - If false, the preview image will load immediately. - default: true diff --git a/crates/quarto-yaml-validation/test-fixtures/schemas/schema.yml b/crates/quarto-yaml-validation/test-fixtures/schemas/schema.yml deleted file mode 100644 index 49924d2b8..000000000 --- a/crates/quarto-yaml-validation/test-fixtures/schemas/schema.yml +++ /dev/null @@ -1,378 +0,0 @@ -- id: schema/scalar - anyOf: - - number - - boolean - - string - - enum: [null] - -- id: schema/description - anyOf: - - string - - object: - closed: true - properties: - short: string - long: string - -- id: schema/base - object: - closed: true - properties: - additionalCompletions: - arrayOf: string - completions: - arrayOf: string - id: string - hidden: boolean - tags: object - errorDescription: string - description: - ref: schema/description - default: any - -- id: schema/enum - object: - closed: true - super: - resolveRef: schema/base - required: - - enum - properties: - enum: - anyOf: - - arrayOf: - ref: schema/scalar - - object: - closed: true - super: - resolveRef: schema/base - properties: - values: - arrayOf: - ref: schema/scalar - -- id: schema/null - anyOf: - - enum: ["null"] - - object: - closed: true - required: - - "null" - properties: - "null": - anyOf: - - ref: schema/base - -- id: schema/explicit-schema - object: - closed: true - required: - - "schema" - super: - resolveRef: schema/base - properties: - schema: - ref: schema/schema - -- id: schema/explicit-pattern-string - object: - closed: true - super: - resolveRef: schema/base - required: - - "pattern" - properties: - pattern: string - -- id: schema/string - anyOf: - - enum: ["string", "path"] - - ref: schema/explicit-pattern-string - - object: - closed: true - super: - resolveRef: schema/base - required: - - "path" - properties: - path: - anyOf: - - ref: schema/explicit-pattern-string - - ref: schema/base - - object: - closed: true - super: - resolveRef: schema/base - required: - - "string" - properties: - string: - anyOf: - - ref: schema/explicit-pattern-string - - ref: schema/base - -- id: schema/number - anyOf: - - enum: ["number"] - - object: - closed: true - super: - resolveRef: schema/base - required: - - number - properties: - number: - anyOf: - - ref: schema/schema - - ref: schema/base - -- id: schema/boolean - anyOf: - - enum: ["boolean"] - - object: - closed: true - required: - - boolean - super: - resolveRef: schema/base - properties: - boolean: - anyOf: - - ref: schema/schema - - ref: schema/base - -- id: schema/resolve-ref - object: - closed: true - required: - - "resolveRef" - properties: - resolveRef: string - -- id: schema/ref - object: - closed: true - required: - - "ref" - properties: - ref: string - description: - ref: schema/description - # NB: schema/ref is _not_ a schema/base. - - #object: - # super: - # resolveRef: schema/base - # properties: - # ref: string - -- id: schema/maybe-array-of - object: - closed: true - required: - - maybeArrayOf - super: - resolveRef: schema/base - properties: - maybeArrayOf: - ref: schema/schema - -- id: schema/array-of - object: - closed: true - super: - resolveRef: schema/base - required: - - "arrayOf" - properties: - arrayOf: - anyOf: - - ref: schema/schema - - object: - closed: true - super: - resolveRef: schema/base - properties: - length: number - schema: - ref: schema/schema - -- id: schema/all-of - object: - closed: true - super: - resolveRef: schema/base - required: - - "allOf" - properties: - allOf: - anyOf: - - arrayOf: - ref: schema/schema - - object: - super: - resolveRef: schema/base - properties: - schemas: - arrayOf: - ref: schema/schema - -- id: schema/any-of - object: - closed: true - super: - resolveRef: schema/base - required: - - "anyOf" - properties: - anyOf: - anyOf: - - arrayOf: - ref: schema/schema - - object: - super: - resolveRef: schema/base - properties: - schemas: - arrayOf: - ref: schema/schema - -- id: schema/record - object: - closed: true - super: - resolveRef: schema/base - required: - - "record" - properties: - record: - anyOf: - - object: - additionalProperties: - ref: schema/schema - - object: - closed: true - super: - resolveRef: schema/base - properties: - properties: - object: - additionalProperties: - ref: schema/schema - required: [properties] - -- id: schema/object - anyOf: - - enum: ["object"] - - object: - closed: true - super: - resolveRef: schema/base - required: - - "object" - properties: - object: - object: - super: - resolveRef: schema/base - closed: true - properties: - namingConvention: - anyOf: - - enum: ["ignore"] - - arrayOf: - enum: - # because this particular setup is especially likely to lead to typos, - # we support all naming conventions in describing naming conventions - - "camelCase" - - "camel-case" - - "camel_case" - - "capitalizationCase" - - "capitalization-case" - - "capitalization_case" - - "underscoreCase" - - "underscore-case" - - "underscore_case" - - "snakeCase" - - "snake-case" - - "snake_case" - - "dashCase" - - "dash-case" - - "dash_case" - - "kebabCase" - - "kebab-case" - - "kebab_case" - properties: - object: - additionalProperties: - ref: schema/schema - patternProperties: - object: - additionalProperties: - ref: schema/schema - propertyNames: - ref: schema/schema - additionalProperties: - ref: schema/schema - super: - maybeArrayOf: - ref: schema/schema - required: - anyOf: - - enum: ["all"] - - arrayOf: string - closed: boolean - description: - ref: schema/description - completions: - arrayOf: string - -- id: schema/schema - anyOf: - - ref: schema/enum - - ref: schema/null - - ref: schema/explicit-schema - - ref: schema/string - - ref: schema/number - - ref: schema/boolean - - ref: schema/ref - - ref: schema/resolve-ref - - ref: schema/any-of - - ref: schema/array-of - - ref: schema/maybe-array-of - - ref: schema/all-of - - ref: schema/record - - ref: schema/object - - enum: [null, "any"] - description: "be a yaml schema" - -- id: schema/schema-field - object: - properties: - name: string - schema: - ref: schema/schema - hidden: boolean - alias: string - disabled: - maybeArrayOf: string - enabled: - maybeArrayOf: string - description: - ref: schema/description - tags: object - errorMessage: string - namingConvention: ignore - required: ["name", "schema", "description"] - propertyNames: - enum: - - name - - schema - - hidden - - alias - - disabled - - enabled - - description - - tags - - errorMessage - - default # no schema, but entry allowed diff --git a/crates/quarto-yaml-validation/tests/integration/comprehensive_schemas.rs b/crates/quarto-yaml-validation/tests/integration/comprehensive_schemas.rs deleted file mode 100644 index df3e3cef1..000000000 --- a/crates/quarto-yaml-validation/tests/integration/comprehensive_schemas.rs +++ /dev/null @@ -1,222 +0,0 @@ -//! Comprehensive tests parsing ALL schemas from quarto-cli schema files -//! -//! These tests attempt to parse every schema definition from the quarto-cli -//! schema files to ensure compatibility and identify any unsupported patterns. - -use quarto_yaml_validation::Schema; -use std::collections::HashMap; - -/// Helper to extract and parse schemas from a field-based YAML file -/// (document-execute.yml, document-text.yml, etc.) -fn parse_field_schemas(content: &str, file_name: &str) -> (usize, usize, Vec<(String, String)>) { - let yaml = quarto_yaml::parse(content).expect("Failed to parse YAML"); - let items = yaml.as_array().expect("Expected array at root"); - - let mut success_count = 0; - let mut total_count = 0; - let mut failures = Vec::new(); - - for item in items { - let name = item - .get_hash_value("name") - .and_then(|v| v.yaml.as_str()) - .unwrap_or(""); - - if let Some(schema_yaml) = item.get_hash_value("schema") { - total_count += 1; - match Schema::from_yaml(schema_yaml) { - Ok(_) => success_count += 1, - Err(e) => failures.push((name.to_string(), format!("{:?}", e))), - } - } - } - - eprintln!( - "{}: Successfully parsed {}/{} schemas", - file_name, success_count, total_count - ); - - if !failures.is_empty() { - eprintln!(" Failures:"); - for (name, error) in &failures { - eprintln!(" - {}: {}", name, error); - } - } - - (success_count, total_count, failures) -} - -#[test] -fn test_parse_all_document_execute_schemas() { - let content = include_str!("../../test-fixtures/schemas/document-execute.yml"); - let (success, total, failures) = parse_field_schemas(content, "document-execute.yml"); - - // We expect high success rate now that P0/P1 features are implemented - assert!( - success > total * 9 / 10, - "Too many failures in document-execute.yml: {}/{} succeeded. Failures: {:?}", - success, - total, - failures - ); -} - -#[test] -fn test_parse_all_document_text_schemas() { - let content = include_str!("../../test-fixtures/schemas/document-text.yml"); - let (success, total, failures) = parse_field_schemas(content, "document-text.yml"); - - assert!( - success > total * 9 / 10, - "Too many failures in document-text.yml: {}/{} succeeded. Failures: {:?}", - success, - total, - failures - ); -} - -#[test] -fn test_parse_all_document_website_schemas() { - let content = include_str!("../../test-fixtures/schemas/document-website.yml"); - let (success, total, failures) = parse_field_schemas(content, "document-website.yml"); - - assert!( - success > total * 9 / 10, - "Too many failures in document-website.yml: {}/{} succeeded. Failures: {:?}", - success, - total, - failures - ); -} - -#[test] -fn test_parse_key_definitions_schemas() { - // Rather than trying to parse all 101 definitions generically, - // test key patterns that use our P0/P1 features - - // Test arrayOf patterns - let yaml1 = quarto_yaml::parse(r#"arrayOf: path"#).unwrap(); - assert!( - Schema::from_yaml(&yaml1).is_ok(), - "pandoc-shortcodes pattern" - ); - - let yaml2 = quarto_yaml::parse( - r#" -arrayOf: - arrayOf: - schema: string - length: 2 -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml2).is_ok(), - "pandoc-format-request-headers pattern" - ); - - // Test maybeArrayOf pattern - let yaml3 = quarto_yaml::parse(r#"maybeArrayOf: string"#).unwrap(); - assert!(Schema::from_yaml(&yaml3).is_ok(), "contents-auto pattern"); - - // Test record pattern - let yaml4 = quarto_yaml::parse( - r#" -record: - type: - enum: [citeproc] -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml4).is_ok(), - "pandoc-format-filters record pattern" - ); - - // Test complex anyOf with object - let yaml5 = quarto_yaml::parse( - r#" -anyOf: - - string - - object: - properties: - value: string - format: string - required: [value] -"#, - ) - .unwrap(); - assert!(Schema::from_yaml(&yaml5).is_ok(), "date pattern"); - - eprintln!("✓ All key definitions.yml patterns parsed successfully"); -} - -#[test] -fn test_comprehensive_statistics() { - // Parse all field-based files and gather statistics - let mut stats: HashMap = HashMap::new(); - - let files = vec![ - ( - "document-execute.yml", - include_str!("../../test-fixtures/schemas/document-execute.yml"), - ), - ( - "document-text.yml", - include_str!("../../test-fixtures/schemas/document-text.yml"), - ), - ( - "document-website.yml", - include_str!("../../test-fixtures/schemas/document-website.yml"), - ), - ]; - - let mut total_success = 0; - let mut total_schemas = 0; - let mut all_failures = Vec::new(); - - for (name, content) in files { - let (success, total, mut failures) = parse_field_schemas(content, name); - - stats.insert(name.to_string(), (success, total)); - total_success += success; - total_schemas += total; - - for (id, err) in failures.drain(..) { - all_failures.push((name.to_string(), id, err)); - } - } - - eprintln!("\n=== Comprehensive Test Statistics (Field-Based Files) ==="); - eprintln!("Total schemas parsed: {}/{}", total_success, total_schemas); - eprintln!( - "Overall success rate: {:.1}%", - (total_success as f64 / total_schemas as f64) * 100.0 - ); - eprintln!("\nPer-file breakdown:"); - for (file, (success, total)) in &stats { - eprintln!( - " {}: {}/{} ({:.1}%)", - file, - success, - total, - (*success as f64 / *total as f64) * 100.0 - ); - } - - if !all_failures.is_empty() { - eprintln!("\n=== All Failures ({}) ===", all_failures.len()); - for (file, id, error) in &all_failures { - eprintln!(" {}:{}: {}", file, id, error); - } - } - - // With P0/P1 features implemented, we expect >90% success on field-based files - assert!( - total_success > total_schemas * 9 / 10, - "Overall success rate too low: {}/{} ({:.1}%)", - total_success, - total_schemas, - (total_success as f64 / total_schemas as f64) * 100.0 - ); -} diff --git a/crates/quarto-yaml-validation/tests/integration/main.rs b/crates/quarto-yaml-validation/tests/integration/main.rs deleted file mode 100644 index 7976696c9..000000000 --- a/crates/quarto-yaml-validation/tests/integration/main.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! quarto-yaml-validation integration test binary. -//! See bd-xvdop / claude-notes/plans/2026-05-28-integration-test-consolidation.md. - -pub mod comprehensive_schemas; -pub mod real_schemas; -pub mod schema_compilation; -pub mod schema_inheritance; -pub mod validation_diagnostic; - -fn main() {} diff --git a/crates/quarto-yaml-validation/tests/integration/real_schemas.rs b/crates/quarto-yaml-validation/tests/integration/real_schemas.rs deleted file mode 100644 index c0ff8d050..000000000 --- a/crates/quarto-yaml-validation/tests/integration/real_schemas.rs +++ /dev/null @@ -1,216 +0,0 @@ -//! Integration tests using real quarto-cli schema files -//! -//! These tests verify that our YAML schema parser can successfully parse -//! actual schema files from quarto-cli without errors. - -use quarto_yaml_validation::Schema; - -/// Test specific schemas from definitions.yml that use our P0/P1 features -#[test] -fn test_parse_definitions_yml() { - // Test pandoc-format-request-headers (nested arrayOf with length) - let yaml1 = quarto_yaml::parse( - r#" -arrayOf: - arrayOf: - schema: string - length: 2 -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml1).is_ok(), - "Failed to parse pandoc-format-request-headers pattern" - ); - - // Test pandoc-shortcodes (simple arrayOf) - let yaml2 = quarto_yaml::parse( - r#" -arrayOf: path -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml2).is_ok(), - "Failed to parse pandoc-shortcodes pattern" - ); - - // Test pandoc-format-filters (arrayOf with anyOf and record) - let yaml3 = quarto_yaml::parse( - r#" -arrayOf: - anyOf: - - path - - object: - properties: - type: string - path: path - required: [path] - - record: - type: - enum: [citeproc] -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml3).is_ok(), - "Failed to parse pandoc-format-filters pattern" - ); - - // Test contents-auto (maybeArrayOf) - let yaml4 = quarto_yaml::parse( - r#" -maybeArrayOf: string -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml4).is_ok(), - "Failed to parse contents-auto auto field pattern" - ); - - // Test date-format (schema wrapper) - let yaml5 = quarto_yaml::parse( - r#" -schema: string -"#, - ) - .unwrap(); - assert!( - Schema::from_yaml(&yaml5).is_ok(), - "Failed to parse date-format pattern" - ); -} - -/// Test parsing document-text.yml which contains many schema wrapper patterns -#[test] -fn test_parse_document_text_yml() { - let yaml_content = include_str!("../../test-fixtures/schemas/document-text.yml"); - let yaml = quarto_yaml::parse(yaml_content).expect("Failed to parse YAML"); - - // The file is an array of field definitions - let items = yaml.as_array().expect("Expected array at root"); - - // Parse each field definition - for item in items { - let name = item - .get_hash_value("name") - .and_then(|v| v.yaml.as_str()) - .unwrap_or(""); - - // Each item should have a 'schema' field - if let Some(schema_yaml) = item.get_hash_value("schema") { - let schema_result = Schema::from_yaml(schema_yaml); - assert!( - schema_result.is_ok(), - "Failed to parse schema for field '{}': {:?}", - name, - schema_result.err() - ); - } - } -} - -/// Test specific patterns from definitions.yml -#[test] -fn test_definitions_arrayof_patterns() { - // Test nested arrayOf with length constraint (pandoc-format-request-headers) - let yaml = quarto_yaml::parse( - r#" -arrayOf: - arrayOf: - schema: string - length: 2 -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - // Should parse as Array containing Array containing String - match schema { - Schema::Array(outer) => { - assert!(outer.items.is_some()); - if let Some(inner_box) = outer.items { - match *inner_box { - Schema::Array(inner) => { - assert_eq!(inner.min_items, Some(2)); - assert_eq!(inner.max_items, Some(2)); - assert!(inner.items.is_some()); - } - _ => panic!("Expected inner Array schema"), - } - } - } - _ => panic!("Expected outer Array schema"), - } -} - -/// Test maybeArrayOf pattern from definitions.yml -#[test] -fn test_definitions_maybe_arrayof() { - let yaml = quarto_yaml::parse( - r#" -maybeArrayOf: string -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - // Should parse as anyOf: [string, array of string] - match schema { - Schema::AnyOf(s) => { - assert_eq!(s.schemas.len(), 2); - // First should be string, second should be array - assert!(matches!(s.schemas[0], Schema::String(_))); - assert!(matches!(s.schemas[1], Schema::Array(_))); - } - _ => panic!("Expected AnyOf schema"), - } -} - -/// Test record pattern from definitions.yml -#[test] -fn test_definitions_record() { - let yaml = quarto_yaml::parse( - r#" -record: - type: - enum: [citeproc] -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - // Should parse as closed object with all properties required - match schema { - Schema::Object(s) => { - assert!(s.closed); - assert_eq!(s.properties.len(), 1); - assert_eq!(s.required.len(), 1); - assert!(s.required.contains(&"type".to_string())); - assert!(s.properties.contains_key("type")); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test schema wrapper from document-text.yml -#[test] -fn test_document_text_schema_wrapper() { - let yaml = quarto_yaml::parse( - r#" -schema: - enum: [lf, crlf, native] -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - // Should parse as enum - match schema { - Schema::Enum(s) => { - assert_eq!(s.values.len(), 3); - } - _ => panic!("Expected Enum schema"), - } -} diff --git a/crates/quarto-yaml-validation/tests/integration/schema_compilation.rs b/crates/quarto-yaml-validation/tests/integration/schema_compilation.rs deleted file mode 100644 index f1a1682ec..000000000 --- a/crates/quarto-yaml-validation/tests/integration/schema_compilation.rs +++ /dev/null @@ -1,408 +0,0 @@ -use quarto_yaml_validation::{Schema, SchemaRegistry}; - -/// Test compiling a schema with inheritance -#[test] -fn test_compile_with_inheritance() { - let mut registry = SchemaRegistry::new(); - - // Register base schema - let base_yaml = quarto_yaml::parse( - r#" -object: - properties: - id: string - created_at: string - required: [id] -"#, - ) - .unwrap(); - let base = Schema::from_yaml(&base_yaml).unwrap(); - registry.register("base".to_string(), base); - - // Parse derived schema with inheritance - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: base - properties: - name: string - email: string - required: [name] -"#, - ) - .unwrap(); - let derived = Schema::from_yaml(&derived_yaml).unwrap(); - - // Before compilation, derived has base_schema - match &derived { - Schema::Object(obj) => { - assert!(obj.base_schema.is_some()); - assert_eq!(obj.properties.len(), 2); // Only derived props - } - _ => panic!("Expected Object schema"), - } - - // Compile - let compiled = derived.compile(®istry).unwrap(); - - // After compilation, schema has merged properties - match compiled { - Schema::Object(obj) => { - assert!(obj.base_schema.is_none()); // Inheritance resolved - assert_eq!(obj.properties.len(), 4); // Base + derived props - assert!(obj.properties.contains_key("id")); - assert!(obj.properties.contains_key("created_at")); - assert!(obj.properties.contains_key("name")); - assert!(obj.properties.contains_key("email")); - assert_eq!(obj.required.len(), 2); // [id, name] - } - _ => panic!("Expected Object schema"), - } -} - -/// Test compiling eager vs lazy references -#[test] -fn test_compile_eager_vs_lazy_refs() { - let mut registry = SchemaRegistry::new(); - - // Register a schema - let target_yaml = quarto_yaml::parse( - r#" -object: - properties: - value: string -"#, - ) - .unwrap(); - registry.register( - "target".to_string(), - Schema::from_yaml(&target_yaml).unwrap(), - ); - - // Schema with both eager and lazy refs - let yaml = quarto_yaml::parse( - r#" -object: - properties: - eager_prop: - resolveRef: target - lazy_prop: - ref: target -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - // Compile - let compiled = schema.compile(®istry).unwrap(); - - match compiled { - Schema::Object(obj) => { - // Eager ref should be resolved to actual object - match obj.properties.get("eager_prop") { - Some(Schema::Object(eager_obj)) => { - assert!(eager_obj.properties.contains_key("value")); - } - _ => panic!("Expected eager_prop to be resolved to Object"), - } - - // Lazy ref should still be a ref - match obj.properties.get("lazy_prop") { - Some(Schema::Ref(lazy_ref)) => { - assert_eq!(lazy_ref.reference, "target"); - assert!(!lazy_ref.eager); - } - _ => panic!("Expected lazy_prop to remain as Ref"), - } - } - _ => panic!("Expected Object schema"), - } -} - -/// Test compiling nested schemas (anyOf, allOf, array) -#[test] -fn test_compile_nested_schemas() { - let mut registry = SchemaRegistry::new(); - - // Register base schemas - let string_schema_yaml = quarto_yaml::parse("string").unwrap(); - registry.register( - "string-schema".to_string(), - Schema::from_yaml(&string_schema_yaml).unwrap(), - ); - - let number_schema_yaml = quarto_yaml::parse("number").unwrap(); - registry.register( - "number-schema".to_string(), - Schema::from_yaml(&number_schema_yaml).unwrap(), - ); - - // Schema with nested eager refs - let yaml = quarto_yaml::parse( - r#" -object: - properties: - flexible: - anyOf: - - resolveRef: string-schema - - resolveRef: number-schema - list: - array: - items: - resolveRef: string-schema -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - // Compile - let compiled = schema.compile(®istry).unwrap(); - - match compiled { - Schema::Object(obj) => { - // anyOf schemas should be resolved - match obj.properties.get("flexible") { - Some(Schema::AnyOf(anyof)) => { - assert_eq!(anyof.schemas.len(), 2); - assert!(matches!(anyof.schemas[0], Schema::String(_))); - assert!(matches!(anyof.schemas[1], Schema::Number(_))); - } - _ => panic!("Expected AnyOf schema"), - } - - // array items should be resolved - match obj.properties.get("list") { - Some(Schema::Array(arr)) => { - assert!(arr.items.is_some()); - assert!(matches!( - arr.items.as_ref().unwrap().as_ref(), - Schema::String(_) - )); - } - _ => panic!("Expected Array schema"), - } - } - _ => panic!("Expected Object schema"), - } -} - -/// Test that primitives compile to themselves -#[test] -fn test_compile_primitives() { - let registry = SchemaRegistry::new(); - - let primitives = vec!["boolean", "string", "number", "null", "any"]; - - for primitive in primitives { - let yaml = quarto_yaml::parse(primitive).unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - let compiled = schema.compile(®istry).unwrap(); - - // Primitives should compile to themselves - assert_eq!(schema, compiled); - } -} - -/// Test error: missing eager reference -#[test] -fn test_compile_error_missing_eager_ref() { - let registry = SchemaRegistry::new(); // Empty registry - - let yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: non-existent - properties: - name: string -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - let result = schema.compile(®istry); - assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("not found in registry")); - assert!(err_msg.contains("non-existent")); -} - -/// Test that lazy refs with missing targets don't error during compilation -#[test] -fn test_compile_preserves_lazy_ref_to_missing_target() { - let registry = SchemaRegistry::new(); // Empty registry - - let yaml = quarto_yaml::parse( - r#" -object: - properties: - person: - ref: non-existent -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - // Should NOT error - lazy refs are not resolved during compilation - let compiled = schema.compile(®istry).unwrap(); - - match compiled { - Schema::Object(obj) => match obj.properties.get("person") { - Some(Schema::Ref(r)) => { - assert_eq!(r.reference, "non-existent"); - assert!(!r.eager); - } - _ => panic!("Expected Ref schema"), - }, - _ => panic!("Expected Object schema"), - } -} - -/// Test circular lazy references are preserved -#[test] -fn test_compile_circular_lazy_refs() { - let mut registry = SchemaRegistry::new(); - - // Register person schema with circular reference to itself - let person_yaml = quarto_yaml::parse( - r#" -object: - properties: - name: string - parent: - ref: person -"#, - ) - .unwrap(); - let person_schema = Schema::from_yaml(&person_yaml).unwrap(); - registry.register("person".to_string(), person_schema); - - // Compile the registered schema - let person_from_registry = registry.resolve("person").unwrap(); - let compiled = person_from_registry.compile(®istry).unwrap(); - - // Should succeed - lazy refs are not resolved - match compiled { - Schema::Object(obj) => { - assert!(obj.properties.contains_key("name")); - assert!(obj.properties.contains_key("parent")); - - // parent should still be a ref - match obj.properties.get("parent") { - Some(Schema::Ref(r)) => { - assert_eq!(r.reference, "person"); - assert!(!r.eager); - } - _ => panic!("Expected Ref schema for parent"), - } - } - _ => panic!("Expected Object schema"), - } -} - -/// Test multiple inheritance compilation -#[test] -fn test_compile_multiple_inheritance() { - let mut registry = SchemaRegistry::new(); - - // Register base schemas - let base1_yaml = quarto_yaml::parse( - r#" -object: - properties: - field1: string - required: [field1] -"#, - ) - .unwrap(); - registry.register("base1".to_string(), Schema::from_yaml(&base1_yaml).unwrap()); - - let base2_yaml = quarto_yaml::parse( - r#" -object: - properties: - field2: number - required: [field2] -"#, - ) - .unwrap(); - registry.register("base2".to_string(), Schema::from_yaml(&base2_yaml).unwrap()); - - // Derived with multiple bases - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - - resolveRef: base1 - - resolveRef: base2 - properties: - field3: boolean -"#, - ) - .unwrap(); - let derived = Schema::from_yaml(&derived_yaml).unwrap(); - - // Compile - let compiled = derived.compile(®istry).unwrap(); - - match compiled { - Schema::Object(obj) => { - assert_eq!(obj.properties.len(), 3); - assert!(obj.properties.contains_key("field1")); - assert!(obj.properties.contains_key("field2")); - assert!(obj.properties.contains_key("field3")); - assert_eq!(obj.required.len(), 2); - assert!(obj.required.contains(&"field1".to_string())); - assert!(obj.required.contains(&"field2".to_string())); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test compiling schema without any refs or inheritance -#[test] -fn test_compile_simple_schema() { - let registry = SchemaRegistry::new(); - - let yaml = quarto_yaml::parse( - r#" -object: - properties: - name: string - age: number - required: [name] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - - let compiled = schema.compile(®istry).unwrap(); - - // Should be structurally identical (no refs or inheritance to resolve) - match compiled { - Schema::Object(obj) => { - assert_eq!(obj.properties.len(), 2); - assert_eq!(obj.required.len(), 1); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test compiling enum schema -#[test] -fn test_compile_enum() { - let registry = SchemaRegistry::new(); - - let yaml = quarto_yaml::parse( - r#" -enum: [option1, option2, option3] -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&yaml).unwrap(); - let compiled = schema.compile(®istry).unwrap(); - - // Enums should compile to themselves - assert_eq!(schema, compiled); -} diff --git a/crates/quarto-yaml-validation/tests/integration/schema_inheritance.rs b/crates/quarto-yaml-validation/tests/integration/schema_inheritance.rs deleted file mode 100644 index 216bfdce5..000000000 --- a/crates/quarto-yaml-validation/tests/integration/schema_inheritance.rs +++ /dev/null @@ -1,304 +0,0 @@ -use quarto_yaml_validation::{Schema, SchemaRegistry, merge_object_schemas}; - -/// Test based on real quarto-cli definitions.yml schema -/// social-metadata → twitter-card-config inheritance -#[test] -fn test_twitter_card_inheritance() { - let mut registry = SchemaRegistry::new(); - - // Register base schema (social-metadata) - let base_yaml = quarto_yaml::parse( - r#" -object: - properties: - title: - string: - description: "Title for social media" - description: - string: - description: "Description for social media" - image: - string: - description: "Image URL" - required: [title] -"#, - ) - .unwrap(); - - let base_schema = Schema::from_yaml(&base_yaml).unwrap(); - registry.register("social-metadata".to_string(), base_schema); - - // Parse derived schema (twitter-card-config) - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: social-metadata - closed: true - properties: - card-style: - enum: [summary, summary_large_image] -"#, - ) - .unwrap(); - - let derived_schema = Schema::from_yaml(&derived_yaml).unwrap(); - - // Extract base_schema and merge - match derived_schema { - Schema::Object(ref obj) => { - assert!(obj.base_schema.is_some()); - - let merged = - merge_object_schemas(obj.base_schema.as_ref().unwrap(), obj, ®istry).unwrap(); - - // Verify merged schema has properties from both - assert!(merged.properties.contains_key("title")); - assert!(merged.properties.contains_key("description")); - assert!(merged.properties.contains_key("image")); - assert!(merged.properties.contains_key("card-style")); - - // Verify required from base - assert!(merged.required.contains(&"title".to_string())); - - // Verify closed from derived - assert!(merged.closed); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test multiple inheritance -#[test] -fn test_multiple_inheritance() { - let mut registry = SchemaRegistry::new(); - - // Register base1 - let base1_yaml = quarto_yaml::parse( - r#" -object: - properties: - field1: string - required: [field1] -"#, - ) - .unwrap(); - registry.register("base1".to_string(), Schema::from_yaml(&base1_yaml).unwrap()); - - // Register base2 - let base2_yaml = quarto_yaml::parse( - r#" -object: - properties: - field2: number - required: [field2] -"#, - ) - .unwrap(); - registry.register("base2".to_string(), Schema::from_yaml(&base2_yaml).unwrap()); - - // Parse derived with multiple bases - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - - resolveRef: base1 - - resolveRef: base2 - properties: - field3: boolean -"#, - ) - .unwrap(); - - let derived_schema = Schema::from_yaml(&derived_yaml).unwrap(); - - match derived_schema { - Schema::Object(ref obj) => { - let merged = - merge_object_schemas(obj.base_schema.as_ref().unwrap(), obj, ®istry).unwrap(); - - assert_eq!(merged.properties.len(), 3); - assert!(merged.properties.contains_key("field1")); - assert!(merged.properties.contains_key("field2")); - assert!(merged.properties.contains_key("field3")); - - assert_eq!(merged.required.len(), 2); - assert!(merged.required.contains(&"field1".to_string())); - assert!(merged.required.contains(&"field2".to_string())); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test property override -#[test] -fn test_property_override() { - let mut registry = SchemaRegistry::new(); - - // Base has 'name' as string - let base_yaml = quarto_yaml::parse( - r#" -object: - properties: - name: - string: - description: "Base description" -"#, - ) - .unwrap(); - registry.register("base".to_string(), Schema::from_yaml(&base_yaml).unwrap()); - - // Derived overrides 'name' with different constraints - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: base - properties: - name: - string: - pattern: "^[A-Z]" - description: "Derived description" -"#, - ) - .unwrap(); - - let derived_schema = Schema::from_yaml(&derived_yaml).unwrap(); - - match derived_schema { - Schema::Object(ref obj) => { - let merged = - merge_object_schemas(obj.base_schema.as_ref().unwrap(), obj, ®istry).unwrap(); - - // Derived should win - match merged.properties.get("name") { - Some(Schema::String(s)) => { - assert_eq!(s.pattern, Some("^[A-Z]".to_string())); - assert_eq!( - s.annotations.description, - Some("Derived description".to_string()) - ); - } - _ => panic!("Expected string schema for name"), - } - } - _ => panic!("Expected Object schema"), - } -} - -/// Test that base schema without inheritance works -#[test] -fn test_no_inheritance() { - let _registry = SchemaRegistry::new(); - - let yaml = quarto_yaml::parse( - r#" -object: - properties: - name: string - age: number - required: [name] -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Object(ref obj) => { - // No base_schema, nothing to merge - assert!(obj.base_schema.is_none()); - assert_eq!(obj.properties.len(), 2); - assert_eq!(obj.required.len(), 1); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test inline object as super (not just ref) -#[test] -fn test_inline_super() { - let registry = SchemaRegistry::new(); - - let yaml = quarto_yaml::parse( - r#" -object: - super: - object: - properties: - base_prop: string - required: [base_prop] - properties: - derived_prop: number -"#, - ) - .unwrap(); - - let schema = Schema::from_yaml(&yaml).unwrap(); - - match schema { - Schema::Object(ref obj) => { - assert!(obj.base_schema.is_some()); - - let merged = - merge_object_schemas(obj.base_schema.as_ref().unwrap(), obj, ®istry).unwrap(); - - assert!(merged.properties.contains_key("base_prop")); - assert!(merged.properties.contains_key("derived_prop")); - assert!(merged.required.contains(&"base_prop".to_string())); - } - _ => panic!("Expected Object schema"), - } -} - -/// Test that required: "all" works with inheritance -#[test] -fn test_required_all_with_inheritance() { - let mut registry = SchemaRegistry::new(); - - let base_yaml = quarto_yaml::parse( - r#" -object: - properties: - id: string - name: string - required: [id] -"#, - ) - .unwrap(); - registry.register("base".to_string(), Schema::from_yaml(&base_yaml).unwrap()); - - let derived_yaml = quarto_yaml::parse( - r#" -object: - super: - resolveRef: base - properties: - email: string - phone: string - required: all -"#, - ) - .unwrap(); - - let derived_schema = Schema::from_yaml(&derived_yaml).unwrap(); - - match derived_schema { - Schema::Object(ref obj) => { - // required: all should expand to [email, phone] for derived props only - assert_eq!(obj.required.len(), 2); - assert!(obj.required.contains(&"email".to_string())); - assert!(obj.required.contains(&"phone".to_string())); - - let merged = - merge_object_schemas(obj.base_schema.as_ref().unwrap(), obj, ®istry).unwrap(); - - // After merge, should have all three required fields - assert_eq!(merged.required.len(), 3); - assert!(merged.required.contains(&"id".to_string())); - assert!(merged.required.contains(&"email".to_string())); - assert!(merged.required.contains(&"phone".to_string())); - } - _ => panic!("Expected Object schema"), - } -} diff --git a/crates/quarto-yaml-validation/tests/integration/validation_diagnostic.rs b/crates/quarto-yaml-validation/tests/integration/validation_diagnostic.rs deleted file mode 100644 index b54a5094a..000000000 --- a/crates/quarto-yaml-validation/tests/integration/validation_diagnostic.rs +++ /dev/null @@ -1,487 +0,0 @@ -//! Integration tests for ValidationDiagnostic -//! -//! Tests JSON structure, text output, and overall integration. - -use quarto_source_map::SourceContext; -use quarto_yaml_validation::{Schema, ValidationDiagnostic, validate}; -use serde_json::Value; - -/// Helper to create a SourceContext with a test file -fn create_test_context(filename: &str, content: &str) -> SourceContext { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - let mut ctx = SourceContext::new(); - - // Compute FileId from filename hash (same as quarto-yaml) - let mut hasher = DefaultHasher::new(); - filename.hash(&mut hasher); - let file_id = quarto_source_map::FileId(hasher.finish() as usize); - - ctx.add_file_with_id(file_id, filename.to_string(), Some(content.to_string())); - ctx -} - -#[test] -fn test_json_structure_type_mismatch() { - // Create a schema with nested object expecting age to be a number - let schema_yaml = quarto_yaml::parse( - r#" -object: - properties: - age: - number: - minimum: 0 - maximum: 100 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - // Create invalid document with string instead of number for age - let doc_content = r#"age: "not a number""#; - let doc = quarto_yaml::parse_file(doc_content, "test.yaml").unwrap(); - - // Create SourceContext - let source_ctx = create_test_context("test.yaml", doc_content); - - // Validate (should fail) - let registry = quarto_yaml_validation::SchemaRegistry::new(); - let result = validate(&doc, &schema, ®istry, &source_ctx); - - assert!(result.is_err(), "Validation should fail for type mismatch"); - - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - // Test JSON structure - let json = diagnostic.to_json(); - - // Check error_kind is structured (not just a string) - assert!( - json.get("error_kind").is_some(), - "Should have error_kind field" - ); - assert!( - json["error_kind"].is_object(), - "error_kind should be an object" - ); - assert_eq!(json["error_kind"]["type"], "TypeMismatch"); - assert_eq!(json["error_kind"]["data"]["expected"], "number"); - assert_eq!(json["error_kind"]["data"]["got"], "string"); - - // Check error code - assert_eq!(json["code"], "Q-1-11"); - - // Check message is present for convenience - assert!(json.get("message").is_some()); - assert!( - json["message"] - .as_str() - .unwrap() - .contains("Expected number") - ); - - // Check instance_path points to "age" property - assert!(json["instance_path"].is_array()); - let instance_path = json["instance_path"].as_array().unwrap(); - assert_eq!(instance_path.len(), 1); - assert_eq!(instance_path[0]["type"], "Key"); - assert_eq!(instance_path[0]["value"], "age"); - - // Check schema_path - assert!(json["schema_path"].is_array()); - - // Check source_range has filename (not file_id!) - let source_range = json.get("source_range").expect("Should have source_range"); - assert_eq!(source_range["filename"], "test.yaml"); - assert!(source_range["start_offset"].is_number()); - assert!(source_range["end_offset"].is_number()); - assert!(source_range["start_line"].is_number()); - assert!(source_range["start_column"].is_number()); - assert!(source_range["end_line"].is_number()); - assert!(source_range["end_column"].is_number()); - - // Verify line numbers are 1-indexed - assert!(source_range["start_line"].as_u64().unwrap() >= 1); - assert!(source_range["start_column"].as_u64().unwrap() >= 1); -} - -#[test] -fn test_json_structure_missing_property() { - // Schema requiring "name" property - let schema_yaml = quarto_yaml::parse( - r#" -object: - properties: - name: - string: {} - age: - number: {} - required: - - name -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - // Document missing "name" - let doc_content = r#"age: 25"#; - let doc = quarto_yaml::parse_file(doc_content, "person.yaml").unwrap(); - - let source_ctx = create_test_context("person.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let json = diagnostic.to_json(); - - // Check structured error_kind - assert_eq!(json["error_kind"]["type"], "MissingRequiredProperty"); - assert_eq!(json["error_kind"]["data"]["property"], "name"); - - // Check error code - assert_eq!(json["code"], "Q-1-10"); - - // Check hints are present - assert!(json.get("hints").is_some()); - let hints = json["hints"].as_array().unwrap(); - assert!(!hints.is_empty()); - assert!(hints[0].as_str().unwrap().contains("name")); -} - -#[test] -fn test_json_structure_nested_path() { - // Schema with nested structure - let schema_yaml = quarto_yaml::parse( - r#" -object: - properties: - user: - object: - properties: - name: - string: {} - email: - string: - pattern: "^[^@]+@[^@]+\\.[^@]+$" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - // Document with invalid email - let doc_content = r#" -user: - name: "John" - email: "invalid-email" -"#; - let doc = quarto_yaml::parse_file(doc_content, "config.yaml").unwrap(); - - let source_ctx = create_test_context("config.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let json = diagnostic.to_json(); - - // Check instance_path shows nested structure - let instance_path = json["instance_path"].as_array().unwrap(); - assert_eq!(instance_path.len(), 2); - assert_eq!(instance_path[0]["type"], "Key"); - assert_eq!(instance_path[0]["value"], "user"); - assert_eq!(instance_path[1]["type"], "Key"); - assert_eq!(instance_path[1]["value"], "email"); - - // Check source_range points to the email value - let source_range = &json["source_range"]; - assert_eq!(source_range["filename"], "config.yaml"); - // Line should be around 4 (0-indexed: line 3) - assert!(source_range["start_line"].as_u64().unwrap() >= 3); -} - -#[test] -fn test_text_output_has_ariadne() { - // Create schema and invalid document - let schema_yaml = quarto_yaml::parse( - r#" -number: - minimum: 1 - maximum: 100 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"count: 500"#; - let doc = quarto_yaml::parse_file(doc_content, "data.yaml").unwrap(); - - let source_ctx = create_test_context("data.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - // Test text output - let text = diagnostic.to_text(&source_ctx); - - // Should have ariadne box-drawing characters - assert!( - text.contains("─") || text.contains("│") || text.contains("╭") || text.contains("╯"), - "Should have ariadne box-drawing characters" - ); - - // Should have filename - assert!(text.contains("data.yaml"), "Should contain filename"); - - // Should have error code - assert!(text.contains("Q-1-"), "Should contain error code"); - - // Should have line:column reference - assert!(text.contains(":1:"), "Should contain line:column reference"); -} - -#[test] -fn test_json_round_trip_serialization() { - // Test that JSON output is valid and can be parsed - let schema_yaml = quarto_yaml::parse( - r#" -string: - minLength: 5 -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"name: "ab""#; - let doc = quarto_yaml::parse_file(doc_content, "test.yaml").unwrap(); - - let source_ctx = create_test_context("test.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let json = diagnostic.to_json(); - - // Serialize to string and parse back - let json_str = serde_json::to_string_pretty(&json).unwrap(); - let parsed: Value = serde_json::from_str(&json_str).unwrap(); - - // Verify key fields are preserved - assert_eq!(parsed["code"], json["code"]); - assert_eq!(parsed["message"], json["message"]); - assert_eq!(parsed["error_kind"], json["error_kind"]); -} - -#[test] -fn test_multiple_errors_same_file() { - // Schema with multiple constraints - let schema_yaml = quarto_yaml::parse( - r#" -object: - properties: - name: - string: - minLength: 3 - age: - number: - minimum: 0 - maximum: 150 - required: - - name - - age -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - // Document with only age (missing name) - let doc_content = r#"age: 25"#; - let doc = quarto_yaml::parse_file(doc_content, "user.yaml").unwrap(); - - let source_ctx = create_test_context("user.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - // For now, we only get one error (first failure) - // But the architecture supports multiple errors - let error = result.unwrap_err(); - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - let json = diagnostic.to_json(); - - // Verify the error has proper source_range pointing to same file - assert_eq!(json["source_range"]["filename"], "user.yaml"); -} - -#[test] -fn test_custom_error_message_overrides_pattern_hint() { - // A string schema with a non-obvious pattern plus an authored errorMessage. - let custom_hint = r#"Must be "naive" or a standard time zone in the form Area/Location"#; - let schema_yaml = quarto_yaml::parse(&format!( - r#" -string: - pattern: "^(naive|UTC)$" - errorMessage: '{custom_hint}' -"# - )) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"PST"#; - let doc = quarto_yaml::parse_file(doc_content, "tz.yaml").unwrap(); - let source_ctx = create_test_context("tz.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!( - result.is_err(), - "Validation should fail for pattern mismatch" - ); - - let error = result.unwrap_err(); - assert_eq!( - error.custom_hint.as_deref(), - Some(custom_hint), - "custom_hint should be populated from the schema's errorMessage" - ); - - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - // The authored message replaces the generic pattern hint. - let hints = diagnostic.hints(); - assert_eq!( - hints, - vec![custom_hint.to_string()], - "the authored errorMessage should be the only hint" - ); - assert!( - !hints - .iter() - .any(|h| h.contains("matches the expected format")), - "the generic pattern hint must not appear" - ); - - // The factual primary message is left intact. - assert!( - diagnostic.message().contains("does not match pattern"), - "primary message should still report the factual failure, got: {}", - diagnostic.message() - ); - - // JSON output carries the authored hint too. - let json = diagnostic.to_json(); - assert_eq!(json["hints"][0], custom_hint); - - // Text output includes the authored message. - let text = diagnostic.to_text(&source_ctx); - assert!( - text.contains(custom_hint), - "text output should include the authored errorMessage, got:\n{}", - text - ); -} - -#[test] -fn test_custom_error_message_applies_to_any_failure_at_node() { - // The override should apply to whatever failure occurs at the annotated - // node, not just pattern mismatches. Here a type mismatch (number, not - // string) trips the same authored message. - let schema_yaml = quarto_yaml::parse( - r#" -string: - pattern: "^[a-z]+$" - errorMessage: 'must be a lowercase identifier' -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"42"#; - let doc = quarto_yaml::parse_file(doc_content, "id.yaml").unwrap(); - let source_ctx = create_test_context("id.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let result = validate(&doc, &schema, ®istry, &source_ctx); - assert!(result.is_err()); - - let error = result.unwrap_err(); - assert_eq!( - error.custom_hint.as_deref(), - Some("must be a lowercase identifier") - ); - - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - assert_eq!( - diagnostic.hints(), - vec!["must be a lowercase identifier".to_string()] - ); -} - -#[test] -fn test_no_custom_error_message_uses_generic_hint() { - // Without errorMessage, the generic hint is still produced. - let schema_yaml = quarto_yaml::parse( - r#" -string: - pattern: "^[0-9]+$" -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = r#"abc"#; - let doc = quarto_yaml::parse_file(doc_content, "p.yaml").unwrap(); - let source_ctx = create_test_context("p.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let error = validate(&doc, &schema, ®istry, &source_ctx).unwrap_err(); - assert_eq!(error.custom_hint, None); - - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - assert_eq!( - diagnostic.hints(), - vec!["Check that the string matches the expected format?".to_string()] - ); -} - -#[test] -fn test_custom_error_message_innermost_node_wins() { - // An object property's own errorMessage should win over the outer - // object's — the override binds to the schema node where the failure - // occurs. - let schema_yaml = quarto_yaml::parse( - r#" -object: - errorMessage: 'outer object message' - properties: - tz: - string: - pattern: "^UTC$" - errorMessage: 'inner tz message' -"#, - ) - .unwrap(); - let schema = Schema::from_yaml(&schema_yaml).unwrap(); - - let doc_content = "tz: PST"; - let doc = quarto_yaml::parse_file(doc_content, "doc.yaml").unwrap(); - let source_ctx = create_test_context("doc.yaml", doc_content); - let registry = quarto_yaml_validation::SchemaRegistry::new(); - - let error = validate(&doc, &schema, ®istry, &source_ctx).unwrap_err(); - assert_eq!(error.custom_hint.as_deref(), Some("inner tz message")); -} diff --git a/crates/quarto-yaml/Cargo.toml b/crates/quarto-yaml/Cargo.toml deleted file mode 100644 index 7e034a687..000000000 --- a/crates/quarto-yaml/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "quarto-yaml" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -repository.workspace = true - -[dependencies] -yaml-rust2 = { workspace = true } -serde = { workspace = true } -thiserror = { workspace = true } -quarto-source-map = { workspace = true } - -[dev-dependencies] -regex = "1" - -[[bench]] -name = "memory_overhead" -harness = false - -[[bench]] -name = "scaling_overhead" -harness = false - -[lints] -workspace = true diff --git a/crates/quarto-yaml/README.md b/crates/quarto-yaml/README.md deleted file mode 100644 index 7c5190667..000000000 --- a/crates/quarto-yaml/README.md +++ /dev/null @@ -1,154 +0,0 @@ -# quarto-yaml - -YAML parsing with source location tracking for the Quarto Rust port. - -## Overview - -This crate provides `YamlWithSourceInfo`, which wraps `yaml-rust2::Yaml` with source location information for every node in the YAML tree. This enables precise error reporting and source tracking through transformations. - -## Design Philosophy - -Uses the **owned data approach**: wraps owned `Yaml` values with a parallel children structure for source tracking. This follows rust-analyzer's precedent of using owned data for tree structures. - -**Trade-offs:** -- Simple API with no lifetime parameters -- Compatible with config merging across different lifetimes -- Enables LSP caching (serializable) -- ~3x memory overhead (acceptable for config files <10KB) - -## Features - -- ✅ Parse YAML with complete source tracking -- ✅ Access raw `yaml-rust2::Yaml` for direct manipulation -- ✅ Source-tracked children for error reporting -- ✅ Type-safe access methods -- ⚠️ Basic alias support (converted to Null) -- ⚠️ Tags parsed but not exposed -- 🔴 Single document only (no multi-document streams yet) - -## Usage - -```rust -use quarto_yaml::{parse, parse_file}; - -// Parse from string -let yaml = parse(r#" -title: My Document -author: John Doe -tags: - - rust - - yaml -"#).unwrap(); - -// Parse with filename -let yaml = parse_file(content, "config.yaml").unwrap(); - -// Access raw Yaml -println!("Title: {:?}", yaml.yaml["title"]); - -// Source-tracked access -if let Some(title) = yaml.get_hash_value("title") { - println!("Title at {}:{}", - title.source_info.line, - title.source_info.col - ); -} - -// Navigate arrays -if let Some(tags) = yaml.get_hash_value("tags") { - for tag in tags.as_array().unwrap() { - println!("{} at line {}", - tag.yaml.as_str().unwrap(), - tag.source_info.line - ); - } -} -``` - -## API Overview - -### Core Types - -- **`YamlWithSourceInfo`** - Main wrapper with owned Yaml + source tracking -- **`SourceInfo`** - Source location (file, line, col, offset, length) -- **`YamlHashEntry`** - Hash entry with source spans for key, value, and entry - -### Functions - -- `parse(content: &str) -> Result` -- `parse_file(content: &str, filename: &str) -> Result` - -### Methods on YamlWithSourceInfo - -- `get_hash_value(&self, key: &str) -> Option<&YamlWithSourceInfo>` -- `get_array_item(&self, index: usize) -> Option<&YamlWithSourceInfo>` -- `as_array(&self) -> Option<&[YamlWithSourceInfo]>` -- `as_hash(&self) -> Option<&[YamlHashEntry]>` -- `is_scalar()`, `is_array()`, `is_hash()` - Type checking -- `len()`, `is_empty()` - Child count - -## Implementation Details - -### Data Structure - -```rust -pub struct YamlWithSourceInfo { - pub yaml: Yaml, // Direct access to raw Yaml - pub source_info: SourceInfo, // This node's location - children: Children, // Source-tracked children (private) -} -``` - -### Parser - -Uses yaml-rust2's `MarkedEventReceiver` API to build the tree: -- Event-based parsing (push parser) -- Stack-based tree construction -- Marker provides source positions - -## Limitations - -1. **Scalar lengths**: Currently approximate (uses value length) -2. **Aliases**: Converted to Null (anchor tracking not implemented) -3. **Tags**: Parsed but not exposed in API -4. **Multi-document**: Only first document parsed - -## Future Work - -See `claude-notes/implementation-plan.md` for roadmap: - -**Phase 2**: Parser improvements (accurate spans, aliases, tags) -**Phase 3**: Public API enhancements (merging, validation) -**Phase 4**: Advanced features (multi-document, streaming) -**Phase 5**: Integration (unified SourceInfo, LSP support) - -## Dependencies - -- `yaml-rust2 = "0.9"` - YAML parsing with markers -- `serde = "1.0"` - For future serialization -- `thiserror = "1.0"` - Error types - -## Testing - -```bash -cd crates/quarto-yaml -cargo test -``` - -All 14 tests passing ✅ - -## Documentation - -```bash -cargo doc --open -``` - -## License - -MIT (same as Kyoto project) - -## Notes - -This crate is part of the Kyoto project - a Rust port of Quarto CLI. See the main project for context and architecture decisions. - -For implementation notes, see `claude-notes/` directory. diff --git a/crates/quarto-yaml/YAML-1.2-REQUIREMENT.md b/crates/quarto-yaml/YAML-1.2-REQUIREMENT.md deleted file mode 100644 index a5c9198e3..000000000 --- a/crates/quarto-yaml/YAML-1.2-REQUIREMENT.md +++ /dev/null @@ -1,113 +0,0 @@ -# YAML 1.2 Requirement - -## Critical Constraint - -**We CANNOT use `serde_yaml` until it supports YAML 1.2.** - -## Background - -### YAML Version Differences - -- **YAML 1.1** (used by `yaml-rust` and `serde_yaml`): Older spec with ambiguous boolean parsing - - `yes`, `no`, `on`, `off` are parsed as booleans - - This breaks many real-world documents where `no` is meant to be a string - -- **YAML 1.2** (used by `yaml-rust2` and `quarto-yaml`): Fixed ambiguities - - Only `true`, `false` (and some case variants) are booleans - - `yes`, `no`, `on`, `off` are strings by default - - Much more predictable for users - -### Why This Matters for Quarto - -Quarto documents often contain YAML like: - -```yaml -author: - name: John Doe - orcid: no # Should be the string "no", not boolean false -``` - -With YAML 1.1 parsers, this would incorrectly parse `no` as `false`. - -## Current State - -- **quarto-yaml**: Uses `yaml-rust2` ✅ (YAML 1.2) -- **quarto-yaml-validation**: Uses `serde_yaml` ❌ (YAML 1.1) for Schema deserialization - -## Problem - -The current `Schema` deserialization in `quarto-yaml-validation/src/schema.rs` uses serde: - -```rust -impl<'de> Deserialize<'de> for Schema { - fn deserialize(deserializer: D) -> Result - // This uses serde_yaml, which only supports YAML 1.1 -} -``` - -This means: -1. **User documents** are parsed with YAML 1.2 (correct) -2. **Schema files** are parsed with YAML 1.1 (incorrect) - -This inconsistency is problematic because: -- Users expect consistent YAML parsing behavior -- Schema files may themselves contain ambiguous values like `no` in examples -- Quarto extensions will define their own schemas and expect YAML 1.2 - -## Solution - -**Use `YamlWithSourceInfo` for loading schemas, not serde deserialization.** - -Instead of: -```rust -// Current (WRONG - uses YAML 1.1) -let schema: Schema = serde_yaml::from_str(yaml_str)?; -``` - -Do: -```rust -// Correct (uses YAML 1.2) -let yaml = quarto_yaml::parse(yaml_str, Some(file_path))?; -let schema = Schema::from_yaml(&yaml)?; // Manual conversion -``` - -Benefits: -1. ✅ Consistent YAML 1.2 parsing for both documents and schemas -2. ✅ Source location tracking for schema files (enables better error messages) -3. ✅ No dependency on `serde_yaml` (one less dependency) -4. ✅ Extensions can use the same infrastructure - -Trade-offs: -- More manual code to convert `YamlWithSourceInfo` → `Schema` -- Cannot leverage serde's automatic deserialization -- But: More control over error messages and validation - -## Implementation Plan - -1. Remove `serde::Deserialize` implementation from `Schema` enum -2. Add `Schema::from_yaml(yaml: &YamlWithSourceInfo) -> Result` method -3. Add helper methods for parsing each schema type -4. Update tests to use `quarto_yaml::parse()` instead of `serde_yaml` -5. Add source location tracking to schema parsing errors - -## Timeline - -This should be done **before** implementing the `validate-yaml` binary, since it affects the fundamental architecture. - -## Related Files - -- `/crates/quarto-yaml-validation/src/schema.rs` - Schema deserialization (needs rewrite) -- `/claude-notes/yaml-schema-from-yaml-design.md` - Design document (needs revision) - -## Future: serde_yaml YAML 1.2 Support - -If `serde_yaml` ever adds YAML 1.2 support, we could: -1. Keep the `from_yaml()` approach for source tracking -2. Optionally add serde deserialization back as a convenience method -3. But `from_yaml()` should remain the primary API - -## References - -- yaml-rust2: https://docs.rs/yaml-rust2/ (YAML 1.2) -- serde_yaml: https://docs.rs/serde_yaml/ (YAML 1.1) -- YAML 1.2 spec: https://yaml.org/spec/1.2/spec.html diff --git a/crates/quarto-yaml/benches/memory_overhead.rs b/crates/quarto-yaml/benches/memory_overhead.rs deleted file mode 100644 index 63b492a11..000000000 --- a/crates/quarto-yaml/benches/memory_overhead.rs +++ /dev/null @@ -1,267 +0,0 @@ -//! Memory overhead benchmark for YamlWithSourceInfo vs raw Yaml -//! -//! This benchmark measures the actual memory overhead of our owned data approach -//! compared to using yaml-rust2::Yaml directly. -//! -//! Run with: cargo bench --bench memory_overhead - -use quarto_yaml::parse; -use std::mem; -use yaml_rust2::YamlLoader; - -/// Calculate approximate memory usage of a Yaml tree -fn estimate_yaml_memory(yaml: &yaml_rust2::Yaml) -> usize { - let mut size = mem::size_of::(); - - match yaml { - yaml_rust2::Yaml::Real(s) | yaml_rust2::Yaml::String(s) => { - size += s.capacity(); - } - yaml_rust2::Yaml::Array(arr) => { - size += arr.capacity() * mem::size_of::(); - for item in arr { - size += estimate_yaml_memory(item); - } - } - yaml_rust2::Yaml::Hash(hash) => { - // HashMap overhead is complex, approximate - size += hash.capacity() * (mem::size_of::() * 2); - for (k, v) in hash { - size += estimate_yaml_memory(k); - size += estimate_yaml_memory(v); - } - } - _ => {} - } - - size -} - -/// Calculate approximate memory usage of a YamlWithSourceInfo tree -fn estimate_yaml_with_source_memory(yaml: &quarto_yaml::YamlWithSourceInfo) -> usize { - let mut size = mem::size_of::(); - - // Add the underlying Yaml - size += estimate_yaml_memory(&yaml.yaml); - - // Add SourceInfo - // Note: SourceInfo size is already included in sizeof(YamlWithSourceInfo) - // For basic parsing, SourceInfo uses Original variant with FileId (just a usize) - - // Add children - if let Some(children) = yaml.as_array() { - // Note: using len() not capacity() since we only have a slice - size += std::mem::size_of_val(children); - for child in children { - size += estimate_yaml_with_source_memory(child); - } - } else if let Some(entries) = yaml.as_hash() { - // Note: using len() not capacity() since we only have a slice - size += std::mem::size_of_val(entries); - for entry in entries { - size += estimate_yaml_with_source_memory(&entry.key); - size += estimate_yaml_with_source_memory(&entry.value); - // Add the 3 SourceInfo structs in YamlHashEntry - size += mem::size_of::() * 3; - } - } - - size -} - -/// Test case with name, YAML content, and description -struct TestCase { - name: &'static str, - yaml: &'static str, - description: &'static str, -} - -const TEST_CASES: &[TestCase] = &[ - TestCase { - name: "simple_scalar", - yaml: "hello world", - description: "Single scalar value", - }, - TestCase { - name: "small_hash", - yaml: r#" -title: My Document -author: John Doe -date: 2024-01-01 -"#, - description: "Small hash with 3 string values", - }, - TestCase { - name: "small_array", - yaml: r#" -- item1 -- item2 -- item3 -- item4 -- item5 -"#, - description: "Small array with 5 items", - }, - TestCase { - name: "nested_structure", - yaml: r#" -project: - title: My Project - version: 1.0.0 - authors: - - name: Alice - email: alice@example.com - - name: Bob - email: bob@example.com - config: - port: 8080 - debug: true - features: - - feature1 - - feature2 - - feature3 -"#, - description: "Nested structure with arrays and hashes", - }, - TestCase { - name: "quarto_document", - yaml: r#" -title: "My Research Paper" -author: "Jane Smith" -date: "2024-01-01" -format: - html: - theme: cosmo - toc: true - toc-depth: 3 - code-fold: true - pdf: - documentclass: article - margin-left: 1in - margin-right: 1in -execute: - echo: true - warning: false - error: false -bibliography: references.bib -csl: apa.csl -"#, - description: "Typical Quarto document metadata", - }, - TestCase { - name: "quarto_project", - yaml: r#" -project: - type: website - output-dir: _site - -website: - title: "My Website" - navbar: - left: - - text: "Home" - href: index.qmd - - text: "About" - href: about.qmd - - text: "Blog" - href: blog/index.qmd - right: - - icon: github - href: https://github.com/user/repo - -format: - html: - theme: - light: flatly - dark: darkly - css: styles.css - toc: true - -execute: - freeze: auto -"#, - description: "Quarto project configuration", - }, -]; - -fn main() { - println!("Memory Overhead Analysis: YamlWithSourceInfo vs raw Yaml"); - println!("==========================================================\n"); - - println!("Size of base types:"); - println!( - " yaml_rust2::Yaml: {} bytes", - mem::size_of::() - ); - println!( - " YamlWithSourceInfo: {} bytes", - mem::size_of::() - ); - println!( - " SourceInfo: {} bytes", - mem::size_of::() - ); - println!( - " YamlHashEntry: {} bytes", - mem::size_of::() - ); - println!(); - - let mut total_raw = 0usize; - let mut total_tracked = 0usize; - - for test in TEST_CASES { - println!("Test: {} - {}", test.name, test.description); - println!("{}", "-".repeat(60)); - - // Parse with yaml-rust2 - let raw_docs = YamlLoader::load_from_str(test.yaml).expect("Failed to parse YAML"); - let raw_yaml = &raw_docs[0]; - let raw_size = estimate_yaml_memory(raw_yaml); - - // Parse with YamlWithSourceInfo - let tracked_yaml = parse(test.yaml).expect("Failed to parse YAML with source tracking"); - let tracked_size = estimate_yaml_with_source_memory(&tracked_yaml); - - let overhead = tracked_size as f64 / raw_size as f64; - let diff = tracked_size - raw_size; - - println!(" Raw Yaml size: {:>8} bytes", raw_size); - println!(" YamlWithSourceInfo size: {:>8} bytes", tracked_size); - println!( - " Overhead: {:>8} bytes ({:.2}x)", - diff, overhead - ); - println!(); - - total_raw += raw_size; - total_tracked += tracked_size; - } - - println!("=========================================================="); - println!("TOTALS across all test cases:"); - println!(" Total raw: {:>8} bytes", total_raw); - println!(" Total tracked: {:>8} bytes", total_tracked); - let total_overhead = total_tracked as f64 / total_raw as f64; - println!(" Average overhead: {:.2}x", total_overhead); - println!(); - - // Analysis - println!("Analysis:"); - if total_overhead < 2.0 { - println!(" ✅ Overhead is better than expected (<2x)"); - } else if total_overhead < 3.0 { - println!(" ✅ Overhead is within expected range (2-3x)"); - } else if total_overhead < 4.0 { - println!(" ⚠️ Overhead is slightly higher than expected (3-4x)"); - } else { - println!(" ❌ Overhead is significantly higher than expected (>4x)"); - } - - println!(); - println!("Notes:"); - println!(" - These are estimates based on size_of and capacity"); - println!(" - Actual memory usage may differ due to allocator overhead"); - println!(" - For typical Quarto configs (<10KB raw), overhead is acceptable"); - println!(" - The overhead provides precise error reporting and LSP support"); -} diff --git a/crates/quarto-yaml/benches/scaling_overhead.rs b/crates/quarto-yaml/benches/scaling_overhead.rs deleted file mode 100644 index 60bab058c..000000000 --- a/crates/quarto-yaml/benches/scaling_overhead.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Scaling analysis: verify overhead grows linearly with data size -//! -//! This benchmark tests whether memory overhead grows linearly (O(n)) or -//! superlinearly (O(n²), O(n log n), etc.) with increasing YAML data size. -//! -//! If overhead ratio stays constant as size increases → Linear (good!) -//! If overhead ratio increases as size increases → Superlinear (bad!) -//! -//! Run with: cargo bench --bench scaling_overhead - -use quarto_yaml::parse; -use std::mem; -use yaml_rust2::YamlLoader; - -/// Calculate approximate memory usage of a Yaml tree -fn estimate_yaml_memory(yaml: &yaml_rust2::Yaml) -> usize { - let mut size = mem::size_of::(); - - match yaml { - yaml_rust2::Yaml::Real(s) | yaml_rust2::Yaml::String(s) => { - size += s.capacity(); - } - yaml_rust2::Yaml::Array(arr) => { - size += arr.capacity() * mem::size_of::(); - for item in arr { - size += estimate_yaml_memory(item); - } - } - yaml_rust2::Yaml::Hash(hash) => { - size += hash.capacity() * (mem::size_of::() * 2); - for (k, v) in hash { - size += estimate_yaml_memory(k); - size += estimate_yaml_memory(v); - } - } - _ => {} - } - - size -} - -/// Calculate approximate memory usage of a YamlWithSourceInfo tree -fn estimate_yaml_with_source_memory(yaml: &quarto_yaml::YamlWithSourceInfo) -> usize { - let mut size = mem::size_of::(); - - size += estimate_yaml_memory(&yaml.yaml); - // Note: SourceInfo size is already included in sizeof(YamlWithSourceInfo) - // For basic parsing, SourceInfo uses Original variant with FileId (just a usize) - - if let Some(children) = yaml.as_array() { - size += std::mem::size_of_val(children); - for child in children { - size += estimate_yaml_with_source_memory(child); - } - } else if let Some(entries) = yaml.as_hash() { - size += std::mem::size_of_val(entries); - for entry in entries { - size += estimate_yaml_with_source_memory(&entry.key); - size += estimate_yaml_with_source_memory(&entry.value); - size += mem::size_of::() * 3; - } - } - - size -} - -struct ScalingResult { - size: usize, - raw_bytes: usize, - tracked_bytes: usize, - overhead_ratio: f64, -} - -/// Generate a flat array of N string items -fn generate_flat_array(n: usize) -> String { - let mut yaml = String::from("[\n"); - for i in 0..n { - yaml.push_str(&format!(" \"item_{}\",\n", i)); - } - yaml.push_str("]\n"); - yaml -} - -/// Generate a flat hash with N key-value pairs -fn generate_flat_hash(n: usize) -> String { - let mut yaml = String::new(); - for i in 0..n { - yaml.push_str(&format!("key_{}: \"value_{}\"\n", i, i)); - } - yaml -} - -/// Generate a nested structure with depth D and breadth B -/// (D levels deep, B children at each level) -fn generate_nested_structure(depth: usize, breadth: usize) -> String { - fn generate_level( - current_depth: usize, - max_depth: usize, - breadth: usize, - indent: usize, - ) -> String { - let ind = " ".repeat(indent); - - if current_depth >= max_depth { - return format!("{}value\n", ind); - } - - let mut yaml = String::new(); - for i in 0..breadth { - yaml.push_str(&format!("{}child_{}:\n", ind, i)); - yaml.push_str(&generate_level( - current_depth + 1, - max_depth, - breadth, - indent + 1, - )); - } - yaml - } - - generate_level(0, depth, breadth, 0) -} - -/// Generate a mixed structure: top-level hash with N keys, each having a small nested structure -fn generate_mixed_structure(n: usize) -> String { - let mut yaml = String::new(); - for i in 0..n { - yaml.push_str(&format!( - "section_{}:\n title: \"Section {}\"\n enabled: true\n items:\n - item1\n - item2\n - item3\n", - i, i - )); - } - yaml -} - -fn test_scaling(name: &str, generator: impl Fn(usize) -> String, sizes: &[usize]) { - println!("\n{}", "=".repeat(70)); - println!("Scaling Test: {}", name); - println!("{}", "=".repeat(70)); - println!( - "{:>6} {:>12} {:>12} {:>12} {:>8}", - "Size", "Raw (bytes)", "Tracked", "Overhead", "Ratio" - ); - println!("{}", "-".repeat(70)); - - let mut results = Vec::new(); - - for &size in sizes { - let yaml_content = generator(size); - - // Parse with yaml-rust2 - let raw_docs = YamlLoader::load_from_str(&yaml_content).expect("Failed to parse YAML"); - let raw_yaml = &raw_docs[0]; - let raw_bytes = estimate_yaml_memory(raw_yaml); - - // Parse with YamlWithSourceInfo - let tracked_yaml = parse(&yaml_content).expect("Failed to parse YAML with source tracking"); - let tracked_bytes = estimate_yaml_with_source_memory(&tracked_yaml); - - let overhead = tracked_bytes - raw_bytes; - let ratio = tracked_bytes as f64 / raw_bytes as f64; - - println!( - "{:>6} {:>12} {:>12} {:>12} {:>8.2}x", - size, raw_bytes, tracked_bytes, overhead, ratio - ); - - results.push(ScalingResult { - size, - raw_bytes, - tracked_bytes, - overhead_ratio: ratio, - }); - } - - // Analyze scaling behavior - println!("\nScaling Analysis:"); - - if results.len() >= 2 { - let first = &results[0]; - let last = &results[results.len() - 1]; - - let size_ratio = last.size as f64 / first.size as f64; - let raw_ratio = last.raw_bytes as f64 / first.raw_bytes as f64; - let tracked_ratio = last.tracked_bytes as f64 / first.tracked_bytes as f64; - - println!(" Size increased: {:.1}x", size_ratio); - println!(" Raw memory increased: {:.1}x", raw_ratio); - println!(" Tracked memory increased: {:.1}x", tracked_ratio); - - // Check if overhead ratio is stable - let ratio_change = (last.overhead_ratio - first.overhead_ratio).abs(); - let ratio_change_pct = (ratio_change / first.overhead_ratio) * 100.0; - - println!( - "\n Overhead ratio change: {:.2}x → {:.2}x (Δ{:.1}%)", - first.overhead_ratio, last.overhead_ratio, ratio_change_pct - ); - - if ratio_change_pct < 10.0 { - println!(" ✅ Overhead is STABLE - scales linearly!"); - } else if ratio_change_pct < 25.0 { - println!(" ⚠️ Overhead grows slightly - possibly O(n log n)"); - } else { - println!(" ❌ Overhead grows significantly - possibly superlinear!"); - } - - // Check raw and tracked growth rates - let raw_per_item = last.raw_bytes as f64 / last.size as f64; - let tracked_per_item = last.tracked_bytes as f64 / last.size as f64; - - println!("\n At largest size:"); - println!(" Raw bytes per item: {:.1} bytes", raw_per_item); - println!(" Tracked bytes per item: {:.1} bytes", tracked_per_item); - println!( - " Overhead per item: {:.1} bytes", - tracked_per_item - raw_per_item - ); - } -} - -fn main() { - println!("Scaling Overhead Analysis: YamlWithSourceInfo"); - println!("============================================================="); - println!("Testing whether overhead grows linearly with data size"); - println!(); - - // Test 1: Flat arrays - let array_sizes = vec![10, 50, 100, 250, 500, 1000]; - test_scaling("Flat Array", generate_flat_array, &array_sizes); - - // Test 2: Flat hashes - let hash_sizes = vec![10, 50, 100, 250, 500, 1000]; - test_scaling("Flat Hash", generate_flat_hash, &hash_sizes); - - // Test 3: Mixed structures (realistic Quarto configs) - let mixed_sizes = vec![5, 10, 20, 50, 100]; - test_scaling("Mixed Structure", generate_mixed_structure, &mixed_sizes); - - // Test 4: Nested structures (depth=5, varying breadth) - println!("\n{}", "=".repeat(70)); - println!("Nested Structure Scaling (depth=5, varying breadth)"); - println!("{}", "=".repeat(70)); - println!( - "{:>8} {:>12} {:>12} {:>12} {:>8}", - "Breadth", "Raw (bytes)", "Tracked", "Overhead", "Ratio" - ); - println!("{}", "-".repeat(70)); - - let breadths = vec![2, 3, 4, 5]; - let mut nested_results = Vec::new(); - - for breadth in &breadths { - let yaml_content = generate_nested_structure(5, *breadth); - - let raw_docs = YamlLoader::load_from_str(&yaml_content).expect("Failed to parse YAML"); - let raw_yaml = &raw_docs[0]; - let raw_bytes = estimate_yaml_memory(raw_yaml); - - let tracked_yaml = parse(&yaml_content).expect("Failed to parse YAML with source tracking"); - let tracked_bytes = estimate_yaml_with_source_memory(&tracked_yaml); - - let overhead = tracked_bytes - raw_bytes; - let ratio = tracked_bytes as f64 / raw_bytes as f64; - - println!( - "{:>8} {:>12} {:>12} {:>12} {:>8.2}x", - breadth, raw_bytes, tracked_bytes, overhead, ratio - ); - - nested_results.push((breadth, raw_bytes, tracked_bytes, ratio)); - } - - println!("\nNested Structure Analysis:"); - if nested_results.len() >= 2 { - let first = nested_results.first().unwrap(); - let last = nested_results.last().unwrap(); - - let total_nodes_first = first.0.pow(5); // breadth^depth - let total_nodes_last = last.0.pow(5); - - println!( - " Total nodes: {} → {}", - total_nodes_first, total_nodes_last - ); - println!(" Overhead ratio: {:.2}x → {:.2}x", first.3, last.3); - - let ratio_change_pct = ((last.3 - first.3) / first.3) * 100.0; - if ratio_change_pct.abs() < 10.0 { - println!(" ✅ Overhead is STABLE even with deep nesting!"); - } else { - println!(" ⚠️ Overhead changes with nesting depth"); - } - } - - // Final summary - println!("\n{}", "=".repeat(70)); - println!("CONCLUSION"); - println!("{}", "=".repeat(70)); - println!("If overhead ratios stay roughly constant (within 10-25%)"); - println!("across all tests, then overhead scales linearly O(n)."); - println!(); - println!("This means larger configs use proportionally more memory,"); - println!("but don't suffer from superlinear growth."); -} diff --git a/crates/quarto-yaml/claude-notes/implementation-plan.md b/crates/quarto-yaml/claude-notes/implementation-plan.md deleted file mode 100644 index 984ba83f9..000000000 --- a/crates/quarto-yaml/claude-notes/implementation-plan.md +++ /dev/null @@ -1,167 +0,0 @@ -# quarto-yaml Implementation Plan - -## Overview - -This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. This uses the **owned data approach** as decided in the design discussion (see `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md`). - -## Architecture Decision: Owned Data - -Following rust-analyzer's precedent, we use owned `Yaml` values with a parallel children structure for source tracking. Trade-off: ~3x memory overhead for simplicity and compatibility with config merging across different lifetimes. - -## Core Data Structures - -### 1. YamlWithSourceInfo - -```rust -pub struct YamlWithSourceInfo { - /// The complete yaml-rust2::Yaml value (owned) - pub yaml: Yaml, - - /// Source location for this node - pub source_info: SourceInfo, - - /// Source-tracked children (parallel structure) - children: Children, -} -``` - -### 2. Children Enum - -```rust -enum Children { - None, - Array(Vec), - Hash(Vec), -} -``` - -### 3. YamlHashEntry - -```rust -pub struct YamlHashEntry { - pub key: YamlWithSourceInfo, - pub value: YamlWithSourceInfo, - pub key_span: SourceInfo, // Span of just the key - pub value_span: SourceInfo, // Span of just the value - pub entry_span: SourceInfo, // Span of key + value -} -``` - -## SourceInfo Type - -For Phase 1, we'll use a simple SourceInfo type: - -```rust -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct SourceInfo { - /// Optional filename - pub file: Option, - - /// Byte offset in source - pub offset: usize, - - /// Line number (1-based) - pub line: usize, - - /// Column number (1-based) - pub col: usize, - - /// Length in bytes - pub len: usize, -} -``` - -Later this will be replaced by the unified SourceInfo from the main project. - -## Implementation Phases - -### Phase 1: Core Data Structures (Current) -- [x] Create crate structure -- [ ] Define SourceInfo type -- [ ] Define YamlWithSourceInfo, Children, YamlHashEntry -- [ ] Implement basic constructors - -### Phase 2: Parser Implementation -- [ ] Implement MarkedEventReceiver trait -- [ ] Build tree from events -- [ ] Track source positions -- [ ] Handle errors - -### Phase 3: Public API -- [ ] `parse(content: &str) -> Result` -- [ ] `parse_file(content: &str, filename: &str) -> Result` -- [ ] Access methods: `get_hash_value()`, `get_array_item()`, etc. -- [ ] Error type with source positions - -### Phase 4: Testing -- [ ] Unit tests for data structures -- [ ] Parser tests with various YAML structures -- [ ] Source position tracking tests -- [ ] Error handling tests - -### Phase 5: Documentation -- [ ] API documentation -- [ ] Usage examples -- [ ] Integration guide - -## Parser Design - -The parser will use yaml-rust2's `MarkedEventReceiver` API: - -```rust -struct YamlBuilder { - stack: Vec, - source: String, - filename: Option, -} - -impl MarkedEventReceiver for YamlBuilder { - fn on_event(&mut self, event: Event, marker: Marker) { - // Build tree with source tracking - } -} -``` - -## Testing Strategy - -### Test Categories - -1. **Basic YAML structures** - - Scalars (string, int, float, bool) - - Arrays - - Hashes - - Nested structures - -2. **Source position tracking** - - Verify line/column accuracy - - Test multi-line values - - Test nested structures - -3. **Error handling** - - Invalid YAML - - Parse errors with positions - -4. **Edge cases** - - Empty documents - - Documents with only comments - - Multi-document streams (initially unsupported) - -## Dependencies - -- `yaml-rust2 = "0.9"` - YAML parsing with position tracking -- `serde = "1.0"` - For future SourceInfo serialization -- `thiserror = "1.0"` - Error types - -## Future Enhancements - -1. **Config merging** - Merge multiple YamlWithSourceInfo objects -2. **Validation** - Schema validation with source positions -3. **Unified SourceInfo** - Replace with project-wide SourceInfo type -4. **YAML tags** - Support for !expr and custom tags -5. **Multi-document** - Support YAML streams - -## References - -- Design document: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/yaml-with-source-info-design.md` -- Session log: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md` -- rust-analyzer patterns: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/rust-analyzer-owned-data-patterns.md` diff --git a/crates/quarto-yaml/claude-notes/implementation-status.md b/crates/quarto-yaml/claude-notes/implementation-status.md deleted file mode 100644 index 07ed812b0..000000000 --- a/crates/quarto-yaml/claude-notes/implementation-status.md +++ /dev/null @@ -1,206 +0,0 @@ -# quarto-yaml Implementation Status - -## Overview - -The `quarto-yaml` crate is now **functional** with basic parsing capabilities. It successfully parses YAML documents and tracks source locations for all nodes. - -## Completed Features - -### Core Data Structures ✅ - -- **SourceInfo**: Tracks source locations with file, offset, line, column, and length -- **YamlWithSourceInfo**: Main wrapper around yaml-rust2::Yaml with source tracking -- **YamlHashEntry**: Represents hash entries with source tracking for keys, values, and entire entries -- **Children enum**: Internal structure for tracking child nodes (Array/Hash/None) - -### Parser Implementation ✅ - -- **MarkedEventReceiver**: Implemented for YamlBuilder -- **Event-based parsing**: Handles all yaml-rust2 events -- **Tree construction**: Builds YamlWithSourceInfo tree from events -- **Source tracking**: Records source positions for all nodes - -### Public API ✅ - -- `parse(content: &str)` - Parse YAML from string -- `parse_file(content: &str, filename: &str)` - Parse with filename -- `get_hash_value(&self, key: &str)` - Access hash values -- `get_array_item(&self, index: usize)` - Access array elements -- `as_array()`, `as_hash()` - Access children -- `is_scalar()`, `is_array()`, `is_hash()` - Type checking - -### Tests ✅ - -All 14 tests passing: -- Scalar parsing (string, integer, boolean) -- Array parsing -- Hash parsing -- Nested structures -- Source info tracking -- Filename association - -## Architecture Decisions - -### Owned Data Approach ✅ - -Following rust-analyzer's precedent, we use **owned yaml-rust2::Yaml** values with a parallel Children structure for source tracking. - -**Trade-offs:** -- ✅ Simple API (no lifetime parameters) -- ✅ Enables config merging across different lifetimes -- ✅ Compatible with LSP caching -- ⚠️ ~3x memory overhead (acceptable for configs <10KB) - -### Design Pattern ✅ - -```rust -pub struct YamlWithSourceInfo { - pub yaml: Yaml, // Complete owned Yaml tree - pub source_info: SourceInfo, // This node's location - children: Children, // Source-tracked children -} -``` - -This provides **dual access**: -1. Direct Yaml access for code that doesn't need source tracking -2. Source-tracked access through children for error reporting - -## Known Limitations - -### 1. Scalar Length Computation ⚠️ - -Currently uses value length, not accounting for: -- Quotes and escapes -- Multi-line strings -- Block scalars - -**TODO**: Compute accurate lengths from source positions - -### 2. Alias Support 🔴 - -Aliases are currently converted to Null values. - -**TODO**: Track anchors and resolve aliases properly - -### 3. Tag Support 🔴 - -YAML tags (like `!expr`) are parsed but not exposed in the API. - -**TODO**: Add tag field to YamlWithSourceInfo - -### 4. Multi-Document Support 🔴 - -Currently only parses the first document in a stream. - -**TODO**: Support multi-document parsing if needed - -## Code Quality - -### Warnings ⚠️ - -Two dead_code warnings (acceptable for now): -- `source` field in YamlBuilder (may be needed for accurate length computation) -- `Complete` variant in BuildNode (may be used in future refactoring) - -### Test Coverage ✅ - -Good coverage of: -- Basic types (scalar, array, hash) -- Nested structures -- Source tracking -- Edge cases - -## Next Steps - -### Phase 1: Core Improvements - -1. **Accurate source spans** - Compute real lengths from markers -2. **Alias support** - Track and resolve anchors -3. **Tag support** - Expose tags in API - -### Phase 2: Advanced Features - -4. **Config merging** - Implement merge operations with source tracking -5. **Validation** - Schema validation with source-aware errors -6. **Error reporting** - Better error messages with source context - -### Phase 3: Integration - -7. **Unified SourceInfo** - Replace with project-wide SourceInfo type -8. **quarto-markdown integration** - Use for YAML metadata in documents -9. **LSP support** - Provide hover/completion data - -## Usage Example - -```rust -use quarto_yaml::{parse_file, YamlWithSourceInfo}; - -let yaml = parse_file(r#" -title: My Document -author: John Doe -tags: - - rust - - yaml -"#, "config.yaml").unwrap(); - -// Direct Yaml access -println!("Title: {:?}", yaml.yaml["title"]); - -// Source-tracked access -if let Some(title) = yaml.get_hash_value("title") { - println!("Title at {}:{}", - title.source_info.line, - title.source_info.col - ); -} - -// Navigate structure -if let Some(tags) = yaml.get_hash_value("tags") { - for (i, tag) in tags.as_array().unwrap().iter().enumerate() { - println!("Tag {}: {} at line {}", - i, - tag.yaml.as_str().unwrap(), - tag.source_info.line - ); - } -} -``` - -## File Structure - -``` -crates/quarto-yaml/ -├── Cargo.toml -├── claude-notes/ -│ ├── implementation-plan.md # Original plan -│ └── implementation-status.md # This file -└── src/ - ├── lib.rs # Public API - ├── error.rs # Error types - ├── source_info.rs # SourceInfo struct - ├── yaml_with_source_info.rs # Core data structures - └── parser.rs # Parser implementation -``` - -## Dependencies - -- `yaml-rust2 = "0.9"` - YAML parsing with position tracking -- `serde = "1.0"` - For future SourceInfo serialization -- `thiserror = "1.0"` - Error types - -## Timeline - -**Total time: ~2-3 hours** - -- Planning: 30min -- Data structures: 1h -- Parser implementation: 1h -- Testing and debugging: 30min - -## Conclusion - -The `quarto-yaml` crate is now ready for basic use! It successfully parses YAML with source tracking, providing a solid foundation for config parsing, validation, and LSP features. - -The owned data approach has proven to be simple and effective, with no lifetime complexity and clean APIs. The memory overhead is acceptable for typical config file sizes. - -Next steps should focus on improving source span accuracy, adding alias/tag support, and implementing config merging operations. diff --git a/crates/quarto-yaml/claude-notes/memory-overhead-analysis.md b/crates/quarto-yaml/claude-notes/memory-overhead-analysis.md deleted file mode 100644 index 571e138dc..000000000 --- a/crates/quarto-yaml/claude-notes/memory-overhead-analysis.md +++ /dev/null @@ -1,221 +0,0 @@ -# Memory Overhead Analysis - -## Executive Summary - -**Measured overhead: 6.38x** (not the 3x estimated) - -However, this is still **acceptable** for Quarto's use case: -- Typical config files are <10KB -- 10KB × 6.38 = ~64KB total memory -- Provides precise error reporting and LSP support -- Memory is cheap, developer time is expensive - -## Benchmark Results - -### Base Type Sizes - -``` -yaml_rust2::Yaml: 56 bytes -YamlWithSourceInfo: 144 bytes (2.57x larger) -SourceInfo: 56 bytes -YamlHashEntry: 456 bytes (!!!) -``` - -### Test Cases - -| Test Case | Raw Yaml | YamlWithSourceInfo | Overhead | -|-----------|----------|---------------------|----------| -| Simple scalar | 67 bytes | 267 bytes | **3.99x** | -| Small hash (3 items) | 772 bytes | 4,424 bytes | **5.73x** | -| Small array (5 items) | 809 bytes | 2,866 bytes | **3.54x** | -| Nested structure | 4,402 bytes | 27,924 bytes | **6.34x** | -| Quarto document | 4,991 bytes | 32,175 bytes | **6.45x** | -| Quarto project | 8,275 bytes | 55,576 bytes | **6.72x** | -| **TOTAL** | **19,316 bytes** | **123,232 bytes** | **6.38x** | - -## Why Higher Than Expected? - -### 1. YamlHashEntry is Heavy (456 bytes!) - -Each hash entry contains: -- `key: YamlWithSourceInfo` (144 bytes) -- `value: YamlWithSourceInfo` (144 bytes) -- `key_span: SourceInfo` (56 bytes) -- `value_span: SourceInfo` (56 bytes) -- `entry_span: SourceInfo` (56 bytes) - -**Total: 456 bytes per entry** - -### 2. Recursive Duplication - -`YamlWithSourceInfo` contains: -- `yaml: Yaml` (56 bytes) - the original tree -- `source_info: SourceInfo` (56 bytes) -- `children: Children` (enum with Vec) - -The `children` field duplicates the entire tree structure, creating recursive overhead. - -### 3. SourceInfo is Not Small - -At 56 bytes, `SourceInfo` is as large as `Yaml` itself: -- `file: Option` (24 bytes) -- `offset: usize` (8 bytes) -- `line: usize` (8 bytes) -- `col: usize` (8 bytes) -- `len: usize` (8 bytes) - -### 4. Overhead Increases with Nesting - -Deeper structures have higher overhead because each level duplicates: -- The Yaml value -- SourceInfo for the node -- Children structure with more YamlWithSourceInfo nodes - -## Is This A Problem? - -### No, for several reasons: - -#### 1. Absolute Numbers Are Small - -Even "large" Quarto project configs: -- Raw: 8KB → With tracking: 56KB -- Still fits in L1 cache on modern CPUs -- Negligible compared to typical application memory usage - -#### 2. Temporary Data Structure - -Config parsing is a one-time operation: -- Parse → Validate → Extract values → Drop YamlWithSourceInfo -- Not held in memory throughout application lifetime -- Only kept for error reporting context - -#### 3. Value Proposition - -The overhead buys us: -- ✅ Precise error messages with line/col -- ✅ LSP hover showing where config came from -- ✅ Config merging with source tracking -- ✅ Validation errors pointing to exact location -- ✅ "Jump to definition" for config values - -#### 4. Proven At Scale - -rust-analyzer uses similar approach: -- Owned SyntaxNode with refcounting -- Handles entire Rust codebases (100K+ LOC) -- Memory overhead acceptable - -## Optimization Opportunities - -If we needed to reduce overhead (we don't), we could: - -### 1. Remove Redundant SourceInfo from YamlHashEntry - -Currently: -```rust -pub struct YamlHashEntry { - pub key: YamlWithSourceInfo, // has source_info - pub value: YamlWithSourceInfo, // has source_info - pub key_span: SourceInfo, // duplicate! - pub value_span: SourceInfo, // duplicate! - pub entry_span: SourceInfo, -} -``` - -Could just use: -```rust -pub struct YamlHashEntry { - pub key: YamlWithSourceInfo, // use key.source_info - pub value: YamlWithSourceInfo, // use value.source_info - pub entry_span: SourceInfo, // only this is unique -} -``` - -**Savings**: 112 bytes per hash entry → ~30% reduction for hashes - -### 2. Box SourceInfo - -```rust -pub struct YamlWithSourceInfo { - pub yaml: Yaml, - pub source_info: Box, // 8 bytes pointer vs 56 bytes struct - children: Children, -} -``` - -**Savings**: 48 bytes per node, but adds indirection (slower access) - -### 3. Interned Filenames - -Instead of `file: Option` in every SourceInfo: -```rust -pub struct SourceInfo { - pub file_id: Option, // index into global string table - // ... -} -``` - -**Savings**: ~16 bytes per node with filename - -### 4. Compact SourceInfo - -```rust -#[repr(C)] -pub struct CompactSourceInfo { - pub file_id: u16, // 65K files should be enough - pub offset: u32, // 4GB should be enough - pub line: u16, // 65K lines should be enough - pub col: u16, // 65K columns should be enough - pub len: u16, // 65K byte spans should be enough -} -// Total: 12 bytes vs 56 bytes -``` - -**Savings**: 44 bytes per node → ~70% reduction in SourceInfo overhead - -### 5. Single Allocation for Tree - -Like rust-analyzer's arena allocation: -- Allocate entire tree in single Vec -- Use indices instead of pointers -- Better cache locality - -**Savings**: Reduces allocator overhead, improves cache performance - -## Recommendation - -**Do nothing.** The current overhead is acceptable because: - -1. **Absolute cost is low** (~60KB for typical configs) -2. **Temporary data** (parsed, used, dropped) -3. **High value** (precise error reporting, LSP support) -4. **Simple implementation** (no lifetime complexity) -5. **Proven approach** (rust-analyzer does similar) - -If we later discover memory pressure (unlikely), we have clear optimization paths. - -## Updating Documentation - -Need to update these claims: - -### Before -"~3x memory overhead (acceptable for configs <10KB)" - -### After -"~6x memory overhead, but still acceptable: -- 10KB config → ~60KB in memory -- Temporary data structure (parse, validate, drop) -- Provides precise error reporting and LSP support" - -## Conclusion - -The **6.38x overhead is higher than estimated but still acceptable** for Quarto's use case. - -The owned data approach remains the right choice: -- ✅ Simple API (no lifetime parameters) -- ✅ Config merging across different lifetimes -- ✅ LSP caching support -- ✅ Memory cost is negligible for typical configs -- ✅ Follows rust-analyzer precedent - -**Status**: No changes needed. Ship it! 🚢 diff --git a/crates/quarto-yaml/claude-notes/scaling-analysis.md b/crates/quarto-yaml/claude-notes/scaling-analysis.md deleted file mode 100644 index c93ef5bb9..000000000 --- a/crates/quarto-yaml/claude-notes/scaling-analysis.md +++ /dev/null @@ -1,238 +0,0 @@ -# Scaling Analysis: Linear vs Superlinear Growth - -## Executive Summary - -✅ **Overhead scales LINEARLY with data size** - no superlinear growth detected. - -The overhead ratio stabilizes around 4-6x for realistic workloads, with only small variations (2-13%) as data size increases 100x. - -## Test Results - -### Test 1: Flat Array (10 → 1000 items) - -``` -Size Raw Tracked Ratio -10 1,592 5,496 3.45x -50 6,840 26,536 3.88x -100 13,624 52,836 3.88x ← Stabilizes -250 30,392 132,036 4.34x -500 60,728 264,036 4.35x -1000 121,400 528,036 4.35x ← Stable -``` - -**Analysis**: -- Overhead ratio: 3.45x → 4.35x (26% change) -- Size increased: 100x -- Memory increased: Raw 76x, Tracked 96x -- **Verdict**: Small fixed cost at tiny sizes, then **linear** (ratio stabilizes at 4.35x) - -### Test 2: Flat Hash (10 → 1000 key-value pairs) - -``` -Size Raw Tracked Ratio -10 2,874 14,544 5.06x -50 12,618 70,288 5.57x -100 25,190 140,360 5.57x ← Stabilizes -250 83,072 369,992 4.45x -500 166,998 740,168 4.43x -1000 334,850 1,480,520 4.42x ← Stable -``` - -**Analysis**: -- Overhead ratio: 5.06x → 4.42x (12.6% change, actually *decreasing*) -- Size increased: 100x -- Memory increased: Raw 117x, Tracked 102x -- **Verdict**: **Linear** - ratio stabilizes, slight decrease due to amortization - -### Test 3: Mixed Structure (5 → 100 sections, most realistic) - -``` -Size Raw Tracked Ratio -5 7,005 42,860 6.12x -10 13,954 85,464 6.12x ← Same! -20 27,862 170,722 6.13x -50 68,018 424,928 6.25x -100 135,990 849,650 6.25x ← Stable -``` - -**Analysis**: -- Overhead ratio: 6.12x → 6.25x (**2.1% change** - excellent!) -- Size increased: 20x -- Memory increased: Raw 19.4x, Tracked 19.8x -- **Verdict**: ✅ **Perfectly linear!** This is closest to real Quarto configs - -### Test 4: Nested Structures (depth=5, breadth 2 → 5) - -``` -Breadth Total Nodes Raw Tracked Ratio -2 32 18,010 146,128 8.11x -3 243 85,124 801,526 9.42x -4 1,024 434,836 3,597,208 8.27x -5 3,125 1,092,680 9,674,890 8.85x -``` - -**Analysis**: -- Overhead ratio: 8.11x → 8.85x (9.1% change) -- Nodes increased: 98x (32 → 3,125) -- **Verdict**: ✅ **Linear** even with deep nesting - -## Why Flat Array Shows 26% Change? - -The "26% change" in flat arrays is **not** superlinear growth. It's **fixed costs amortizing**: - -### Small Size (10 items): 3.45x overhead -- Fixed overhead (YamlWithSourceInfo struct, Children enum, etc.) is significant -- Relative to tiny data size, fixed costs dominate - -### Large Size (1000 items): 4.35x overhead -- Same fixed overhead, but now spread over 1000 items -- Per-item overhead dominates, fixed costs negligible -- **Ratio stabilizes** at 4.35x - -This is **exactly what we want** - it means overhead is primarily per-item, not per-size-squared or worse. - -## Mathematical Verification - -For linear scaling, memory should follow: `M(n) = a + b·n` - -Where: -- `a` = fixed overhead -- `b` = per-item overhead -- `n` = number of items - -Looking at flat array results: - -``` -n=100: M = 52,836 -n=1000: M = 528,036 - -Per-item overhead: (528,036 - 52,836) / (1000 - 100) = 528 bytes/item -``` - -This matches the "528.0 bytes per item" reported at n=1000. ✅ - -## Practical Implications - -### For Quarto Configs - -Typical Quarto project config (~100 keys): -- Raw: ~136 KB -- Tracked: ~850 KB -- Overhead: 6.25x (stable ratio) - -Large Quarto project (1000 keys) - unlikely but possible: -- Raw: ~1.3 MB -- Tracked: ~8.5 MB -- Overhead: Still 6.25x (same ratio!) - -**No superlinear explosion** - memory grows proportionally. - -### Worst Case: Deep Nesting - -Even with pathological depth=5, breadth=5 (3,125 nodes): -- Raw: 1.1 MB -- Tracked: 9.7 MB -- Overhead: 8.85x - -This is still linear - the higher ratio (8.85x vs 6.25x) is because hash entries are expensive (456 bytes each), but it doesn't grow superlinearly. - -## Comparison to Alternatives - -### If We Had O(n²) Scaling (hypothetical bad case): - -``` -Size Linear (actual) Quadratic (bad) -10 5,496 ~5,000 -100 52,836 ~500,000 (10x worse!) -1000 528,036 ~50,000,000 (100x worse!) -``` - -We're seeing **linear**, not quadratic. 🎉 - -### If We Had O(n log n) Scaling: - -``` -Size Linear (actual) n log n (bad) -10 5,496 ~5,000 -100 52,836 ~100,000 (2x worse) -1000 528,036 ~3,000,000 (6x worse) -``` - -We're not seeing this either - ratio stays constant. - -## Why This Matters - -### Memory Usage is Predictable - -- 10 KB config → ~60 KB tracked (6x) -- 100 KB config → ~600 KB tracked (6x) -- 1 MB config → ~6 MB tracked (6x) - -**Predictable scaling** means no surprises with large configs. - -### No Performance Cliffs - -With superlinear growth, you'd hit a "cliff" where: -- Small configs work fine -- Medium configs slow down noticeably -- Large configs become unusable - -**Linear scaling** means smooth, predictable performance across all sizes. - -### Validation for Design - -The owned-data approach with parallel children structure: -- ✅ Scales linearly (verified) -- ✅ Predictable memory usage -- ✅ No pathological cases -- ✅ Simple implementation -- ✅ No lifetime complexity - -## Detailed Scaling Behavior - -### Per-Item Overhead by Structure Type - -| Structure Type | Bytes per Item | Notes | -|---------------|----------------|-------| -| Flat Array | 528 | YamlWithSourceInfo + SourceInfo | -| Flat Hash | 1,480 | Includes YamlHashEntry (456 bytes!) | -| Mixed (realistic) | 8,497 | Nested hashes + arrays + scalars | -| Deep Nested | ~3,100 | More hash entries at each level | - -Hash entries are expensive (456 bytes each) because they store: -- 2× YamlWithSourceInfo (288 bytes) -- 3× SourceInfo (168 bytes) - -But even with expensive entries, scaling remains **linear**. - -## Conclusion - -✅ **Overhead scales linearly O(n)** - verified across multiple test cases: -- Flat arrays: Stable at 4.35x (after initial warmup) -- Flat hashes: Stable at 4.42x -- Mixed structures: **2.1% variation** (excellent!) -- Deep nesting: 9.1% variation (good) - -✅ **No superlinear growth** - memory increases proportionally with data size - -✅ **Predictable behavior** - can estimate memory usage for any config size - -✅ **Design validated** - owned data approach works well at scale - -**Recommendation**: The current implementation is production-ready. The linear scaling means we won't encounter performance cliffs or memory explosions with larger configs. - -## Benchmark Tool - -Run the scaling analysis: -```bash -cd crates/quarto-yaml -cargo bench --bench scaling_overhead -``` - -Tests: -- Flat arrays: 10 → 1000 items -- Flat hashes: 10 → 1000 pairs -- Mixed structures: 5 → 100 sections (realistic Quarto configs) -- Nested structures: depth=5, breadth 2→5 (3,125 nodes max) - -All tests confirm **linear scaling**. 🚀 diff --git a/crates/quarto-yaml/src/error.rs b/crates/quarto-yaml/src/error.rs deleted file mode 100644 index 2371e7073..000000000 --- a/crates/quarto-yaml/src/error.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Error types for YAML parsing with source locations. - -use crate::SourceInfo; -use std::fmt; - -/// Result type alias for quarto-yaml operations. -pub type Result = std::result::Result; - -/// Errors that can occur during YAML parsing. -#[derive(Debug, Clone, PartialEq)] -pub enum Error { - /// YAML syntax error - ParseError { - message: String, - location: Option, - }, - - /// Unexpected end of input - UnexpectedEof { location: Option }, - - /// Invalid YAML structure - InvalidStructure { - message: String, - location: Option, - }, -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::ParseError { message, location } => { - write!(f, "Parse error: {}", message)?; - // TODO: Proper location display requires SourceContext to map offsets to row/column. - // For now, we only show the error message without location details. - // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. - if let Some(_loc) = location { - // Location information available but cannot display without SourceContext - } - Ok(()) - } - Error::UnexpectedEof { location } => { - write!(f, "Unexpected end of input")?; - // TODO: Proper location display requires SourceContext to map offsets to row/column. - // For now, we only show the error message without location details. - // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. - if let Some(_loc) = location { - // Location information available but cannot display without SourceContext - } - Ok(()) - } - Error::InvalidStructure { message, location } => { - write!(f, "Invalid YAML structure: {}", message)?; - // TODO: Proper location display requires SourceContext to map offsets to row/column. - // For now, we only show the error message without location details. - // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. - if let Some(_loc) = location { - // Location information available but cannot display without SourceContext - } - Ok(()) - } - } - } -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(err: yaml_rust2::ScanError) -> Self { - Error::ParseError { - message: err.to_string(), - location: None, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use quarto_source_map::FileId; - - #[test] - fn test_parse_error_display_no_location() { - let error = Error::ParseError { - message: "unexpected token".to_string(), - location: None, - }; - assert_eq!(error.to_string(), "Parse error: unexpected token"); - } - - #[test] - fn test_parse_error_display_with_location() { - let location = SourceInfo::original(FileId(0), 10, 20); - let error = Error::ParseError { - message: "invalid syntax".to_string(), - location: Some(location), - }; - // Location is not displayed currently (see TODO in code) - assert_eq!(error.to_string(), "Parse error: invalid syntax"); - } - - #[test] - fn test_unexpected_eof_display_no_location() { - let error = Error::UnexpectedEof { location: None }; - assert_eq!(error.to_string(), "Unexpected end of input"); - } - - #[test] - fn test_unexpected_eof_display_with_location() { - let location = SourceInfo::original(FileId(0), 100, 100); - let error = Error::UnexpectedEof { - location: Some(location), - }; - assert_eq!(error.to_string(), "Unexpected end of input"); - } - - #[test] - fn test_invalid_structure_display_no_location() { - let error = Error::InvalidStructure { - message: "expected mapping".to_string(), - location: None, - }; - assert_eq!( - error.to_string(), - "Invalid YAML structure: expected mapping" - ); - } - - #[test] - fn test_invalid_structure_display_with_location() { - let location = SourceInfo::original(FileId(0), 50, 60); - let error = Error::InvalidStructure { - message: "duplicate key".to_string(), - location: Some(location), - }; - assert_eq!(error.to_string(), "Invalid YAML structure: duplicate key"); - } - - #[test] - fn test_error_is_std_error() { - let error = Error::ParseError { - message: "test".to_string(), - location: None, - }; - // Verify that Error implements std::error::Error - fn assert_error(_: &T) {} - assert_error(&error); - } -} diff --git a/crates/quarto-yaml/src/lib.rs b/crates/quarto-yaml/src/lib.rs deleted file mode 100644 index a09f5907b..000000000 --- a/crates/quarto-yaml/src/lib.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! # quarto-yaml -//! -//! YAML parsing with source location tracking. -//! -//! This crate provides `YamlWithSourceInfo`, which wraps `yaml-rust2::Yaml` with -//! source location information for every node in the YAML tree. This enables -//! precise error reporting and source tracking through transformations. -//! -//! ## Design -//! -//! Uses the **owned data approach**: wraps owned `Yaml` values with a parallel -//! children structure for source tracking. Trade-off: ~3x memory overhead for -//! simplicity and compatibility with config merging across different lifetimes. -//! -//! Follows rust-analyzer's precedent of using owned data with reference counting -//! for tree structures. -//! -//! ## Example -//! -//! ```rust,no_run -//! use quarto_yaml::parse; -//! -//! let content = r#" -//! title: My Document -//! author: John Doe -//! "#; -//! -//! let yaml = parse(content).unwrap(); -//! // Access with source location tracking -//! if let Some(title) = yaml.get_hash_value("title") { -//! println!("Title at offset {}", title.source_info.start_offset()); -//! } -//! ``` - -mod error; -mod parser; -mod yaml_with_source_info; - -pub use error::{Error, Result}; -pub use parser::{file_id_for_filename, parse, parse_file, parse_with_parent}; -pub use quarto_source_map::SourceInfo; // Re-export from quarto-source-map -pub use yaml_with_source_info::{YamlHashEntry, YamlWithSourceInfo}; diff --git a/crates/quarto-yaml/src/parser.rs b/crates/quarto-yaml/src/parser.rs deleted file mode 100644 index 75cd0cbc7..000000000 --- a/crates/quarto-yaml/src/parser.rs +++ /dev/null @@ -1,1487 +0,0 @@ -//! YAML parser that builds YamlWithSourceInfo trees. - -use crate::{Error, Result, SourceInfo, YamlHashEntry, YamlWithSourceInfo}; -use yaml_rust2::Yaml; -use yaml_rust2::parser::{Event, MarkedEventReceiver, Parser}; -use yaml_rust2::scanner::Marker; - -/// Parse YAML from a string, producing a YamlWithSourceInfo tree. -/// -/// This parses a single YAML document. If the input contains multiple documents, -/// only the first one will be parsed. -/// -/// # Example -/// -/// ```rust -/// use quarto_yaml::parse; -/// -/// let yaml = parse("title: My Document").unwrap(); -/// assert!(yaml.is_hash()); -/// ``` -/// -/// # Errors -/// -/// Returns an error if the YAML is invalid or if parsing fails. -pub fn parse(content: &str) -> Result { - parse_impl(content, None, None) -} - -/// Parse YAML from a string with an associated filename. -/// -/// The filename is included in source location information for better -/// error reporting. -/// -/// # Example -/// -/// ```rust -/// use quarto_yaml::parse_file; -/// -/// let yaml = parse_file("title: My Document", "config.yaml").unwrap(); -/// // Filename tracking will be added in a future update -/// assert!(yaml.source_info.end_offset() > 0); -/// ``` -/// -/// # Errors -/// -/// Returns an error if the YAML is invalid or if parsing fails. -pub fn parse_file(content: &str, filename: &str) -> Result { - parse_impl(content, Some(filename), None) -} - -/// Parse YAML that was extracted from a parent document. -/// -/// This function is used when parsing YAML that is a substring of a larger -/// document (e.g., YAML frontmatter extracted from a .qmd file). The resulting -/// YamlWithSourceInfo will have Substring mappings that track back to the -/// parent document. -/// -/// # Arguments -/// -/// * `content` - The YAML string to parse -/// * `parent` - Source information for the parent document from which this YAML was extracted -/// -/// # Example -/// -/// ```rust,no_run -/// use quarto_yaml::{parse_with_parent, SourceInfo}; -/// use quarto_source_map::{FileId, Location, Range}; -/// -/// // Create parent source info for a .qmd file -/// let parent = SourceInfo::from_range( -/// FileId(1), -/// Range { -/// start: Location { offset: 0, row: 0, column: 0 }, -/// end: Location { offset: 1000, row: 50, column: 0 }, -/// } -/// ); -/// -/// // Parse YAML frontmatter (extracted from parent document at offset 10-50) -/// let yaml_content = "title: My Document\nauthor: John"; -/// let yaml = parse_with_parent(yaml_content, parent).unwrap(); -/// -/// // The yaml now has Substring mappings tracking back to the parent -/// ``` -/// -/// # Errors -/// -/// Returns an error if the YAML is invalid or if parsing fails. -pub fn parse_with_parent(content: &str, parent: SourceInfo) -> Result { - parse_impl(content, None, Some(parent)) -} - -/// Derive the [`quarto_source_map::FileId`] used by [`parse_file`] -/// from a filename string. -/// -/// This is the same hash that `parse_file` (and `parse_impl` below) -/// computes when no explicit parent SourceInfo is supplied. Exposed -/// so callers building a [`quarto_source_map::SourceContext`] for -/// ariadne rendering can look up the right `FileId` for a given -/// on-disk file without re-hashing inline. -/// -/// Stability: this is part of the public API and is relied upon by -/// the diagnostic layer to bind file content to FileIds at render -/// time. Don't change the hash recipe without bumping consumers. -pub fn file_id_for_filename(filename: &str) -> quarto_source_map::FileId { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - let mut hasher = DefaultHasher::new(); - filename.hash(&mut hasher); - quarto_source_map::FileId(hasher.finish() as usize) -} - -fn parse_impl( - content: &str, - filename: Option<&str>, - parent: Option, -) -> Result { - // If parent is not provided but filename is, create a parent SourceInfo for the file - let parent = parent.or_else(|| { - filename.map(|name| { - let file_id = file_id_for_filename(name); - - // Create SourceInfo for the entire file content - use quarto_source_map::{Location, Range}; - SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: content.len(), - row: content.lines().count().saturating_sub(1), - column: content.lines().last().map_or(0, |l| l.len()), - }, - }, - ) - }) - }); - - let mut parser = Parser::new_from_str(content); - let mut builder = YamlBuilder::new(content, parent); - - parser - .load(&mut builder, false) // false = single document only - .map_err(Error::from)?; - - builder.result() -} - -/// Helper function to create a contiguous span from start to end positions. -/// This is used for entry_span which should cover from key start to value end. -fn create_contiguous_span(start_info: &SourceInfo, end_info: &SourceInfo) -> SourceInfo { - // Extract the actual start and end offsets, handling the different SourceInfo variants - match (start_info, end_info) { - ( - SourceInfo::Original { - file_id: start_file, - start_offset: start, - .. - }, - SourceInfo::Original { - file_id: end_file, - end_offset: end, - .. - }, - ) => { - // Both are Original from the same file - create a single Original span - assert_eq!( - start_file, end_file, - "Key and value must be from the same file" - ); - SourceInfo::original(*start_file, *start, *end) - } - ( - SourceInfo::Substring { - parent: start_parent, - start_offset: start, - .. - }, - SourceInfo::Substring { - end_offset: end, .. - }, - ) => { - // Both are Substrings - they should have the same parent - // Use the first parent (they should be equivalent even if not the same Rc) - SourceInfo::substring((**start_parent).clone(), *start, *end) - } - _ => { - // Mixed types or Concat - fall back to combine which creates a Concat - // This shouldn't happen in normal YAML parsing but handle it gracefully - start_info.combine(end_info) - } - } -} - -/// Builder that implements MarkedEventReceiver to construct YamlWithSourceInfo. -struct YamlBuilder<'a> { - /// The source text being parsed - source: &'a str, - - /// Optional parent source info for substring tracking - parent: Option, - - /// Stack of nodes being constructed - stack: Vec, - - /// The completed root node - root: Option, -} - -/// A node being constructed during parsing. -enum BuildNode { - /// Building a sequence - Sequence { - start_marker: Marker, - items: Vec, - }, - - /// Building a mapping - Mapping { - start_marker: Marker, - entries: Vec<(YamlWithSourceInfo, Option)>, - }, -} - -impl<'a> YamlBuilder<'a> { - fn new(source: &'a str, parent: Option) -> Self { - Self { - source, - parent, - stack: Vec::new(), - root: None, - } - } - - fn result(self) -> Result { - self.root.ok_or_else(|| Error::ParseError { - message: "No YAML document found".into(), - location: None, - }) - } - - fn push_complete(&mut self, node: YamlWithSourceInfo) { - if self.stack.is_empty() { - // This is the root - self.root = Some(node); - return; - } - - // Add to the parent node - match self.stack.last_mut().unwrap() { - BuildNode::Sequence { items, .. } => { - items.push(node); - } - BuildNode::Mapping { entries, .. } => { - if let Some((_, value)) = entries.last_mut() { - if value.is_none() { - *value = Some(node); - } else { - // This is a new key - entries.push((node, None)); - } - } else { - // First key - entries.push((node, None)); - } - } - } - } - - fn make_source_info(&self, marker: &Marker, len: usize) -> SourceInfo { - let start_offset = marker.index(); - let end_offset = start_offset + len; - - if let Some(ref parent) = self.parent { - // We're parsing a substring - create a Substring mapping - SourceInfo::substring(parent.clone(), start_offset, end_offset) - } else { - // We're parsing an original file - create an Original mapping - use quarto_source_map::{Location, Range}; - - let start_row = marker.line(); // yaml-rust2 uses 0-based - let start_column = marker.col(); // yaml-rust2 uses 0-based - - SourceInfo::from_range( - quarto_source_map::FileId(0), // Dummy FileId for now - Range { - start: Location { - offset: start_offset, - row: start_row, - column: start_column, - }, - end: Location { - offset: end_offset, - // TODO: Calculate accurate end row/column based on content - row: start_row, - column: start_column + len, - }, - }, - ) - } - } - - fn make_source_info_at_offset(&self, start_offset: usize, len: usize) -> SourceInfo { - let end_offset = start_offset + len; - - if let Some(ref parent) = self.parent { - // We're parsing a substring - create a Substring mapping - SourceInfo::substring(parent.clone(), start_offset, end_offset) - } else { - // We're parsing an original file - create an Original mapping - // We don't have row/column info without a marker, so we need to compute it - // from the content - use quarto_source_map::{Location, Range}; - - // For now, create a minimal SourceInfo without accurate row/column - // This should still work correctly because SourceContext can map offsets - SourceInfo::from_range( - quarto_source_map::FileId(0), - Range { - start: Location { - offset: start_offset, - row: 0, // Will be computed from offset by SourceContext - column: 0, - }, - end: Location { - offset: end_offset, - row: 0, - column: 0, - }, - }, - ) - } - } - - fn compute_scalar_len(&self, _marker: &Marker, value: &str) -> usize { - // For now, use the value length - // TODO: This should be computed more accurately from the source - // considering quotes, escapes, etc. - value.len() - } - - /// Find the byte offset of a tag before a scalar value. - /// - /// When yaml-rust2 emits a Scalar event with a tag, the marker points to the - /// start of the VALUE, not the tag. We need to search backwards in the source - /// to find where the tag actually is. - /// - /// For example, in "key: !expr x + 1", if marker points to "x", we need to - /// find "!expr" which comes before it. - /// - /// Returns the byte offset of the '!' character. - fn find_tag_start_offset(&self, value_marker: &Marker, tag_suffix: &str) -> Option { - let value_pos = value_marker.index(); - - // The tag format is: ! - let tag_text = format!("!{}", tag_suffix); - let tag_len = tag_text.len(); - - // Search backwards from value_pos for the tag - // We need at least enough characters for the tag - if value_pos < tag_len { - return None; - } - - // Look in a reasonable window before the value (tag + some whitespace) - let search_start = value_pos.saturating_sub(tag_len + 10); - let search_end = value_pos; - - if search_end > self.source.len() { - return None; - } - - let search_slice = &self.source[search_start..search_end]; - - // Find the last occurrence of the tag in this slice - if let Some(relative_pos) = search_slice.rfind(&tag_text) { - let absolute_pos = search_start + relative_pos; - Some(absolute_pos) - } else { - None - } - } - - /// Create SourceInfo for a tag at a specific byte offset. - fn make_tag_source_info(&self, tag_start_offset: usize, tag_len: usize) -> SourceInfo { - let end_offset = tag_start_offset + tag_len; - - if let Some(ref parent) = self.parent { - // We're parsing a substring - create a Substring mapping - SourceInfo::substring(parent.clone(), tag_start_offset, end_offset) - } else { - // We're parsing an original file - create an Original mapping - // For row/column, we'd need to scan the source, but for now use approximations - SourceInfo::original(quarto_source_map::FileId(0), tag_start_offset, end_offset) - } - } -} - -impl<'a> MarkedEventReceiver for YamlBuilder<'a> { - fn on_event(&mut self, ev: Event, marker: Marker) { - match ev { - Event::Nothing => {} - - Event::StreamStart => {} - Event::StreamEnd => {} - Event::DocumentStart => {} - Event::DocumentEnd => {} - - Event::Scalar(value, _style, _anchor_id, tag) => { - // Capture tag information if present - let tag_info = tag.as_ref().map(|t| { - // The marker points to the start of the VALUE, not the tag - // We need to find where the tag actually is in the source - let tag_len = 1 + t.suffix.len(); // ! + suffix - - // Find the tag position by searching backwards in the source - if let Some(tag_offset) = self.find_tag_start_offset(&marker, &t.suffix) { - let tag_source_info = self.make_tag_source_info(tag_offset, tag_len); - (t.suffix.clone(), tag_source_info) - } else { - // Fallback: if we can't find the tag, use the marker position - // This will be wrong but at least we won't panic - let tag_source_info = self.make_source_info(&marker, tag_len); - (t.suffix.clone(), tag_source_info) - } - }); - - // Compute source info for the value itself - // The marker points to the start of the value - let len = self.compute_scalar_len(&marker, &value); - let source_info = self.make_source_info(&marker, len); - - // Create the Yaml value - let yaml = parse_scalar_value(&value); - let node = YamlWithSourceInfo::new_scalar_with_tag(yaml, source_info, tag_info); - - self.push_complete(node); - } - - Event::SequenceStart(_anchor_id, _tag) => { - self.stack.push(BuildNode::Sequence { - start_marker: marker, - items: Vec::new(), - }); - } - - Event::SequenceEnd => { - let build_node = self.stack.pop().expect("SequenceEnd without SequenceStart"); - - if let BuildNode::Sequence { - start_marker, - items, - } = build_node - { - // Compute the length from start to current marker - let len = marker.index().saturating_sub(start_marker.index()); - let source_info = self.make_source_info(&start_marker, len); - - // Build the Yaml::Array - let yaml_items: Vec = items.iter().map(|n| n.yaml.clone()).collect(); - let yaml = Yaml::Array(yaml_items); - - let node = YamlWithSourceInfo::new_array(yaml, source_info, items); - self.push_complete(node); - } else { - panic!("Expected Sequence build node"); - } - } - - Event::MappingStart(_anchor_id, _tag) => { - self.stack.push(BuildNode::Mapping { - start_marker: marker, - entries: Vec::new(), - }); - } - - Event::MappingEnd => { - let build_node = self.stack.pop().expect("MappingEnd without MappingStart"); - - if let BuildNode::Mapping { - start_marker, - entries, - } = build_node - { - // Build the hash entries - let mut hash_entries = Vec::new(); - let mut yaml_pairs = Vec::new(); - - for (key, value) in entries { - let value = value.expect("Mapping entry without value"); - - // Create YamlHashEntry - let key_span = key.source_info.clone(); - let value_span = value.source_info.clone(); - - // Entry span from key start to value end - // Create a contiguous span (not a Concat) from key start to value end - let entry_span = create_contiguous_span(&key_span, &value_span); - - hash_entries.push(YamlHashEntry::new( - key.clone(), - value.clone(), - key_span, - value_span, - entry_span, - )); - - yaml_pairs.push((key.yaml.clone(), value.yaml.clone())); - } - - // Compute source_info for the entire object - // If we have entries, use the first key's start and the current marker's end - // Otherwise, use start_marker to current marker - let source_info = if let Some(first_entry) = hash_entries.first() { - // Get the start offset from the first key - let first_key_start = first_entry.key.source_info.start_offset(); - // Compute length from first key start to current marker - let len = marker.index().saturating_sub(first_key_start); - // Create SourceInfo starting from first key - self.make_source_info_at_offset(first_key_start, len) - } else { - // Empty object: use start_marker to current marker - let len = marker.index().saturating_sub(start_marker.index()); - self.make_source_info(&start_marker, len) - }; - - // Build the Yaml::Hash - let yaml = Yaml::Hash(yaml_pairs.into_iter().collect()); - - let node = YamlWithSourceInfo::new_hash(yaml, source_info, hash_entries); - self.push_complete(node); - } else { - panic!("Expected Mapping build node"); - } - } - - Event::Alias(_anchor_id) => { - // For now, we don't support aliases - // We could add support later by tracking anchors - let source_info = self.make_source_info(&marker, 0); - let node = YamlWithSourceInfo::new_scalar(Yaml::Null, source_info); - self.push_complete(node); - } - } - } -} - -/// Parse a scalar string value into the appropriate Yaml type. -/// -/// This handles type inference: integers, floats, booleans, null, and strings. -fn parse_scalar_value(value: &str) -> Yaml { - // Try to parse as integer - if let Ok(i) = value.parse::() { - return Yaml::Integer(i); - } - - // Try to parse as float, including the YAML 1.2 core-schema float - // spellings (`.inf`, `-.inf`, `+.inf`, `.nan`, and case variants) that - // Rust's `f64::from_str` does not accept. - if is_yaml_float(value) { - return Yaml::Real(value.to_string()); - } - - // Check for boolean - match value { - "true" | "True" | "TRUE" | "yes" | "Yes" | "YES" | "on" | "On" | "ON" => { - return Yaml::Boolean(true); - } - "false" | "False" | "FALSE" | "no" | "No" | "NO" | "off" | "Off" | "OFF" => { - return Yaml::Boolean(false); - } - "null" | "Null" | "NULL" | "~" | "" => { - return Yaml::Null; - } - _ => {} - } - - // Default to string - Yaml::String(value.to_string()) -} - -/// Returns `true` if `value` is a YAML 1.2 core-schema float. -fn is_yaml_float(value: &str) -> bool { - match value { - ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => true, - "-.inf" | "-.Inf" | "-.INF" => true, - ".nan" | ".NaN" | ".NAN" => true, - _ => value.bytes().any(|b| b.is_ascii_digit()) && value.parse::().is_ok(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_scalar() { - let yaml = parse("hello").unwrap(); - assert!(yaml.is_scalar()); - assert_eq!(yaml.yaml.as_str(), Some("hello")); - } - - #[test] - fn test_parse_integer() { - let yaml = parse("42").unwrap(); - assert!(yaml.is_scalar()); - assert_eq!(yaml.yaml.as_i64(), Some(42)); - } - - #[test] - fn test_parse_yaml_float_special_forms() { - // YAML 1.2 core-schema float spellings must resolve to Yaml::Real, - // not Yaml::String. See tidyverse/data-dict#47. - for (text, expected) in [ - (".inf", f64::INFINITY), - ("+.inf", f64::INFINITY), - (".Inf", f64::INFINITY), - (".INF", f64::INFINITY), - ("+.INF", f64::INFINITY), - ("-.inf", f64::NEG_INFINITY), - ("-.Inf", f64::NEG_INFINITY), - ("-.INF", f64::NEG_INFINITY), - ] { - let yaml = parse(text).unwrap(); - assert!( - matches!(yaml.yaml, Yaml::Real(_)), - "{text:?} should parse as Yaml::Real, got {:?}", - yaml.yaml - ); - assert_eq!( - yaml.yaml.as_f64(), - Some(expected), - "{text:?} should evaluate to {expected}" - ); - } - - for text in [".nan", ".NaN", ".NAN"] { - let yaml = parse(text).unwrap(); - assert!( - matches!(yaml.yaml, Yaml::Real(_)), - "{text:?} should parse as Yaml::Real, got {:?}", - yaml.yaml - ); - assert!( - yaml.yaml.as_f64().unwrap().is_nan(), - "{text:?} should evaluate to NaN" - ); - } - - // Bare `inf` / `nan` (no leading dot) are NOT YAML floats — they stay strings. - for text in ["inf", "nan", "infinity"] { - let yaml = parse(text).unwrap(); - assert!( - matches!(yaml.yaml, Yaml::String(_)), - "{text:?} should stay a Yaml::String, got {:?}", - yaml.yaml - ); - } - } - - #[test] - fn test_parse_boolean() { - let yaml = parse("true").unwrap(); - assert!(yaml.is_scalar()); - assert_eq!(yaml.yaml.as_bool(), Some(true)); - } - - #[test] - fn test_parse_array() { - let yaml = parse("[1, 2, 3]").unwrap(); - assert!(yaml.is_array()); - assert_eq!(yaml.len(), 3); - - let items = yaml.as_array().unwrap(); - assert_eq!(items[0].yaml.as_i64(), Some(1)); - assert_eq!(items[1].yaml.as_i64(), Some(2)); - assert_eq!(items[2].yaml.as_i64(), Some(3)); - } - - #[test] - fn test_parse_hash() { - let yaml = parse("title: My Document\nauthor: John Doe").unwrap(); - assert!(yaml.is_hash()); - assert_eq!(yaml.len(), 2); - - let title = yaml.get_hash_value("title").unwrap(); - assert_eq!(title.yaml.as_str(), Some("My Document")); - - let author = yaml.get_hash_value("author").unwrap(); - assert_eq!(author.yaml.as_str(), Some("John Doe")); - } - - #[test] - fn test_nested_structure() { - let yaml = parse( - r#" -project: - title: My Project - authors: - - Alice - - Bob -"#, - ) - .unwrap(); - - assert!(yaml.is_hash()); - - let project = yaml.get_hash_value("project").unwrap(); - assert!(project.is_hash()); - - let authors = project.get_hash_value("authors").unwrap(); - assert!(authors.is_array()); - assert_eq!(authors.len(), 2); - } - - #[test] - fn test_source_info_tracking() { - let yaml = parse("title: My Document").unwrap(); - - // Check that source info is present - // Note: row/column are 0-indexed in the new system - assert!(yaml.source_info.start_offset() < yaml.source_info.end_offset()); - - let title = yaml.get_hash_value("title").unwrap(); - // Verify the title value has a valid range - assert!(title.source_info.start_offset() < title.source_info.end_offset()); - } - - #[test] - fn test_parse_with_filename() { - let yaml = parse_file("title: Test", "config.yaml").unwrap(); - assert!(yaml.source_info.end_offset() > 0); - - // Verify that we're now using Substring mapping for files - match &yaml.source_info { - SourceInfo::Substring { .. } => { - // Expected: Substring mapping to parent file - } - _ => panic!("Expected Substring mapping for file parsing"), - } - } - - #[test] - fn test_parse_with_parent_simple() { - use quarto_source_map::{FileId, Location, Range}; - - // Simulate extracting YAML from a .qmd file at offset 100-150 - let parent = SourceInfo::from_range( - FileId(42), - Range { - start: Location { - offset: 100, - row: 5, - column: 0, - }, - end: Location { - offset: 150, - row: 8, - column: 0, - }, - }, - ); - - let yaml_content = "title: My Document\nauthor: John"; - let yaml = parse_with_parent(yaml_content, parent).unwrap(); - - // Verify root has Substring mapping - match &yaml.source_info { - SourceInfo::Substring { parent: p, .. } => { - // Parent should point to our original parent - match p.as_ref() { - SourceInfo::Original { file_id, .. } => { - assert_eq!(file_id.0, 42); - } - _ => panic!("Expected parent to have Original mapping"), - } - } - _ => panic!("Expected Substring mapping"), - } - } - - #[test] - fn test_parse_with_parent_nested() { - use quarto_source_map::{FileId, Location, Range}; - - // Parent file - let parent = SourceInfo::from_range( - FileId(1), - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 500, - row: 20, - column: 0, - }, - }, - ); - - let yaml_content = r#" -project: - title: My Project - authors: - - Alice - - Bob -"#; - let yaml = parse_with_parent(yaml_content, parent).unwrap(); - - // Get nested values - let project = yaml - .get_hash_value("project") - .expect("project key not found"); - let title = project - .get_hash_value("title") - .expect("title key not found"); - let authors = project - .get_hash_value("authors") - .expect("authors key not found"); - - // All should have Substring mappings - assert!(matches!(project.source_info, SourceInfo::Substring { .. })); - assert!(matches!(title.source_info, SourceInfo::Substring { .. })); - assert!(matches!(authors.source_info, SourceInfo::Substring { .. })); - - // Array elements should also have Substring mappings - if let Some(items) = authors.as_array() { - assert_eq!(items.len(), 2); - assert!(matches!(items[0].source_info, SourceInfo::Substring { .. })); - assert!(matches!(items[1].source_info, SourceInfo::Substring { .. })); - } else { - panic!("Expected array for authors"); - } - } - - #[test] - fn test_substring_offset_tracking() { - use quarto_source_map::{FileId, Location, Range}; - - // Parent document - let parent_content = "---\ntitle: Test\nauthor: John\n---\n\nDocument content"; - let parent = SourceInfo::from_range( - FileId(1), - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: parent_content.len(), - row: 4, - column: 0, - }, - }, - ); - - // YAML frontmatter (offset 4-31 in parent) - let yaml_content = "title: Test\nauthor: John"; - let yaml = parse_with_parent(yaml_content, parent).unwrap(); - - // Get title value - let title = yaml.get_hash_value("title").expect("title not found"); - - // Verify the title has a valid substring range - match &title.source_info { - SourceInfo::Substring { start_offset, .. } => { - // Offset should be relative to the yaml_content string - assert!(*start_offset < yaml_content.len()); - } - _ => panic!("Expected Substring mapping for title"), - } - - // Check that range makes sense - assert!(title.source_info.start_offset() < title.source_info.end_offset()); - } - - #[test] - fn test_parse_anonymous_no_substring() { - // Parse without filename or parent - should use Original mapping - let yaml = parse("title: Test").unwrap(); - - match &yaml.source_info { - SourceInfo::Original { file_id, .. } => { - assert_eq!(file_id.0, 0); // Anonymous FileId - } - _ => panic!("Expected Original mapping for anonymous parse"), - } - } - - /// Helper function to resolve a SourceInfo through the mapping chain to get - /// the absolute offset in the original file. - fn resolve_to_original_offset(info: &SourceInfo) -> (usize, quarto_source_map::FileId) { - match info { - SourceInfo::Original { - file_id, - start_offset, - .. - } => (*start_offset, *file_id), - SourceInfo::Substring { - parent, - start_offset, - .. - } => { - let (parent_offset, file_id) = resolve_to_original_offset(parent); - (parent_offset + start_offset, file_id) - } - _ => panic!("Unsupported mapping type for offset resolution"), - } - } - - #[test] - fn test_hash_key_and_value_locations() { - // Test that we can track both key and value locations in YAML hashes - let yaml_content = "hello: world\nfoo: bar\ncount: 42"; - let yaml = parse(yaml_content).unwrap(); - - assert!(yaml.is_hash()); - let entries = yaml.as_hash().expect("Should be a hash"); - - // Test 1: Verify "hello" key and "world" value locations - let hello_entry = entries - .iter() - .find(|e| e.key.yaml.as_str() == Some("hello")) - .expect("Should have 'hello' key"); - - // Verify key location - assert_eq!(hello_entry.key.yaml.as_str(), Some("hello")); - let key_offset = hello_entry.key_span.start_offset(); - let key_str = &yaml_content[key_offset..key_offset + 5]; - assert_eq!(key_str, "hello", "Key location should point to 'hello'"); - - // Verify value location - assert_eq!(hello_entry.value.yaml.as_str(), Some("world")); - let value_offset = hello_entry.value_span.start_offset(); - let value_str = &yaml_content[value_offset..value_offset + 5]; - assert_eq!(value_str, "world", "Value location should point to 'world'"); - - // Verify they are different locations - assert_ne!( - key_offset, value_offset, - "Key and value should have different offsets" - ); - - // Test 2: Verify "foo" key and "bar" value locations - let foo_entry = entries - .iter() - .find(|e| e.key.yaml.as_str() == Some("foo")) - .expect("Should have 'foo' key"); - - let foo_key_offset = foo_entry.key_span.start_offset(); - let foo_key_str = &yaml_content[foo_key_offset..foo_key_offset + 3]; - assert_eq!(foo_key_str, "foo", "Key location should point to 'foo'"); - - let bar_value_offset = foo_entry.value_span.start_offset(); - let bar_value_str = &yaml_content[bar_value_offset..bar_value_offset + 3]; - assert_eq!(bar_value_str, "bar", "Value location should point to 'bar'"); - - // Test 3: Verify "count" key and "42" value locations - let count_entry = entries - .iter() - .find(|e| e.key.yaml.as_str() == Some("count")) - .expect("Should have 'count' key"); - - let count_key_offset = count_entry.key_span.start_offset(); - let count_key_str = &yaml_content[count_key_offset..count_key_offset + 5]; - assert_eq!( - count_key_str, "count", - "Key location should point to 'count'" - ); - - assert_eq!(count_entry.value.yaml.as_i64(), Some(42)); - let count_value_offset = count_entry.value_span.start_offset(); - let count_value_str = &yaml_content[count_value_offset..count_value_offset + 2]; - assert_eq!(count_value_str, "42", "Value location should point to '42'"); - - // Test 4: Verify entry spans include both key and value - // The entry span should start at the key and end after the value - assert!( - hello_entry.entry_span.start_offset() <= key_offset, - "Entry span should start at or before the key" - ); - assert!( - hello_entry.entry_span.end_offset() >= value_offset + 5, - "Entry span should end at or after the value" - ); - } - - #[test] - fn test_qmd_frontmatter_extraction() { - use quarto_source_map::{FileId, Location, Range}; - - // Simulate a realistic .qmd file - let qmd_content = r#"--- -title: "My Research Paper" -author: "Jane Smith" -date: "2024-01-15" -format: - html: - theme: cosmo - toc: true - pdf: - documentclass: article ---- - -# Introduction - -This is my research paper with some **bold** text. - -## Methods - -We used the following approach... -"#; - - // Extract YAML frontmatter using regex (simple approach - just for testing) - let re = regex::Regex::new(r"(?s)^---\n(.*?)\n---").unwrap(); - let captures = re - .captures(qmd_content) - .expect("Failed to find YAML frontmatter"); - - let yaml_match = captures.get(1).expect("No YAML content found"); - let yaml_start = yaml_match.start(); - let yaml_end = yaml_match.end(); - let yaml_content = yaml_match.as_str(); - - // Create parent SourceInfo for the entire .qmd file - let parent = SourceInfo::from_range( - FileId(123), // Simulated FileId for test.qmd - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: qmd_content.len(), - row: qmd_content.lines().count().saturating_sub(1), - column: qmd_content.lines().last().unwrap_or("").len(), - }, - }, - ); - - // Create parent SourceInfo for just the YAML portion - let yaml_parent = SourceInfo::substring(parent.clone(), yaml_start, yaml_end); - - // Parse the YAML with parent tracking - let yaml = parse_with_parent(yaml_content, yaml_parent).unwrap(); - - // Verify the YAML was parsed correctly - assert!(yaml.is_hash()); - let title = yaml.get_hash_value("title").expect("title not found"); - assert_eq!(title.yaml.as_str(), Some("My Research Paper")); - - // Verify that the title's location maps back through the substring chain - match &title.source_info { - SourceInfo::Substring { - parent: p, - start_offset, - .. - } => { - // The offset should be within the YAML content - assert!(*start_offset < yaml_content.len()); - - // The parent should be another Substring pointing to the .qmd file - match p.as_ref() { - SourceInfo::Substring { - parent: grandparent, - start_offset: yaml_offset, - .. - } => { - // This should point to the original .qmd file - assert_eq!(*yaml_offset, yaml_start); - - // Grandparent should be the Original .qmd file - match grandparent.as_ref() { - SourceInfo::Original { file_id, .. } => { - assert_eq!(file_id.0, 123); - } - _ => panic!("Expected Original mapping for .qmd file"), - } - } - _ => panic!("Expected Substring mapping for YAML within .qmd"), - } - } - _ => panic!("Expected Substring mapping for title"), - } - - // Verify nested structures also have correct mappings - let format = yaml.get_hash_value("format").expect("format not found"); - assert!(format.is_hash()); - - let html = format.get_hash_value("html").expect("html not found"); - assert!(html.is_hash()); - - let theme = html.get_hash_value("theme").expect("theme not found"); - assert_eq!(theme.yaml.as_str(), Some("cosmo")); - - // The theme value should also have Substring mapping through the chain - match &theme.source_info { - SourceInfo::Substring { .. } => { - // Good - it has substring mapping - } - _ => panic!("Expected Substring mapping for deeply nested theme value"), - } - - // Verify that the 'toc' boolean value is correctly located - let toc = html.get_hash_value("toc").expect("toc not found"); - assert_eq!(toc.yaml.as_bool(), Some(true)); - - // Calculate where "true" appears in the original .qmd file - let toc_true_in_qmd = qmd_content - .find("toc: true") - .expect("toc: true not found in qmd"); - let toc_value_offset = toc_true_in_qmd + "toc: ".len(); - - // The toc value should be located within the YAML frontmatter region - assert!( - toc_value_offset >= yaml_start && toc_value_offset < yaml_end, - "toc value offset {} should be within YAML range {}-{}", - toc_value_offset, - yaml_start, - yaml_end - ); - - // ===== NOW TEST OFFSET RESOLUTION ===== - - // Test 1: Verify the title value resolves to correct position in .qmd file - let (resolved_title_offset, resolved_file_id) = - resolve_to_original_offset(&title.source_info); - assert_eq!( - resolved_file_id.0, 123, - "Title should resolve to FileId 123" - ); - - // Extract the exact string at the resolved position - let title_expected = "\"My Research Paper\""; // YAML parser includes quotes - let resolved_title_str = - &qmd_content[resolved_title_offset..resolved_title_offset + title_expected.len()]; - assert_eq!( - resolved_title_str, title_expected, - "Resolved title offset should point to exactly '{}'", - title_expected - ); - - // Test 2: Verify the theme value "cosmo" resolves correctly - let (resolved_cosmo_offset, resolved_file_id) = - resolve_to_original_offset(&theme.source_info); - assert_eq!( - resolved_file_id.0, 123, - "Theme should resolve to FileId 123" - ); - - // Extract the exact string at the resolved position - let cosmo_expected = "cosmo"; - let resolved_cosmo_str = - &qmd_content[resolved_cosmo_offset..resolved_cosmo_offset + cosmo_expected.len()]; - assert_eq!( - resolved_cosmo_str, cosmo_expected, - "Resolved theme offset should point to exactly '{}'", - cosmo_expected - ); - - // Test 3: Verify the author value resolves correctly - let author = yaml.get_hash_value("author").expect("author not found"); - assert_eq!(author.yaml.as_str(), Some("Jane Smith")); - - let (resolved_author_offset, resolved_file_id) = - resolve_to_original_offset(&author.source_info); - assert_eq!( - resolved_file_id.0, 123, - "Author should resolve to FileId 123" - ); - - // Extract the exact string at the resolved position - let author_expected = "\"Jane Smith\""; // YAML parser includes quotes - let resolved_author_str = - &qmd_content[resolved_author_offset..resolved_author_offset + author_expected.len()]; - assert_eq!( - resolved_author_str, author_expected, - "Resolved author offset should point to exactly '{}'", - author_expected - ); - - // Test 4: Verify the YAML root offset resolution - let (resolved_yaml_offset, _) = resolve_to_original_offset(&yaml.source_info); - - // The resolved position should be within the YAML frontmatter - assert!( - resolved_yaml_offset >= yaml_start && resolved_yaml_offset < yaml_end, - "YAML root offset {} should be within YAML content range {}-{}", - resolved_yaml_offset, - yaml_start, - yaml_end - ); - - // Extract and verify the exact string - the YAML root should start at the first key - let yaml_root_expected = "title: \"My Research P"; // First key and start of value - let resolved_yaml_str = - &qmd_content[resolved_yaml_offset..resolved_yaml_offset + yaml_root_expected.len()]; - assert_eq!( - resolved_yaml_str, yaml_root_expected, - "Resolved YAML root offset should point to exactly '{}'", - yaml_root_expected - ); - - // Test 5: Verify nested hash entry offsets - let pdf = format.get_hash_value("pdf").expect("pdf not found"); - let documentclass = pdf - .get_hash_value("documentclass") - .expect("documentclass not found"); - assert_eq!(documentclass.yaml.as_str(), Some("article")); - - let (resolved_article_offset, resolved_file_id) = - resolve_to_original_offset(&documentclass.source_info); - assert_eq!( - resolved_file_id.0, 123, - "Documentclass should resolve to FileId 123" - ); - - // Extract the exact string at the resolved position - let article_expected = "article"; - let resolved_article_str = - &qmd_content[resolved_article_offset..resolved_article_offset + article_expected.len()]; - assert_eq!( - resolved_article_str, article_expected, - "Resolved documentclass offset should point to exactly '{}'", - article_expected - ); - - // Test 6: Verify that hash entry key spans resolve correctly - if let Some(entries) = yaml.as_hash() { - for entry in entries { - let (entry_key_start, entry_file_id) = resolve_to_original_offset(&entry.key_span); - assert_eq!( - entry_file_id.0, 123, - "Entry key should resolve to FileId 123" - ); - - // All top-level keys should be within the YAML frontmatter region - assert!( - entry_key_start >= yaml_start && entry_key_start < yaml_end, - "Entry key at offset {} should be within YAML range {}-{}", - entry_key_start, - yaml_start, - yaml_end - ); - - // Verify the key actually points to the key string - let key_str = entry.key.yaml.as_str().unwrap_or(""); - if !key_str.is_empty() && entry_key_start + key_str.len() <= qmd_content.len() { - let resolved_key_str = - &qmd_content[entry_key_start..entry_key_start + key_str.len()]; - assert_eq!( - resolved_key_str, key_str, - "Entry key '{}' should resolve to exact position", - key_str - ); - } - } - } - - // All tests passed - offset resolution works correctly through the double-substring chain! - } - - #[test] - fn test_object_source_range_starts_at_first_key() { - let yaml_content = "title: \"My Research Paper\"\nauthor: \"John Doe\"\n"; - let parsed = parse_file(yaml_content, "test.yaml").expect("parse failed"); - - // The root should be an object - assert!(parsed.is_hash()); - - // Check the SourceInfo of the object - let source_info = &parsed.source_info; - - // The object should span from offset 0 (start of "title") to the end - // NOT from offset 5 (the colon) - assert_eq!( - source_info.start_offset(), - 0, - "Object should start at offset 0 (beginning of first key), not at the colon" - ); - - // The end should be at the end of the content - assert_eq!( - source_info.end_offset(), - yaml_content.len(), - "Object should end at end of content" - ); - } - - // =========== TAG TESTS =========== - - #[test] - fn test_parse_scalar_with_tag() { - let yaml = parse("key: !expr x + 1").unwrap(); - let value = yaml.get_hash_value("key").expect("key not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _tag_source) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "expr"); - } - - #[test] - fn test_parse_scalar_with_prefer_tag() { - let yaml = parse("theme: !prefer cosmo").unwrap(); - let value = yaml.get_hash_value("theme").expect("theme not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "prefer"); - assert_eq!(value.yaml.as_str(), Some("cosmo")); - } - - #[test] - fn test_parse_scalar_with_concat_tag() { - let yaml = parse("items: !concat [a, b]").unwrap(); - // Note: !concat on a sequence - the tag is on the sequence itself - let value = yaml.get_hash_value("items").expect("items not found"); - - // The tag is currently only captured for scalars, not sequences - // This test documents current behavior - assert!(value.is_array()); - } - - #[test] - fn test_parse_scalar_with_md_tag() { - let yaml = parse("description: !md \"**bold** text\"").unwrap(); - let value = yaml - .get_hash_value("description") - .expect("description not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "md"); - } - - #[test] - fn test_parse_scalar_with_str_tag() { - let yaml = parse("title: !str \"My Title\"").unwrap(); - let value = yaml.get_hash_value("title").expect("title not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "str"); - } - - #[test] - fn test_parse_scalar_with_path_tag() { - let yaml = parse("file: !path ./data/file.csv").unwrap(); - let value = yaml.get_hash_value("file").expect("file not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "path"); - } - - #[test] - fn test_parse_scalar_with_glob_tag() { - let yaml = parse("sources: !glob \"*.qmd\"").unwrap(); - let value = yaml.get_hash_value("sources").expect("sources not found"); - - assert!(value.tag.is_some()); - let (tag_suffix, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag_suffix, "glob"); - } - - #[test] - fn test_combined_tag_with_underscore_works() { - // Combined tags like !prefer_md ARE supported using underscore as separator. - // This is the recommended syntax for combining merge ops with interpretation hints. - let result = parse("title: !prefer_md \"**My Title**\""); - assert!(result.is_ok(), "Combined tags with underscore should parse"); - - let yaml = result.unwrap(); - let value = yaml.get_hash_value("title").unwrap(); - let (tag, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag, "prefer_md"); - } - - #[test] - fn test_combined_tag_with_comma_not_supported() { - // Note: Combined tags like !prefer,md are NOT supported by standard YAML parsers. - // The comma is not valid in a tag without proper URI escaping. - // Use underscore instead: !prefer_md - let result = parse("title: !prefer,md \"**My Title**\""); - assert!(result.is_err(), "Combined tags with comma should not parse"); - } - - #[test] - fn test_tag_source_info_location() { - let yaml_content = "key: !expr value"; - let yaml = parse(yaml_content).unwrap(); - let value = yaml.get_hash_value("key").expect("key not found"); - - let (_, tag_source) = value.tag.as_ref().expect("tag should be present"); - - // The tag should start at "!" (offset 5) - let tag_start = tag_source.start_offset(); - assert_eq!(&yaml_content[tag_start..tag_start + 1], "!"); - - // The tag should cover "!expr" (5 characters) - let tag_len = tag_source.end_offset() - tag_source.start_offset(); - assert_eq!(tag_len, 5); // "!expr" - } - - #[test] - fn test_no_tag_when_absent() { - let yaml = parse("key: value").unwrap(); - let value = yaml.get_hash_value("key").expect("key not found"); - - assert!(value.tag.is_none()); - } - - #[test] - fn test_alternative_tag_separator_syntaxes() { - // Test which separators work for combined tags - - // Underscore separator - should work - let result = parse("title: !prefer_md test"); - assert!(result.is_ok(), "Underscore separator should work"); - let yaml = result.unwrap(); - let value = yaml.get_hash_value("title").unwrap(); - let (tag, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag, "prefer_md"); - - // Dash/hyphen separator - should work - let result = parse("title: !prefer-md test"); - assert!(result.is_ok(), "Dash separator should work"); - let yaml = result.unwrap(); - let value = yaml.get_hash_value("title").unwrap(); - let (tag, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag, "prefer-md"); - - // Dot separator - should work - let result = parse("title: !prefer.md test"); - assert!(result.is_ok(), "Dot separator should work"); - let yaml = result.unwrap(); - let value = yaml.get_hash_value("title").unwrap(); - let (tag, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag, "prefer.md"); - - // Colon separator - works - let result = parse("title: !prefer:md test"); - assert!(result.is_ok(), "Colon separator should work"); - let yaml = result.unwrap(); - let value = yaml.get_hash_value("title").unwrap(); - let (tag, _) = value.tag.as_ref().unwrap(); - assert_eq!(tag, "prefer:md"); - - // Exclamation mark separator - does NOT work (treated as handle) - let result = parse("title: !md!prefer test"); - assert!( - result.is_err(), - "Bang separator should not work (treated as YAML handle)" - ); - } - - #[test] - fn test_multiple_tagged_values() { - let yaml = parse( - r#" -title: !str "Plain Title" -description: !md "**Bold** description" -file: !path ./data.csv -"#, - ) - .unwrap(); - - let title = yaml.get_hash_value("title").expect("title not found"); - assert_eq!(title.tag.as_ref().map(|(t, _)| t.as_str()), Some("str")); - - let desc = yaml - .get_hash_value("description") - .expect("description not found"); - assert_eq!(desc.tag.as_ref().map(|(t, _)| t.as_str()), Some("md")); - - let file = yaml.get_hash_value("file").expect("file not found"); - assert_eq!(file.tag.as_ref().map(|(t, _)| t.as_str()), Some("path")); - } -} diff --git a/crates/quarto-yaml/src/yaml_with_source_info.rs b/crates/quarto-yaml/src/yaml_with_source_info.rs deleted file mode 100644 index c0071046d..000000000 --- a/crates/quarto-yaml/src/yaml_with_source_info.rs +++ /dev/null @@ -1,312 +0,0 @@ -//! YAML value with source location tracking. - -use crate::SourceInfo; -use yaml_rust2::Yaml; - -/// A YAML value with source location information. -/// -/// This structure wraps a `yaml-rust2::Yaml` value with source location tracking -/// for the value itself and all its children. Uses the **owned data approach**: -/// stores an owned `Yaml` value with a parallel `Children` structure for source -/// tracking. -/// -/// ## Design Trade-offs -/// -/// - **Memory**: ~3x overhead (owned Yaml + source-tracked children) -/// - **Simplicity**: No lifetime parameters, clean API -/// - **Config merging**: Can merge configs from different lifetimes -/// - **LSP caching**: Can serialize/deserialize for caching -/// -/// Follows rust-analyzer's precedent of using owned data for tree structures. -/// -/// ## Example -/// -/// ```rust,no_run -/// use quarto_yaml::{parse, YamlWithSourceInfo}; -/// use yaml_rust2::Yaml; -/// -/// let yaml = parse("title: My Document").unwrap(); -/// if let Some(title) = yaml.get_hash_value("title") { -/// println!("Title: {:?}", title.yaml); -/// println!("Location: offset {}", title.source_info.start_offset()); -/// } -/// ``` -#[derive(Debug, Clone)] -pub struct YamlWithSourceInfo { - /// The complete yaml-rust2::Yaml value (owned). - /// - /// This provides direct access to the raw Yaml for code that doesn't - /// need source tracking. It's a complete, independent Yaml tree. - pub yaml: Yaml, - - /// Source location for this node. - pub source_info: SourceInfo, - - /// YAML tag information (e.g., !path, !glob, !str). - /// - /// If present, contains the tag suffix (e.g., "path" for !path) and - /// the source location of the tag itself. Used to bypass markdown parsing - /// for tagged strings and enable error reporting on tags. - pub tag: Option<(String, SourceInfo)>, - - /// Source-tracked children (parallel structure). - /// - /// This mirrors the structure of `yaml` but includes source location - /// information for each child. The structure matches the `yaml` field: - /// - None for scalars and Null - /// - Array for sequences - /// - Hash for mappings - children: Children, -} - -/// Source-tracked children of a YAML node. -/// -/// This is a parallel structure to the children in `Yaml`, providing -/// source location information for each child element. -#[derive(Debug, Clone)] -enum Children { - /// No children (for scalars, Null, BadValue) - None, - - /// Array elements with source tracking - Array(Vec), - - /// Hash entries with source tracking - Hash(Vec), -} - -/// A key-value pair in a YAML hash/mapping with source tracking. -/// -/// Tracks source locations for the key, value, and the entire entry. -#[derive(Debug, Clone)] -pub struct YamlHashEntry { - /// The key with source tracking - pub key: YamlWithSourceInfo, - - /// The value with source tracking - pub value: YamlWithSourceInfo, - - /// Source location of just the key - pub key_span: SourceInfo, - - /// Source location of just the value - pub value_span: SourceInfo, - - /// Source location of the entire entry (key + value) - pub entry_span: SourceInfo, -} - -impl YamlWithSourceInfo { - /// Create a new YamlWithSourceInfo for a scalar or leaf node. - pub fn new_scalar(yaml: Yaml, source_info: SourceInfo) -> Self { - Self { - yaml, - source_info, - tag: None, - children: Children::None, - } - } - - /// Create a new YamlWithSourceInfo for a scalar with tag information. - pub fn new_scalar_with_tag( - yaml: Yaml, - source_info: SourceInfo, - tag: Option<(String, SourceInfo)>, - ) -> Self { - Self { - yaml, - source_info, - tag, - children: Children::None, - } - } - - /// Create a new YamlWithSourceInfo for an array/sequence. - pub fn new_array( - yaml: Yaml, - source_info: SourceInfo, - children: Vec, - ) -> Self { - Self { - yaml, - source_info, - tag: None, - children: Children::Array(children), - } - } - - /// Create a new YamlWithSourceInfo for a hash/mapping. - pub fn new_hash(yaml: Yaml, source_info: SourceInfo, entries: Vec) -> Self { - Self { - yaml, - source_info, - tag: None, - children: Children::Hash(entries), - } - } - - /// Check if this is a scalar value (not array or hash). - pub fn is_scalar(&self) -> bool { - matches!(self.children, Children::None) - } - - /// Check if this is an array. - pub fn is_array(&self) -> bool { - matches!(self.children, Children::Array(_)) - } - - /// Check if this is a hash. - pub fn is_hash(&self) -> bool { - matches!(self.children, Children::Hash(_)) - } - - /// Get array children if this is an array. - pub fn as_array(&self) -> Option<&[YamlWithSourceInfo]> { - match &self.children { - Children::Array(items) => Some(items), - _ => None, - } - } - - /// Get hash entries if this is a hash. - pub fn as_hash(&self) -> Option<&[YamlHashEntry]> { - match &self.children { - Children::Hash(entries) => Some(entries), - _ => None, - } - } - - /// Get a value from a hash by key (string comparison). - /// - /// This searches through hash entries and compares keys as strings. - /// Returns None if this is not a hash or the key is not found. - pub fn get_hash_value(&self, key: &str) -> Option<&YamlWithSourceInfo> { - match &self.children { - Children::Hash(entries) => entries.iter().find_map(|entry| { - if entry.key.yaml.as_str() == Some(key) { - Some(&entry.value) - } else { - None - } - }), - _ => None, - } - } - - /// Get an array element by index. - pub fn get_array_item(&self, index: usize) -> Option<&YamlWithSourceInfo> { - match &self.children { - Children::Array(items) => items.get(index), - _ => None, - } - } - - /// Get the number of children (array length or hash entry count). - pub fn len(&self) -> usize { - match &self.children { - Children::None => 0, - Children::Array(items) => items.len(), - Children::Hash(entries) => entries.len(), - } - } - - /// Check if this node has no children. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Consume self and return array children if this is an array. - /// - /// Returns a tuple of (items, source_info) where items are the owned - /// YamlWithSourceInfo elements and source_info is the SourceInfo for - /// the whole array. - pub fn into_array(self) -> Option<(Vec, SourceInfo)> { - match self.children { - Children::Array(items) => Some((items, self.source_info)), - _ => None, - } - } - - /// Consume self and return hash entries if this is a hash. - /// - /// Returns a tuple of (entries, source_info) where entries are the owned - /// YamlHashEntry elements and source_info is the SourceInfo for - /// the whole hash. - pub fn into_hash(self) -> Option<(Vec, SourceInfo)> { - match self.children { - Children::Hash(entries) => Some((entries, self.source_info)), - _ => None, - } - } -} - -impl YamlHashEntry { - /// Create a new YamlHashEntry. - pub fn new( - key: YamlWithSourceInfo, - value: YamlWithSourceInfo, - key_span: SourceInfo, - value_span: SourceInfo, - entry_span: SourceInfo, - ) -> Self { - Self { - key, - value, - key_span, - value_span, - entry_span, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_scalar_creation() { - let yaml = Yaml::String("test".into()); - let info = SourceInfo::for_test(); - let node = YamlWithSourceInfo::new_scalar(yaml.clone(), info.clone()); - - assert_eq!(node.yaml, yaml); - assert_eq!(node.source_info, info); - assert!(node.is_scalar()); - assert!(!node.is_array()); - assert!(!node.is_hash()); - assert_eq!(node.len(), 0); - } - - #[test] - fn test_array_creation() { - let child1 = - YamlWithSourceInfo::new_scalar(Yaml::String("a".into()), SourceInfo::for_test()); - let child2 = - YamlWithSourceInfo::new_scalar(Yaml::String("b".into()), SourceInfo::for_test()); - - let yaml = Yaml::Array(vec![Yaml::String("a".into()), Yaml::String("b".into())]); - let node = - YamlWithSourceInfo::new_array(yaml, SourceInfo::for_test(), vec![child1, child2]); - - assert!(node.is_array()); - assert_eq!(node.len(), 2); - assert!(node.as_array().is_some()); - assert_eq!(node.as_array().unwrap().len(), 2); - } - - #[test] - fn test_get_array_item() { - let child1 = - YamlWithSourceInfo::new_scalar(Yaml::String("a".into()), SourceInfo::for_test()); - let child2 = - YamlWithSourceInfo::new_scalar(Yaml::String("b".into()), SourceInfo::for_test()); - - let yaml = Yaml::Array(vec![Yaml::String("a".into()), Yaml::String("b".into())]); - let node = - YamlWithSourceInfo::new_array(yaml, SourceInfo::for_test(), vec![child1, child2]); - - assert_eq!(node.get_array_item(0).unwrap().yaml.as_str(), Some("a")); - assert_eq!(node.get_array_item(1).unwrap().yaml.as_str(), Some("b")); - assert!(node.get_array_item(2).is_none()); - } -} diff --git a/crates/validate-yaml/Cargo.toml b/crates/validate-yaml/Cargo.toml deleted file mode 100644 index 2bc730ea7..000000000 --- a/crates/validate-yaml/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -[package] -name = "validate-yaml" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -repository.workspace = true - -[dependencies] -# Workspace crates -quarto-yaml.workspace = true -quarto-yaml-validation.workspace = true -quarto-error-reporting.workspace = true -quarto-source-map.workspace = true - -# Error handling -anyhow.workspace = true - -# CLI argument parsing -clap.workspace = true - -# JSON serialization -serde_json.workspace = true - -[lints] -workspace = true diff --git a/crates/validate-yaml/README.md b/crates/validate-yaml/README.md deleted file mode 100644 index 811ec17bf..000000000 --- a/crates/validate-yaml/README.md +++ /dev/null @@ -1,114 +0,0 @@ -# validate-yaml - -A command-line tool for validating YAML documents against schemas. - -## Usage - -```bash -validate-yaml --schema --input -``` - -## Example - -Given a schema file `schema.yaml`: - -```yaml -object: - properties: - title: - string: - description: "Document title" - author: - string: - description: "Document author" - required: - - title - - author -``` - -And a document `document.yaml`: - -```yaml -title: "My Document" -author: "John Doe" -``` - -Run validation: - -```bash -validate-yaml --schema schema.yaml --input document.yaml -``` - -### Success Output - -``` -✓ Validation successful - Input: document.yaml - Schema: schema.yaml -``` - -### Failure Output - -``` -Error: YAML Validation Failed (Q-1-10) - -Problem: Missing required property 'author' - - ✖ At document root - ℹ Schema constraint: object - ✖ In file `document.yaml` at line 2, column 6 - - ? Add the `author` property to your YAML document? - -See https://quarto.org/docs/errors/yaml/Q-1-10 for more information -``` - -## Features - -- **YAML 1.2 Support**: Uses yaml-rust2 for consistent YAML 1.2 parsing -- **Structured Error Messages**: Tidyverse-style error reporting with: - - Error codes (Q-1-xxx) for searchability - - Clear problem statements - - Contextual details with visual bullets (✖ error, ℹ info) - - Actionable hints for fixing issues - - Documentation links for each error code -- **Source Location Tracking**: Error messages include file, line, and column information -- **Schema Validation**: Supports Quarto's simplified JSON Schema subset including: - - Basic types (boolean, number, string, null, any) - - Enums - - Objects with properties and required fields - - Arrays - - AnyOf and AllOf combinators - - Schema references ($ref) - -## Error Codes - -Validation errors include searchable error codes: - -- **Q-1-10**: Missing required property -- **Q-1-11**: Type mismatch (expected one type, got another) -- **Q-1-12**: Invalid enum value -- **Q-1-13**: Array length constraint violation -- **Q-1-14**: String pattern mismatch -- **Q-1-15**: Number range violation -- **Q-1-16**: Object property count violation -- **Q-1-17**: Unresolved schema reference -- **Q-1-18**: Unknown property in closed object -- **Q-1-19**: Array uniqueness violation -- **Q-1-99**: Generic validation error - -Each error code links to detailed documentation at `https://quarto.org/docs/errors/yaml/Q-1-XX`. - -## Exit Codes - -- `0`: Validation successful -- `1`: Validation failed or error occurred - -## Test Data - -The `test-data/` directory contains example schemas and documents for testing: - -- `simple-schema.yaml`: Example schema with basic types -- `valid-document.yaml`: Document that passes validation -- `invalid-document.yaml`: Document that fails validation (missing required property) -- `type-mismatch-document.yaml`: Document with type errors diff --git a/crates/validate-yaml/src/main.rs b/crates/validate-yaml/src/main.rs deleted file mode 100644 index c6e03139b..000000000 --- a/crates/validate-yaml/src/main.rs +++ /dev/null @@ -1,169 +0,0 @@ -use anyhow::{Context, Result}; -use clap::{Parser, ValueEnum}; -use quarto_yaml_validation::{Schema, SchemaRegistry, ValidationDiagnostic, validate}; -use std::fs; -use std::path::PathBuf; -use std::process; - -/// How to render validation output -#[derive(Clone, Debug, Default, PartialEq, Eq, ValueEnum)] -enum OutputFormat { - /// Rich ariadne-style diagnostics with source snippets (default) - #[default] - Human, - /// Structured JSON, one object per error - Json, - /// One compact line per error: `file:line:col [CODE] path: message (hint: ...)`. - /// Optimized for token-efficient consumption, e.g. feeding errors to an LLM. - Compact, -} - -/// Validate a YAML document against a schema -#[derive(Parser, Debug)] -#[command(name = "validate-yaml")] -#[command(about = "Validate YAML documents against schemas", long_about = None)] -struct Args { - /// Path to the YAML document to validate - #[arg(long, value_name = "FILE")] - input: PathBuf, - - /// Path to the YAML schema file - #[arg(long, value_name = "FILE")] - schema: PathBuf, - - /// Output format - #[arg(long, value_enum, default_value_t = OutputFormat::Human)] - format: OutputFormat, - - /// Deprecated alias for `--format json` - #[arg(long)] - json: bool, -} - -fn main() { - if let Err(e) = run() { - eprintln!("Error: {:#}", e); - process::exit(1); - } -} - -fn run() -> Result<()> { - let args = Args::parse(); - - // Read the schema file - let schema_content = fs::read_to_string(&args.schema) - .with_context(|| format!("Failed to read schema file: {}", args.schema.display()))?; - - let schema_filename = args - .schema - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("schema.yaml"); - - // Parse the schema file - let schema_yaml = quarto_yaml::parse_file(&schema_content, schema_filename).map_err(|e| { - anyhow::anyhow!( - "Failed to parse schema file {}: {}", - args.schema.display(), - e - ) - })?; - - let schema = Schema::from_yaml(&schema_yaml).map_err(|e| { - anyhow::anyhow!( - "Failed to load schema from {}: {}", - args.schema.display(), - e - ) - })?; - - // Read the input document - let input_content = fs::read_to_string(&args.input) - .with_context(|| format!("Failed to read input file: {}", args.input.display()))?; - - let input_filename = args - .input - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("input.yaml"); - - // Parse the input document - let input_yaml = quarto_yaml::parse_file(&input_content, input_filename).map_err(|e| { - anyhow::anyhow!("Failed to parse input file {}: {}", args.input.display(), e) - })?; - - // Create a SourceContext and register the input file - // This enables proper file name and line/column tracking in error messages - let mut source_ctx = quarto_source_map::SourceContext::new(); - - // Compute the same FileId that quarto-yaml uses (hash of filename) - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - let mut hasher = DefaultHasher::new(); - input_filename.hash(&mut hasher); - let expected_file_id = quarto_source_map::FileId(hasher.finish() as usize); - - // Register the file with the computed FileId - let file_id = source_ctx.add_file_with_id( - expected_file_id, - args.input.to_string_lossy().to_string(), - Some(input_content.clone()), - ); - - // Verify we got the expected file_id - debug_assert_eq!( - file_id, expected_file_id, - "FileId mismatch: quarto-yaml will use {:?} but SourceContext has {:?}", - expected_file_id, file_id - ); - - // Create a schema registry (empty for now, but needed for $ref resolution) - let registry = SchemaRegistry::new(); - - // `--json` is a deprecated alias for `--format json`. - let format = if args.json { - OutputFormat::Json - } else { - args.format.clone() - }; - - // Validate the document against the schema - match validate(&input_yaml, &schema, ®istry, &source_ctx) { - Ok(()) => { - match format { - OutputFormat::Json => println!(r#"{{"success": true}}"#), - OutputFormat::Compact => println!("ok"), - OutputFormat::Human => { - println!("✓ Validation successful"); - println!(" Input: {}", args.input.display()); - println!(" Schema: {}", args.schema.display()); - } - } - Ok(()) - } - Err(error) => { - // Convert ValidationError to ValidationDiagnostic - let diagnostic = ValidationDiagnostic::from_validation_error(&error, &source_ctx); - - match format { - OutputFormat::Json => { - // JSON error output with structured paths and source ranges - let json = serde_json::json!({ - "success": false, - "errors": [diagnostic.to_json()] - }); - println!("{}", serde_json::to_string_pretty(&json)?); - } - OutputFormat::Compact => { - // One compact line, optimized for LLM consumption - eprintln!("{}", diagnostic.to_compact()); - } - OutputFormat::Human => { - // Human-readable error output with ariadne-style rendering - eprint!("{}", diagnostic.to_text(&source_ctx)); - } - } - process::exit(1); - } - } -} diff --git a/crates/validate-yaml/test-data/invalid-document.yaml b/crates/validate-yaml/test-data/invalid-document.yaml deleted file mode 100644 index e0d4b2da1..000000000 --- a/crates/validate-yaml/test-data/invalid-document.yaml +++ /dev/null @@ -1,4 +0,0 @@ -title: "My Research Paper" -# Missing required 'author' field -year: "not a number" -published: "not a boolean" diff --git a/crates/validate-yaml/test-data/simple-schema.yaml b/crates/validate-yaml/test-data/simple-schema.yaml deleted file mode 100644 index f29afc351..000000000 --- a/crates/validate-yaml/test-data/simple-schema.yaml +++ /dev/null @@ -1,19 +0,0 @@ -object: - properties: - title: - string: - description: "Document title" - author: - string: - description: "Document author" - year: - number: - description: "Publication year" - minimum: 1900 - maximum: 2100 - published: - boolean: - description: "Whether the document is published" - required: - - title - - author diff --git a/crates/validate-yaml/test-data/type-mismatch-document.yaml b/crates/validate-yaml/test-data/type-mismatch-document.yaml deleted file mode 100644 index a33e079ae..000000000 --- a/crates/validate-yaml/test-data/type-mismatch-document.yaml +++ /dev/null @@ -1,4 +0,0 @@ -title: "My Research Paper" -author: "John Doe" -year: "not a number" -published: "not a boolean" diff --git a/crates/validate-yaml/test-data/valid-document.yaml b/crates/validate-yaml/test-data/valid-document.yaml deleted file mode 100644 index 8af3f6756..000000000 --- a/crates/validate-yaml/test-data/valid-document.yaml +++ /dev/null @@ -1,4 +0,0 @@ -title: "My Research Paper" -author: "John Doe" -year: 2024 -published: true diff --git a/crates/wasm-quarto-hub-client/Cargo.lock b/crates/wasm-quarto-hub-client/Cargo.lock index cacc8db4f..99677ea82 100644 --- a/crates/wasm-quarto-hub-client/Cargo.lock +++ b/crates/wasm-quarto-hub-client/Cargo.lock @@ -2417,7 +2417,9 @@ dependencies = [ [[package]] name = "quarto-yaml" -version = "0.7.0" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c32ab7b39ffa5c43c8aa6abf7eff392678acee63bad5d21680fe295e69b7c2e0" dependencies = [ "quarto-source-map", "serde",