diff --git a/lean/cslib/lake-manifest.json b/lean/cslib/lake-manifest.json index 76ba7e0..1a4bb3f 100644 --- a/lean/cslib/lake-manifest.json +++ b/lean/cslib/lake-manifest.json @@ -1,81 +1,41 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/doc-gen4", + [{"url": "https://github.com/leanprover/cslib", "type": "git", "subDir": null, "scope": "", - "rev": "7be6082d00bb0cfc1e136505dba7718cd5fcaa84", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.28.0-rc1", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/cslib", - "type": "git", - "subDir": null, - "scope": "", - "rev": "00d68a39497c0954bd52ca5237eea2aa152a5387", + "rev": "48e29fde33e1d1650a1615ba4d1440a1c36a23c3", "name": "Cslib", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": false, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover/lean4-cli", + {"url": "https://github.com/leanprover/doc-gen4", "type": "git", "subDir": null, "scope": "", - "rev": "28e0856d4424863a85b18f38868c5420c55f9bae", - "name": "Cli", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", - "type": "git", - "subDir": null, - "scope": "", - "rev": "8668e1ab7c987fb8ed1349f14c3b7b60bd5f27b6", - "name": "UnicodeBasic", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", - "type": "git", - "subDir": null, - "scope": "", - "rev": "1c5c543d2637aebf90c80aead2d401ae88db13cc", - "name": "BibtexQuery", - "manifestFile": "lake-manifest.json", - "inputRev": "master", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/acmepjz/md4lean", - "type": "git", - "subDir": null, - "scope": "", - "rev": "38ac5945d744903ffcc473ce1030223991b11cf6", - "name": "MD4Lean", + "rev": "2c96309e0de1b916d1a0b74c8119fddc7bf7da51", + "name": "«doc-gen4»", "manifestFile": "lake-manifest.json", "inputRev": "main", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/mathlib4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5352afccd6866369be9de43f5b7ec47203555f44", + "rev": "ae0c6140a8312511dbb5bc265a0146dd418eca8d", "name": "mathlib", "manifestFile": "lake-manifest.json", - "inputRev": "v4.28.0-rc1", + "inputRev": "master", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "7311586e1a56af887b1081d05e80c11b6c41d212", + "rev": "f449eabb8f7e3feef0366856c20e28a6d2c97ee3", "name": "plausible", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -85,7 +45,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5ce7f0a355f522a952a3d678d696bd563bb4fd28", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -95,7 +55,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "875ad9d88ed684e39c16bdea260e6ecfa15afd60", + "rev": "86503d416c875fdcf3b6b6c54c22581e96c6bda7", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -105,41 +65,92 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "6d65c6e0a25b8a52c13c3adeb63ecde3bfbb6294", + "rev": "82d457fb3bdd9efadbae06608ff337d689efdddf", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.86", + "inputRev": "v0.0.97", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "f08e838d4f9aea519f3cde06260cfb686fd4bab0", + "rev": "f74c7555aaa94eadd7b7bff9170f7983f92aac21", "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc1", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "23324752757bf28124a518ec284044c8db79fee5", + "rev": "7aa86cb20b8458748dc24d55dab2d7ea01161057", "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc1", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "cabbb5a025bfbbc50af9184ed2dfdde6ea4f53a7", + "rev": "bf597c77bf9b8e66720d724928207f5911533113", "name": "batteries", "manifestFile": "lake-manifest.json", + "inputRev": "v4.30.0-rc1", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "f7d0ca7c926cdde0562af20394dd25d028b839a5", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.30.0-rc1", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "4dfd48ca35193382ddf4c64eef5cdd4f6091bef3", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "type": "git", + "subDir": null, + "scope": "", + "rev": "c8bc12a81fd4ee633719f7fd3f587eca7d0e87ab", + "name": "UnicodeBasic", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d31b64fb703c5d77f6ef4d1fb958f9bdf1ea539", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "nightly-testing", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/acmepjz/md4lean", + "type": "git", + "subDir": null, + "scope": "", + "rev": "6a3fb240133bcb7e1a066fdc784b3fdc304e3fc5", + "name": "MD4Lean", + "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.toml"}], + "configFile": "lakefile.lean"}], "name": "«cslib-extractor»", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lean/cslib/lakefile.lean b/lean/cslib/lakefile.lean index e48cda9..3f04e15 100644 --- a/lean/cslib/lakefile.lean +++ b/lean/cslib/lakefile.lean @@ -7,8 +7,8 @@ package «cslib-extractor» where lean_lib «CslibExtract» where roots := #[`CslibExtract] +require «doc-gen4» from git + "https://github.com/leanprover/doc-gen4" @ "main" + require Cslib from git "https://github.com/leanprover/cslib" @ "main" - -require «doc-gen4» from git - "https://github.com/leanprover/doc-gen4" @ "v4.28.0-rc1" diff --git a/lean/cslib/lean-toolchain b/lean/cslib/lean-toolchain index 4bafde2..2210cba 100644 --- a/lean/cslib/lean-toolchain +++ b/lean/cslib/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.28.0-rc1 +leanprover/lean4:v4.30.0-rc1 diff --git a/lean/flt/lake-manifest.json b/lean/flt/lake-manifest.json index 0faeab2..1a668fe 100644 --- a/lean/flt/lake-manifest.json +++ b/lean/flt/lake-manifest.json @@ -1,65 +1,25 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/doc-gen4", + [{"url": "https://github.com/ImperialCollegeLondon/FLT", "type": "git", "subDir": null, "scope": "", - "rev": "01e143329f2f79abbb8559344e836c221ae84cfd", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.27.0", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/ImperialCollegeLondon/FLT", - "type": "git", - "subDir": null, - "scope": "", - "rev": "782d8b962dd96be0c767b7b99e390e32c52ea583", + "rev": "260e400807ff88e84d008fb66792ec2dfdf83154", "name": "FLT", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": false, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover/lean4-cli", + {"url": "https://github.com/leanprover/doc-gen4", "type": "git", "subDir": null, "scope": "", - "rev": "55c37290ff6186e2e965d68cf853a57c0702db82", - "name": "Cli", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", - "type": "git", - "subDir": null, - "scope": "", - "rev": "b09d573cbacf5a8e767292e0edeec787a81689ce", - "name": "UnicodeBasic", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", - "type": "git", - "subDir": null, - "scope": "", - "rev": "c8a6a4dae7c7f42949a1058427a75c20447e990f", - "name": "BibtexQuery", - "manifestFile": "lake-manifest.json", - "inputRev": "master", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/acmepjz/md4lean", - "type": "git", - "subDir": null, - "scope": "", - "rev": "38ac5945d744903ffcc473ce1030223991b11cf6", - "name": "MD4Lean", + "rev": "2c96309e0de1b916d1a0b74c8119fddc7bf7da51", + "name": "«doc-gen4»", "manifestFile": "lake-manifest.json", "inputRev": "main", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, {"url": "https://github.com/PatrickMassot/checkdecls.git", "type": "git", @@ -75,17 +35,17 @@ "type": "git", "subDir": null, "scope": "", - "rev": "a3a10db0e9d66acbebf76c5e6a135066525ac900", + "rev": "8f9d9cff6bd728b17a24e163c9402775d9e6a365", "name": "mathlib", "manifestFile": "lake-manifest.json", - "inputRev": null, + "inputRev": "master", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "009dc1e6f2feb2c96c081537d80a0905b2c6498f", + "rev": "55c8532eb21ec9f6d565d51d96b8ca50bd1fbef3", "name": "plausible", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -95,7 +55,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5ce7f0a355f522a952a3d678d696bd563bb4fd28", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -105,7 +65,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "8f497d55985a189cea8020d9dc51260af1e41ad2", + "rev": "85b59af46828c029a9168f2f9c35119bd0721e6e", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -115,17 +75,17 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "c04225ee7c0585effbd933662b3151f01b600e40", + "rev": "be3b2e63b1bbf496c478cef98b86972a37c1417d", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.85", + "inputRev": "v0.0.87", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "cb837cc26236ada03c81837bebe0acd9c70ced7d", + "rev": "f642a64c76df8ba9cb53dba3b919425a0c2aeaf1", "name": "aesop", "manifestFile": "lake-manifest.json", "inputRev": "master", @@ -135,7 +95,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "bd58c9efe2086d56ca361807014141a860ddbf8c", + "rev": "b8f98e9087e02c8553945a2c5abf07cec8e798c3", "name": "Qq", "manifestFile": "lake-manifest.json", "inputRev": "master", @@ -145,11 +105,62 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "b25b36a7caf8e237e7d1e6121543078a06777c8a", + "rev": "495c008c3e3f4fb4256ff5582ddb3abf3198026f", "name": "batteries", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.toml"}], + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "4f10f47646cb7d5748d6f423f4a07f98f7bbcc9e", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.28.0", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "4dfd48ca35193382ddf4c64eef5cdd4f6091bef3", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "type": "git", + "subDir": null, + "scope": "", + "rev": "c8bc12a81fd4ee633719f7fd3f587eca7d0e87ab", + "name": "UnicodeBasic", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d31b64fb703c5d77f6ef4d1fb958f9bdf1ea539", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "nightly-testing", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/acmepjz/md4lean", + "type": "git", + "subDir": null, + "scope": "", + "rev": "6a3fb240133bcb7e1a066fdc784b3fdc304e3fc5", + "name": "MD4Lean", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}], "name": "«flt-extractor»", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lean/flt/lakefile.lean b/lean/flt/lakefile.lean index 412308c..f21999c 100644 --- a/lean/flt/lakefile.lean +++ b/lean/flt/lakefile.lean @@ -7,8 +7,8 @@ package «flt-extractor» where lean_lib «FLTExtract» where roots := #[`FLTExtract] +require «doc-gen4» from git + "https://github.com/leanprover/doc-gen4" @ "main" + require FLT from git "https://github.com/ImperialCollegeLondon/FLT" @ "main" - -require «doc-gen4» from git - "https://github.com/leanprover/doc-gen4" @ "v4.27.0" diff --git a/lean/flt/lean-toolchain b/lean/flt/lean-toolchain index 5249182..79ac861 100644 --- a/lean/flt/lean-toolchain +++ b/lean/flt/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.27.0 +leanprover/lean4:v4.30.0-rc1 \ No newline at end of file diff --git a/lean/formal-conjectures/lake-manifest.json b/lean/formal-conjectures/lake-manifest.json index 3f35390..d9f2d91 100644 --- a/lean/formal-conjectures/lake-manifest.json +++ b/lean/formal-conjectures/lake-manifest.json @@ -1,91 +1,51 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/doc-gen4", + [{"url": "https://github.com/google-deepmind/formal-conjectures", "type": "git", "subDir": null, "scope": "", - "rev": "58b48e75bd19f785927e06912dea610e5e48f1fa", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/google-deepmind/formal-conjectures", - "type": "git", - "subDir": null, - "scope": "", - "rev": "f01550cf4f910c7018f593f47979ab1cc9e8c70e", + "rev": "0ab27014f1755dc2812ba4193add6b4b159a0022", "name": "formal_conjectures", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": false, "configFile": "lakefile.toml"}, - {"url": "https://github.com/mhuisi/lean4-cli", - "type": "git", - "subDir": null, - "scope": "", - "rev": "6667b921594697980586296511fab6a359e802d1", - "name": "Cli", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", - "type": "git", - "subDir": null, - "scope": "", - "rev": "d3195374a885cf2b0bfa66063deb493686029f95", - "name": "UnicodeBasic", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", - "type": "git", - "subDir": null, - "scope": "", - "rev": "dbfe2b7630c5f7c5c1cf71e7747ffc0a30337f69", - "name": "BibtexQuery", - "manifestFile": "lake-manifest.json", - "inputRev": "dbfe2b7630c5f7c5c1cf71e7747ffc0a30337f69", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/acmepjz/md4lean", + {"url": "https://github.com/leanprover/doc-gen4", "type": "git", "subDir": null, "scope": "", - "rev": "feac4e0c356b0928657bf3b54fa83ae952f53257", - "name": "MD4Lean", + "rev": "2c96309e0de1b916d1a0b74c8119fddc7bf7da51", + "name": "«doc-gen4»", "manifestFile": "lake-manifest.json", "inputRev": "main", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/mathlib4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "79e94a093aff4a60fb1b1f92d9681e407124c2ca", + "rev": "a3a10db0e9d66acbebf76c5e6a135066525ac900", "name": "mathlib", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "v4.27.0", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "b100ad4c5d74a464f497aaa8e7c74d86bf39a56f", + "rev": "009dc1e6f2feb2c96c081537d80a0905b2c6498f", "name": "plausible", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/LeanSearchClient", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "99657ad92e23804e279f77ea6dbdeebaa1317b98", + "rev": "5ce7f0a355f522a952a3d678d696bd563bb4fd28", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -95,51 +55,102 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "eb164a46de87078f27640ee71e6c3841defc2484", + "rev": "8f497d55985a189cea8020d9dc51260af1e41ad2", "name": "importGraph", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/ProofWidgets4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "1253a071e6939b0faf5c09d2b30b0bfc79dae407", + "rev": "c04225ee7c0585effbd933662b3151f01b600e40", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.68", + "inputRev": "v0.0.85", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "1256a18522728c2eeed6109b02dd2b8f207a2a3c", + "rev": "cb837cc26236ada03c81837bebe0acd9c70ced7d", "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "master", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "917bfa5064b812b7fbd7112d018ea0b4def25ab3", + "rev": "bd58c9efe2086d56ca361807014141a860ddbf8c", "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "master", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "240676e9568c254a69be94801889d4b13f3b249f", + "rev": "b25b36a7caf8e237e7d1e6121543078a06777c8a", "name": "batteries", "manifestFile": "lake-manifest.json", - "inputRev": "v4.22.0", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "55c37290ff6186e2e965d68cf853a57c0702db82", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.27.0", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "4dfd48ca35193382ddf4c64eef5cdd4f6091bef3", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "type": "git", + "subDir": null, + "scope": "", + "rev": "c8bc12a81fd4ee633719f7fd3f587eca7d0e87ab", + "name": "UnicodeBasic", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d31b64fb703c5d77f6ef4d1fb958f9bdf1ea539", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "nightly-testing", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/acmepjz/md4lean", + "type": "git", + "subDir": null, + "scope": "", + "rev": "6a3fb240133bcb7e1a066fdc784b3fdc304e3fc5", + "name": "MD4Lean", + "manifestFile": "lake-manifest.json", + "inputRev": "main", "inherited": true, - "configFile": "lakefile.toml"}], + "configFile": "lakefile.lean"}], "name": "«formal-conjectures-extractor»", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lean/formal-conjectures/lakefile.lean b/lean/formal-conjectures/lakefile.lean index 032c248..d136505 100644 --- a/lean/formal-conjectures/lakefile.lean +++ b/lean/formal-conjectures/lakefile.lean @@ -4,8 +4,8 @@ open Lake DSL package «formal-conjectures-extractor» where -- Workspace for extracting formal-conjectures documentation +require «doc-gen4» from git + "https://github.com/leanprover/doc-gen4" @ "main" + require «formal_conjectures» from git "https://github.com/google-deepmind/formal-conjectures" @ "main" - -require «doc-gen4» from git - "https://github.com/leanprover/doc-gen4" @ "v4.22.0" diff --git a/lean/formal-conjectures/lean-toolchain b/lean/formal-conjectures/lean-toolchain index 6ac6d4c..79ac861 100644 --- a/lean/formal-conjectures/lean-toolchain +++ b/lean/formal-conjectures/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.22.0 +leanprover/lean4:v4.30.0-rc1 \ No newline at end of file diff --git a/lean/mathlib/lake-manifest.json b/lean/mathlib/lake-manifest.json index 68cc3c8..d094a2d 100644 --- a/lean/mathlib/lake-manifest.json +++ b/lean/mathlib/lake-manifest.json @@ -1,71 +1,31 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/doc-gen4", + [{"url": "https://github.com/leanprover-community/mathlib4.git", "type": "git", "subDir": null, "scope": "", - "rev": "7be6082d00bb0cfc1e136505dba7718cd5fcaa84", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.28.0-rc1", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover-community/mathlib4.git", - "type": "git", - "subDir": null, - "scope": "", - "rev": "0e16bffea0a1b2664111fb1cfeb46c94e4a92604", + "rev": "fd68cd664924f41a676da81ded2c886d0488af03", "name": "mathlib", "manifestFile": "lake-manifest.json", "inputRev": null, "inherited": false, "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/lean4-cli", - "type": "git", - "subDir": null, - "scope": "", - "rev": "28e0856d4424863a85b18f38868c5420c55f9bae", - "name": "Cli", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", + {"url": "https://github.com/leanprover/doc-gen4", "type": "git", "subDir": null, "scope": "", - "rev": "8668e1ab7c987fb8ed1349f14c3b7b60bd5f27b6", - "name": "UnicodeBasic", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", - "type": "git", - "subDir": null, - "scope": "", - "rev": "1c5c543d2637aebf90c80aead2d401ae88db13cc", - "name": "BibtexQuery", - "manifestFile": "lake-manifest.json", - "inputRev": "master", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/acmepjz/md4lean", - "type": "git", - "subDir": null, - "scope": "", - "rev": "38ac5945d744903ffcc473ce1030223991b11cf6", - "name": "MD4Lean", + "rev": "2c96309e0de1b916d1a0b74c8119fddc7bf7da51", + "name": "«doc-gen4»", "manifestFile": "lake-manifest.json", "inputRev": "main", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "7311586e1a56af887b1081d05e80c11b6c41d212", + "rev": "a3b459a8312125758e51c354b93d54ba620efda6", "name": "plausible", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -75,7 +35,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5ce7f0a355f522a952a3d678d696bd563bb4fd28", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -85,7 +45,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "875ad9d88ed684e39c16bdea260e6ecfa15afd60", + "rev": "4411c5f89c797401c609b3a946c8874569e69731", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -95,41 +55,92 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "6d65c6e0a25b8a52c13c3adeb63ecde3bfbb6294", + "rev": "82d457fb3bdd9efadbae06608ff337d689efdddf", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.86", + "inputRev": "v0.0.97", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "f08e838d4f9aea519f3cde06260cfb686fd4bab0", + "rev": "f74c7555aaa94eadd7b7bff9170f7983f92aac21", "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc1", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "23324752757bf28124a518ec284044c8db79fee5", + "rev": "7aa86cb20b8458748dc24d55dab2d7ea01161057", "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc1", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "cabbb5a025bfbbc50af9184ed2dfdde6ea4f53a7", + "rev": "bf597c77bf9b8e66720d724928207f5911533113", "name": "batteries", "manifestFile": "lake-manifest.json", + "inputRev": "v4.30.0-rc1", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "f7d0ca7c926cdde0562af20394dd25d028b839a5", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.30.0-rc1", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "4dfd48ca35193382ddf4c64eef5cdd4f6091bef3", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "type": "git", + "subDir": null, + "scope": "", + "rev": "c8bc12a81fd4ee633719f7fd3f587eca7d0e87ab", + "name": "UnicodeBasic", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d31b64fb703c5d77f6ef4d1fb958f9bdf1ea539", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "nightly-testing", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/acmepjz/md4lean", + "type": "git", + "subDir": null, + "scope": "", + "rev": "6a3fb240133bcb7e1a066fdc784b3fdc304e3fc5", + "name": "MD4Lean", + "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.toml"}], + "configFile": "lakefile.lean"}], "name": "«mathlib-extractor»", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lean/mathlib/lakefile.lean b/lean/mathlib/lakefile.lean index f6d3ba3..f2b6f63 100644 --- a/lean/mathlib/lakefile.lean +++ b/lean/mathlib/lakefile.lean @@ -7,8 +7,8 @@ package «mathlib-extractor» where lean_lib «MathExtract» where roots := #[`MathExtract] +require «doc-gen4» from git + "https://github.com/leanprover/doc-gen4" @ "main" + require mathlib from git "https://github.com/leanprover-community/mathlib4.git" - -require «doc-gen4» from git - "https://github.com/leanprover/doc-gen4" @ "v4.28.0-rc1" diff --git a/lean/mathlib/lean-toolchain b/lean/mathlib/lean-toolchain index 4bafde2..2210cba 100644 --- a/lean/mathlib/lean-toolchain +++ b/lean/mathlib/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.28.0-rc1 +leanprover/lean4:v4.30.0-rc1 diff --git a/lean/physlean/PhysExtract.lean b/lean/physlean/PhysExtract.lean index bbfe63d..cbfc3a6 100644 --- a/lean/physlean/PhysExtract.lean +++ b/lean/physlean/PhysExtract.lean @@ -1 +1,2 @@ -import PhysLean +import Physlib +import QuantumInfo diff --git a/lean/physlean/lake-manifest.json b/lean/physlean/lake-manifest.json index e0230f9..bda3b9e 100644 --- a/lean/physlean/lake-manifest.json +++ b/lean/physlean/lake-manifest.json @@ -1,81 +1,41 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/doc-gen4", + [{"url": "https://github.com/HEPLean/PhysLean", "type": "git", "subDir": null, "scope": "", - "rev": "77ef3eb515ad6bd125c596f0b164349d4a7d5bf5", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.26.0", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/HEPLean/PhysLean", - "type": "git", - "subDir": null, - "scope": "", - "rev": "7ba876c3f5fcd9f67fa2ee9326dd5d0b1e70079a", + "rev": "9ca1ee1d0cac43391399fcdc9e9fca8c94c17057", "name": "PhysLean", "manifestFile": "lake-manifest.json", "inputRev": null, "inherited": false, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover/lean4-cli", - "type": "git", - "subDir": null, - "scope": "", - "rev": "933fce7e893f65969714c60cdb4bd8376786044e", - "name": "Cli", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", - "type": "git", - "subDir": null, - "scope": "", - "rev": "84b88f7ac9adf382b9668f852cee82487d616792", - "name": "UnicodeBasic", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", + {"url": "https://github.com/leanprover/doc-gen4", "type": "git", "subDir": null, "scope": "", - "rev": "3ab4379b2b92448717de66b7d3e254ac1487aede", - "name": "BibtexQuery", - "manifestFile": "lake-manifest.json", - "inputRev": "master", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/acmepjz/md4lean", - "type": "git", - "subDir": null, - "scope": "", - "rev": "38ac5945d744903ffcc473ce1030223991b11cf6", - "name": "MD4Lean", + "rev": "2c96309e0de1b916d1a0b74c8119fddc7bf7da51", + "name": "«doc-gen4»", "manifestFile": "lake-manifest.json", "inputRev": "main", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/mathlib4.git", "type": "git", "subDir": null, "scope": "", - "rev": "2df2f0150c275ad53cb3c90f7c98ec15a56a1a67", + "rev": "8a178386ffc0f5fef0b77738bb5449d50efeea95", "name": "mathlib", "manifestFile": "lake-manifest.json", - "inputRev": "v4.26.0", + "inputRev": "v4.29.0", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "160af9e8e7d4ae448f3c92edcc5b6a8522453f11", + "rev": "83e90935a17ca19ebe4b7893c7f7066e266f50d3", "name": "plausible", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -85,7 +45,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "3591c3f664ac3719c4c86e4483e21e228707bfa2", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -95,7 +55,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "e9f31324f15ead11048b1443e62c5deaddd055d2", + "rev": "48d5698bc464786347c1b0d859b18f938420f060", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -105,17 +65,17 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "b4fb2aa5290ebf61bc5f80a5375ba642f0a49192", + "rev": "3c52dee17f0cd89c1ec14de78920d1bdaa3d26b3", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.83", + "inputRev": "v0.0.95", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "2f6d238744c4cb07fdc91240feaf5d4221a27931", + "rev": "7152850e7b216a0d409701617721b6e469d34bf6", "name": "aesop", "manifestFile": "lake-manifest.json", "inputRev": "master", @@ -125,7 +85,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "9312503909aa8e8bb392530145cc1677a6298574", + "rev": "707efb56d0696634e9e965523a1bbe9ac6ce141d", "name": "Qq", "manifestFile": "lake-manifest.json", "inputRev": "master", @@ -135,11 +95,62 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "24241822ef9d3e7f6a3bcc53ad136e12663db8f3", + "rev": "756e3321fd3b02a85ffda19fef789916223e578c", "name": "batteries", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.toml"}], + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "7802da01beb530bf051ab657443f9cd9bc3e1a29", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.29.0", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/kim-em/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "d14544c72b593af6a66131bc34cdab16bf7c0940", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "suppress-reducibility-warning", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "type": "git", + "subDir": null, + "scope": "", + "rev": "9539e34e5cb2d52a6454d9b6218f6b6835cad071", + "name": "UnicodeBasic", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d31b64fb703c5d77f6ef4d1fb958f9bdf1ea539", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "nightly-testing", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/acmepjz/md4lean", + "type": "git", + "subDir": null, + "scope": "", + "rev": "6a3fb240133bcb7e1a066fdc784b3fdc304e3fc5", + "name": "MD4Lean", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}], "name": "«physlean-extractor»", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lean/physlean/lakefile.lean b/lean/physlean/lakefile.lean index b6bbade..b819436 100644 --- a/lean/physlean/lakefile.lean +++ b/lean/physlean/lakefile.lean @@ -4,11 +4,12 @@ open Lake DSL package «physlean-extractor» where -- Workspace for extracting PhysLean documentation +@[default_target] lean_lib «PhysExtract» where roots := #[`PhysExtract] +require «doc-gen4» from git + "https://github.com/leanprover/doc-gen4" @ "v4.29.0" + require PhysLean from git "https://github.com/HEPLean/PhysLean" - -require «doc-gen4» from git - "https://github.com/leanprover/doc-gen4" @ "v4.26.0" diff --git a/lean/physlean/lean-toolchain b/lean/physlean/lean-toolchain index e59446d..79ac861 100644 --- a/lean/physlean/lean-toolchain +++ b/lean/physlean/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.26.0 +leanprover/lean4:v4.30.0-rc1 \ No newline at end of file diff --git a/scripts/generate_docs_data.py b/scripts/generate_docs_data.py index ed04a03..8b31194 100644 --- a/scripts/generate_docs_data.py +++ b/scripts/generate_docs_data.py @@ -193,7 +193,7 @@ class ModuleDict(TypedDict): def resolve_annotation(annotation: str | Expr | None) -> str: """Converts a griffe annotation to its string representation.""" - if isinstance(annotation, (Expr, str)): + if isinstance(annotation, Expr | str): return str(annotation) return "" @@ -391,16 +391,14 @@ def parse_docstring(docstring_object: object | None) -> DocstringSections: parse_admonition_section(section, sections_data) elif isinstance( section, - ( - DocstringSectionDeprecated, - DocstringSectionWarns, - DocstringSectionYields, - DocstringSectionReceives, - DocstringSectionOtherParameters, - DocstringSectionClasses, - DocstringSectionFunctions, - DocstringSectionModules, - ), + DocstringSectionDeprecated + | DocstringSectionWarns + | DocstringSectionYields + | DocstringSectionReceives + | DocstringSectionOtherParameters + | DocstringSectionClasses + | DocstringSectionFunctions + | DocstringSectionModules, ): sections_data[kind] = parse_generic_section(section) else: diff --git a/src/lean_explore/extract/doc_gen4.py b/src/lean_explore/extract/doc_gen4.py index 621e55a..e3e62ae 100644 --- a/src/lean_explore/extract/doc_gen4.py +++ b/src/lean_explore/extract/doc_gen4.py @@ -6,6 +6,7 @@ import logging import os +import re import shutil import subprocess import time @@ -23,6 +24,29 @@ logger = logging.getLogger(__name__) +def _uses_sqlite_docgen(lean_version: str) -> bool: + """Return whether the matching doc-gen4 release writes api-docs.db. + + Doc-gen4's SQLite output landed after `v4.29.0-rc1` and is present starting + with `v4.29.0-rc2`. + """ + match = extract_lean_version(lean_version) + version_match = re.match(r"^v(\d+)\.(\d+)\.(\d+)(?:-rc(\d+))?$", match) + if version_match is None: + return False + + major, minor, patch, rc = version_match.groups() + version = (int(major), int(minor), int(patch)) + if version > (4, 29, 0): + return True + if version < (4, 29, 0): + return False + + if rc is None: + return True + return int(rc) >= 2 + + def _clear_workspace_cache(workspace_path: Path) -> None: """Clear entire Lake cache to force complete rebuild. @@ -47,8 +71,8 @@ def _clear_workspace_cache(workspace_path: Path) -> None: shutil.rmtree(lake_dir) -def _get_doc_lib_names(package_name: str) -> list[str]: - """Get the library names to run doc-gen4 on for a package. +def _get_library_names(package_name: str) -> list[str]: + """Get the library names to build and run doc-gen4 on for a package. Some packages have custom extract wrappers, others use upstream libraries directly. """ @@ -62,6 +86,57 @@ def _get_doc_lib_names(package_name: str) -> list[str]: return lib_names.get(package_name, [f"{package_name.title()}Extract"]) +def _run_lake_build_target( + workspace_path: Path, + package_name: str, + target: str, + env: dict[str, str], + allow_failure: bool = False, +) -> bool: + """Run ``lake build `` streaming output to the logger. + + Args: + workspace_path: Path to the Lake workspace directory. + package_name: Name of the package for log messages. + target: The Lake build target to run. + env: Environment variables to pass to the subprocess. + allow_failure: Whether to continue when the target fails. + + Returns: + ``True`` if the target built successfully, otherwise ``False``. + + Raises: + RuntimeError: If the target fails and ``allow_failure`` is ``False``. + """ + logger.info("[%s] Running lake build %s...", package_name, target) + process = subprocess.Popen( + ["lake", "build", target], + cwd=workspace_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) + if process.stdout: + for line in process.stdout: + logger.info(line.rstrip()) + + returncode = process.wait() + if returncode == 0: + return True + + if allow_failure: + logger.warning( + "[%s] lake build %s failed (continuing with generated docs)", + package_name, + target, + ) + return False + + raise RuntimeError(f"lake build failed for {package_name} target {target}") + + def _setup_workspace(package_config: PackageConfig) -> tuple[str, str]: """Fetch toolchain from GitHub and update lakefile. @@ -155,44 +230,18 @@ def _run_lake_for_package(package_name: str, verbose: bool = False) -> None: if result.returncode != 0: logger.warning("[%s] Cache fetch failed (non-fatal)", package_name) - logger.info("[%s] Running lake build...", package_name) - process = subprocess.Popen( - ["lake", "build"], - cwd=workspace_path, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - env=env, - ) - if process.stdout: - for line in process.stdout: - logger.info(line.rstrip()) - if process.wait() != 0: - raise RuntimeError(f"lake build failed for {package_name}") - - lib_names = _get_doc_lib_names(package_name) + # The :docs facet transitively depends on :docInfo, which depends on the + # library oleans, so a single lake build per library is sufficient. The + # explicit library build beforehand is redundant and doubles the work. + lib_names = _get_library_names(package_name) for lib_name in lib_names: - logger.info("[%s] Running doc-gen4 (%s:docs)...", package_name, lib_name) - - process = subprocess.Popen( - ["lake", "build", f"{lib_name}:docs"], - cwd=workspace_path, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - env=env, + _run_lake_build_target( + workspace_path, + package_name, + f"{lib_name}:docs", + env, + allow_failure=True, ) - if process.stdout: - for line in process.stdout: - logger.info(line.rstrip()) - returncode = process.wait() - if returncode != 0: - logger.warning( - "[%s] doc-gen4 had failures for %s (continuing with generated docs)", - package_name, lib_name, - ) async def run_doc_gen4( @@ -227,12 +276,29 @@ async def run_doc_gen4( workspace_path = Path("lean") / package_name logger.info("\n%s\nPackage: %s\n%s", "=" * 50, package_name, "=" * 50) + toolchain = None + ref = None if fresh: - _clear_workspace_cache(workspace_path) + if setup: + toolchain, ref = _setup_workspace(config) + logger.info("Toolchain: %s, ref: %s", toolchain, ref) + + # Doc-gen4 switched from BMP files to api-docs.db in v4.29.0-rc2. + # The SQLite format handles incremental updates, while legacy BMP + # output requires a cache clear to avoid stale files. + if toolchain and _uses_sqlite_docgen(toolchain): + logger.info( + "[%s] Skipping cache clear " + "(api-docs.db handles incremental updates)", + package_name, + ) + else: + _clear_workspace_cache(workspace_path) if setup: - toolchain, ref = _setup_workspace(config) - logger.info("Toolchain: %s, ref: %s", toolchain, ref) + if toolchain is None or ref is None: + toolchain, ref = _setup_workspace(config) + logger.info("Toolchain: %s, ref: %s", toolchain, ref) _run_lake_for_package(package_name, verbose) diff --git a/src/lean_explore/extract/doc_parser.py b/src/lean_explore/extract/doc_parser.py index 707e62a..2ca2aa4 100644 --- a/src/lean_explore/extract/doc_parser.py +++ b/src/lean_explore/extract/doc_parser.py @@ -1,12 +1,17 @@ """Parser for Lean doc-gen4 output files. -This module parses doc-gen4 JSON data and extracts Lean source code -to produce Declaration objects ready for database insertion. +This module parses doc-gen4 output and extracts Lean source code to produce +Declaration objects ready for database insertion. + +Supports two doc-gen4 output formats: +- SQLite database (api-docs.db): Used by doc-gen4 >= v4.29.0-rc2 +- BMP JSON files (.bmp): Used by doc-gen4 < v4.29.0-rc2 """ import json import logging import re +import sqlite3 from pathlib import Path from rich.progress import ( @@ -26,6 +31,148 @@ logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# RenderedCode BLOB parser +# +# Doc-gen4 stores declaration type signatures in the `name_info.type` column +# as a binary BLOB using leansqlite's ToBinary serialization format. +# +# The type is RenderedCode = TaggedText RenderedCode.Tag where: +# TaggedText: text(0) String | tag(1) Tag TaggedText | append(2) Array +# Tag: keyword(0) | string(1) | const(2) Name | sort-none(3) +# | sort-type(4) | sort-prop(5) | sort-sort(6) | otherExpr(7) +# Name: anonymous(0) | str(1) Name String | num(2) Name Nat +# +# Encoding primitives (big-endian, leansqlite Classes.lean): +# Nat – variable-length 7-bit chunks, high bit = continuation +# String – Nat(utf8_byte_length) + raw UTF-8 bytes +# Array – Nat(count) + elements +# --------------------------------------------------------------------------- + + +class _BlobReader: + """Minimal reader for leansqlite ToBinary format.""" + + __slots__ = ("_data", "_cursor") + + def __init__(self, data: bytes) -> None: + self._data = data + self._cursor = 0 + + def _read_byte(self) -> int: + if self._cursor >= len(self._data): + raise ValueError("Unexpected end of BLOB data") + value = self._data[self._cursor] + self._cursor += 1 + return value + + def _read_nat(self) -> int: + """Read a variable-length natural number (7-bit chunks, MSB = more).""" + result = 0 + shift = 0 + while True: + byte = self._read_byte() + if byte >= 128: + result |= (byte & 0x7F) << shift + else: + result |= byte << shift + break + shift += 7 + return result + + def _read_string(self) -> str: + byte_length = self._read_nat() + if self._cursor + byte_length > len(self._data): + raise ValueError("String extends past end of BLOB data") + raw = self._data[self._cursor : self._cursor + byte_length] + self._cursor += byte_length + return raw.decode("utf-8") + + def _read_name(self) -> str: + """Read a Lean Name and return its dot-separated string form.""" + tag = self._read_byte() + if tag == 0: # anonymous + return "" + if tag == 1: # str parent s + parent = self._read_name() + component = self._read_string() + return f"{parent}.{component}" if parent else component + if tag == 2: # num parent n + parent = self._read_name() + number = self._read_nat() + return f"{parent}.{number}" if parent else str(number) + raise ValueError(f"Invalid Name tag: {tag}") + + def _skip_name(self) -> None: + """Skip over a Name without allocating strings.""" + tag = self._read_byte() + if tag == 0: + return + if tag == 1: + self._skip_name() + byte_length = self._read_nat() + self._cursor += byte_length + return + if tag == 2: + self._skip_name() + self._read_nat() + return + raise ValueError(f"Invalid Name tag: {tag}") + + +def _extract_names_from_rendered_code(blob: bytes) -> list[str]: + """Extract referenced declaration names from a RenderedCode BLOB. + + Walks the TaggedText tree and collects Lean Names from every + RenderedCode.Tag.const node (tag byte 2). + + Args: + blob: Raw bytes of the RenderedCode BLOB from name_info.type. + + Returns: + De-duplicated list of fully-qualified Lean names referenced in the type. + """ + reader = _BlobReader(blob) + names: list[str] = [] + seen: set[str] = set() + + def walk_tagged_text() -> None: + tag = reader._read_byte() + if tag == 0: # text + reader._read_string() + elif tag == 1: # tag + walk_tag() + walk_tagged_text() + elif tag == 2: # append + count = reader._read_nat() + for _ in range(count): + walk_tagged_text() + else: + raise ValueError(f"Invalid TaggedText tag: {tag}") + + def walk_tag() -> None: + tag = reader._read_byte() + if tag <= 1 or (3 <= tag <= 7): + # keyword(0), string(1), sort-none(3), sort-type(4), + # sort-prop(5), sort-sort(6), otherExpr(7) — no payload + return + if tag == 2: # const + name = reader._read_name() + if name and name not in seen: + names.append(name) + seen.add(name) + return + raise ValueError(f"Invalid RenderedCode.Tag tag: {tag}") + + try: + walk_tagged_text() + except (ValueError, IndexError): + logger.debug("Failed to parse RenderedCode BLOB (%d bytes)", len(blob)) + return [] + + return names + + def _strip_lean_comments(source_text: str) -> str: """Strip Lean comments from source text for comparison. @@ -270,6 +417,16 @@ def _extract_source_text( package_name.replace("-", "").lower(), ]: if variant in package_cache: + if variant == "lean4" and file_path_string.startswith("src/lean/"): + adjusted_path = file_path_string[9:] + candidates.append(package_cache[variant] / adjusted_path) + continue + if variant == "lean4" and file_path_string.startswith("src/lake/"): + adjusted_path = file_path_string[9:] + candidates.append( + package_cache[variant].parent / "lake" / adjusted_path + ) + continue if variant == "lean4" and file_path_string.startswith("src/"): adjusted_path = file_path_string[4:] else: @@ -292,6 +449,251 @@ def _extract_source_text( ) +def _read_lean_toolchain_version(workspace_path: Path) -> str | None: + """Read the Lean version from a workspace's lean-toolchain file. + + Args: + workspace_path: Path to the package workspace (e.g., lean/mathlib). + + Returns: + Version string like 'v4.29.0-rc6', or None if not found. + """ + toolchain_file = workspace_path / "lean-toolchain" + if not toolchain_file.exists(): + return None + try: + content = toolchain_file.read_text().strip() + match = re.search(r"v\d+\.\d+\.\d+(?:-rc\d+)?", content) + return match.group() if match else None + except OSError: + return None + + +def _construct_source_link( + module_name: str, + module_source_url: str | None, + start_line: int, + end_line: int, + lean_version: str | None = None, +) -> str | None: + """Construct a GitHub source link from module URL and line range. + + Args: + module_name: Lean module name from api-docs.db. + module_source_url: GitHub URL to the module file from api-docs.db. + start_line: Start line number in the source file. + end_line: End line number in the source file. + lean_version: Lean toolchain version (e.g., 'v4.29.0-rc6') used as + the git ref for core module fallback URLs. + + Returns: + GitHub URL with line range fragment, or None if no source URL exists. + """ + if module_source_url: + return f"{module_source_url}#L{start_line}-L{end_line}" + + git_ref = lean_version or "master" + module_path = module_name.replace(".", "/") + root = module_name.split(".", 1)[0] + if root in {"Init", "Lean", "Std"}: + return ( + f"https://github.com/leanprover/lean4/blob/{git_ref}/" + f"src/lean/{module_path}.lean#L{start_line}-L{end_line}" + ) + if root == "Lake": + return ( + f"https://github.com/leanprover/lean4/blob/{git_ref}/" + f"src/lake/{module_path}.lean#L{start_line}-L{end_line}" + ) + + return None + + +def _parse_declarations_from_sqlite( + database_path: Path, + lean_root: Path, + package_cache: dict[str, Path], + allowed_module_prefixes: list[str], + lean_version: str | None = None, +) -> list[Declaration]: + """Parse declarations from a doc-gen4 SQLite database (api-docs.db). + + Doc-gen4 >= v4.29.0-rc2 outputs declaration data to a SQLite database + instead of individual BMP JSON files. This function reads that database + and produces the same Declaration objects as the BMP parser. + + Args: + database_path: Path to the api-docs.db SQLite database. + lean_root: Root directory of the Lean project. + package_cache: Dictionary mapping package names to their directories. + allowed_module_prefixes: Module prefixes to extract (e.g., ["Mathlib"]). + lean_version: Lean toolchain version for core module source links. + + Returns: + List of parsed Declaration objects. + """ + declarations = [] + + connection = sqlite3.connect(str(database_path)) + connection.row_factory = sqlite3.Row + + try: + # Query declarations with source ranges and both docstring types. + # Doc-gen4 stores docstrings as either markdown text or Verso binary + # BLOBs (never both). We prefer markdown; Verso BLOBs require a + # complex deserializer so we detect but cannot extract them yet. + query = """ + SELECT + n.module_name, + n.position, + n.kind, + n.name, + n.type, + r.start_line, + r.end_line, + d.text AS docstring, + v.content AS verso_docstring, + m.source_url + FROM name_info n + JOIN declaration_ranges r + ON n.module_name = r.module_name AND n.position = r.position + LEFT JOIN declaration_markdown_docstrings d + ON n.module_name = d.module_name AND n.position = d.position + LEFT JOIN declaration_verso_docstrings v + ON n.module_name = v.module_name AND n.position = v.position + JOIN modules m + ON n.module_name = m.name + WHERE n.render = 1 + ORDER BY n.module_name, n.position + """ + rows = connection.execute(query).fetchall() + + logger.info("Found %d declarations in api-docs.db", len(rows)) + + skipped_no_source = 0 + skipped_prefix = 0 + skipped_constructor = 0 + source_errors = 0 + verso_only_docstrings = 0 + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + TimeRemainingColumn(), + ) as progress: + task = progress.add_task( + "[cyan]Parsing api-docs.db...", total=len(rows) + ) + + for row in rows: + module_name = row["module_name"] + declaration_name = row["name"] + + # Filter by module prefix + matches_prefix = any( + module_name == prefix or module_name.startswith(prefix + ".") + for prefix in allowed_module_prefixes + ) + if not matches_prefix: + skipped_prefix += 1 + progress.update(task, advance=1) + continue + + # Skip auto-generated .mk constructors + if declaration_name.endswith(".mk"): + skipped_constructor += 1 + progress.update(task, advance=1) + continue + + source_url = row["source_url"] + start_line = row["start_line"] + end_line = row["end_line"] + + source_link = _construct_source_link( + module_name, source_url, start_line, end_line, + lean_version=lean_version, + ) + if not source_link: + skipped_no_source += 1 + progress.update(task, advance=1) + continue + + # Extract source text from local files + try: + source_text = _extract_source_text( + source_link, lean_root, package_cache + ) + except (FileNotFoundError, ValueError) as error: + source_errors += 1 + if source_errors <= 10: + logger.debug( + "Could not extract source for %s: %s", + declaration_name, error, + ) + progress.update(task, advance=1) + continue + + # Extract dependency names from the type signature BLOB + type_blob = row["type"] + if type_blob: + dep_names = _extract_names_from_rendered_code( + bytes(type_blob) + ) + # Filter out self-references + dep_names = [ + d for d in dep_names if d != declaration_name + ] + dependencies = dep_names or None + else: + dependencies = None + + # Use markdown docstring; detect Verso-only cases + docstring = row["docstring"] + if not docstring and row["verso_docstring"]: + verso_only_docstrings += 1 + + declarations.append( + Declaration( + name=declaration_name, + module=module_name, + docstring=docstring, + source_text=source_text, + source_link=source_link, + dependencies=dependencies, + ) + ) + + progress.update(task, advance=1) + + if skipped_prefix > 0: + logger.info( + "Skipped %d declarations outside allowed prefixes", + skipped_prefix, + ) + if skipped_constructor > 0: + logger.info("Skipped %d .mk constructors", skipped_constructor) + if skipped_no_source > 0: + logger.info("Skipped %d declarations without source URL", skipped_no_source) + if verso_only_docstrings > 0: + logger.warning( + "%d declarations have Verso-only docstrings " + "(not yet supported, stored as docstring=None)", + verso_only_docstrings, + ) + if source_errors > 0: + logger.warning( + "Could not extract source text for %d declarations", + source_errors, + ) + + finally: + connection.close() + + return declarations + + def _parse_declarations_from_files( bmp_files: list[Path], lean_root: Path, @@ -310,6 +712,7 @@ def _parse_declarations_from_files( List of parsed Declaration objects. """ declarations = [] + source_errors = 0 with Progress( SpinnerColumn(), @@ -338,23 +741,33 @@ def _parse_declarations_from_files( for declaration_data in data.get("declarations", []): information = declaration_data["info"] - source_text = _extract_source_text( - information["sourceLink"], lean_root, package_cache - ) + declaration_name = information["name"] + + # Skip auto-generated .mk constructors + if declaration_name.endswith(".mk"): + continue + + try: + source_text = _extract_source_text( + information["sourceLink"], lean_root, package_cache + ) + except (FileNotFoundError, ValueError) as error: + source_errors += 1 + if source_errors <= 10: + logger.debug( + "Could not extract source for %s: %s", + declaration_name, error, + ) + continue header_html = declaration_data.get("header", "") dependencies = _extract_dependencies_from_html(header_html) # Filter out self-references from dependencies - declaration_name = information["name"] filtered_dependencies = [ d for d in dependencies if d != declaration_name ] - # Skip auto-generated .mk constructors - if declaration_name.endswith(".mk"): - continue - declarations.append( Declaration( name=declaration_name, @@ -368,6 +781,12 @@ def _parse_declarations_from_files( progress.update(task, advance=1) + if source_errors > 0: + logger.warning( + "Could not extract source text for %d declarations", + source_errors, + ) + return declarations @@ -428,12 +847,88 @@ async def _insert_declarations_batch( return inserted_count +_REQUIRED_DOCGEN_TABLES = {"name_info", "declaration_ranges", "modules"} + + +def _validate_docgen_sqlite(database_path: Path) -> bool: + """Check that a doc-gen4 api-docs.db is a valid, usable SQLite database. + + Verifies the file is non-empty, opens as SQLite, and contains the tables + that the extraction pipeline requires. + + Args: + database_path: Path to the api-docs.db file. + + Returns: + True if the database is valid and contains the required tables. + """ + if database_path.stat().st_size == 0: + logger.warning("api-docs.db exists but is empty: %s", database_path) + return False + + try: + connection = sqlite3.connect(str(database_path)) + try: + cursor = connection.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ) + tables = {row[0] for row in cursor.fetchall()} + finally: + connection.close() + except sqlite3.DatabaseError as error: + logger.warning("api-docs.db is not a valid SQLite file: %s", error) + return False + + missing = _REQUIRED_DOCGEN_TABLES - tables + if missing: + logger.warning( + "api-docs.db is missing required tables %s: %s", + missing, database_path, + ) + return False + + return True + + +def _detect_docgen_format(workspace_path: Path) -> str: + """Detect which doc-gen4 output format a workspace uses. + + Doc-gen4 >= v4.29.0-rc2 writes to a SQLite database (api-docs.db). + Earlier versions write individual BMP JSON files to doc-data/. + + The SQLite file is validated before returning "sqlite" to guard against + zero-byte, corrupt, or incompatible databases left by crashed builds. + + Args: + workspace_path: Path to the package workspace (e.g., lean/mathlib). + + Returns: + "sqlite" if a valid api-docs.db exists, "bmp" if BMP files exist, + "none" otherwise. + """ + api_docs_db = workspace_path / ".lake" / "build" / "api-docs.db" + if api_docs_db.exists(): + if _validate_docgen_sqlite(api_docs_db): + return "sqlite" + logger.warning( + "Invalid api-docs.db at %s, checking for BMP fallback", + api_docs_db, + ) + + doc_data_dir = workspace_path / ".lake" / "build" / "doc-data" + if doc_data_dir.exists(): + bmp_files = list(doc_data_dir.glob("**/*.bmp")) + if bmp_files: + return "bmp" + + return "none" + + async def extract_declarations(engine: AsyncEngine, batch_size: int = 1000) -> None: """Extract all declarations from doc-gen4 data and load into database. - Looks for BMP files in each package's .lake/build/doc-data directory. - Extracts only declarations matching the package's configured module_prefixes, - ensuring each package's declarations come from its own workspace. + Automatically detects whether each package uses the newer SQLite format + (api-docs.db from doc-gen4 >= v4.29.0-rc2) or the legacy BMP JSON format. Args: engine: SQLAlchemy async engine for database connection. @@ -445,21 +940,13 @@ async def extract_declarations(engine: AsyncEngine, batch_size: int = 1000) -> N lean_root = Path("lean") all_declarations = [] - # Process each workspace separately with its own package cache for package_name in get_extraction_order(): package_config = PACKAGE_REGISTRY[package_name] - doc_data_dir = lean_root / package_name / ".lake" / "build" / "doc-data" - - if not doc_data_dir.exists(): - logger.warning( - "No doc-data directory for %s: %s", package_name, doc_data_dir - ) - continue - - bmp_files = sorted(doc_data_dir.glob("**/*.bmp")) - logger.info("Found %d BMP files in %s", len(bmp_files), package_name) + workspace_path = lean_root / package_name + docgen_format = _detect_docgen_format(workspace_path) - if not bmp_files: + if docgen_format == "none": + logger.warning("No doc-gen4 output found for %s", package_name) continue # Build workspace-specific package cache to avoid version mismatches @@ -469,9 +956,31 @@ async def extract_declarations(engine: AsyncEngine, batch_size: int = 1000) -> N package_name, len(package_cache), ) - declarations = _parse_declarations_from_files( - bmp_files, lean_root, package_cache, package_config.module_prefixes - ) + if docgen_format == "sqlite": + api_docs_path = workspace_path / ".lake" / "build" / "api-docs.db" + lean_version = _read_lean_toolchain_version(workspace_path) + logger.info( + "[%s] Using SQLite format (api-docs.db)", package_name + ) + declarations = _parse_declarations_from_sqlite( + api_docs_path, + lean_root, + package_cache, + package_config.module_prefixes, + lean_version=lean_version, + ) + else: + doc_data_dir = workspace_path / ".lake" / "build" / "doc-data" + bmp_files = sorted(doc_data_dir.glob("**/*.bmp")) + logger.info( + "[%s] Using BMP format (%d files)", + package_name, len(bmp_files), + ) + declarations = _parse_declarations_from_files( + bmp_files, lean_root, package_cache, + package_config.module_prefixes, + ) + logger.info( "Extracted %d declarations from %s (prefixes: %s)", len(declarations), package_name, package_config.module_prefixes, @@ -479,7 +988,9 @@ async def extract_declarations(engine: AsyncEngine, batch_size: int = 1000) -> N all_declarations.extend(declarations) if not all_declarations: - raise FileNotFoundError("No declarations extracted from any package workspace") + raise FileNotFoundError( + "No declarations extracted from any package workspace" + ) logger.info("Total declarations extracted: %d", len(all_declarations)) diff --git a/src/lean_explore/extract/package_registry.py b/src/lean_explore/extract/package_registry.py index a076e80..f9e0dd4 100644 --- a/src/lean_explore/extract/package_registry.py +++ b/src/lean_explore/extract/package_registry.py @@ -17,7 +17,7 @@ "physlean": PackageConfig( name="physlean", git_url="https://github.com/HEPLean/PhysLean", - module_prefixes=["PhysLean"], + module_prefixes=["Physlib", "QuantumInfo"], version_strategy=VersionStrategy.TAGGED, depends_on=["mathlib"], ), diff --git a/src/lean_explore/extract/package_utils.py b/src/lean_explore/extract/package_utils.py index f5912c0..ab5d5b8 100644 --- a/src/lean_explore/extract/package_utils.py +++ b/src/lean_explore/extract/package_utils.py @@ -82,7 +82,12 @@ def get_package_toolchain(package_configuration: PackageConfig) -> tuple[str, st def update_lakefile_docgen_version(lakefile_path: Path, lean_version: str) -> None: - """Update the doc-gen4 version in a lakefile to match the Lean version. + """Update the doc-gen4 version in a lakefile to match the Lean toolchain. + + Doc-gen4 releases are tagged to match Lean toolchain versions, so pinning + to the same version ensures compatibility. The doc-gen4 ``require`` must + appear before the main package ``require`` so that the main package's + transitive dependency versions take precedence during resolution. Args: lakefile_path: Path to lakefile.lean diff --git a/tests/extract/__main___test.py b/tests/extract/__main___test.py index 84607c6..aa9ee4d 100644 --- a/tests/extract/__main___test.py +++ b/tests/extract/__main___test.py @@ -50,32 +50,65 @@ class TestDocGen4Step: @pytest.mark.external async def test_run_doc_gen4_step_success(self, temp_directory): """Test successful doc-gen4 execution.""" - lean_directory = temp_directory / "lean" - lean_directory.mkdir() - - # Mock subprocess to avoid actually running lake - with patch("lean_explore.extract.doc_gen4.subprocess.Popen") as mock_popen: - mock_process = MagicMock() - mock_process.stdout = iter(["Building...\n", "Complete!\n"]) - mock_process.wait.return_value = 0 - mock_popen.return_value = mock_process + with patch( + "lean_explore.extract.doc_gen4.get_extraction_order", + return_value=["mathlib"], + ): + with patch( + "lean_explore.extract.doc_gen4._setup_workspace", + return_value=("leanprover/lean4:v4.29.0-rc2", "v4.29.0-rc2"), + ): + with patch( + "lean_explore.extract.doc_gen4.subprocess.run" + ) as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout="", + stderr="", + ) + with patch( + "lean_explore.extract.doc_gen4.subprocess.Popen" + ) as mock_popen: + mock_process = MagicMock() + mock_process.stdout = iter(["Building...\n", "Complete!\n"]) + mock_process.wait.return_value = 0 + mock_popen.return_value = mock_process - await _run_doc_gen4_step() + await _run_doc_gen4_step() - # Called twice: "lake build" and "lake build LeanExtract:docs" - assert mock_popen.call_count == 2 + assert mock_run.call_count == 2 + # Called twice: "lake build" and "lake build MathExtract:docs" + assert mock_popen.call_count == 2 @pytest.mark.external async def test_run_doc_gen4_step_failure(self): """Test doc-gen4 execution failure.""" - with patch("lean_explore.extract.doc_gen4.subprocess.Popen") as mock_popen: - mock_process = MagicMock() - mock_process.stdout = iter(["Error!\n"]) - mock_process.wait.return_value = 1 - mock_popen.return_value = mock_process - - with pytest.raises(RuntimeError, match="failed"): - await _run_doc_gen4_step() + with patch( + "lean_explore.extract.doc_gen4.get_extraction_order", + return_value=["mathlib"], + ): + with patch( + "lean_explore.extract.doc_gen4._setup_workspace", + return_value=("leanprover/lean4:v4.29.0-rc2", "v4.29.0-rc2"), + ): + with patch( + "lean_explore.extract.doc_gen4.subprocess.run" + ) as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout="", + stderr="", + ) + with patch( + "lean_explore.extract.doc_gen4.subprocess.Popen" + ) as mock_popen: + mock_process = MagicMock() + mock_process.stdout = iter(["Error!\n"]) + mock_process.wait.return_value = 1 + mock_popen.return_value = mock_process + + with pytest.raises(RuntimeError, match="failed"): + await _run_doc_gen4_step() class TestExtractionStep: diff --git a/tests/extract/doc_gen4_test.py b/tests/extract/doc_gen4_test.py new file mode 100644 index 0000000..b1c8c49 --- /dev/null +++ b/tests/extract/doc_gen4_test.py @@ -0,0 +1,107 @@ +"""Tests for doc-gen4 workspace orchestration.""" + +from unittest.mock import MagicMock, patch + +from lean_explore.extract.doc_gen4 import ( + _run_lake_for_package, + _uses_sqlite_docgen, + run_doc_gen4, +) +from lean_explore.extract.package_registry import PACKAGE_REGISTRY + + +class TestDocGen4VersionDetection: + """Tests for doc-gen4 format detection by Lean version.""" + + def test_uses_sqlite_docgen_from_rc2(self): + """Test the SQLite cutoff at v4.29.0-rc2.""" + assert _uses_sqlite_docgen("leanprover/lean4:v4.29.0-rc2") + assert _uses_sqlite_docgen("leanprover/lean4:v4.29.0") + assert not _uses_sqlite_docgen("leanprover/lean4:v4.29.0-rc1") + assert not _uses_sqlite_docgen("leanprover/lean4:v4.28.0") + + +class TestRunDocGen4FreshHandling: + """Tests for fresh-cache handling in run_doc_gen4.""" + + async def test_fresh_legacy_workspace_clears_cache(self): + """Test that legacy doc-gen4 workspaces still clear the cache.""" + with patch( + "lean_explore.extract.doc_gen4.get_extraction_order", + return_value=["mathlib"], + ): + with patch( + "lean_explore.extract.doc_gen4._setup_workspace", + return_value=("leanprover/lean4:v4.29.0-rc1", "v4.29.0-rc1"), + ): + with patch( + "lean_explore.extract.doc_gen4._clear_workspace_cache" + ) as mock_clear: + with patch( + "lean_explore.extract.doc_gen4._run_lake_for_package" + ) as mock_run: + await run_doc_gen4(fresh=True) + + mock_clear.assert_called_once() + mock_run.assert_called_once_with("mathlib", False) + + async def test_fresh_sqlite_workspace_skips_cache_clear(self): + """Test that SQLite doc-gen4 workspaces keep the cache.""" + with patch( + "lean_explore.extract.doc_gen4.get_extraction_order", + return_value=["mathlib"], + ): + with patch( + "lean_explore.extract.doc_gen4._setup_workspace", + return_value=("leanprover/lean4:v4.29.0-rc2", "v4.29.0-rc2"), + ): + with patch( + "lean_explore.extract.doc_gen4._clear_workspace_cache" + ) as mock_clear: + with patch( + "lean_explore.extract.doc_gen4._run_lake_for_package" + ) as mock_run: + await run_doc_gen4(fresh=True) + + mock_clear.assert_not_called() + mock_run.assert_called_once_with("mathlib", False) + + +class TestLakeBuildTargets: + """Tests for Lake build target selection.""" + + def test_physlean_runs_docs_target(self): + """Test that PhysLean runs the :docs target for its wrapper library.""" + with patch("lean_explore.extract.doc_gen4._run_lake_update_with_retry"): + with patch("lean_explore.extract.doc_gen4.subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout="", + stderr="", + ) + with patch( + "lean_explore.extract.doc_gen4.subprocess.Popen" + ) as mock_popen: + mock_process = MagicMock() + mock_process.stdout = iter([]) + mock_process.wait.return_value = 0 + mock_popen.return_value = mock_process + + _run_lake_for_package("physlean") + + assert mock_popen.call_args_list[0].args[0] == [ + "lake", + "build", + "PhysExtract:docs", + ] + + +class TestPackageRegistry: + """Tests for package registry metadata.""" + + def test_physlean_uses_upstream_module_roots(self): + """Test that PhysLean filters on the upstream module roots.""" + assert PACKAGE_REGISTRY["physlean"].module_prefixes == [ + "Physlib", + "QuantumInfo", + ] diff --git a/tests/extract/doc_parser_test.py b/tests/extract/doc_parser_test.py index 68a0842..08ab60b 100644 --- a/tests/extract/doc_parser_test.py +++ b/tests/extract/doc_parser_test.py @@ -5,20 +5,28 @@ """ import json +import sqlite3 from unittest.mock import AsyncMock, patch import pytest from sqlalchemy import select from lean_explore.extract.doc_parser import ( + _BlobReader, _build_package_cache, + _construct_source_link, + _detect_docgen_format, _extract_dependencies_from_html, + _extract_names_from_rendered_code, _extract_source_text, _filter_auto_generated_projections, _insert_declarations_batch, _parse_declarations_from_files, + _parse_declarations_from_sqlite, + _read_lean_toolchain_version, _read_source_lines, _strip_lean_comments, + _validate_docgen_sqlite, extract_declarations, ) from lean_explore.extract.types import Declaration @@ -155,6 +163,253 @@ def test_extract_source_text_file_not_found(self, temp_directory): with pytest.raises(FileNotFoundError): _extract_source_text(source_link, lean_root, package_cache) + def test_extract_source_text_from_lake_toolchain(self, temp_directory): + """Test extracting source text from the Lake toolchain path.""" + lean_root = temp_directory / "lean" + toolchain_root = temp_directory / "toolchain" / "src" + lean_source_dir = toolchain_root / "lean" + lake_source_dir = toolchain_root / "lake" + lean_source_dir.mkdir(parents=True) + lake_file = lake_source_dir / "Lake" / "Config" / "Monad.lean" + lake_file.parent.mkdir(parents=True) + lake_file.write_text("def Lake.Config.Monad.run := 1\n") + + package_cache = {"lean4": lean_source_dir} + source_link = ( + "https://github.com/leanprover/lean4/blob/toolchain/" + "src/lake/Lake/Config/Monad.lean#L1-L1" + ) + + result = _extract_source_text(source_link, lean_root, package_cache) + + assert "Lake.Config.Monad.run" in result + + +def _encode_nat(n: int) -> bytes: + """Encode a natural number in leansqlite's variable-length format.""" + chunks = [] + while n >= 128: + chunks.append((n & 0x7F) | 0x80) + n >>= 7 + chunks.append(n) + return bytes(chunks) + + +def _encode_string(s: str) -> bytes: + """Encode a string: Nat(utf8_byte_length) + UTF-8 bytes.""" + encoded = s.encode("utf-8") + return _encode_nat(len(encoded)) + encoded + + +def _encode_name(name: str) -> bytes: + """Encode a dotted Lean name (e.g. 'Nat.add') into binary.""" + if not name: + return b"\x00" # anonymous + parts = name.split(".") + result = b"\x00" # start with anonymous + for part in parts: + # Name.str = tag 1 + parent + string + result = b"\x01" + result + _encode_string(part) + return result + + +def _make_const_tag(name: str) -> bytes: + """Build a RenderedCode.Tag.const(name) blob fragment.""" + return b"\x02" + _encode_name(name) + + +def _make_text_node(s: str) -> bytes: + """Build a TaggedText.text(s) blob fragment.""" + return b"\x00" + _encode_string(s) + + +def _make_tag_node(tag_bytes: bytes, inner: bytes) -> bytes: + """Build a TaggedText.tag(tag, inner) blob fragment.""" + return b"\x01" + tag_bytes + inner + + +def _make_append_node(children: list[bytes]) -> bytes: + """Build a TaggedText.append(children) blob fragment.""" + return b"\x02" + _encode_nat(len(children)) + b"".join(children) + + +class TestRenderedCodeBlobParser: + """Tests for RenderedCode BLOB parsing to extract dependency names.""" + + def test_extract_single_const(self): + """Test extracting a single const name from a type BLOB.""" + # TaggedText.tag(Tag.const("Nat"), TaggedText.text("Nat")) + blob = _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")) + names = _extract_names_from_rendered_code(blob) + assert names == ["Nat"] + + def test_extract_multiple_consts(self): + """Test extracting multiple const names from a type BLOB.""" + # append([tag(const Nat, text Nat), text " → ", tag(const Bool, text Bool)]) + blob = _make_append_node([ + _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")), + _make_text_node(" → "), + _make_tag_node(_make_const_tag("Bool"), _make_text_node("Bool")), + ]) + names = _extract_names_from_rendered_code(blob) + assert names == ["Nat", "Bool"] + + def test_extract_dotted_name(self): + """Test extracting a dotted name like Nat.add.""" + blob = _make_tag_node( + _make_const_tag("Nat.add"), _make_text_node("Nat.add") + ) + names = _extract_names_from_rendered_code(blob) + assert names == ["Nat.add"] + + def test_deduplicates_names(self): + """Test that duplicate const references are deduplicated.""" + blob = _make_append_node([ + _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")), + _make_text_node(" → "), + _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")), + ]) + names = _extract_names_from_rendered_code(blob) + assert names == ["Nat"] + + def test_skips_non_const_tags(self): + """Test that keyword, string, sort, otherExpr tags are skipped.""" + blob = _make_append_node([ + _make_tag_node(b"\x00", _make_text_node("def")), # keyword + _make_text_node(" "), + _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")), + _make_tag_node(b"\x07", _make_text_node("x")), # otherExpr + ]) + names = _extract_names_from_rendered_code(blob) + assert names == ["Nat"] + + def test_empty_blob_returns_empty(self): + """Test that an empty or invalid BLOB returns empty list.""" + assert _extract_names_from_rendered_code(b"") == [] + assert _extract_names_from_rendered_code(b"\xff") == [] + + def test_text_only_returns_empty(self): + """Test that a BLOB with only text returns no names.""" + blob = _make_text_node("hello world") + names = _extract_names_from_rendered_code(blob) + assert names == [] + + def test_nested_tagged_text(self): + """Test parsing nested tag nodes.""" + # tag(otherExpr, tag(const("List"), text("List"))) + inner = _make_tag_node(_make_const_tag("List"), _make_text_node("List")) + blob = _make_tag_node(b"\x07", inner) # otherExpr wrapping + names = _extract_names_from_rendered_code(blob) + assert names == ["List"] + + def test_blob_reader_nat_encoding(self): + """Test that Nat encoding/decoding matches leansqlite format.""" + reader = _BlobReader(_encode_nat(0)) + assert reader._read_nat() == 0 + + reader = _BlobReader(_encode_nat(127)) + assert reader._read_nat() == 127 + + reader = _BlobReader(_encode_nat(128)) + assert reader._read_nat() == 128 + + reader = _BlobReader(_encode_nat(300)) + assert reader._read_nat() == 300 + + reader = _BlobReader(_encode_nat(100000)) + assert reader._read_nat() == 100000 + + def test_blob_reader_name_roundtrip(self): + """Test that Name encoding produces correct dot-separated output.""" + reader = _BlobReader(_encode_name("Mathlib.Data.Nat.Basic")) + assert reader._read_name() == "Mathlib.Data.Nat.Basic" + + reader = _BlobReader(_encode_name("")) + assert reader._read_name() == "" + + def test_sort_tags_handled(self): + """Test that sort tag variants (3-6) are handled without error.""" + for sort_byte in [3, 4, 5, 6]: + blob = _make_tag_node( + bytes([sort_byte]), _make_text_node("Type") + ) + names = _extract_names_from_rendered_code(blob) + assert names == [] + + +class TestSqliteHelpers: + """Tests for SQLite-specific parsing helpers.""" + + def test_construct_source_link_for_core_module_with_version(self): + """Test source links for core modules use the toolchain version.""" + result = _construct_source_link( + "Init.Data.Nat.Basic", None, 12, 15, + lean_version="v4.29.0-rc6", + ) + + assert ( + result + == "https://github.com/leanprover/lean4/blob/v4.29.0-rc6/" + "src/lean/Init/Data/Nat/Basic.lean#L12-L15" + ) + + def test_construct_source_link_for_lake_module_with_version(self): + """Test source links for Lake modules use the toolchain version.""" + result = _construct_source_link( + "Lake.Config.Monad", None, 7, 9, + lean_version="v4.29.0-rc6", + ) + + assert ( + result + == "https://github.com/leanprover/lean4/blob/v4.29.0-rc6/" + "src/lake/Lake/Config/Monad.lean#L7-L9" + ) + + def test_construct_source_link_falls_back_to_master(self): + """Test that missing lean_version falls back to 'master'.""" + result = _construct_source_link( + "Init.Data.Nat.Basic", None, 12, 15, + ) + + assert ( + result + == "https://github.com/leanprover/lean4/blob/master/" + "src/lean/Init/Data/Nat/Basic.lean#L12-L15" + ) + + def test_construct_source_link_prefers_source_url(self): + """Test that a non-None source_url is used directly.""" + url = "https://github.com/leanprover-community/mathlib4/blob/abc123/Foo.lean" + result = _construct_source_link("Foo.Bar", url, 1, 10) + + assert result == f"{url}#L1-L10" + + def test_read_lean_toolchain_version(self, temp_directory): + """Test reading version from a lean-toolchain file.""" + workspace = temp_directory / "workspace" + workspace.mkdir() + toolchain = workspace / "lean-toolchain" + toolchain.write_text("leanprover/lean4:v4.29.0-rc6\n") + + assert _read_lean_toolchain_version(workspace) == "v4.29.0-rc6" + + def test_read_lean_toolchain_version_release(self, temp_directory): + """Test reading a release version (no -rc suffix).""" + workspace = temp_directory / "workspace" + workspace.mkdir() + toolchain = workspace / "lean-toolchain" + toolchain.write_text("leanprover/lean4:v4.29.0\n") + + assert _read_lean_toolchain_version(workspace) == "v4.29.0" + + def test_read_lean_toolchain_version_missing(self, temp_directory): + """Test returns None when lean-toolchain doesn't exist.""" + workspace = temp_directory / "workspace" + workspace.mkdir() + + assert _read_lean_toolchain_version(workspace) is None + class TestDependencyExtraction: """Tests for dependency extraction from HTML.""" @@ -273,6 +528,458 @@ def test_parse_declarations_filters_packages(self, temp_directory): assert len(declarations) == 0 + def test_parse_declarations_from_sqlite_filters_non_rendered( + self, temp_directory + ): + """Test SQLite parsing only keeps rendered declarations.""" + lean_root = temp_directory / "lean" + database_path = temp_directory / "api-docs.db" + + source_dir = lean_root / "mathlib" / ".lake" / "packages" / "mathlib4" + source_file = source_dir / "Mathlib" / "Data" / "Nat" / "Basic.lean" + source_file.parent.mkdir(parents=True) + source_file.write_text( + "theorem Nat.visible : True := trivial\n" + "def Nat.hidden : Nat := 0\n" + ) + + connection = sqlite3.connect(database_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT); + CREATE TABLE name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type BLOB NOT NULL, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position) + ); + """ + ) + connection.execute( + "INSERT INTO modules (name, source_url) VALUES (?, ?)", + ( + "Mathlib.Data.Nat.Basic", + "https://github.com/leanprover-community/mathlib4/blob/master/" + "Mathlib/Data/Nat/Basic.lean", + ), + ) + connection.executemany( + """ + INSERT INTO name_info + (module_name, position, kind, name, type, sorried, render) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + "Mathlib.Data.Nat.Basic", + 1, + "theorem", + "Nat.visible", + _make_text_node("True"), + 0, + 1, + ), + ( + "Mathlib.Data.Nat.Basic", + 2, + "definition", + "Nat.hidden", + _make_text_node("Nat"), + 0, + 0, + ), + ], + ) + connection.executemany( + """ + INSERT INTO declaration_ranges + (module_name, position, start_line, start_column, start_utf16, + end_line, end_column, end_utf16) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ("Mathlib.Data.Nat.Basic", 1, 1, 0, 0, 1, 32, 32), + ("Mathlib.Data.Nat.Basic", 2, 2, 0, 0, 2, 22, 22), + ], + ) + connection.execute( + """ + INSERT INTO declaration_markdown_docstrings + (module_name, position, text) + VALUES (?, ?, ?) + """, + ("Mathlib.Data.Nat.Basic", 1, "Visible theorem"), + ) + connection.commit() + connection.close() + + declarations = _parse_declarations_from_sqlite( + database_path, + lean_root, + _build_package_cache(lean_root), + allowed_module_prefixes=["Mathlib"], + ) + + assert [declaration.name for declaration in declarations] == ["Nat.visible"] + assert declarations[0].docstring == "Visible theorem" + + def test_parse_declarations_from_sqlite_extracts_dependencies( + self, temp_directory + ): + """Test that SQLite parsing extracts dependencies from type BLOBs.""" + lean_root = temp_directory / "lean" + database_path = temp_directory / "api-docs.db" + + source_dir = lean_root / "mathlib" / ".lake" / "packages" / "mathlib4" + source_file = source_dir / "Mathlib" / "Data" / "Nat" / "Basic.lean" + source_file.parent.mkdir(parents=True) + source_file.write_text("def Nat.myFunc (n : Nat) : Bool := true\n") + + # Build a type BLOB: "Nat → Bool" + type_blob = _make_append_node([ + _make_tag_node(_make_const_tag("Nat"), _make_text_node("Nat")), + _make_text_node(" → "), + _make_tag_node(_make_const_tag("Bool"), _make_text_node("Bool")), + ]) + + connection = sqlite3.connect(database_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT); + CREATE TABLE name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type BLOB NOT NULL, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position) + ); + """ + ) + connection.execute( + "INSERT INTO modules (name, source_url) VALUES (?, ?)", + ( + "Mathlib.Data.Nat.Basic", + "https://github.com/leanprover-community/mathlib4/blob/master/" + "Mathlib/Data/Nat/Basic.lean", + ), + ) + connection.execute( + """ + INSERT INTO name_info + (module_name, position, kind, name, type, sorried, render) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ("Mathlib.Data.Nat.Basic", 1, "definition", "Nat.myFunc", type_blob, 0, 1), + ) + connection.execute( + """ + INSERT INTO declaration_ranges + (module_name, position, start_line, start_column, start_utf16, + end_line, end_column, end_utf16) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ("Mathlib.Data.Nat.Basic", 1, 1, 0, 0, 1, 40, 40), + ) + connection.commit() + connection.close() + + declarations = _parse_declarations_from_sqlite( + database_path, + lean_root, + _build_package_cache(lean_root), + allowed_module_prefixes=["Mathlib"], + ) + + assert len(declarations) == 1 + assert declarations[0].name == "Nat.myFunc" + assert declarations[0].dependencies == ["Nat", "Bool"] + + def test_parse_declarations_from_sqlite_detects_verso_docstrings( + self, temp_directory, caplog + ): + """Test that Verso-only docstrings are detected and logged.""" + lean_root = temp_directory / "lean" + database_path = temp_directory / "api-docs.db" + + source_dir = ( + lean_root / "mathlib" / ".lake" / "packages" / "mathlib4" + ) + source_file = source_dir / "Mathlib" / "Data" / "Nat" / "Basic.lean" + source_file.parent.mkdir(parents=True) + source_file.write_text( + "def Nat.withVerso : Nat := 0\n" + "def Nat.withMarkdown : Nat := 1\n" + ) + + connection = sqlite3.connect(database_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT); + CREATE TABLE name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type BLOB NOT NULL, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position) + ); + """ + ) + connection.execute( + "INSERT INTO modules (name, source_url) VALUES (?, ?)", + ( + "Mathlib.Data.Nat.Basic", + "https://github.com/leanprover-community/mathlib4" + "/blob/master/Mathlib/Data/Nat/Basic.lean", + ), + ) + connection.executemany( + """ + INSERT INTO name_info + (module_name, position, kind, name, type, sorried, render) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + "Mathlib.Data.Nat.Basic", 1, "definition", + "Nat.withVerso", _make_text_node("Nat"), 0, 1, + ), + ( + "Mathlib.Data.Nat.Basic", 2, "definition", + "Nat.withMarkdown", _make_text_node("Nat"), 0, 1, + ), + ], + ) + connection.executemany( + """ + INSERT INTO declaration_ranges + (module_name, position, start_line, start_column, + start_utf16, end_line, end_column, end_utf16) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ("Mathlib.Data.Nat.Basic", 1, 1, 0, 0, 1, 27, 27), + ("Mathlib.Data.Nat.Basic", 2, 2, 0, 0, 2, 30, 30), + ], + ) + # Declaration 1: Verso-only docstring (binary BLOB) + connection.execute( + """ + INSERT INTO declaration_verso_docstrings + (module_name, position, content) + VALUES (?, ?, ?) + """, + ("Mathlib.Data.Nat.Basic", 1, b"\x00\x01\x02"), + ) + # Declaration 2: Markdown docstring + connection.execute( + """ + INSERT INTO declaration_markdown_docstrings + (module_name, position, text) + VALUES (?, ?, ?) + """, + ("Mathlib.Data.Nat.Basic", 2, "A markdown docstring"), + ) + connection.commit() + connection.close() + + import logging + + with caplog.at_level(logging.WARNING): + declarations = _parse_declarations_from_sqlite( + database_path, + lean_root, + _build_package_cache(lean_root), + allowed_module_prefixes=["Mathlib"], + ) + + assert len(declarations) == 2 + # Verso-only: docstring is None + verso_decl = next( + d for d in declarations if d.name == "Nat.withVerso" + ) + assert verso_decl.docstring is None + # Markdown: docstring is present + md_decl = next( + d for d in declarations if d.name == "Nat.withMarkdown" + ) + assert md_decl.docstring == "A markdown docstring" + # Warning logged about Verso-only docstrings + assert "1 declarations have Verso-only docstrings" in caplog.text + + def test_parse_declarations_from_sqlite_uses_core_fallback_source_link( + self, temp_directory + ): + """Test SQLite parsing for core modules without a stored source URL.""" + lean_root = temp_directory / "lean" + database_path = temp_directory / "api-docs.db" + + toolchain_root = temp_directory / "toolchain" / "src" + lean_source_dir = toolchain_root / "lean" + init_file = lean_source_dir / "Init" / "Data" / "Nat" / "Basic.lean" + init_file.parent.mkdir(parents=True) + init_file.write_text("theorem Nat.core : True := trivial\n") + + connection = sqlite3.connect(database_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT); + CREATE TABLE name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type BLOB NOT NULL, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position) + ); + """ + ) + connection.execute( + "INSERT INTO modules (name, source_url) VALUES (?, ?)", + ("Init.Data.Nat.Basic", None), + ) + connection.execute( + """ + INSERT INTO name_info + (module_name, position, kind, name, type, sorried, render) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + "Init.Data.Nat.Basic", 1, "theorem", "Nat.core", + _make_text_node("True"), 0, 1, + ), + ) + connection.execute( + """ + INSERT INTO declaration_ranges + (module_name, position, start_line, start_column, start_utf16, + end_line, end_column, end_utf16) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ("Init.Data.Nat.Basic", 1, 1, 0, 0, 1, 31, 31), + ) + connection.commit() + connection.close() + + declarations = _parse_declarations_from_sqlite( + database_path, + lean_root, + {"lean4": lean_source_dir}, + allowed_module_prefixes=["Init"], + lean_version="v4.29.0-rc6", + ) + + assert len(declarations) == 1 + assert declarations[0].name == "Nat.core" + assert ( + declarations[0].source_link + == "https://github.com/leanprover/lean4/blob/v4.29.0-rc6/" + "src/lean/Init/Data/Nat/Basic.lean#L1-L1" + ) + class TestDeclarationInsertion: """Tests for database insertion.""" @@ -641,3 +1348,125 @@ def test_single_declaration(self): assert len(filtered) == 1 assert removed_count == 0 + + +class TestDocgenFormatDetection: + """Tests for doc-gen4 output format detection and SQLite validation.""" + + def test_detect_sqlite_with_valid_db(self, temp_directory): + """Test detection returns 'sqlite' for a valid api-docs.db.""" + workspace = temp_directory / "pkg" + db_path = workspace / ".lake" / "build" / "api-docs.db" + db_path.parent.mkdir(parents=True) + + connection = sqlite3.connect(db_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT); + CREATE TABLE name_info ( + module_name TEXT, position INTEGER, kind TEXT, name TEXT, + type BLOB, sorried INTEGER, render INTEGER, + PRIMARY KEY (module_name, position) + ); + CREATE TABLE declaration_ranges ( + module_name TEXT, position INTEGER, + start_line INTEGER, start_column INTEGER, start_utf16 INTEGER, + end_line INTEGER, end_column INTEGER, end_utf16 INTEGER, + PRIMARY KEY (module_name, position) + ); + """ + ) + connection.close() + + assert _detect_docgen_format(workspace) == "sqlite" + + def test_detect_bmp_format(self, temp_directory): + """Test detection returns 'bmp' when only BMP files exist.""" + workspace = temp_directory / "pkg" + doc_data = workspace / ".lake" / "build" / "doc-data" + doc_data.mkdir(parents=True) + (doc_data / "declaration-data-Foo.bmp").write_text("{}") + + assert _detect_docgen_format(workspace) == "bmp" + + def test_detect_none_when_empty(self, temp_directory): + """Test detection returns 'none' when no doc output exists.""" + workspace = temp_directory / "pkg" + workspace.mkdir(parents=True) + + assert _detect_docgen_format(workspace) == "none" + + def test_detect_falls_back_to_bmp_for_empty_db(self, temp_directory): + """Test that an empty api-docs.db falls back to BMP if available.""" + workspace = temp_directory / "pkg" + db_path = workspace / ".lake" / "build" / "api-docs.db" + db_path.parent.mkdir(parents=True) + db_path.write_bytes(b"") + + doc_data = workspace / ".lake" / "build" / "doc-data" + doc_data.mkdir(parents=True) + (doc_data / "declaration-data-Foo.bmp").write_text("{}") + + assert _detect_docgen_format(workspace) == "bmp" + + def test_detect_returns_none_for_empty_db_without_bmp(self, temp_directory): + """Test that an empty api-docs.db with no BMP fallback returns 'none'.""" + workspace = temp_directory / "pkg" + db_path = workspace / ".lake" / "build" / "api-docs.db" + db_path.parent.mkdir(parents=True) + db_path.write_bytes(b"") + + assert _detect_docgen_format(workspace) == "none" + + def test_detect_falls_back_for_corrupt_db(self, temp_directory): + """Test that a corrupt (non-SQLite) file falls back gracefully.""" + workspace = temp_directory / "pkg" + db_path = workspace / ".lake" / "build" / "api-docs.db" + db_path.parent.mkdir(parents=True) + db_path.write_bytes(b"this is not a sqlite database") + + assert _detect_docgen_format(workspace) == "none" + + def test_detect_falls_back_for_db_missing_tables(self, temp_directory): + """Test that a SQLite DB missing required tables falls back.""" + workspace = temp_directory / "pkg" + db_path = workspace / ".lake" / "build" / "api-docs.db" + db_path.parent.mkdir(parents=True) + + connection = sqlite3.connect(db_path) + connection.execute( + "CREATE TABLE modules (name TEXT PRIMARY KEY, source_url TEXT)" + ) + connection.close() + + # Has modules but missing name_info and declaration_ranges + assert _detect_docgen_format(workspace) == "none" + + def test_validate_docgen_sqlite_valid(self, temp_directory): + """Test validation passes for a well-formed database.""" + db_path = temp_directory / "api-docs.db" + connection = sqlite3.connect(db_path) + connection.executescript( + """ + CREATE TABLE modules (name TEXT PRIMARY KEY); + CREATE TABLE name_info (module_name TEXT, position INTEGER); + CREATE TABLE declaration_ranges (module_name TEXT, position INTEGER); + """ + ) + connection.close() + + assert _validate_docgen_sqlite(db_path) is True + + def test_validate_docgen_sqlite_empty_file(self, temp_directory): + """Test validation rejects an empty file.""" + db_path = temp_directory / "api-docs.db" + db_path.write_bytes(b"") + + assert _validate_docgen_sqlite(db_path) is False + + def test_validate_docgen_sqlite_corrupt_file(self, temp_directory): + """Test validation rejects a non-SQLite file.""" + db_path = temp_directory / "api-docs.db" + db_path.write_bytes(b"not a database") + + assert _validate_docgen_sqlite(db_path) is False