diff --git a/config/config.yaml b/config/config.yaml index 70bcd68..58e291f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,8 +28,7 @@ scenarios: release_type: "gbOpen" BEL: subtype: 1 - source: "geoboundaries" - release_type: "gbOpen" + source: "gadm" CHE: subtype: "country" source: "overture" diff --git a/figures/rulegraph.png b/figures/rulegraph.png index 52a9524..e039410 100644 Binary files a/figures/rulegraph.png and b/figures/rulegraph.png differ diff --git a/workflow/rules/download.smk b/workflow/rules/download.smk index 171616b..6d28fd8 100644 --- a/workflow/rules/download.smk +++ b/workflow/rules/download.smk @@ -1,6 +1,21 @@ """Rules to used to download resource files.""" +rule download_duckdb_extensions: + output: + path="/automatic/overture/duckdb_extensions.txt", + log: + "/download_duckdb_extensions.log", + localrule: True + conda: + "../envs/shape.yaml" + threads: 1 + message: + "Downloading DuckDB extensions." + script: + "../scripts/download_duckdb_extensions.py" + + rule download_geoboundaries: output: path="/automatic/geoboundaries/download/{country}_{subtype}_{release_type}.parquet", diff --git a/workflow/rules/harmonise.smk b/workflow/rules/harmonise.smk index e59987f..517bdf0 100644 --- a/workflow/rules/harmonise.smk +++ b/workflow/rules/harmonise.smk @@ -17,6 +17,8 @@ rule harmonise_geoboundaries: rule download_harmonised_overture: + input: + duckdb_extensions=rules.download_duckdb_extensions.output.path, output: path="/automatic/overture/harmonise/{country}_{subtype}.parquet", log: diff --git a/workflow/scripts/download_duckdb_extensions.py b/workflow/scripts/download_duckdb_extensions.py new file mode 100644 index 0000000..050513f --- /dev/null +++ b/workflow/scripts/download_duckdb_extensions.py @@ -0,0 +1,28 @@ +"""Download DuckDB extensions needed by datasources using it.""" + +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import duckdb + +if TYPE_CHECKING: + snakemake: Any + + +def main() -> None: + """Install DuckDB extensions.""" + connection = duckdb.connect() + installed_extensions = [] + for extension in ["spatial", "httpfs"]: + connection.install_extension(extension) + installed_extensions.append(extension) + + output_path = Path(snakemake.output.path) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text("\n".join(installed_extensions) + "\n") + + +if __name__ == "__main__": + sys.stderr = open(snakemake.log[0], "w") + main() diff --git a/workflow/scripts/download_harmonised_overture.py b/workflow/scripts/download_harmonised_overture.py index 02db71c..4c1839b 100644 --- a/workflow/scripts/download_harmonised_overture.py +++ b/workflow/scripts/download_harmonised_overture.py @@ -98,7 +98,6 @@ def download_country_overture(country: str, subtype: str, version: str, path: st # Setup SQL connection to the remote dataset connection = duckdb.connect() for extension in ["spatial", "httpfs"]: - connection.install_extension(extension) connection.load_extension(extension) connection.sql("SET s3_region='us-west-2'")