views-platform · Polichinel · Apr 7, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/.github/workflows/update_catalogs.yml b/.github/workflows/update_catalogs.yml
@@ -2,7 +2,6 @@ name: Update Model Catalogs
 on:
   push:
     branches:
-      - create_catalogs_01 # for testing on this branch
       - main
       - development
     paths:
@@ -33,13 +32,10 @@ jobs:
 
       - name: Generate catalog if models directory has changed
         run: |
-          python create_catalogs.py  
+          set -e
+          python create_catalogs.py
           python update_readme.py
-          if [ $? -ne 0 ]; then
-            echo "Generating catalogs failed."
-            exit 1
-          fi
-          echo "Model catalog is updated. Model READMEs are updated."           
+          echo "Model catalog is updated. Model READMEs are updated."
           git status
 
       - name: Configure Git

diff --git a/.gitignore b/.gitignore
@@ -257,4 +257,8 @@ cython_debug/
 
 /models/emerging_principles/temp-views-r2darts2/
 /models/emerging_principles/output_non_sweep_00.md
-/models/emerging_principles/temp_output_sweep00.md
+/models/emerging_principles/temp_output_sweep00.md
+
+# Un-ignore tracked files that match blanket rules above
+!**/requirements.txt
+!meta/partitions.json
diff --git a/apis/un_fao/main.py b/apis/un_fao/main.py
@@ -1,11 +1,8 @@
 import wandb
-import warnings
 from pathlib import Path
 from views_faoapi.managers.model import APIPathManager
 from views_faoapi.managers.api import FAOApiManager
 
-warnings.filterwarnings("ignore")
-
 try:
     model_path = APIPathManager(Path(__file__))
 except FileNotFoundError as fnf_error:

diff --git a/build_ensemble_scaffold.py b/build_ensemble_scaffold.py
@@ -50,7 +50,7 @@ def __init__(self, ensemble_name: str):
         self._subdirs = self._model.get_directories().values()
         self._scripts = self._model.get_scripts().values()
 
-    def build_model_scripts(self):
+    def build_model_scripts(self, *, pipeline_config=None):
         """
         Generates the necessary model scripts for deployment, hyperparameters, and metadata configurations.
 
@@ -61,9 +61,15 @@ def build_model_scripts(self):
         - config_meta.py
         - main.py
 
+        Args:
+            pipeline_config: PipelineConfig instance for version range lookup.
+                Defaults to PipelineConfig().
+
         Raises:
             FileNotFoundError: If the model directory does not exist.
         """
+        pipeline_config = pipeline_config or PipelineConfig()
+
         if not self._model.model_dir.exists():
             raise FileNotFoundError(
                 f"Model directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation."
@@ -80,7 +86,7 @@ def build_model_scripts(self):
         )
         template_main.generate(script_path=self._model.model_dir / "main.py")
         template_run_sh.generate(script_path=self._model.model_dir / "run.sh")
-        template_requirement_txt.generate(script_path=self.requirements_path, pipeline_core_version_range=PipelineConfig().views_pipeline_core_version_range)
+        template_requirement_txt.generate(script_path=self.requirements_path, pipeline_core_version_range=pipeline_config.views_pipeline_core_version_range)
 
 
 if __name__ == "__main__":

diff --git a/build_model_scaffold.py b/build_model_scaffold.py
@@ -139,7 +139,7 @@ def build_model_directory(self) -> Path:
         #     logging.error(f"Did not create requirements.txt: {requirements_path}")
         return self._model.model_dir
 
-    def build_model_scripts(self):
+    def build_model_scripts(self, *, input_fn=None, get_version_fn=None):
         """
         Generates various model configuration and script files required for the model.
 
@@ -155,9 +155,17 @@ def build_model_scripts(self):
         9. Generates the main script for the model.
         10. Reminds the user to update the queryset file.
 
+        Args:
+            input_fn: Callable for user prompts. Defaults to builtin input().
+            get_version_fn: Callable(package_name) -> version string.
+                Defaults to PackageManager.get_latest_release_version_from_github.
+
         Raises:
             FileNotFoundError: If the model directory does not exist.
         """
+        input_fn = input_fn or input
+        get_version_fn = get_version_fn or PackageManager.get_latest_release_version_from_github
+
         if not self._model.model_dir.exists():
             raise FileNotFoundError(
                 f"Model directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation."
@@ -166,7 +174,7 @@ def build_model_scripts(self):
             script_path=self._model.configs / "config_deployment.py"
         )
         self._model_algorithm = str(
-            input(
+            input_fn(
                 "Enter the algorithm of the model (e.g. XGBModel, LightGBMModel, HurdleModel, HydraNet): "
             )
         )
@@ -190,20 +198,19 @@ def build_model_scripts(self):
         template_config_partitions.generate(script_path=self._model.configs / "config_partitions.py")
         template_main.generate(script_path=self._model.model_dir / "main.py")
 
-        self.package_name = str(input("Enter the name of the architecture package: "))
-        while (PackageManager.validate_package_name(self.package_name) == False):
+        self.package_name = str(input_fn("Enter the name of the architecture package: "))
+        while not PackageManager.validate_package_name(self.package_name):
             error = "Invalid input. Please use the format 'views-packagename' in lowercase, e.g., 'views-stepshifter'."
             logging.error(error)
-            self.package_name = str(input("Enter the name of the architecture package: "))
+            self.package_name = str(input_fn("Enter the name of the architecture package: "))
         template_run_sh.generate(script_path=self._model.model_dir / "run.sh", package_name=self.package_name)
         try:
-            _latest_package_release_version = PackageManager.get_latest_release_version_from_github(self.package_name)
+            _latest_package_release_version = get_version_fn(self.package_name)
         except Exception as e:
             logging.error(f"Error fetching latest release version for {self.package_name}: {e}. Using default version 0.1.0.")
             _latest_package_release_version = None
         template_requirement_txt.generate(script_path=self.requirements_path, package_name=self.package_name, package_version_range=_latest_package_release_version)
 
-
         print(f"\033[91m\033[1mRemember to update the queryset file at {self._model.queryset_path}!\033[0m")
 
     def assess_model_directory(self) -> dict:

diff --git a/create_catalogs.py b/create_catalogs.py
@@ -1,10 +1,10 @@
 import os
 import importlib.util
 import logging
+import tempfile
 logging.basicConfig(
     level=logging.ERROR, format="%(asctime)s %(name)s - %(levelname)s - %(message)s"
 )
-logger = logging.getLogger(__name__)
 
 
 from pathlib import Path
@@ -150,8 +150,13 @@ def update_readme_with_tables(
         content, "ENSEMBLE_TABLE", ensemble_table
     )
 
-    with open(readme_path, "w") as file:
-        file.write(content)
+    dir_name = os.path.dirname(os.path.abspath(readme_path))
+    with tempfile.NamedTemporaryFile(
+        mode="w", dir=dir_name, suffix=".tmp", delete=False
+    ) as tmp:
+        tmp.write(content)
+        tmp_path = tmp.name
+    os.replace(tmp_path, readme_path)
 
 
 def replace_table_in_section(content, section_name, new_table):

diff --git a/docs/ADRs/001_ontology.md b/docs/ADRs/001_ontology.md
@@ -20,7 +20,7 @@ The repository recognizes the following ontological categories:
 ### Domain Entities
 | Category | Location | Description |
 |----------|----------|-------------|
-| **Models** | `models/*/` | Individual forecasting model launchers (~66). Each is a thin `main.py` + config directory that delegates to an external architecture package. |
+| **Models** | `models/*/` | Individual forecasting model launchers (66 active). Each is a thin `main.py` + config directory that delegates to an external architecture package. |
 | **Ensembles** | `ensembles/*/` | Ensemble aggregation launchers (5). Aggregate predictions from constituent models. |
 
 ### Configuration Entities

diff --git a/docs/ADRs/004_evolution.md b/docs/ADRs/004_evolution.md
@@ -0,0 +1,121 @@
+
+# ADR-004: Rules for Evolution and Stability
+
+**Status:** Accepted  
+**Date:** 2026-04-05  
+**Deciders:** Project maintainers  
+**Informed:** All contributors  
+
+---
+
+## Context
+
+The preceding ADRs establish:
+
+- **ADR-001:** the ontology of the repository (what exists)
+- **ADR-002:** the topology of the repository (how components may relate)
+- **ADR-003:** semantic authority (who owns meaning and how it is declared)
+
+Together, these decisions define the system's structure and semantics at a point in time.
+
+What they do **not** yet define is how the system is allowed to **change over time**:
+- which components are expected to be stable
+- which components may evolve freely
+- what constitutes a breaking change
+- when compatibility guarantees apply
+- when a new ADR is required
+
+In views-models, these questions are now concrete:
+
+- 68 models and 5 ensembles share identical partition boundaries across 73 files; a boundary change is a coordinated multi-file update (Risk R1).
+- External consumers (the VIEWS platform, UN FAO API) depend on `white_mustang` ensemble output; breaking changes have real downstream cost.
+- The config key vocabulary (`regression_targets`, `prediction_format`, `rolling_origin_stride`) is enforced by tests; adding or renaming required keys is a breaking change to all 68+ models.
+- Contributors regularly express uncertainty about what is safe to change (hyperparameters: freely; partition boundaries: never without coordination).
+
+Multiple trigger conditions from the original deferred ADR-004 template are now met.
+
+---
+
+## Decision
+
+The repository adopts a three-tier stability classification for its components:
+
+### Tier 1 — Stable (change requires ADR or explicit team decision)
+
+| Component | Examples | Rationale |
+|---|---|---|
+| Partition boundaries | `(121, 444)`, `(445, 492)`, `(493, 540)` | Cross-model comparability depends on identical splits |
+| Required config keys | `name`, `algorithm`, `level`, `steps`, `time_steps`, `deployment_status` | Enforced by `test_config_completeness.py`; adding/removing breaks all models |
+| Config file set | The 6 config files per model | Enforced by `test_model_structure.py`; scaffold builder generates this set |
+| CLI argument contract | `-r`, `-t`, `-e`, `-f`, `--sweep` | All `run.sh` and integration tests depend on this interface |
+| Deployment status vocabulary | `shadow`, `deployed`, `baseline`, `deprecated` | Enforced by test; production gating depends on it |
+
+### Tier 2 — Conventional (change requires updating all models + tests)
+
+| Component | Examples | Rationale |
+|---|---|---|
+| Model naming convention | `adjective_noun` lowercase | Enforced by `test_model_structure.py`; catalog scripts depend on pattern |
+| Directory structure | `configs/`, `artifacts/`, `data/`, `main.py`, `run.sh` | Enforced by tests; scaffold builder generates this layout |
+| CLI import pattern | `from views_pipeline_core.cli import ForecastingModelArgs` | Enforced by `test_cli_pattern.py` |
+| Ensemble dependency declarations | `config_meta["models"]` list | Enforced by `test_ensemble_configs.py` |
+
+### Tier 3 — Volatile (changed freely by model owners)
+
+| Component | Examples | Rationale |
+|---|---|---|
+| Hyperparameters | All keys in `config_hyperparameters.py` beyond `steps`/`time_steps` | Algorithm-specific; model owner's domain |
+| Querysets | Feature selection and transformation chains in `config_queryset.py` | Model owner's domain |
+| W&B experiment tracking | Run names, tags, logging frequency | Operational convenience |
+| Model-specific README content | Beyond scaffold-generated sections | Documentation convenience |
+
+---
+
+## Rationale
+
+The three-tier model makes the cost of change explicit:
+
+- **Stable** components have high coordination cost and downstream impact. Changes require an ADR or explicit team decision, plus updates to all affected models and tests.
+- **Conventional** components have moderate coordination cost. Changes propagate across the model zoo but don't affect external consumers.
+- **Volatile** components are model-local. No coordination required.
+
+This classification reflects the existing reality (tests already enforce Stable and Conventional tiers) while making the rules discoverable for contributors.
+
+---
+
+## Consequences
+
+### Positive
+- Contributors can immediately determine whether a change is safe to make unilaterally
+- The cost of adding new required config keys is made explicit before the change is attempted
+- Partition boundary changes are recognized as architectural events, not routine updates
+
+### Negative
+- Stable components resist change even when change is desirable — the coordination cost is real
+- The 73-file partition duplication (intentional per ADR-002) amplifies the cost of Stable-tier changes
+- Model owners may be tempted to treat Conventional components as Volatile; tests are the enforcement mechanism
+
+---
+
+## Implementation Notes
+
+- Stability tiers are enforced primarily by the test suite, not by tooling
+- The integration test runner (`run_integration_tests.sh`) provides behavioral verification but is not in CI; Stable-tier changes should include an integration test run
+- ADR-001 already defines a stability classification consistent with these tiers; this ADR makes the rules actionable
+
+---
+
+## Open Questions
+
+- Should partition boundary changes require a formal migration tool (updating all 73 files atomically)?
+- Should there be a deprecation protocol for removing models (currently only `electric_relaxation` is deprecated)?
+- Should Tier 2 changes require a PR review from a specific set of maintainers?
+
+---
+
+## References
+
+- [ADR-001](001_ontology.md) — Ontology stability levels
+- [ADR-002](002_topology.md) — Self-contained config files (why duplication is intentional)
+- [ADR-003](003_authority.md) — Authority of declarations
+- [ADR-005](005_testing.md) — Testing enforces tiers
+- [ADR-009](009_boundary_contracts.md) — Boundary contracts define the Stable-tier interface
diff --git a/docs/ADRs/005_testing.md b/docs/ADRs/005_testing.md
@@ -25,12 +25,12 @@ We adopt a three-team testing taxonomy:
 |------|---------|----------------------|
 | **Green** (Correctness) | Verify the system works as intended | `test_config_completeness.py` — required keys exist, values are valid |
 | **Beige** (Convention) | Catch configuration drift and convention violations | `test_model_structure.py` — naming, file presence; `test_config_partitions.py` — delegation to shared module; `test_cli_pattern.py` — CLI import consistency |
-| **Red** (Adversarial) | Expose failure modes by testing edge cases | Not yet implemented — future work |
+| **Red** (Adversarial) | Expose failure modes by testing edge cases | `test_failure_modes.py` — config loading error paths |
 
 ### Test Design Principles
 
 1. **Tests must run without ML dependencies** — Tests parse source code and use `importlib.util` to load config modules, avoiding dependency on `views_pipeline_core`, `ingester3`, or algorithm packages.
-2. **Tests are parametrized over all models** — Every test runs against all ~66 models, catching drift immediately.
+2. **Tests are parametrized over all models** — Every test runs against all 66 models, catching drift immediately.
 3. **Tests run fast** — The full suite completes in ~2 seconds.
 
 ### Current Test Suite
@@ -42,6 +42,10 @@ We adopt a three-team testing taxonomy:
 | `tests/test_model_structure.py` | Beige | Naming convention, required files, config directory structure |
 | `tests/test_cli_pattern.py` | Beige | New CLI import pattern, no explicit `wandb.login()` |
 | `tests/test_catalogs.py` | Green | No `exec()` usage, markdown generation correctness |
+| `tests/test_ensemble_configs.py` | Green | Ensemble structure, required keys, constituent model existence and level consistency |
+| `tests/test_darts_reproducibility.py` | Green | DARTS reproducibility gate parameter completeness (skipped without `views_r2darts2`) |
+| `tests/test_algorithm_coherence.py` | Beige | Algorithm-to-package mapping, requirements.txt consistency with main.py imports |
+| `tests/test_failure_modes.py` | Red | Config loading error paths: syntax errors, missing functions, non-existent files |
 
 ### Test Requirements for Changes
 
@@ -53,11 +57,11 @@ We adopt a three-team testing taxonomy:
 
 ## Known Gaps
 
-- No red-team (adversarial) tests yet
 - Catalog generation function tests require `views_pipeline_core` (skipped in most dev environments)
-- No cross-validation between `config_meta.algorithm` and `main.py` manager import
-- No ensemble config tests
-- Tests are not wired into CI (`.github/workflows/`)
+- DARTS reproducibility tests require `views_r2darts2` (skipped without it); no equivalent for stepshifter or baseline
+- Tests are not wired into CI (`.github/workflows/`) — see Risk Register C-03
+- No static validation of queryset correctness — see Risk Register C-02
+- Red-team coverage is limited to config loading infrastructure; no adversarial tests for runtime behavior
 
 ---