Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions .github/workflows/update_catalogs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: Update Model Catalogs
on:
push:
branches:
- create_catalogs_01 # for testing on this branch
- main
- development
paths:
Expand Down Expand Up @@ -33,13 +32,10 @@ jobs:

- name: Generate catalog if models directory has changed
run: |
python create_catalogs.py
set -e
python create_catalogs.py
python update_readme.py
if [ $? -ne 0 ]; then
echo "Generating catalogs failed."
exit 1
fi
echo "Model catalog is updated. Model READMEs are updated."
echo "Model catalog is updated. Model READMEs are updated."
git status

- name: Configure Git
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,8 @@ cython_debug/

/models/emerging_principles/temp-views-r2darts2/
/models/emerging_principles/output_non_sweep_00.md
/models/emerging_principles/temp_output_sweep00.md
/models/emerging_principles/temp_output_sweep00.md

# Un-ignore tracked files that match blanket rules above
!**/requirements.txt
!meta/partitions.json
3 changes: 0 additions & 3 deletions apis/un_fao/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import wandb
import warnings
from pathlib import Path
from views_faoapi.managers.model import APIPathManager
from views_faoapi.managers.api import FAOApiManager

warnings.filterwarnings("ignore")

try:
model_path = APIPathManager(Path(__file__))
except FileNotFoundError as fnf_error:
Expand Down
10 changes: 8 additions & 2 deletions build_ensemble_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, ensemble_name: str):
self._subdirs = self._model.get_directories().values()
self._scripts = self._model.get_scripts().values()

def build_model_scripts(self):
def build_model_scripts(self, *, pipeline_config=None):
"""
Generates the necessary model scripts for deployment, hyperparameters, and metadata configurations.

Expand All @@ -61,9 +61,15 @@ def build_model_scripts(self):
- config_meta.py
- main.py

Args:
pipeline_config: PipelineConfig instance for version range lookup.
Defaults to PipelineConfig().

Raises:
FileNotFoundError: If the model directory does not exist.
"""
pipeline_config = pipeline_config or PipelineConfig()

if not self._model.model_dir.exists():
raise FileNotFoundError(
f"Model directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation."
Expand All @@ -80,7 +86,7 @@ def build_model_scripts(self):
)
template_main.generate(script_path=self._model.model_dir / "main.py")
template_run_sh.generate(script_path=self._model.model_dir / "run.sh")
template_requirement_txt.generate(script_path=self.requirements_path, pipeline_core_version_range=PipelineConfig().views_pipeline_core_version_range)
template_requirement_txt.generate(script_path=self.requirements_path, pipeline_core_version_range=pipeline_config.views_pipeline_core_version_range)


if __name__ == "__main__":
Expand Down
21 changes: 14 additions & 7 deletions build_model_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def build_model_directory(self) -> Path:
# logging.error(f"Did not create requirements.txt: {requirements_path}")
return self._model.model_dir

def build_model_scripts(self):
def build_model_scripts(self, *, input_fn=None, get_version_fn=None):
"""
Generates various model configuration and script files required for the model.

Expand All @@ -155,9 +155,17 @@ def build_model_scripts(self):
9. Generates the main script for the model.
10. Reminds the user to update the queryset file.

Args:
input_fn: Callable for user prompts. Defaults to builtin input().
get_version_fn: Callable(package_name) -> version string.
Defaults to PackageManager.get_latest_release_version_from_github.

Raises:
FileNotFoundError: If the model directory does not exist.
"""
input_fn = input_fn or input
get_version_fn = get_version_fn or PackageManager.get_latest_release_version_from_github

if not self._model.model_dir.exists():
raise FileNotFoundError(
f"Model directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation."
Expand All @@ -166,7 +174,7 @@ def build_model_scripts(self):
script_path=self._model.configs / "config_deployment.py"
)
self._model_algorithm = str(
input(
input_fn(
"Enter the algorithm of the model (e.g. XGBModel, LightGBMModel, HurdleModel, HydraNet): "
)
)
Expand All @@ -190,20 +198,19 @@ def build_model_scripts(self):
template_config_partitions.generate(script_path=self._model.configs / "config_partitions.py")
template_main.generate(script_path=self._model.model_dir / "main.py")

self.package_name = str(input("Enter the name of the architecture package: "))
while (PackageManager.validate_package_name(self.package_name) == False):
self.package_name = str(input_fn("Enter the name of the architecture package: "))
while not PackageManager.validate_package_name(self.package_name):
error = "Invalid input. Please use the format 'views-packagename' in lowercase, e.g., 'views-stepshifter'."
logging.error(error)
self.package_name = str(input("Enter the name of the architecture package: "))
self.package_name = str(input_fn("Enter the name of the architecture package: "))
template_run_sh.generate(script_path=self._model.model_dir / "run.sh", package_name=self.package_name)
try:
_latest_package_release_version = PackageManager.get_latest_release_version_from_github(self.package_name)
_latest_package_release_version = get_version_fn(self.package_name)
except Exception as e:
logging.error(f"Error fetching latest release version for {self.package_name}: {e}. Using default version 0.1.0.")
_latest_package_release_version = None
template_requirement_txt.generate(script_path=self.requirements_path, package_name=self.package_name, package_version_range=_latest_package_release_version)


print(f"\033[91m\033[1mRemember to update the queryset file at {self._model.queryset_path}!\033[0m")

def assess_model_directory(self) -> dict:
Expand Down
11 changes: 8 additions & 3 deletions create_catalogs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import importlib.util
import logging
import tempfile
logging.basicConfig(
level=logging.ERROR, format="%(asctime)s %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


from pathlib import Path
Expand Down Expand Up @@ -150,8 +150,13 @@ def update_readme_with_tables(
content, "ENSEMBLE_TABLE", ensemble_table
)

with open(readme_path, "w") as file:
file.write(content)
dir_name = os.path.dirname(os.path.abspath(readme_path))
with tempfile.NamedTemporaryFile(
mode="w", dir=dir_name, suffix=".tmp", delete=False
) as tmp:
tmp.write(content)
tmp_path = tmp.name
os.replace(tmp_path, readme_path)


def replace_table_in_section(content, section_name, new_table):
Expand Down
2 changes: 1 addition & 1 deletion docs/ADRs/001_ontology.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The repository recognizes the following ontological categories:
### Domain Entities
| Category | Location | Description |
|----------|----------|-------------|
| **Models** | `models/*/` | Individual forecasting model launchers (~66). Each is a thin `main.py` + config directory that delegates to an external architecture package. |
| **Models** | `models/*/` | Individual forecasting model launchers (66 active). Each is a thin `main.py` + config directory that delegates to an external architecture package. |
| **Ensembles** | `ensembles/*/` | Ensemble aggregation launchers (5). Aggregate predictions from constituent models. |

### Configuration Entities
Expand Down
121 changes: 121 additions & 0 deletions docs/ADRs/004_evolution.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@

# ADR-004: Rules for Evolution and Stability

**Status:** Accepted
**Date:** 2026-04-05
**Deciders:** Project maintainers
**Informed:** All contributors

---

## Context

The preceding ADRs establish:

- **ADR-001:** the ontology of the repository (what exists)
- **ADR-002:** the topology of the repository (how components may relate)
- **ADR-003:** semantic authority (who owns meaning and how it is declared)

Together, these decisions define the system's structure and semantics at a point in time.

What they do **not** yet define is how the system is allowed to **change over time**:
- which components are expected to be stable
- which components may evolve freely
- what constitutes a breaking change
- when compatibility guarantees apply
- when a new ADR is required

In views-models, these questions are now concrete:

- 68 models and 5 ensembles share identical partition boundaries across 73 files; a boundary change is a coordinated multi-file update (Risk R1).
- External consumers (the VIEWS platform, UN FAO API) depend on `white_mustang` ensemble output; breaking changes have real downstream cost.
- The config key vocabulary (`regression_targets`, `prediction_format`, `rolling_origin_stride`) is enforced by tests; adding or renaming required keys is a breaking change to all 68+ models.
- Contributors regularly express uncertainty about what is safe to change (hyperparameters: freely; partition boundaries: never without coordination).

Multiple trigger conditions from the original deferred ADR-004 template are now met.

---

## Decision

The repository adopts a three-tier stability classification for its components:

### Tier 1 — Stable (change requires ADR or explicit team decision)

| Component | Examples | Rationale |
|---|---|---|
| Partition boundaries | `(121, 444)`, `(445, 492)`, `(493, 540)` | Cross-model comparability depends on identical splits |
| Required config keys | `name`, `algorithm`, `level`, `steps`, `time_steps`, `deployment_status` | Enforced by `test_config_completeness.py`; adding/removing breaks all models |
| Config file set | The 6 config files per model | Enforced by `test_model_structure.py`; scaffold builder generates this set |
| CLI argument contract | `-r`, `-t`, `-e`, `-f`, `--sweep` | All `run.sh` and integration tests depend on this interface |
| Deployment status vocabulary | `shadow`, `deployed`, `baseline`, `deprecated` | Enforced by test; production gating depends on it |

### Tier 2 — Conventional (change requires updating all models + tests)

| Component | Examples | Rationale |
|---|---|---|
| Model naming convention | `adjective_noun` lowercase | Enforced by `test_model_structure.py`; catalog scripts depend on pattern |
| Directory structure | `configs/`, `artifacts/`, `data/`, `main.py`, `run.sh` | Enforced by tests; scaffold builder generates this layout |
| CLI import pattern | `from views_pipeline_core.cli import ForecastingModelArgs` | Enforced by `test_cli_pattern.py` |
| Ensemble dependency declarations | `config_meta["models"]` list | Enforced by `test_ensemble_configs.py` |

### Tier 3 — Volatile (changed freely by model owners)

| Component | Examples | Rationale |
|---|---|---|
| Hyperparameters | All keys in `config_hyperparameters.py` beyond `steps`/`time_steps` | Algorithm-specific; model owner's domain |
| Querysets | Feature selection and transformation chains in `config_queryset.py` | Model owner's domain |
| W&B experiment tracking | Run names, tags, logging frequency | Operational convenience |
| Model-specific README content | Beyond scaffold-generated sections | Documentation convenience |

---

## Rationale

The three-tier model makes the cost of change explicit:

- **Stable** components have high coordination cost and downstream impact. Changes require an ADR or explicit team decision, plus updates to all affected models and tests.
- **Conventional** components have moderate coordination cost. Changes propagate across the model zoo but don't affect external consumers.
- **Volatile** components are model-local. No coordination required.

This classification reflects the existing reality (tests already enforce Stable and Conventional tiers) while making the rules discoverable for contributors.

---

## Consequences

### Positive
- Contributors can immediately determine whether a change is safe to make unilaterally
- The cost of adding new required config keys is made explicit before the change is attempted
- Partition boundary changes are recognized as architectural events, not routine updates

### Negative
- Stable components resist change even when change is desirable — the coordination cost is real
- The 73-file partition duplication (intentional per ADR-002) amplifies the cost of Stable-tier changes
- Model owners may be tempted to treat Conventional components as Volatile; tests are the enforcement mechanism

---

## Implementation Notes

- Stability tiers are enforced primarily by the test suite, not by tooling
- The integration test runner (`run_integration_tests.sh`) provides behavioral verification but is not in CI; Stable-tier changes should include an integration test run
- ADR-001 already defines a stability classification consistent with these tiers; this ADR makes the rules actionable

---

## Open Questions

- Should partition boundary changes require a formal migration tool (updating all 73 files atomically)?
- Should there be a deprecation protocol for removing models (currently only `electric_relaxation` is deprecated)?
- Should Tier 2 changes require a PR review from a specific set of maintainers?

---

## References

- [ADR-001](001_ontology.md) — Ontology stability levels
- [ADR-002](002_topology.md) — Self-contained config files (why duplication is intentional)
- [ADR-003](003_authority.md) — Authority of declarations
- [ADR-005](005_testing.md) — Testing enforces tiers
- [ADR-009](009_boundary_contracts.md) — Boundary contracts define the Stable-tier interface
16 changes: 10 additions & 6 deletions docs/ADRs/005_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ We adopt a three-team testing taxonomy:
|------|---------|----------------------|
| **Green** (Correctness) | Verify the system works as intended | `test_config_completeness.py` — required keys exist, values are valid |
| **Beige** (Convention) | Catch configuration drift and convention violations | `test_model_structure.py` — naming, file presence; `test_config_partitions.py` — delegation to shared module; `test_cli_pattern.py` — CLI import consistency |
| **Red** (Adversarial) | Expose failure modes by testing edge cases | Not yet implemented — future work |
| **Red** (Adversarial) | Expose failure modes by testing edge cases | `test_failure_modes.py` — config loading error paths |

### Test Design Principles

1. **Tests must run without ML dependencies** — Tests parse source code and use `importlib.util` to load config modules, avoiding dependency on `views_pipeline_core`, `ingester3`, or algorithm packages.
2. **Tests are parametrized over all models** — Every test runs against all ~66 models, catching drift immediately.
2. **Tests are parametrized over all models** — Every test runs against all 66 models, catching drift immediately.
3. **Tests run fast** — The full suite completes in ~2 seconds.

### Current Test Suite
Expand All @@ -42,6 +42,10 @@ We adopt a three-team testing taxonomy:
| `tests/test_model_structure.py` | Beige | Naming convention, required files, config directory structure |
| `tests/test_cli_pattern.py` | Beige | New CLI import pattern, no explicit `wandb.login()` |
| `tests/test_catalogs.py` | Green | No `exec()` usage, markdown generation correctness |
| `tests/test_ensemble_configs.py` | Green | Ensemble structure, required keys, constituent model existence and level consistency |
| `tests/test_darts_reproducibility.py` | Green | DARTS reproducibility gate parameter completeness (skipped without `views_r2darts2`) |
| `tests/test_algorithm_coherence.py` | Beige | Algorithm-to-package mapping, requirements.txt consistency with main.py imports |
| `tests/test_failure_modes.py` | Red | Config loading error paths: syntax errors, missing functions, non-existent files |

### Test Requirements for Changes

Expand All @@ -53,11 +57,11 @@ We adopt a three-team testing taxonomy:

## Known Gaps

- No red-team (adversarial) tests yet
- Catalog generation function tests require `views_pipeline_core` (skipped in most dev environments)
- No cross-validation between `config_meta.algorithm` and `main.py` manager import
- No ensemble config tests
- Tests are not wired into CI (`.github/workflows/`)
- DARTS reproducibility tests require `views_r2darts2` (skipped without it); no equivalent for stepshifter or baseline
- Tests are not wired into CI (`.github/workflows/`) — see Risk Register C-03
- No static validation of queryset correctness — see Risk Register C-02
- Red-team coverage is limited to config loading infrastructure; no adversarial tests for runtime behavior

---

Expand Down
Loading
Loading