diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..4d2259614 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +projects/leaf-counting/weights/*.pt filter=lfs diff=lfs merge=lfs -text +projects/leaf-counting/weights/*.pth filter=lfs diff=lfs merge=lfs -text +projects/leaf-counting/weights/*.safetensors filter=lfs diff=lfs merge=lfs -text +# אם יש גם מודלים בתיקיות אחרות (דוגמה): +projects/Detection_Jobs/**/models/* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 348249e4c..4e4dbc90a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,40 +1,84 @@ -# --- Secrets and Certificates --- -*.env -*.crt -*/certs/ -**/certs/ -**/secrets/ -storage_with_mqtt/secrets/ -storage_with_mqtt/mqtt_images/secrets/ -MQTT_IMAGES/secrets/ -services/sounds/sounds_classifier/src/classification/data/ -services/sounds/sounds_classifier/src/classification/models/panns_data/ +# === OS / Editors === +.DS_Store +*.swp +*.swo +.idea/ +.vscode/ +*.code-workspace -# --- Python --- +# === Python === __pycache__/ -*.pyc +*.py[cod] *.pyo *.pyd -*.pytest_cache/ -.venv/ -venv/ +*.egg-info/ +.eggs/ +.build/ +build/ +dist/ +.mypy_cache/ +.pytest_cache/ .coverage +.coverage.* +.cache/ +.ipynb_checkpoints/ -# --- VSCode / Editor --- -.vscode/ -.idea/ +# Virtual envs +.venv/ +venv/ +env/ -# --- Docker / Build --- -*.log -*.pid -*.bak -*.tmp -*.swp -.env.local +# === Docker / Compose === +docker.env +.env .env.* -!.env.example +.env.local +.envrc -# --- OS files --- -.DS_Store -Thumbs.db +# === Airflow (ארטיפקטים/לוגים/DB) === +airflow/logs/ +airflow/airflow.db +airflow/airflow.db-journal +airflow/*.pid +airflow/*webserver*.log +airflow/*webserver*.out +airflow/*webserver*.err +airflow/*scheduler*.log +airflow/*scheduler*.out +airflow/*scheduler*.err +airflow/dags/*.bak.* +airflow/staging/ + +# === Projects: leaf-counting (ארטיפקטים בלבד) === +projects/leaf-counting/out_detect/ +projects/leaf-counting/out_crops/ +projects/leaf-counting/out_pwb/ +projects/leaf-counting/runs_local/ +projects/leaf-counting/.venv/ +projects/leaf-counting/staging/ +# === Projects: Detection_Jobs / disease-monitor (ארטיפקטים כלליים) === +projects/Detection_Jobs/**/__pycache__/ +projects/Detection_Jobs/**/.mypy_cache/ +projects/Detection_Jobs/**/.pytest_cache/ +projects/disease-monitor/**/__pycache__/ +projects/disease-monitor/**/.mypy_cache/ +projects/disease-monitor/**/.pytest_cache/ + +# === Secrets / Certs === +*.key +*.pem +*.crt +*.p12 +*credentials*.json +*service_account*.json +*.secrets.* +.secrets/ +secrets/ +projects/Detection_Jobs/.git.backup-*.tar.gz +airflow/dags/leaf-counting/runs_local/ +airflow/dags/leaf-counting/demo_images/ +airflow/dags/leaf-counting/out_*/ +projects/Detection_Jobs/**/models/ +projects/disease-monitor/disease-monitor/alerts.db +projects/**/.git.backup-*.tar.gz diff --git a/.gitignore.bak.1762665487 b/.gitignore.bak.1762665487 new file mode 100644 index 000000000..e41179aad --- /dev/null +++ b/.gitignore.bak.1762665487 @@ -0,0 +1,76 @@ +# === OS / Editors === +.DS_Store +*.swp +*.swo +.idea/ +.vscode/ +*.code-workspace + +# === Python === +__pycache__/ +*.py[cod] +*.pyo +*.pyd +*.egg-info/ +.eggs/ +.build/ +build/ +dist/ +.mypy_cache/ +.pytest_cache/ +.coverage +.coverage.* +.cache/ +.ipynb_checkpoints/ + +# Virtual envs +.venv/ +venv/ +env/ + +# === Docker / Compose === +docker.env +.env # סודות מקומיים - לא מעלים (נפרסם .env.example בהמשך) + +# === Airflow (רק ארטיפקטים/לוגים/DB) === +airflow/logs/ +airflow/airflow.db +airflow/airflow.db-journal +airflow/*.pid +airflow/*webserver*.log +airflow/*webserver*.out +airflow/*webserver*.err +airflow/*scheduler*.log +airflow/*scheduler*.out +airflow/*scheduler*.err +# קבצי גיבוי של DAG (אלו *.bak.*) +airflow/dags/*.bak.* + +# קלט/פלט זמני של Airflow (staging) — לא קוד +airflow/staging/ + +# === Projects: leaf-counting (ארטיפקטים בלבד) === +projects/leaf-counting/out_detect/ +projects/leaf-counting/out_crops/ +projects/leaf-counting/out_pwb/ +projects/leaf-counting/runs_local/ +projects/leaf-counting/.venv/ + +# === Projects: Detection_Jobs / disease-monitor (ארטיפקטים כלליים) === +projects/Detection_Jobs/**/__pycache__/ +projects/Detection_Jobs/**/.mypy_cache/ +projects/Detection_Jobs/**/.pytest_cache/ +projects/disease-monitor/**/__pycache__/ +projects/disease-monitor/**/.mypy_cache/ +projects/disease-monitor/**/.pytest_cache/ + +# === Secrets / Certs (למקרה שיש) === +*.key +*.pem +*.crt +*.p12 +*credentials*.json +*service_account*.json +*.secrets.* +.secrets/ +secrets/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..3ad98bbfc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,135 @@ +# # Airflow 2.9.3 על Python 3.12 (Debian) +# FROM apache/airflow:2.9.3-python3.12 + +# USER root +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# rsync curl && \ +# rm -rf /var/lib/apt/lists/* + +# # נתקין את דרישות הפרויקט (כולל torch/opencv/ultralytics וכו') + awscli/minio +# USER airflow +# COPY projects/leaf-counting/requirements.txt /tmp/req.txt +# RUN pip install --no-cache-dir -r /tmp/req.txt \ +# && pip install --no-cache-dir awscli minio + +# # Airflow כבר מותקן בתמונה – אין מה לעשות כאן מעבר +# Airflow 2.9.3 על Python 3.12 (Debian) +# FROM apache/airflow:2.9.3-python3.12 + +# USER root +# # מוסיפים ספריות מערכת הנדרשות ל-OpenCV (כולל libGL) + כלי עזר +# RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ +# libgl1 \ +# libglib2.0-0 \ +# libsm6 \ +# libxext6 \ +# libxrender1 \ +# rsync \ +# curl \ +# && rm -rf /var/lib/apt/lists/* + +# # תקיית עבודה עם הרשאות כתיבה למשתמש airflow (UID 50000) +# RUN mkdir -p /work /opt/airflow/runs_local \ +# && chown -R airflow:0 /work /opt/airflow/runs_local \ +# && chmod -R 775 /work /opt/airflow/runs_local + +# # נתקין את דרישות הפרויקט + awscli/minio + opencv-python-headless +# USER airflow +# COPY projects/leaf-counting/requirements.txt /tmp/req.txt +# RUN pip install --no-cache-dir -r /tmp/req.txt && \ +# pip install --no-cache-dir \ +# awscli==1.33.0 \ +# minio==7.2.9 \ +# opencv-python-headless==4.10.0.84 +# Airflow 2.9.3 על Python 3.12 +# FROM apache/airflow:2.9.3-python3.12 + +# # ספריה מערכתית ל-OpenCV +# USER root +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# libgl1 \ +# && rm -rf /var/lib/apt/lists/* +# USER airflow + +# # תלותי ML/עיבוד תמונה + כלים לענן/MinIO +# # שמירה על תאימות בינארית: numpy<2; OpenCV headless; YOLO (ultralytics) +# RUN pip install --no-cache-dir \ +# "numpy==1.26.4 " \ +# "opencv-python-headless==4.9.0.80" \ +# "ultralytics==8.*" \ +# "boto3" "minio" "awscli" + +# # (אופציונלי אבל מומלץ ל-CPU): PyTorch CPU מפינים הרשמיים +# RUN pip install --no-cache-dir \ +# torch==2.3.1+cpu torchvision==0.18.1+cpu \ +# --index-url https://download.pytorch.org/whl/cpu +# FROM apache/airflow:2.9.3-python3.10 +# FROM ghcr.io/apache/airflow:2.9.3-python3.10 + +# FROM quay.io/apache/airflow:2.9.3-python3.10 + + +# USER root +# RUN apt-get update && apt-get install -y --no-install-recommends libgl1 && \ +# rm -rf /var/lib/apt/lists/* +# USER airflow + +# RUN python -m pip install --no-cache-dir --upgrade pip + +# RUN pip install --no-cache-dir \ +# --extra-index-url https://download.pytorch.org/whl/cpu \ +# torch==2.3.1+cpu torchvision==0.18.1+cpu torchaudio==2.3.1+cpu + +# RUN pip install --no-cache-dir \ +# numpy==1.26.4 opencv-python-headless==4.9.0.80 ultralytics==8.2.10 \ +# boto3 minio awscli +# FROM python:3.10-slim +FROM mcr.microsoft.com/devcontainers/python:1-3.10-bullseye + + +# מערכת מינימלית +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# libgl1 libglib2.0-0 ffmpeg curl ca-certificates && \ +# rm -rf /var/lib/apt/lists/* +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# libgl1 libglib2.0-0 ffmpeg curl ca-certificates \ +# util-linux procps \ +# && rm -rf /var/lib/apt/lists/* + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgl1 libglib2.0-0 ffmpeg curl ca-certificates \ + util-linux procps \ + && rm -rf /var/lib/apt/lists/* + + + +# שדרוג כלי build של פייתון +RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools + +# === Airflow מ-PyPI עם constraints (עוקף רג'יסטרי) === +ENV AIRFLOW_VERSION=2.9.3 +ENV PYTHON_VERSION=3.10 +ENV CONSTRAINT_URL=https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt +RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" + +# Providers תואמים ל-2.9.3 דרך אותו constraints +RUN pip install --no-cache-dir \ + "apache-airflow-providers-docker" \ + --constraint "${CONSTRAINT_URL}" + + +# === PyTorch CPU wheels (מהאינדקס הרשמי) === +RUN pip install --no-cache-dir \ + --extra-index-url https://download.pytorch.org/whl/cpu \ + torch==2.3.1+cpu torchvision==0.18.1+cpu torchaudio==2.3.1+cpu + +# === YOLO/עיבוד-תמונה וכלי עזר === +RUN pip install --no-cache-dir \ + numpy==1.26.4 opencv-python-headless==4.9.0.80 ultralytics==8.2.10 \ + onnx==1.16.1 onnxruntime==1.18.1 \ + boto3 minio awscli requests tqdm + +# סביבת עבודה ומשתמש לא-root +RUN useradd -ms /bin/bash airflow +USER airflow +WORKDIR /opt/airflow diff --git a/airflow/airflow.cfg b/airflow/airflow.cfg new file mode 100644 index 000000000..8f1408c24 --- /dev/null +++ b/airflow/airflow.cfg @@ -0,0 +1,2420 @@ +[core] +# The folder where your airflow pipelines live, most likely a +# subfolder in a code repository. This path must be absolute. +# +# Variable: AIRFLOW__CORE__DAGS_FOLDER +# +dags_folder = /opt/airflow/dags + +# Hostname by providing a path to a callable, which will resolve the hostname. +# The format is "package.function". +# +# For example, default value ``airflow.utils.net.getfqdn`` means that result from patched +# version of `socket.getfqdn() `__, +# see related `CPython Issue `__. +# +# No argument should be required in the function specified. +# If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address`` +# +# Variable: AIRFLOW__CORE__HOSTNAME_CALLABLE +# +hostname_callable = airflow.utils.net.getfqdn + +# A callable to check if a python file has airflow dags defined or not and should +# return ``True`` if it has dags otherwise ``False``. +# If this is not provided, Airflow uses its own heuristic rules. +# +# The function should have the following signature +# +# .. code-block:: python +# +# def func_name(file_path: str, zip_file: zipfile.ZipFile | None = None) -> bool: ... +# +# Variable: AIRFLOW__CORE__MIGHT_CONTAIN_DAG_CALLABLE +# +might_contain_dag_callable = airflow.utils.file.might_contain_dag_via_default_heuristic + +# Default timezone in case supplied date times are naive +# can be `UTC` (default), `system`, or any `IANA ` +# timezone string (e.g. Europe/Amsterdam) +# +# Variable: AIRFLOW__CORE__DEFAULT_TIMEZONE +# +default_timezone = utc + +# The executor class that airflow should use. Choices include +# ``SequentialExecutor``, ``LocalExecutor``, ``CeleryExecutor``, +# ``KubernetesExecutor``, ``CeleryKubernetesExecutor``, ``LocalKubernetesExecutor`` or the +# full import path to the class when using a custom executor. +# +# Variable: AIRFLOW__CORE__EXECUTOR +# +executor = SequentialExecutor + +# The auth manager class that airflow should use. Full import path to the auth manager class. +# +# Variable: AIRFLOW__CORE__AUTH_MANAGER +# +auth_manager = airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager + +# This defines the maximum number of task instances that can run concurrently per scheduler in +# Airflow, regardless of the worker count. Generally this value, multiplied by the number of +# schedulers in your cluster, is the maximum number of task instances with the running +# state in the metadata database. +# +# Variable: AIRFLOW__CORE__PARALLELISM +# +parallelism = 32 + +# The maximum number of task instances allowed to run concurrently in each DAG. To calculate +# the number of tasks that is running concurrently for a DAG, add up the number of running +# tasks for all DAG runs of the DAG. This is configurable at the DAG level with ``max_active_tasks``, +# which is defaulted as ``[core] max_active_tasks_per_dag``. +# +# An example scenario when this would be useful is when you want to stop a new dag with an early +# start date from stealing all the executor slots in a cluster. +# +# Variable: AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG +# +max_active_tasks_per_dag = 16 + +# Are DAGs paused by default at creation +# +# Variable: AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION +# +dags_are_paused_at_creation = True + +# The maximum number of active DAG runs per DAG. The scheduler will not create more DAG runs +# if it reaches the limit. This is configurable at the DAG level with ``max_active_runs``, +# which is defaulted as ``[core] max_active_runs_per_dag``. +# +# Variable: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG +# +max_active_runs_per_dag = 16 + +# (experimental) The maximum number of consecutive DAG failures before DAG is automatically paused. +# This is also configurable per DAG level with ``max_consecutive_failed_dag_runs``, +# which is defaulted as ``[core] max_consecutive_failed_dag_runs_per_dag``. +# If not specified, then the value is considered as 0, +# meaning that the dags are never paused out by default. +# +# Variable: AIRFLOW__CORE__MAX_CONSECUTIVE_FAILED_DAG_RUNS_PER_DAG +# +max_consecutive_failed_dag_runs_per_dag = 0 + +# The name of the method used in order to start Python processes via the multiprocessing module. +# This corresponds directly with the options available in the Python docs: +# `multiprocessing.set_start_method +# `__ +# must be one of the values returned by `multiprocessing.get_all_start_methods() +# `__. +# +# Example: mp_start_method = fork +# +# Variable: AIRFLOW__CORE__MP_START_METHOD +# +# mp_start_method = + +# Whether to load the DAG examples that ship with Airflow. It's good to +# get started, but you probably want to set this to ``False`` in a production +# environment +# +# Variable: AIRFLOW__CORE__LOAD_EXAMPLES +# +load_examples = True + +# Path to the folder containing Airflow plugins +# +# Variable: AIRFLOW__CORE__PLUGINS_FOLDER +# +plugins_folder = /opt/airflow/plugins + +# Should tasks be executed via forking of the parent process +# +# * ``False``: Execute via forking of the parent process +# * ``True``: Spawning a new python process, slower than fork, but means plugin changes picked +# up by tasks straight away +# +# Variable: AIRFLOW__CORE__EXECUTE_TASKS_NEW_PYTHON_INTERPRETER +# +execute_tasks_new_python_interpreter = False + +# Secret key to save connection passwords in the db +# +# Variable: AIRFLOW__CORE__FERNET_KEY +# +fernet_key = + +# Whether to disable pickling dags +# +# Variable: AIRFLOW__CORE__DONOT_PICKLE +# +donot_pickle = True + +# How long before timing out a python file import +# +# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT +# +dagbag_import_timeout = 30.0 + +# Should a traceback be shown in the UI for dagbag import errors, +# instead of just the exception message +# +# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACKS +# +dagbag_import_error_tracebacks = True + +# If tracebacks are shown, how many entries from the traceback should be shown +# +# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACK_DEPTH +# +dagbag_import_error_traceback_depth = 2 + +# How long before timing out a DagFileProcessor, which processes a dag file +# +# Variable: AIRFLOW__CORE__DAG_FILE_PROCESSOR_TIMEOUT +# +dag_file_processor_timeout = 50 + +# The class to use for running task instances in a subprocess. +# Choices include StandardTaskRunner, CgroupTaskRunner or the full import path to the class +# when using a custom task runner. +# +# Variable: AIRFLOW__CORE__TASK_RUNNER +# +task_runner = StandardTaskRunner + +# If set, tasks without a ``run_as_user`` argument will be run with this user +# Can be used to de-elevate a sudo user running Airflow when executing tasks +# +# Variable: AIRFLOW__CORE__DEFAULT_IMPERSONATION +# +default_impersonation = + +# What security module to use (for example kerberos) +# +# Variable: AIRFLOW__CORE__SECURITY +# +security = + +# Turn unit test mode on (overwrites many configuration options with test +# values at runtime) +# +# Variable: AIRFLOW__CORE__UNIT_TEST_MODE +# +unit_test_mode = False + +# Whether to enable pickling for xcom (note that this is insecure and allows for +# RCE exploits). +# +# Variable: AIRFLOW__CORE__ENABLE_XCOM_PICKLING +# +enable_xcom_pickling = False + +# What classes can be imported during deserialization. This is a multi line value. +# The individual items will be parsed as a pattern to a glob function. +# Python built-in classes (like dict) are always allowed. +# +# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES +# +allowed_deserialization_classes = airflow.* + +# What classes can be imported during deserialization. This is a multi line value. +# The individual items will be parsed as regexp patterns. +# This is a secondary option to ``[core] allowed_deserialization_classes``. +# +# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP +# +allowed_deserialization_classes_regexp = + +# When a task is killed forcefully, this is the amount of time in seconds that +# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED +# +# Variable: AIRFLOW__CORE__KILLED_TASK_CLEANUP_TIME +# +killed_task_cleanup_time = 60 + +# Whether to override params with dag_run.conf. If you pass some key-value pairs +# through ``airflow dags backfill -c`` or +# ``airflow dags trigger -c``, the key-value pairs will override the existing ones in params. +# +# Variable: AIRFLOW__CORE__DAG_RUN_CONF_OVERRIDES_PARAMS +# +dag_run_conf_overrides_params = True + +# If enabled, Airflow will only scan files containing both ``DAG`` and ``airflow`` (case-insensitive). +# +# Variable: AIRFLOW__CORE__DAG_DISCOVERY_SAFE_MODE +# +dag_discovery_safe_mode = True + +# The pattern syntax used in the +# `.airflowignore +# `__ +# files in the DAG directories. Valid values are ``regexp`` or ``glob``. +# +# Variable: AIRFLOW__CORE__DAG_IGNORE_FILE_SYNTAX +# +dag_ignore_file_syntax = regexp + +# The number of retries each task is going to have by default. Can be overridden at dag or task level. +# +# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRIES +# +default_task_retries = 0 + +# The number of seconds each task is going to wait by default between retries. Can be overridden at +# dag or task level. +# +# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRY_DELAY +# +default_task_retry_delay = 300 + +# The maximum delay (in seconds) each task is going to wait by default between retries. +# This is a global setting and cannot be overridden at task or DAG level. +# +# Variable: AIRFLOW__CORE__MAX_TASK_RETRY_DELAY +# +max_task_retry_delay = 86400 + +# The weighting method used for the effective total priority weight of the task +# +# Variable: AIRFLOW__CORE__DEFAULT_TASK_WEIGHT_RULE +# +default_task_weight_rule = downstream + +# The default task execution_timeout value for the operators. Expected an integer value to +# be passed into timedelta as seconds. If not specified, then the value is considered as None, +# meaning that the operators are never timed out by default. +# +# Variable: AIRFLOW__CORE__DEFAULT_TASK_EXECUTION_TIMEOUT +# +default_task_execution_timeout = + +# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate. +# +# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_UPDATE_INTERVAL +# +min_serialized_dag_update_interval = 30 + +# If ``True``, serialized DAGs are compressed before writing to DB. +# +# .. note:: +# +# This will disable the DAG dependencies view +# +# Variable: AIRFLOW__CORE__COMPRESS_SERIALIZED_DAGS +# +compress_serialized_dags = False + +# Fetching serialized DAG can not be faster than a minimum interval to reduce database +# read rate. This config controls when your DAGs are updated in the Webserver +# +# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_FETCH_INTERVAL +# +min_serialized_dag_fetch_interval = 10 + +# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store +# in the Database. +# All the template_fields for each of Task Instance are stored in the Database. +# Keeping this number small may cause an error when you try to view ``Rendered`` tab in +# TaskInstance view for older tasks. +# +# Variable: AIRFLOW__CORE__MAX_NUM_RENDERED_TI_FIELDS_PER_TASK +# +max_num_rendered_ti_fields_per_task = 30 + +# On each dagrun check against defined SLAs +# +# Variable: AIRFLOW__CORE__CHECK_SLAS +# +check_slas = True + +# Path to custom XCom class that will be used to store and resolve operators results +# +# Example: xcom_backend = path.to.CustomXCom +# +# Variable: AIRFLOW__CORE__XCOM_BACKEND +# +xcom_backend = airflow.models.xcom.BaseXCom + +# By default Airflow plugins are lazily-loaded (only loaded when required). Set it to ``False``, +# if you want to load plugins whenever 'airflow' is invoked via cli or loaded from module. +# +# Variable: AIRFLOW__CORE__LAZY_LOAD_PLUGINS +# +lazy_load_plugins = True + +# By default Airflow providers are lazily-discovered (discovery and imports happen only when required). +# Set it to ``False``, if you want to discover providers whenever 'airflow' is invoked via cli or +# loaded from module. +# +# Variable: AIRFLOW__CORE__LAZY_DISCOVER_PROVIDERS +# +lazy_discover_providers = True + +# Hide sensitive **Variables** or **Connection extra json keys** from UI +# and task logs when set to ``True`` +# +# .. note:: +# +# Connection passwords are always hidden in logs +# +# Variable: AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS +# +hide_sensitive_var_conn_fields = True + +# A comma-separated list of extra sensitive keywords to look for in variables names or connection's +# extra JSON. +# +# Variable: AIRFLOW__CORE__SENSITIVE_VAR_CONN_NAMES +# +sensitive_var_conn_names = + +# Task Slot counts for ``default_pool``. This setting would not have any effect in an existing +# deployment where the ``default_pool`` is already created. For existing deployments, users can +# change the number of slots using Webserver, API or the CLI +# +# Variable: AIRFLOW__CORE__DEFAULT_POOL_TASK_SLOT_COUNT +# +default_pool_task_slot_count = 128 + +# The maximum list/dict length an XCom can push to trigger task mapping. If the pushed list/dict has a +# length exceeding this value, the task pushing the XCom will be failed automatically to prevent the +# mapped tasks from clogging the scheduler. +# +# Variable: AIRFLOW__CORE__MAX_MAP_LENGTH +# +max_map_length = 1024 + +# The default umask to use for process when run in daemon mode (scheduler, worker, etc.) +# +# This controls the file-creation mode mask which determines the initial value of file permission bits +# for newly created files. +# +# This value is treated as an octal-integer. +# +# Variable: AIRFLOW__CORE__DAEMON_UMASK +# +daemon_umask = 0o077 + +# Class to use as dataset manager. +# +# Example: dataset_manager_class = airflow.datasets.manager.DatasetManager +# +# Variable: AIRFLOW__CORE__DATASET_MANAGER_CLASS +# +# dataset_manager_class = + +# Kwargs to supply to dataset manager. +# +# Example: dataset_manager_kwargs = {"some_param": "some_value"} +# +# Variable: AIRFLOW__CORE__DATASET_MANAGER_KWARGS +# +# dataset_manager_kwargs = + +# Dataset URI validation should raise an exception if it is not compliant with AIP-60. +# By default this configuration is false, meaning that Airflow 2.x only warns the user. +# In Airflow 3, this configuration will be enabled by default. +# +# Variable: AIRFLOW__CORE__STRICT_DATASET_URI_VALIDATION +# +strict_dataset_uri_validation = False + +# (experimental) Whether components should use Airflow Internal API for DB connectivity. +# +# Variable: AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION +# +database_access_isolation = False + +# (experimental) Airflow Internal API url. +# Only used if ``[core] database_access_isolation`` is ``True``. +# +# Example: internal_api_url = http://localhost:8080 +# +# Variable: AIRFLOW__CORE__INTERNAL_API_URL +# +# internal_api_url = + +# The ability to allow testing connections across Airflow UI, API and CLI. +# Supported options: ``Disabled``, ``Enabled``, ``Hidden``. Default: Disabled +# Disabled - Disables the test connection functionality and disables the Test Connection button in UI. +# Enabled - Enables the test connection functionality and shows the Test Connection button in UI. +# Hidden - Disables the test connection functionality and hides the Test Connection button in UI. +# Before setting this to Enabled, make sure that you review the users who are able to add/edit +# connections and ensure they are trusted. Connection testing can be done maliciously leading to +# undesired and insecure outcomes. +# See `Airflow Security Model: Capabilities of authenticated UI users +# `__ +# for more details. +# +# Variable: AIRFLOW__CORE__TEST_CONNECTION +# +test_connection = Disabled + +# The maximum length of the rendered template field. If the value to be stored in the +# rendered template field exceeds this size, it's redacted. +# +# Variable: AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH +# +max_templated_field_length = 4096 + +[database] +# Path to the ``alembic.ini`` file. You can either provide the file path relative +# to the Airflow home directory or the absolute path if it is located elsewhere. +# +# Variable: AIRFLOW__DATABASE__ALEMBIC_INI_FILE_PATH +# +alembic_ini_file_path = alembic.ini + +# The SQLAlchemy connection string to the metadata database. +# SQLAlchemy supports many different database engines. +# See: `Set up a Database Backend: Database URI +# `__ +# for more details. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN +# +sql_alchemy_conn = sqlite:////opt/airflow/airflow.db + +# Extra engine specific keyword args passed to SQLAlchemy's create_engine, as a JSON-encoded value +# +# Example: sql_alchemy_engine_args = {"arg1": true} +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_ENGINE_ARGS +# +# sql_alchemy_engine_args = + +# The encoding for the databases +# +# Variable: AIRFLOW__DATABASE__SQL_ENGINE_ENCODING +# +sql_engine_encoding = utf-8 + +# Collation for ``dag_id``, ``task_id``, ``key``, ``external_executor_id`` columns +# in case they have different encoding. +# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb`` +# the default is ``utf8mb3_bin`` so that the index sizes of our index keys will not exceed +# the maximum size of allowed index when collation is set to ``utf8mb4`` variant, see +# `GitHub Issue Comment `__ +# for more details. +# +# Variable: AIRFLOW__DATABASE__SQL_ENGINE_COLLATION_FOR_IDS +# +# sql_engine_collation_for_ids = + +# If SQLAlchemy should pool database connections. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_ENABLED +# +sql_alchemy_pool_enabled = True + +# The SQLAlchemy pool size is the maximum number of database connections +# in the pool. 0 indicates no limit. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_SIZE +# +sql_alchemy_pool_size = 5 + +# The maximum overflow size of the pool. +# When the number of checked-out connections reaches the size set in pool_size, +# additional connections will be returned up to this limit. +# When those additional connections are returned to the pool, they are disconnected and discarded. +# It follows then that the total number of simultaneous connections the pool will allow +# is **pool_size** + **max_overflow**, +# and the total number of "sleeping" connections the pool will allow is pool_size. +# max_overflow can be set to ``-1`` to indicate no overflow limit; +# no limit will be placed on the total number of concurrent connections. Defaults to ``10``. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_MAX_OVERFLOW +# +sql_alchemy_max_overflow = 10 + +# The SQLAlchemy pool recycle is the number of seconds a connection +# can be idle in the pool before it is invalidated. This config does +# not apply to sqlite. If the number of DB connections is ever exceeded, +# a lower config value will allow the system to recover faster. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_RECYCLE +# +sql_alchemy_pool_recycle = 1800 + +# Check connection at the start of each connection pool checkout. +# Typically, this is a simple statement like "SELECT 1". +# See `SQLAlchemy Pooling: Disconnect Handling - Pessimistic +# `__ +# for more details. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_PRE_PING +# +sql_alchemy_pool_pre_ping = True + +# The schema to use for the metadata database. +# SQLAlchemy supports databases with the concept of multiple schemas. +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_SCHEMA +# +sql_alchemy_schema = + +# Import path for connect args in SQLAlchemy. Defaults to an empty dict. +# This is useful when you want to configure db engine args that SQLAlchemy won't parse +# in connection string. This can be set by passing a dictionary containing the create engine parameters. +# For more details about passing create engine parameters (keepalives variables, timeout etc) +# in Postgres DB Backend see `Setting up a PostgreSQL Database +# `__ +# e.g ``connect_args={"timeout":30}`` can be defined in ``airflow_local_settings.py`` and +# can be imported as shown below +# +# Example: sql_alchemy_connect_args = airflow_local_settings.connect_args +# +# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONNECT_ARGS +# +# sql_alchemy_connect_args = + +# Whether to load the default connections that ship with Airflow when ``airflow db init`` is called. +# It's good to get started, but you probably want to set this to ``False`` in a production environment. +# +# Variable: AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS +# +load_default_connections = True + +# Number of times the code should be retried in case of DB Operational Errors. +# Not all transactions will be retried as it can cause undesired state. +# Currently it is only used in ``DagFileProcessor.process_file`` to retry ``dagbag.sync_to_db``. +# +# Variable: AIRFLOW__DATABASE__MAX_DB_RETRIES +# +max_db_retries = 3 + +# Whether to run alembic migrations during Airflow start up. Sometimes this operation can be expensive, +# and the users can assert the correct version through other means (e.g. through a Helm chart). +# Accepts ``True`` or ``False``. +# +# Variable: AIRFLOW__DATABASE__CHECK_MIGRATIONS +# +check_migrations = True + +[logging] +# The folder where airflow should store its log files. +# This path must be absolute. +# There are a few existing configurations that assume this is set to the default. +# If you choose to override this you may need to update the +# ``[logging] dag_processor_manager_log_location`` and +# ``[logging] child_process_log_directory settings`` as well. +# +# Variable: AIRFLOW__LOGGING__BASE_LOG_FOLDER +# +base_log_folder = /opt/airflow/logs +processor_log_folder = /opt/airflow/logs/scheduler + +# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. +# Set this to ``True`` if you want to enable remote logging. +# +# Variable: AIRFLOW__LOGGING__REMOTE_LOGGING +# +remote_logging = False + +# Users must supply an Airflow connection id that provides access to the storage +# location. Depending on your remote logging service, this may only be used for +# reading logs, not writing them. +# +# Variable: AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID +# +remote_log_conn_id = + +# Whether the local log files for GCS, S3, WASB and OSS remote logging should be deleted after +# they are uploaded to the remote location. +# +# Variable: AIRFLOW__LOGGING__DELETE_LOCAL_LOGS +# +delete_local_logs = False + +# Path to Google Credential JSON file. If omitted, authorization based on `the Application Default +# Credentials +# `__ will +# be used. +# +# Variable: AIRFLOW__LOGGING__GOOGLE_KEY_PATH +# +google_key_path = + +# Storage bucket URL for remote logging +# S3 buckets should start with **s3://** +# Cloudwatch log groups should start with **cloudwatch://** +# GCS buckets should start with **gs://** +# WASB buckets should start with **wasb** just to help Airflow select correct handler +# Stackdriver logs should start with **stackdriver://** +# +# Variable: AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER +# +remote_base_log_folder = + +# The remote_task_handler_kwargs param is loaded into a dictionary and passed to the ``__init__`` +# of remote task handler and it overrides the values provided by Airflow config. For example if you set +# ``delete_local_logs=False`` and you provide ``{"delete_local_copy": true}``, then the local +# log files will be deleted after they are uploaded to remote location. +# +# Example: remote_task_handler_kwargs = {"delete_local_copy": true} +# +# Variable: AIRFLOW__LOGGING__REMOTE_TASK_HANDLER_KWARGS +# +remote_task_handler_kwargs = + +# Use server-side encryption for logs stored in S3 +# +# Variable: AIRFLOW__LOGGING__ENCRYPT_S3_LOGS +# +encrypt_s3_logs = False + +# Logging level. +# +# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. +# +# Variable: AIRFLOW__LOGGING__LOGGING_LEVEL +# +logging_level = INFO + +# Logging level for celery. If not set, it uses the value of logging_level +# +# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. +# +# Variable: AIRFLOW__LOGGING__CELERY_LOGGING_LEVEL +# +celery_logging_level = + +# Logging level for Flask-appbuilder UI. +# +# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. +# +# Variable: AIRFLOW__LOGGING__FAB_LOGGING_LEVEL +# +fab_logging_level = WARNING + +# Logging class +# Specify the class that will specify the logging configuration +# This class has to be on the python classpath +# +# Example: logging_config_class = my.path.default_local_settings.LOGGING_CONFIG +# +# Variable: AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS +# +logging_config_class = + +# Flag to enable/disable Colored logs in Console +# Colour the logs when the controlling terminal is a TTY. +# +# Variable: AIRFLOW__LOGGING__COLORED_CONSOLE_LOG +# +colored_console_log = True + +# Log format for when Colored logs is enabled +# +# Variable: AIRFLOW__LOGGING__COLORED_LOG_FORMAT +# +colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s + +# Specifies the class utilized by Airflow to implement colored logging +# +# Variable: AIRFLOW__LOGGING__COLORED_FORMATTER_CLASS +# +colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter + +# Format of Log line +# +# Variable: AIRFLOW__LOGGING__LOG_FORMAT +# +log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s + +# Defines the format of log messages for simple logging configuration +# +# Variable: AIRFLOW__LOGGING__SIMPLE_LOG_FORMAT +# +simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s + +# Where to send dag parser logs. If "file", logs are sent to log files defined by child_process_log_directory. +# +# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_TARGET +# +dag_processor_log_target = file + +# Format of Dag Processor Log line +# +# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_FORMAT +# +dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s + +# Determines the formatter class used by Airflow for structuring its log messages +# The default formatter class is timezone-aware, which means that timestamps attached to log entries +# will be adjusted to reflect the local timezone of the Airflow instance +# +# Variable: AIRFLOW__LOGGING__LOG_FORMATTER_CLASS +# +log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware + +# An import path to a function to add adaptations of each secret added with +# ``airflow.utils.log.secrets_masker.mask_secret`` to be masked in log messages. The given function +# is expected to require a single parameter: the secret to be adapted. It may return a +# single adaptation of the secret or an iterable of adaptations to each be masked as secrets. +# The original secret will be masked as well as any adaptations returned. +# +# Example: secret_mask_adapter = urllib.parse.quote +# +# Variable: AIRFLOW__LOGGING__SECRET_MASK_ADAPTER +# +secret_mask_adapter = + +# Specify prefix pattern like mentioned below with stream handler ``TaskHandlerWithCustomFormatter`` +# +# Example: task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{ti.try_number}} +# +# Variable: AIRFLOW__LOGGING__TASK_LOG_PREFIX_TEMPLATE +# +task_log_prefix_template = + +# Formatting for how airflow generates file names/paths for each task run. +# +# Variable: AIRFLOW__LOGGING__LOG_FILENAME_TEMPLATE +# +log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log + +# Formatting for how airflow generates file names for log +# +# Variable: AIRFLOW__LOGGING__LOG_PROCESSOR_FILENAME_TEMPLATE +# +log_processor_filename_template = {{ filename }}.log + +# Full path of dag_processor_manager logfile. +# +# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_LOCATION +# +dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log + +# Whether DAG processor manager will write logs to stdout +# +# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_STDOUT +# +dag_processor_manager_log_stdout = False + +# Name of handler to read task instance logs. +# Defaults to use ``task`` handler. +# +# Variable: AIRFLOW__LOGGING__TASK_LOG_READER +# +task_log_reader = task + +# A comma\-separated list of third-party logger names that will be configured to print messages to +# consoles\. +# +# Example: extra_logger_names = connexion,sqlalchemy +# +# Variable: AIRFLOW__LOGGING__EXTRA_LOGGER_NAMES +# +extra_logger_names = + +# When you start an Airflow worker, Airflow starts a tiny web server +# subprocess to serve the workers local log files to the airflow main +# web server, who then builds pages and sends them to users. This defines +# the port on which the logs are served. It needs to be unused, and open +# visible from the main web server to connect into the workers. +# +# Variable: AIRFLOW__LOGGING__WORKER_LOG_SERVER_PORT +# +worker_log_server_port = 8793 + +# Port to serve logs from for triggerer. +# See ``[logging] worker_log_server_port`` description for more info. +# +# Variable: AIRFLOW__LOGGING__TRIGGER_LOG_SERVER_PORT +# +trigger_log_server_port = 8794 + +# We must parse timestamps to interleave logs between trigger and task. To do so, +# we need to parse timestamps in log files. In case your log format is non-standard, +# you may provide import path to callable which takes a string log line and returns +# the timestamp (datetime.datetime compatible). +# +# Example: interleave_timestamp_parser = path.to.my_func +# +# Variable: AIRFLOW__LOGGING__INTERLEAVE_TIMESTAMP_PARSER +# +# interleave_timestamp_parser = + +# Permissions in the form or of octal string as understood by chmod. The permissions are important +# when you use impersonation, when logs are written by a different user than airflow. The most secure +# way of configuring it in this case is to add both users to the same group and make it the default +# group of both users. Group-writeable logs are default in airflow, but you might decide that you are +# OK with having the logs other-writeable, in which case you should set it to ``0o777``. You might +# decide to add more security if you do not use impersonation and change it to ``0o755`` to make it +# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o700`` +# if all the access (read/write) for your logs happens from the same user. +# +# Example: file_task_handler_new_folder_permissions = 0o775 +# +# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FOLDER_PERMISSIONS +# +file_task_handler_new_folder_permissions = 0o775 + +# Permissions in the form or of octal string as understood by chmod. The permissions are important +# when you use impersonation, when logs are written by a different user than airflow. The most secure +# way of configuring it in this case is to add both users to the same group and make it the default +# group of both users. Group-writeable logs are default in airflow, but you might decide that you are +# OK with having the logs other-writeable, in which case you should set it to ``0o666``. You might +# decide to add more security if you do not use impersonation and change it to ``0o644`` to make it +# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o600`` +# if all the access (read/write) for your logs happens from the same user. +# +# Example: file_task_handler_new_file_permissions = 0o664 +# +# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FILE_PERMISSIONS +# +file_task_handler_new_file_permissions = 0o664 + +# By default Celery sends all logs into stderr. +# If enabled any previous logging handlers will get *removed*. +# With this option AirFlow will create new handlers +# and send low level logs like INFO and WARNING to stdout, +# while sending higher severity logs to stderr. +# +# Variable: AIRFLOW__LOGGING__CELERY_STDOUT_STDERR_SEPARATION +# +celery_stdout_stderr_separation = False + +# If enabled, Airflow may ship messages to task logs from outside the task run context, e.g. from +# the scheduler, executor, or callback execution context. This can help in circumstances such as +# when there's something blocking the execution of the task and ordinarily there may be no task +# logs at all. +# This is set to ``True`` by default. If you encounter issues with this feature +# (e.g. scheduler performance issues) it can be disabled. +# +# Variable: AIRFLOW__LOGGING__ENABLE_TASK_CONTEXT_LOGGER +# +enable_task_context_logger = True + +[metrics] +# `StatsD `__ integration settings. + +# If true, ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` will use +# regex pattern matching anywhere within the metric name instead of only prefix matching +# at the start of the name. +# +# Variable: AIRFLOW__METRICS__METRICS_USE_PATTERN_MATCH +# +metrics_use_pattern_match = False + +# Configure an allow list (comma separated string) to send only certain metrics. +# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. +# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. +# +# Example: metrics_allow_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" +# +# Variable: AIRFLOW__METRICS__METRICS_ALLOW_LIST +# +metrics_allow_list = + +# Configure a block list (comma separated string) to block certain metrics from being emitted. +# If ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` are both configured, +# ``[metrics] metrics_block_list`` is ignored. +# +# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. +# +# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. +# +# Example: metrics_block_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" +# +# Variable: AIRFLOW__METRICS__METRICS_BLOCK_LIST +# +metrics_block_list = + +# Enables sending metrics to StatsD. +# +# Variable: AIRFLOW__METRICS__STATSD_ON +# +statsd_on = False + +# Specifies the host address where the StatsD daemon (or server) is running +# +# Variable: AIRFLOW__METRICS__STATSD_HOST +# +statsd_host = localhost + +# Specifies the port on which the StatsD daemon (or server) is listening to +# +# Variable: AIRFLOW__METRICS__STATSD_PORT +# +statsd_port = 8125 + +# Defines the namespace for all metrics sent from Airflow to StatsD +# +# Variable: AIRFLOW__METRICS__STATSD_PREFIX +# +statsd_prefix = airflow + +# A function that validate the StatsD stat name, apply changes to the stat name if necessary and return +# the transformed stat name. +# +# The function should have the following signature +# +# .. code-block:: python +# +# def func_name(stat_name: str) -> str: ... +# +# Variable: AIRFLOW__METRICS__STAT_NAME_HANDLER +# +stat_name_handler = + +# To enable datadog integration to send airflow metrics. +# +# Variable: AIRFLOW__METRICS__STATSD_DATADOG_ENABLED +# +statsd_datadog_enabled = False + +# List of datadog tags attached to all metrics(e.g: ``key1:value1,key2:value2``) +# +# Variable: AIRFLOW__METRICS__STATSD_DATADOG_TAGS +# +statsd_datadog_tags = + +# Set to ``False`` to disable metadata tags for some of the emitted metrics +# +# Variable: AIRFLOW__METRICS__STATSD_DATADOG_METRICS_TAGS +# +statsd_datadog_metrics_tags = True + +# If you want to utilise your own custom StatsD client set the relevant +# module path below. +# Note: The module path must exist on your +# `PYTHONPATH ` +# for Airflow to pick it up +# +# Variable: AIRFLOW__METRICS__STATSD_CUSTOM_CLIENT_PATH +# +# statsd_custom_client_path = + +# If you want to avoid sending all the available metrics tags to StatsD, +# you can configure a block list of prefixes (comma separated) to filter out metric tags +# that start with the elements of the list (e.g: ``job_id,run_id``) +# +# Example: statsd_disabled_tags = job_id,run_id,dag_id,task_id +# +# Variable: AIRFLOW__METRICS__STATSD_DISABLED_TAGS +# +statsd_disabled_tags = job_id,run_id + +# To enable sending Airflow metrics with StatsD-Influxdb tagging convention. +# +# Variable: AIRFLOW__METRICS__STATSD_INFLUXDB_ENABLED +# +statsd_influxdb_enabled = False + +# Enables sending metrics to OpenTelemetry. +# +# Variable: AIRFLOW__METRICS__OTEL_ON +# +otel_on = False + +# Specifies the hostname or IP address of the OpenTelemetry Collector to which Airflow sends +# metrics and traces. +# +# Variable: AIRFLOW__METRICS__OTEL_HOST +# +otel_host = localhost + +# Specifies the port of the OpenTelemetry Collector that is listening to. +# +# Variable: AIRFLOW__METRICS__OTEL_PORT +# +otel_port = 8889 + +# The prefix for the Airflow metrics. +# +# Variable: AIRFLOW__METRICS__OTEL_PREFIX +# +otel_prefix = airflow + +# Defines the interval, in milliseconds, at which Airflow sends batches of metrics and traces +# to the configured OpenTelemetry Collector. +# +# Variable: AIRFLOW__METRICS__OTEL_INTERVAL_MILLISECONDS +# +otel_interval_milliseconds = 60000 + +# If ``True``, all metrics are also emitted to the console. Defaults to ``False``. +# +# Variable: AIRFLOW__METRICS__OTEL_DEBUGGING_ON +# +otel_debugging_on = False + +# If ``True``, SSL will be enabled. Defaults to ``False``. +# To establish an HTTPS connection to the OpenTelemetry collector, +# you need to configure the SSL certificate and key within the OpenTelemetry collector's +# ``config.yml`` file. +# +# Variable: AIRFLOW__METRICS__OTEL_SSL_ACTIVE +# +otel_ssl_active = False + +[secrets] +# Full class name of secrets backend to enable (will precede env vars and metastore in search path) +# +# Example: backend = airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend +# +# Variable: AIRFLOW__SECRETS__BACKEND +# +backend = + +# The backend_kwargs param is loaded into a dictionary and passed to ``__init__`` +# of secrets backend class. See documentation for the secrets backend you are using. +# JSON is expected. +# +# Example for AWS Systems Manager ParameterStore: +# ``{"connections_prefix": "/airflow/connections", "profile_name": "default"}`` +# +# Variable: AIRFLOW__SECRETS__BACKEND_KWARGS +# +backend_kwargs = + +# .. note:: |experimental| +# +# Enables local caching of Variables, when parsing DAGs only. +# Using this option can make dag parsing faster if Variables are used in top level code, at the expense +# of longer propagation time for changes. +# Please note that this cache concerns only the DAG parsing step. There is no caching in place when DAG +# tasks are run. +# +# Variable: AIRFLOW__SECRETS__USE_CACHE +# +use_cache = False + +# .. note:: |experimental| +# +# When the cache is enabled, this is the duration for which we consider an entry in the cache to be +# valid. Entries are refreshed if they are older than this many seconds. +# It means that when the cache is enabled, this is the maximum amount of time you need to wait to see a +# Variable change take effect. +# +# Variable: AIRFLOW__SECRETS__CACHE_TTL_SECONDS +# +cache_ttl_seconds = 900 + +[cli] +# In what way should the cli access the API. The LocalClient will use the +# database directly, while the json_client will use the api running on the +# webserver +# +# Variable: AIRFLOW__CLI__API_CLIENT +# +api_client = airflow.api.client.local_client + +# If you set web_server_url_prefix, do NOT forget to append it here, ex: +# ``endpoint_url = http://localhost:8080/myroot`` +# So api will look like: ``http://localhost:8080/myroot/api/experimental/...`` +# +# Variable: AIRFLOW__CLI__ENDPOINT_URL +# +endpoint_url = http://localhost:8080 + +[debug] +# Used only with ``DebugExecutor``. If set to ``True`` DAG will fail with first +# failed task. Helpful for debugging purposes. +# +# Variable: AIRFLOW__DEBUG__FAIL_FAST +# +fail_fast = False + +[api] +# Enables the deprecated experimental API. Please note that these API endpoints do not have +# access control. An authenticated user has full access. +# +# .. warning:: +# +# This `Experimental REST API +# `__ is +# deprecated since version 2.0. Please consider using +# `the Stable REST API +# `__. +# For more information on migration, see +# `RELEASE_NOTES.rst `_ +# +# Variable: AIRFLOW__API__ENABLE_EXPERIMENTAL_API +# +enable_experimental_api = False + +# Comma separated list of auth backends to authenticate users of the API. See +# `Security: API +# `__ for possible values. +# ("airflow.api.auth.backend.default" allows all requests for historic reasons) +# +# Variable: AIRFLOW__API__AUTH_BACKENDS +# +auth_backends = airflow.api.auth.backend.session + +# Used to set the maximum page limit for API requests. If limit passed as param +# is greater than maximum page limit, it will be ignored and maximum page limit value +# will be set as the limit +# +# Variable: AIRFLOW__API__MAXIMUM_PAGE_LIMIT +# +maximum_page_limit = 100 + +# Used to set the default page limit when limit param is zero or not provided in API +# requests. Otherwise if positive integer is passed in the API requests as limit, the +# smallest number of user given limit or maximum page limit is taken as limit. +# +# Variable: AIRFLOW__API__FALLBACK_PAGE_LIMIT +# +fallback_page_limit = 100 + +# The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. +# +# Example: google_oauth2_audience = project-id-random-value.apps.googleusercontent.com +# +# Variable: AIRFLOW__API__GOOGLE_OAUTH2_AUDIENCE +# +google_oauth2_audience = + +# Path to Google Cloud Service Account key file (JSON). If omitted, authorization based on +# `the Application Default Credentials +# `__ will +# be used. +# +# Example: google_key_path = /files/service-account-json +# +# Variable: AIRFLOW__API__GOOGLE_KEY_PATH +# +google_key_path = + +# Used in response to a preflight request to indicate which HTTP +# headers can be used when making the actual request. This header is +# the server side response to the browser's +# Access-Control-Request-Headers header. +# +# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_HEADERS +# +access_control_allow_headers = + +# Specifies the method or methods allowed when accessing the resource. +# +# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_METHODS +# +access_control_allow_methods = + +# Indicates whether the response can be shared with requesting code from the given origins. +# Separate URLs with space. +# +# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_ORIGINS +# +access_control_allow_origins = + +# Indicates whether the **xcomEntries** endpoint supports the **deserialize** +# flag. If set to ``False``, setting this flag in a request would result in a +# 400 Bad Request error. +# +# Variable: AIRFLOW__API__ENABLE_XCOM_DESERIALIZE_SUPPORT +# +enable_xcom_deserialize_support = False + +[lineage] +# what lineage backend to use +# +# Variable: AIRFLOW__LINEAGE__BACKEND +# +backend = + +[operators] +# The default owner assigned to each new operator, unless +# provided explicitly or passed via ``default_args`` +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_OWNER +# +default_owner = airflow + +# The default value of attribute "deferrable" in operators and sensors. +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_DEFERRABLE +# +default_deferrable = false + +# Indicates the default number of CPU units allocated to each operator when no specific CPU request +# is specified in the operator's configuration +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_CPUS +# +default_cpus = 1 + +# Indicates the default number of RAM allocated to each operator when no specific RAM request +# is specified in the operator's configuration +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_RAM +# +default_ram = 512 + +# Indicates the default number of disk storage allocated to each operator when no specific disk request +# is specified in the operator's configuration +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_DISK +# +default_disk = 512 + +# Indicates the default number of GPUs allocated to each operator when no specific GPUs request +# is specified in the operator's configuration +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_GPUS +# +default_gpus = 0 + +# Default queue that tasks get assigned to and that worker listen on. +# +# Variable: AIRFLOW__OPERATORS__DEFAULT_QUEUE +# +default_queue = default + +# Is allowed to pass additional/unused arguments (args, kwargs) to the BaseOperator operator. +# If set to ``False``, an exception will be thrown, +# otherwise only the console message will be displayed. +# +# Variable: AIRFLOW__OPERATORS__ALLOW_ILLEGAL_ARGUMENTS +# +allow_illegal_arguments = False + +[webserver] +# The message displayed when a user attempts to execute actions beyond their authorised privileges. +# +# Variable: AIRFLOW__WEBSERVER__ACCESS_DENIED_MESSAGE +# +access_denied_message = Access is Denied + +# Path of webserver config file used for configuring the webserver parameters +# +# Variable: AIRFLOW__WEBSERVER__CONFIG_FILE +# +config_file = /opt/airflow/webserver_config.py + +# The base url of your website: Airflow cannot guess what domain or CNAME you are using. +# This is used to create links in the Log Url column in the Browse - Task Instances menu, +# as well as in any automated emails sent by Airflow that contain links to your webserver. +# +# Variable: AIRFLOW__WEBSERVER__BASE_URL +# +base_url = http://localhost:8080 + +# Default timezone to display all dates in the UI, can be UTC, system, or +# any IANA timezone string (e.g. **Europe/Amsterdam**). If left empty the +# default value of core/default_timezone will be used +# +# Example: default_ui_timezone = America/New_York +# +# Variable: AIRFLOW__WEBSERVER__DEFAULT_UI_TIMEZONE +# +default_ui_timezone = UTC + +# The ip specified when starting the web server +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_HOST +# +web_server_host = 0.0.0.0 + +# The port on which to run the web server +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_PORT +# +web_server_port = 8080 + +# Paths to the SSL certificate and key for the web server. When both are +# provided SSL will be enabled. This does not change the web server port. +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT +# +web_server_ssl_cert = + +# Paths to the SSL certificate and key for the web server. When both are +# provided SSL will be enabled. This does not change the web server port. +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY +# +web_server_ssl_key = + +# The type of backend used to store web session data, can be ``database`` or ``securecookie``. For the +# ``database`` backend, sessions are store in the database and they can be +# managed there (for example when you reset password of the user, all sessions for that user are +# deleted). For the ``securecookie`` backend, sessions are stored in encrypted cookies on the client +# side. The ``securecookie`` mechanism is 'lighter' than database backend, but sessions are not deleted +# when you reset password of the user, which means that other than waiting for expiry time, the only +# way to invalidate all sessions for a user is to change secret_key and restart webserver (which +# also invalidates and logs out all other user's sessions). +# +# When you are using ``database`` backend, make sure to keep your database session table small +# by periodically running ``airflow db clean --table session`` command, especially if you have +# automated API calls that will create a new session for each call rather than reuse the sessions +# stored in browser cookies. +# +# Example: session_backend = securecookie +# +# Variable: AIRFLOW__WEBSERVER__SESSION_BACKEND +# +session_backend = database + +# Number of seconds the webserver waits before killing gunicorn master that doesn't respond +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_MASTER_TIMEOUT +# +web_server_master_timeout = 120 + +# Number of seconds the gunicorn webserver waits before timing out on a worker +# +# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_WORKER_TIMEOUT +# +web_server_worker_timeout = 120 + +# Number of workers to refresh at a time. When set to 0, worker refresh is +# disabled. When nonzero, airflow periodically refreshes webserver workers by +# bringing up new ones and killing old ones. +# +# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE +# +worker_refresh_batch_size = 1 + +# Number of seconds to wait before refreshing a batch of workers. +# +# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_INTERVAL +# +worker_refresh_interval = 6000 + +# If set to ``True``, Airflow will track files in plugins_folder directory. When it detects changes, +# then reload the gunicorn. If set to ``True``, gunicorn starts without preloading, which is slower, +# uses more memory, and may cause race conditions. Avoid setting this to ``True`` in production. +# +# Variable: AIRFLOW__WEBSERVER__RELOAD_ON_PLUGIN_CHANGE +# +reload_on_plugin_change = False + +# Secret key used to run your flask app. It should be as random as possible. However, when running +# more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise +# one of them will error with "CSRF session token is missing". +# The webserver key is also used to authorize requests to Celery workers when logs are retrieved. +# The token generated using the secret key has a short expiry time though - make sure that time on +# ALL the machines that you run airflow components on is synchronized (for example using ntpd) +# otherwise you might get "forbidden" errors when the logs are accessed. +# +# Variable: AIRFLOW__WEBSERVER__SECRET_KEY +# +secret_key = xIcs2bnO5KyoXMwIyf88+g== + +# Number of workers to run the Gunicorn web server +# +# Variable: AIRFLOW__WEBSERVER__WORKERS +# +workers = 4 + +# The worker class gunicorn should use. Choices include +# ``sync`` (default), ``eventlet``, ``gevent``. +# +# .. warning:: +# +# When using ``gevent`` you might also want to set the ``_AIRFLOW_PATCH_GEVENT`` +# environment variable to ``"1"`` to make sure gevent patching is done as early as possible. +# +# See related Issues / PRs for more details: +# +# * https://github.com/benoitc/gunicorn/issues/2796 +# * https://github.com/apache/airflow/issues/8212 +# * https://github.com/apache/airflow/pull/28283 +# +# Variable: AIRFLOW__WEBSERVER__WORKER_CLASS +# +worker_class = sync + +# Log files for the gunicorn webserver. '-' means log to stderr. +# +# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFILE +# +access_logfile = - + +# Log files for the gunicorn webserver. '-' means log to stderr. +# +# Variable: AIRFLOW__WEBSERVER__ERROR_LOGFILE +# +error_logfile = - + +# Access log format for gunicorn webserver. +# default format is ``%%(h)s %%(l)s %%(u)s %%(t)s "%%(r)s" %%(s)s %%(b)s "%%(f)s" "%%(a)s"`` +# See `Gunicorn Settings: 'access_log_format' Reference +# `__ for more details +# +# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFORMAT +# +access_logformat = + +# Expose the configuration file in the web server. Set to ``non-sensitive-only`` to show all values +# except those that have security implications. ``True`` shows all values. ``False`` hides the +# configuration completely. +# +# Variable: AIRFLOW__WEBSERVER__EXPOSE_CONFIG +# +expose_config = False + +# Expose hostname in the web server +# +# Variable: AIRFLOW__WEBSERVER__EXPOSE_HOSTNAME +# +expose_hostname = False + +# Expose stacktrace in the web server +# +# Variable: AIRFLOW__WEBSERVER__EXPOSE_STACKTRACE +# +expose_stacktrace = False + +# Default DAG view. Valid values are: ``grid``, ``graph``, ``duration``, ``gantt``, ``landing_times`` +# +# Variable: AIRFLOW__WEBSERVER__DAG_DEFAULT_VIEW +# +dag_default_view = grid + +# Default DAG orientation. Valid values are: +# ``LR`` (Left->Right), ``TB`` (Top->Bottom), ``RL`` (Right->Left), ``BT`` (Bottom->Top) +# +# Variable: AIRFLOW__WEBSERVER__DAG_ORIENTATION +# +dag_orientation = LR + +# Sorting order in grid view. Valid values are: ``topological``, ``hierarchical_alphabetical`` +# +# Variable: AIRFLOW__WEBSERVER__GRID_VIEW_SORTING_ORDER +# +grid_view_sorting_order = topological + +# The amount of time (in secs) webserver will wait for initial handshake +# while fetching logs from other worker machine +# +# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_TIMEOUT_SEC +# +log_fetch_timeout_sec = 5 + +# Time interval (in secs) to wait before next log fetching. +# +# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_DELAY_SEC +# +log_fetch_delay_sec = 2 + +# Distance away from page bottom to enable auto tailing. +# +# Variable: AIRFLOW__WEBSERVER__LOG_AUTO_TAILING_OFFSET +# +log_auto_tailing_offset = 30 + +# Animation speed for auto tailing log display. +# +# Variable: AIRFLOW__WEBSERVER__LOG_ANIMATION_SPEED +# +log_animation_speed = 1000 + +# By default, the webserver shows paused DAGs. Flip this to hide paused +# DAGs by default +# +# Variable: AIRFLOW__WEBSERVER__HIDE_PAUSED_DAGS_BY_DEFAULT +# +hide_paused_dags_by_default = False + +# Consistent page size across all listing views in the UI +# +# Variable: AIRFLOW__WEBSERVER__PAGE_SIZE +# +page_size = 100 + +# Define the color of navigation bar +# +# Variable: AIRFLOW__WEBSERVER__NAVBAR_COLOR +# +navbar_color = #fff + +# Define the color of text in the navigation bar +# +# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_COLOR +# +navbar_text_color = #51504f + +# Define the color of navigation bar links when hovered +# +# Variable: AIRFLOW__WEBSERVER__NAVBAR_HOVER_COLOR +# +navbar_hover_color = #eee + +# Define the color of text in the navigation bar when hovered +# +# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_HOVER_COLOR +# +navbar_text_hover_color = #51504f + +# Define the color of the logo text +# +# Variable: AIRFLOW__WEBSERVER__NAVBAR_LOGO_TEXT_COLOR +# +navbar_logo_text_color = #51504f + +# Default dagrun to show in UI +# +# Variable: AIRFLOW__WEBSERVER__DEFAULT_DAG_RUN_DISPLAY_NUMBER +# +default_dag_run_display_number = 25 + +# Enable werkzeug ``ProxyFix`` middleware for reverse proxy +# +# Variable: AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX +# +enable_proxy_fix = False + +# Number of values to trust for ``X-Forwarded-For``. +# See `Werkzeug: X-Forwarded-For Proxy Fix +# `__ for more details. +# +# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_FOR +# +proxy_fix_x_for = 1 + +# Number of values to trust for ``X-Forwarded-Proto``. +# See `Werkzeug: X-Forwarded-For Proxy Fix +# `__ for more details. +# +# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PROTO +# +proxy_fix_x_proto = 1 + +# Number of values to trust for ``X-Forwarded-Host``. +# See `Werkzeug: X-Forwarded-For Proxy Fix +# `__ for more details. +# +# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_HOST +# +proxy_fix_x_host = 1 + +# Number of values to trust for ``X-Forwarded-Port``. +# See `Werkzeug: X-Forwarded-For Proxy Fix +# `__ for more details. +# +# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PORT +# +proxy_fix_x_port = 1 + +# Number of values to trust for ``X-Forwarded-Prefix``. +# See `Werkzeug: X-Forwarded-For Proxy Fix +# `__ for more details. +# +# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PREFIX +# +proxy_fix_x_prefix = 1 + +# Set secure flag on session cookie +# +# Variable: AIRFLOW__WEBSERVER__COOKIE_SECURE +# +cookie_secure = False + +# Set samesite policy on session cookie +# +# Variable: AIRFLOW__WEBSERVER__COOKIE_SAMESITE +# +cookie_samesite = Lax + +# Default setting for wrap toggle on DAG code and TI log views. +# +# Variable: AIRFLOW__WEBSERVER__DEFAULT_WRAP +# +default_wrap = False + +# Allow the UI to be rendered in a frame +# +# Variable: AIRFLOW__WEBSERVER__X_FRAME_ENABLED +# +x_frame_enabled = True + +# Send anonymous user activity to your analytics tool +# choose from ``google_analytics``, ``segment``, ``metarouter``, or ``matomo`` +# +# Variable: AIRFLOW__WEBSERVER__ANALYTICS_TOOL +# +# analytics_tool = + +# Unique ID of your account in the analytics tool +# +# Variable: AIRFLOW__WEBSERVER__ANALYTICS_ID +# +# analytics_id = + +# Your instances url, only applicable to Matomo. +# +# Example: analytics_url = https://your.matomo.instance.com/ +# +# Variable: AIRFLOW__WEBSERVER__ANALYTICS_URL +# +# analytics_url = + +# 'Recent Tasks' stats will show for old DagRuns if set +# +# Variable: AIRFLOW__WEBSERVER__SHOW_RECENT_STATS_FOR_COMPLETED_RUNS +# +show_recent_stats_for_completed_runs = True + +# The UI cookie lifetime in minutes. User will be logged out from UI after +# ``[webserver] session_lifetime_minutes`` of non-activity +# +# Variable: AIRFLOW__WEBSERVER__SESSION_LIFETIME_MINUTES +# +session_lifetime_minutes = 43200 + +# Sets a custom page title for the DAGs overview page and site title for all pages +# +# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME +# +# instance_name = + +# Whether the custom page title for the DAGs overview page contains any Markup language +# +# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME_HAS_MARKUP +# +instance_name_has_markup = False + +# How frequently, in seconds, the DAG data will auto-refresh in graph or grid view +# when auto-refresh is turned on +# +# Variable: AIRFLOW__WEBSERVER__AUTO_REFRESH_INTERVAL +# +auto_refresh_interval = 3 + +# Boolean for displaying warning for publicly viewable deployment +# +# Variable: AIRFLOW__WEBSERVER__WARN_DEPLOYMENT_EXPOSURE +# +warn_deployment_exposure = True + +# Comma separated string of view events to exclude from dag audit view. +# All other events will be added minus the ones passed here. +# The audit logs in the db will not be affected by this parameter. +# +# Example: audit_view_excluded_events = cli_task_run,running,success +# +# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_EXCLUDED_EVENTS +# +# audit_view_excluded_events = + +# Comma separated string of view events to include in dag audit view. +# If passed, only these events will populate the dag audit view. +# The audit logs in the db will not be affected by this parameter. +# +# Example: audit_view_included_events = dagrun_cleared,failed +# +# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_INCLUDED_EVENTS +# +# audit_view_included_events = + +# Boolean for running SwaggerUI in the webserver. +# +# Variable: AIRFLOW__WEBSERVER__ENABLE_SWAGGER_UI +# +enable_swagger_ui = True + +# Boolean for running Internal API in the webserver. +# +# Variable: AIRFLOW__WEBSERVER__RUN_INTERNAL_API +# +run_internal_api = False + +# The caching algorithm used by the webserver. Must be a valid hashlib function name. +# +# Example: caching_hash_method = sha256 +# +# Variable: AIRFLOW__WEBSERVER__CACHING_HASH_METHOD +# +caching_hash_method = md5 + +# Behavior of the trigger DAG run button for DAGs without params. ``False`` to skip and trigger +# without displaying a form to add a **dag_run.conf**, ``True`` to always display the form. +# The form is displayed always if parameters are defined. +# +# Variable: AIRFLOW__WEBSERVER__SHOW_TRIGGER_FORM_IF_NO_PARAMS +# +show_trigger_form_if_no_params = False + +# Number of recent DAG run configurations in the selector on the trigger web form. +# +# Example: num_recent_configurations_for_trigger = 10 +# +# Variable: AIRFLOW__WEBSERVER__NUM_RECENT_CONFIGURATIONS_FOR_TRIGGER +# +num_recent_configurations_for_trigger = 5 + +# A DAG author is able to provide any raw HTML into ``doc_md`` or params description in +# ``description_md`` for text formatting. This is including potentially unsafe javascript. +# Displaying the DAG or trigger form in web UI provides the DAG author the potential to +# inject malicious code into clients browsers. To ensure the web UI is safe by default, +# raw HTML is disabled by default. If you trust your DAG authors, you can enable HTML +# support in markdown by setting this option to ``True``. +# +# This parameter also enables the deprecated fields ``description_html`` and +# ``custom_html_form`` in DAG params until the feature is removed in a future version. +# +# Example: allow_raw_html_descriptions = False +# +# Variable: AIRFLOW__WEBSERVER__ALLOW_RAW_HTML_DESCRIPTIONS +# +allow_raw_html_descriptions = False + +# The maximum size of the request payload (in MB) that can be sent. +# +# Variable: AIRFLOW__WEBSERVER__ALLOWED_PAYLOAD_SIZE +# +allowed_payload_size = 1.0 + +# Require confirmation when changing a DAG in the web UI. This is to prevent accidental changes +# to a DAG that may be running on sensitive environments like production. +# When set to ``True``, confirmation dialog will be shown when a user tries to Pause/Unpause, +# Trigger a DAG +# +# Variable: AIRFLOW__WEBSERVER__REQUIRE_CONFIRMATION_DAG_CHANGE +# +require_confirmation_dag_change = False + +[email] +# Configuration email backend and whether to +# send email alerts on retry or failure + +# Email backend to use +# +# Variable: AIRFLOW__EMAIL__EMAIL_BACKEND +# +email_backend = airflow.utils.email.send_email_smtp + +# Email connection to use +# +# Variable: AIRFLOW__EMAIL__EMAIL_CONN_ID +# +email_conn_id = smtp_default + +# Whether email alerts should be sent when a task is retried +# +# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_RETRY +# +default_email_on_retry = True + +# Whether email alerts should be sent when a task failed +# +# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_FAILURE +# +default_email_on_failure = True + +# File that will be used as the template for Email subject (which will be rendered using Jinja2). +# If not set, Airflow uses a base template. +# +# Example: subject_template = /path/to/my_subject_template_file +# +# Variable: AIRFLOW__EMAIL__SUBJECT_TEMPLATE +# +# subject_template = + +# File that will be used as the template for Email content (which will be rendered using Jinja2). +# If not set, Airflow uses a base template. +# +# Example: html_content_template = /path/to/my_html_content_template_file +# +# Variable: AIRFLOW__EMAIL__HTML_CONTENT_TEMPLATE +# +# html_content_template = + +# Email address that will be used as sender address. +# It can either be raw email or the complete address in a format ``Sender Name `` +# +# Example: from_email = Airflow +# +# Variable: AIRFLOW__EMAIL__FROM_EMAIL +# +# from_email = + +# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" +# which sets it to ``ssl.create_default_context()`` which provides the right balance between +# compatibility and security, it however requires that certificates in your operating system are +# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public +# keys installed on your machines. You can switch it to "none" if you want to disable checking +# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks +# if your infrastructure is not sufficiently secured. It should only be set temporarily while you +# are fixing your certificate configuration. This can be typically done by upgrading to newer +# version of the operating system you run Airflow components on,by upgrading/refreshing proper +# certificates in the OS or by updating certificates for your mail servers. +# +# Example: ssl_context = default +# +# Variable: AIRFLOW__EMAIL__SSL_CONTEXT +# +ssl_context = default + +[smtp] +# If you want airflow to send emails on retries, failure, and you want to use +# the airflow.utils.email.send_email_smtp function, you have to configure an +# smtp server here + +# Specifies the host server address used by Airflow when sending out email notifications via SMTP. +# +# Variable: AIRFLOW__SMTP__SMTP_HOST +# +smtp_host = localhost + +# Determines whether to use the STARTTLS command when connecting to the SMTP server. +# +# Variable: AIRFLOW__SMTP__SMTP_STARTTLS +# +smtp_starttls = True + +# Determines whether to use an SSL connection when talking to the SMTP server. +# +# Variable: AIRFLOW__SMTP__SMTP_SSL +# +smtp_ssl = False + +# Username to authenticate when connecting to smtp server. +# +# Example: smtp_user = airflow +# +# Variable: AIRFLOW__SMTP__SMTP_USER +# +# smtp_user = + +# Password to authenticate when connecting to smtp server. +# +# Example: smtp_password = airflow +# +# Variable: AIRFLOW__SMTP__SMTP_PASSWORD +# +# smtp_password = + +# Defines the port number on which Airflow connects to the SMTP server to send email notifications. +# +# Variable: AIRFLOW__SMTP__SMTP_PORT +# +smtp_port = 25 + +# Specifies the default **from** email address used when Airflow sends email notifications. +# +# Variable: AIRFLOW__SMTP__SMTP_MAIL_FROM +# +smtp_mail_from = airflow@example.com + +# Determines the maximum time (in seconds) the Apache Airflow system will wait for a +# connection to the SMTP server to be established. +# +# Variable: AIRFLOW__SMTP__SMTP_TIMEOUT +# +smtp_timeout = 30 + +# Defines the maximum number of times Airflow will attempt to connect to the SMTP server. +# +# Variable: AIRFLOW__SMTP__SMTP_RETRY_LIMIT +# +smtp_retry_limit = 5 + +[sentry] +# `Sentry `__ integration. Here you can supply +# additional configuration options based on the Python platform. +# See `Python / Configuration / Basic Options +# `__ for more details. +# Unsupported options: ``integrations``, ``in_app_include``, ``in_app_exclude``, +# ``ignore_errors``, ``before_breadcrumb``, ``transport``. + +# Enable error reporting to Sentry +# +# Variable: AIRFLOW__SENTRY__SENTRY_ON +# +sentry_on = false + +# +# Variable: AIRFLOW__SENTRY__SENTRY_DSN +# +sentry_dsn = + +# Dotted path to a before_send function that the sentry SDK should be configured to use. +# +# Variable: AIRFLOW__SENTRY__BEFORE_SEND +# +# before_send = + +[scheduler] +# Task instances listen for external kill signal (when you clear tasks +# from the CLI or the UI), this defines the frequency at which they should +# listen (in seconds). +# +# Variable: AIRFLOW__SCHEDULER__JOB_HEARTBEAT_SEC +# +job_heartbeat_sec = 5 + +# The scheduler constantly tries to trigger new tasks (look at the +# scheduler section in the docs for more information). This defines +# how often the scheduler should run (in seconds). +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC +# +scheduler_heartbeat_sec = 5 + +# The frequency (in seconds) at which the LocalTaskJob should send heartbeat signals to the +# scheduler to notify it's still alive. If this value is set to 0, the heartbeat interval will default +# to the value of ``[scheduler] scheduler_zombie_task_threshold``. +# +# Variable: AIRFLOW__SCHEDULER__LOCAL_TASK_JOB_HEARTBEAT_SEC +# +local_task_job_heartbeat_sec = 0 + +# The number of times to try to schedule each DAG file +# -1 indicates unlimited number +# +# Variable: AIRFLOW__SCHEDULER__NUM_RUNS +# +num_runs = -1 + +# Controls how long the scheduler will sleep between loops, but if there was nothing to do +# in the loop. i.e. if it scheduled something then it will start the next loop +# iteration straight away. +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_IDLE_SLEEP_TIME +# +scheduler_idle_sleep_time = 1 + +# Number of seconds after which a DAG file is parsed. The DAG file is parsed every +# ``[scheduler] min_file_process_interval`` number of seconds. Updates to DAGs are reflected after +# this interval. Keeping this number low will increase CPU usage. +# +# Variable: AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL +# +min_file_process_interval = 30 + +# How often (in seconds) to check for stale DAGs (DAGs which are no longer present in +# the expected files) which should be deactivated, as well as datasets that are no longer +# referenced and should be marked as orphaned. +# +# Variable: AIRFLOW__SCHEDULER__PARSING_CLEANUP_INTERVAL +# +parsing_cleanup_interval = 60 + +# How long (in seconds) to wait after we have re-parsed a DAG file before deactivating stale +# DAGs (DAGs which are no longer present in the expected files). The reason why we need +# this threshold is to account for the time between when the file is parsed and when the +# DAG is loaded. The absolute maximum that this could take is ``[core] dag_file_processor_timeout``, +# but when you have a long timeout configured, it results in a significant delay in the +# deactivation of stale dags. +# +# Variable: AIRFLOW__SCHEDULER__STALE_DAG_THRESHOLD +# +stale_dag_threshold = 50 + +# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes. +# +# Variable: AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL +# +dag_dir_list_interval = 300 + +# How often should stats be printed to the logs. Setting to 0 will disable printing stats +# +# Variable: AIRFLOW__SCHEDULER__PRINT_STATS_INTERVAL +# +print_stats_interval = 30 + +# How often (in seconds) should pool usage stats be sent to StatsD (if statsd_on is enabled) +# +# Variable: AIRFLOW__SCHEDULER__POOL_METRICS_INTERVAL +# +pool_metrics_interval = 5.0 + +# If the last scheduler heartbeat happened more than ``[scheduler] scheduler_health_check_threshold`` +# ago (in seconds), scheduler is considered unhealthy. +# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI +# for SchedulerJob. +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_THRESHOLD +# +scheduler_health_check_threshold = 30 + +# When you start a scheduler, airflow starts a tiny web server +# subprocess to serve a health check if this is set to ``True`` +# +# Variable: AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK +# +enable_health_check = False + +# When you start a scheduler, airflow starts a tiny web server +# subprocess to serve a health check on this host +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_HOST +# +scheduler_health_check_server_host = 0.0.0.0 + +# When you start a scheduler, airflow starts a tiny web server +# subprocess to serve a health check on this port +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_PORT +# +scheduler_health_check_server_port = 8974 + +# How often (in seconds) should the scheduler check for orphaned tasks and SchedulerJobs +# +# Variable: AIRFLOW__SCHEDULER__ORPHANED_TASKS_CHECK_INTERVAL +# +orphaned_tasks_check_interval = 300.0 + +# Determines the directory where logs for the child processes of the scheduler will be stored +# +# Variable: AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY +# +child_process_log_directory = /opt/airflow/logs/scheduler + +# Local task jobs periodically heartbeat to the DB. If the job has +# not heartbeat in this many seconds, the scheduler will mark the +# associated task instance as failed and will re-schedule the task. +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULER_ZOMBIE_TASK_THRESHOLD +# +scheduler_zombie_task_threshold = 300 + +# How often (in seconds) should the scheduler check for zombie tasks. +# +# Variable: AIRFLOW__SCHEDULER__ZOMBIE_DETECTION_INTERVAL +# +zombie_detection_interval = 10.0 + +# Turn off scheduler catchup by setting this to ``False``. +# Default behavior is unchanged and +# Command Line Backfills still work, but the scheduler +# will not do scheduler catchup if this is ``False``, +# however it can be set on a per DAG basis in the +# DAG definition (catchup) +# +# Variable: AIRFLOW__SCHEDULER__CATCHUP_BY_DEFAULT +# +catchup_by_default = True + +# Setting this to ``True`` will make first task instance of a task +# ignore depends_on_past setting. A task instance will be considered +# as the first task instance of a task when there is no task instance +# in the DB with an execution_date earlier than it., i.e. no manual marking +# success will be needed for a newly added task to be scheduled. +# +# Variable: AIRFLOW__SCHEDULER__IGNORE_FIRST_DEPENDS_ON_PAST_BY_DEFAULT +# +ignore_first_depends_on_past_by_default = True + +# This changes the batch size of queries in the scheduling main loop. +# This should not be greater than ``[core] parallelism``. +# If this is too high, SQL query performance may be impacted by +# complexity of query predicate, and/or excessive locking. +# Additionally, you may hit the maximum allowable query length for your db. +# Set this to 0 to use the value of ``[core] parallelism`` +# +# Variable: AIRFLOW__SCHEDULER__MAX_TIS_PER_QUERY +# +max_tis_per_query = 16 + +# Should the scheduler issue ``SELECT ... FOR UPDATE`` in relevant queries. +# If this is set to ``False`` then you should not run more than a single +# scheduler at once +# +# Variable: AIRFLOW__SCHEDULER__USE_ROW_LEVEL_LOCKING +# +use_row_level_locking = True + +# Max number of DAGs to create DagRuns for per scheduler loop. +# +# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_TO_CREATE_PER_LOOP +# +max_dagruns_to_create_per_loop = 10 + +# How many DagRuns should a scheduler examine (and lock) when scheduling +# and queuing tasks. +# +# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_PER_LOOP_TO_SCHEDULE +# +max_dagruns_per_loop_to_schedule = 20 + +# Should the Task supervisor process perform a "mini scheduler" to attempt to schedule more tasks of the +# same DAG. Leaving this on will mean tasks in the same DAG execute quicker, but might starve out other +# dags in some circumstances +# +# Variable: AIRFLOW__SCHEDULER__SCHEDULE_AFTER_TASK_EXECUTION +# +schedule_after_task_execution = True + +# The scheduler reads dag files to extract the airflow modules that are going to be used, +# and imports them ahead of time to avoid having to re-do it for each parsing process. +# This flag can be set to ``False`` to disable this behavior in case an airflow module needs +# to be freshly imported each time (at the cost of increased DAG parsing time). +# +# Variable: AIRFLOW__SCHEDULER__PARSING_PRE_IMPORT_MODULES +# +parsing_pre_import_modules = True + +# The scheduler can run multiple processes in parallel to parse dags. +# This defines how many processes will run. +# +# Variable: AIRFLOW__SCHEDULER__PARSING_PROCESSES +# +parsing_processes = 2 + +# One of ``modified_time``, ``random_seeded_by_host`` and ``alphabetical``. +# The scheduler will list and sort the dag files to decide the parsing order. +# +# * ``modified_time``: Sort by modified time of the files. This is useful on large scale to parse the +# recently modified DAGs first. +# * ``random_seeded_by_host``: Sort randomly across multiple Schedulers but with same order on the +# same host. This is useful when running with Scheduler in HA mode where each scheduler can +# parse different DAG files. +# * ``alphabetical``: Sort by filename +# +# Variable: AIRFLOW__SCHEDULER__FILE_PARSING_SORT_MODE +# +file_parsing_sort_mode = modified_time + +# Whether the dag processor is running as a standalone process or it is a subprocess of a scheduler +# job. +# +# Variable: AIRFLOW__SCHEDULER__STANDALONE_DAG_PROCESSOR +# +standalone_dag_processor = False + +# Only applicable if ``[scheduler] standalone_dag_processor`` is true and callbacks are stored +# in database. Contains maximum number of callbacks that are fetched during a single loop. +# +# Variable: AIRFLOW__SCHEDULER__MAX_CALLBACKS_PER_LOOP +# +max_callbacks_per_loop = 20 + +# Only applicable if ``[scheduler] standalone_dag_processor`` is true. +# Time in seconds after which dags, which were not updated by Dag Processor are deactivated. +# +# Variable: AIRFLOW__SCHEDULER__DAG_STALE_NOT_SEEN_DURATION +# +dag_stale_not_seen_duration = 600 + +# Turn off scheduler use of cron intervals by setting this to ``False``. +# DAGs submitted manually in the web UI or with trigger_dag will still run. +# +# Variable: AIRFLOW__SCHEDULER__USE_JOB_SCHEDULE +# +use_job_schedule = True + +# Allow externally triggered DagRuns for Execution Dates in the future +# Only has effect if schedule_interval is set to None in DAG +# +# Variable: AIRFLOW__SCHEDULER__ALLOW_TRIGGER_IN_FUTURE +# +allow_trigger_in_future = False + +# How often to check for expired trigger requests that have not run yet. +# +# Variable: AIRFLOW__SCHEDULER__TRIGGER_TIMEOUT_CHECK_INTERVAL +# +trigger_timeout_check_interval = 15 + +# Amount of time a task can be in the queued state before being retried or set to failed. +# +# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT +# +task_queued_timeout = 600.0 + +# How often to check for tasks that have been in the queued state for +# longer than ``[scheduler] task_queued_timeout``. +# +# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT_CHECK_INTERVAL +# +task_queued_timeout_check_interval = 120.0 + +# The run_id pattern used to verify the validity of user input to the run_id parameter when +# triggering a DAG. This pattern cannot change the pattern used by scheduler to generate run_id +# for scheduled DAG runs or DAG runs triggered without changing the run_id parameter. +# +# Variable: AIRFLOW__SCHEDULER__ALLOWED_RUN_ID_PATTERN +# +allowed_run_id_pattern = ^[A-Za-z0-9_.~:+-]+$ + +# Whether to create DAG runs that span an interval or one single point in time for cron schedules, when +# a cron string is provided to ``schedule`` argument of a DAG. +# +# * ``True``: **CronDataIntervalTimetable** is used, which is suitable +# for DAGs with well-defined data interval. You get contiguous intervals from the end of the previous +# interval up to the scheduled datetime. +# * ``False``: **CronTriggerTimetable** is used, which is closer to the behavior of cron itself. +# +# Notably, for **CronTriggerTimetable**, the logical date is the same as the time the DAG Run will +# try to schedule, while for **CronDataIntervalTimetable**, the logical date is the beginning of +# the data interval, but the DAG Run will try to schedule at the end of the data interval. +# +# Variable: AIRFLOW__SCHEDULER__CREATE_CRON_DATA_INTERVALS +# +create_cron_data_intervals = True + +[triggerer] +# How many triggers a single Triggerer will run at once, by default. +# +# Variable: AIRFLOW__TRIGGERER__DEFAULT_CAPACITY +# +default_capacity = 1000 + +# How often to heartbeat the Triggerer job to ensure it hasn't been killed. +# +# Variable: AIRFLOW__TRIGGERER__JOB_HEARTBEAT_SEC +# +job_heartbeat_sec = 5 + +# If the last triggerer heartbeat happened more than ``[triggerer] triggerer_health_check_threshold`` +# ago (in seconds), triggerer is considered unhealthy. +# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI +# for TriggererJob. +# +# Variable: AIRFLOW__TRIGGERER__TRIGGERER_HEALTH_CHECK_THRESHOLD +# +triggerer_health_check_threshold = 30 + +[kerberos] +# Location of your ccache file once kinit has been performed. +# +# Variable: AIRFLOW__KERBEROS__CCACHE +# +ccache = /tmp/airflow_krb5_ccache + +# gets augmented with fqdn +# +# Variable: AIRFLOW__KERBEROS__PRINCIPAL +# +principal = airflow + +# Determines the frequency at which initialization or re-initialization processes occur. +# +# Variable: AIRFLOW__KERBEROS__REINIT_FREQUENCY +# +reinit_frequency = 3600 + +# Path to the kinit executable +# +# Variable: AIRFLOW__KERBEROS__KINIT_PATH +# +kinit_path = kinit + +# Designates the path to the Kerberos keytab file for the Airflow user +# +# Variable: AIRFLOW__KERBEROS__KEYTAB +# +keytab = airflow.keytab + +# Allow to disable ticket forwardability. +# +# Variable: AIRFLOW__KERBEROS__FORWARDABLE +# +forwardable = True + +# Allow to remove source IP from token, useful when using token behind NATted Docker host. +# +# Variable: AIRFLOW__KERBEROS__INCLUDE_IP +# +include_ip = True + +[sensors] +# Sensor default timeout, 7 days by default (7 * 24 * 60 * 60). +# +# Variable: AIRFLOW__SENSORS__DEFAULT_TIMEOUT +# +default_timeout = 604800 + +[common.io] +# Common IO configuration section + +# Path to a location on object storage where XComs can be stored in url format. +# +# Example: xcom_objectstorage_path = s3://conn_id@bucket/path +# +# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_PATH +# +xcom_objectstorage_path = + +# Threshold in bytes for storing XComs in object storage. -1 means always store in the +# database. 0 means always store in object storage. Any positive number means +# it will be stored in object storage if the size of the value is greater than the threshold. +# +# Example: xcom_objectstorage_threshold = 1000000 +# +# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_THRESHOLD +# +xcom_objectstorage_threshold = -1 + +# Compression algorithm to use when storing XComs in object storage. Supported algorithms +# are a.o.: snappy, zip, gzip, bz2, and lzma. If not specified, no compression will be used. +# Note that the compression algorithm must be available in the Python installation (e.g. +# python-snappy for snappy). Zip, gz, bz2 are available by default. +# +# Example: xcom_objectstorage_compression = gz +# +# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_COMPRESSION +# +xcom_objectstorage_compression = + +[fab] +# This section contains configs specific to FAB provider. + +# Boolean for enabling rate limiting on authentication endpoints. +# +# Variable: AIRFLOW__FAB__AUTH_RATE_LIMITED +# +auth_rate_limited = True + +# Rate limit for authentication endpoints. +# +# Variable: AIRFLOW__FAB__AUTH_RATE_LIMIT +# +auth_rate_limit = 5 per 40 second + +# Update FAB permissions and sync security manager roles +# on webserver startup +# +# Variable: AIRFLOW__FAB__UPDATE_FAB_PERMS +# +update_fab_perms = True + +[imap] +# Options for IMAP provider. + +# ssl_context = + +[smtp_provider] +# Options for SMTP provider. + +# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" +# which sets it to ``ssl.create_default_context()`` which provides the right balance between +# compatibility and security, it however requires that certificates in your operating system are +# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public +# keys installed on your machines. You can switch it to "none" if you want to disable checking +# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks +# if your infrastructure is not sufficiently secured. It should only be set temporarily while you +# are fixing your certificate configuration. This can be typically done by upgrading to newer +# version of the operating system you run Airflow components on,by upgrading/refreshing proper +# certificates in the OS or by updating certificates for your mail servers. +# +# If you do not set this option explicitly, it will use Airflow "email.ssl_context" configuration, +# but if this configuration is not present, it will use "default" value. +# +# Example: ssl_context = default +# +# Variable: AIRFLOW__SMTP_PROVIDER__SSL_CONTEXT +# +# ssl_context = + +# Allows overriding of the standard templated email subject line when the SmtpNotifier is used. +# Must provide a path to the template. +# +# Example: templated_email_subject_path = path/to/override/email_subject.html +# +# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_EMAIL_SUBJECT_PATH +# +# templated_email_subject_path = + +# Allows overriding of the standard templated email path when the SmtpNotifier is used. Must provide +# a path to the template. +# +# Example: templated_html_content_path = path/to/override/email.html +# +# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_HTML_CONTENT_PATH +# +# templated_html_content_path = + +processor_log_folder = /opt/airflow/logs/scheduler diff --git a/airflow/dags/configs/config.docker.yaml b/airflow/dags/configs/config.docker.yaml new file mode 100644 index 000000000..465a10ca7 --- /dev/null +++ b/airflow/dags/configs/config.docker.yaml @@ -0,0 +1,67 @@ +# io: +# # IMPORTANT: use the Docker service name of Postgres (from your compose): +# postgres_url: "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db" +io: + postgres_url: "postgresql+psycopg2://postgres:postgres@agcloud-postgres:5432/postgres" + + +windows: + frequency: "D" + timezone: "UTC" + +source_mapping: + entity_dim: "mission" # or "region"/"device" + area_strategy: "none" # or "region_area" (requires regions table/geom) + filters: + start_time: null + end_time: null + anomaly_codes: null + +baseline: + method: "median" + lookback_periods: 28 + min_history: 7 + seasonality: null + +rules: + count_anomaly: + enabled: true + method: "zscore" + z_threshold: 3.0 + iqr_k: 1.5 + min_count: 3 + worsening: + enabled: true + method: "slope" + slope_lookback: 7 + slope_min: 0.02 + min_periods: 5 + ewma_span: 7 + ewma_threshold: 0.6 + +alerting: + dedup_cooldown_windows: 3 + resolve_after_no_anomaly: 3 + rate_limit_per_run: 100 + group_by_window: true + +delivery: + slack: + enabled: false + webhook_url: "" + webhook: + enabled: false + url: "" + headers: {} + email: + enabled: false + smtp_host: "" + smtp_port: 587 + username: "" + password_env: "SMTP_PASSWORD" + from_addr: "" + to_addrs: [] + +run: + dry_run: false + diff --git a/airflow/dags/leaf-counting/src/__init__.py b/airflow/dags/leaf-counting/src/__init__.py new file mode 100755 index 000000000..ee49d4339 --- /dev/null +++ b/airflow/dags/leaf-counting/src/__init__.py @@ -0,0 +1,5 @@ +# decompyle3 version 3.9.3 +# Python bytecode version base 3.12.0 (3531) +# Decompiled from: Python 3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0] +# Embedded file name: /home/user/ml-workspace/projects/leaf-counting/src/__init__.py +# Compiled at: 2025-10-20 13:47:51 diff --git a/airflow/dags/leaf-counting/src/common.py b/airflow/dags/leaf-counting/src/common.py new file mode 100755 index 000000000..ea30f64ea --- /dev/null +++ b/airflow/dags/leaf-counting/src/common.py @@ -0,0 +1,35 @@ +from __future__ import annotations +from pathlib import Path +import cv2 +import numpy as np + +IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"} + +def is_image(path: Path) -> bool: + return path.suffix.lower() in IMG_EXTS + +def iter_images(inp: Path): + p = Path(inp) + if p.is_file() and is_image(p): + yield p + elif p.is_dir(): + for q in sorted(p.rglob("*")): + if q.is_file() and is_image(q): + yield q + +def ensure_dir(p: Path) -> Path: + Path(p).mkdir(parents=True, exist_ok=True) + return Path(p) + +def draw_boxes(img_bgr: np.ndarray, boxes, color=(0,255,0), thickness=2): + h, w = img_bgr.shape[:2] + out = img_bgr.copy() + for (x1,y1,x2,y2,conf,cls_id) in boxes: + x1 = max(0, min(w-1, int(x1))) + y1 = max(0, min(h-1, int(y1))) + x2 = max(0, min(w-1, int(x2))) + y2 = max(0, min(h-1, int(y2))) + cv2.rectangle(out, (x1,y1), (x2,y2), color, thickness) + label = f"{int(cls_id)}:{conf:.2f}" + cv2.putText(out, label, (x1, max(0, y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA) + return out \ No newline at end of file diff --git a/airflow/dags/leaf-counting/src/cpuinfo_patch.py b/airflow/dags/leaf-counting/src/cpuinfo_patch.py new file mode 100644 index 000000000..2cabf8314 --- /dev/null +++ b/airflow/dags/leaf-counting/src/cpuinfo_patch.py @@ -0,0 +1,13 @@ +# Runs at import time. Neutralizes py-cpuinfo and Ultralytics CPU probe (no subprocess). +try: + import cpuinfo as _cpuinfo + _cpuinfo.get_cpu_info = lambda: {"brand_raw": "unknown-cpu", "count": 0} + _cpuinfo.get_cpu_info_json = lambda: "{}" +except Exception: + pass + +try: + from ultralytics.utils import torch_utils as _tu + _tu.get_cpu_info = lambda: "unknown-cpu" +except Exception: + pass diff --git a/airflow/dags/leaf-counting/src/crop_only.py b/airflow/dags/leaf-counting/src/crop_only.py new file mode 100755 index 000000000..b13dcd406 --- /dev/null +++ b/airflow/dags/leaf-counting/src/crop_only.py @@ -0,0 +1,220 @@ +from __future__ import annotations +import json, argparse +from pathlib import Path +from typing import Optional +import cv2 +from common import ensure_dir +from datetime import datetime + +try: + from minio_io import get_client, ensure_bucket, put_png +except Exception: + get_client = ensure_bucket = put_png = None + +# def _load_jsons(inp: Path): +# jdir = inp / "json" +# if not jdir.exists(): +# raise SystemExit(f"[ERR] Expected JSON dir not found: {jdir} (run detect_only.py first)") +# for jp in sorted(jdir.glob("*.json")): +# with jp.open("r", encoding="utf-8") as f: +# j = json.load(f) +# yield jp, j +def _load_jsons(inp: Path): + jdir = inp / "json" + if not jdir.exists(): + raise SystemExit(f"[ERR] Expected JSON dir not found: {jdir} (run detect_only.py first)") + # חיפוש רקורסיבי בכל תתי-התיקיות + for jp in sorted(jdir.rglob("*.json")): + with jp.open("r", encoding="utf-8") as f: + j = json.load(f) + yield jp, j + +def _safe_crop(img, x1, y1, x2, y2): + h, w = img.shape[:2] + x1 = max(0, min(w-1, int(x1))); y1 = max(0, min(h-1, int(y1))) + x2 = max(0, min(w-1, int(x2))); y2 = max(0, min(h-1, int(y2))) + if x2 <= x1: x2 = min(w-1, x1+1) + if y2 <= y1: y2 = min(h-1, y1+1) + return img[y1:y2, x1:x2] + +# def run_crop(inp: Path, out_dir: Path, size: int=224, margin: float=0.1, min_wh: int=8, +# orig_dir: Optional[Path]=None, flat: bool=False, +# minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, +# minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, +# minio_prefix: str="crops", minio_secure: bool=False): + +# out_dir = ensure_dir(out_dir) + +# cli = None +# if minio_endpoint and minio_access and minio_secret and minio_bucket: +# if get_client is None: +# raise SystemExit("[ERR] חסר minio או minio_io.") +# cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) +# ensure_bucket(cli, minio_bucket) + +# count = 0 +# for jp, j in _load_jsons(inp): +# src = j.get("source_path") +# if src: +# img_path = Path(src) +# else: +# if orig_dir is None: +# raise SystemExit("[ERR] JSON חסר source_path; ספקי --orig לתיקיית המקור") +# img_path = Path(orig_dir) / j["image"] + +# if not img_path.exists(): +# print(f"[WARN] Original image not found: {img_path}, skipping") +# continue + +# img = cv2.imread(str(img_path)) +# if img is None: +# print(f"[WARN] Can't read image: {img_path}") +# continue + +# # מבנה פלט: תת תיקייה לכל תמונה (אלא אם --flat) +# if flat: +# dest_dir = ensure_dir(out_dir) +# minio_subprefix = minio_prefix +# else: +# dest_dir = ensure_dir(out_dir / Path(j["image"]).stem) +# minio_subprefix = f"{minio_prefix}/{Path(j['image']).stem}" + +# for i, (x1,y1,x2,y2,conf,cls_id) in enumerate(j.get("boxes", [])): +# w = x2 - x1; h = y2 - y1 +# if w < min_wh or h < min_wh: +# continue +# cx = (x1 + x2) * 0.5; cy = (y1 + y2) * 0.5 +# half = max(w, h) * 0.5 * (1.0 + margin) +# crop = _safe_crop(img, cx-half, cy-half, cx+half, cy+half) +# if crop.size == 0: +# continue +# crop_resized = cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA) +# out_name = f"det{i:03d}_cls{int(cls_id)}_{conf:.2f}.png" +# cv2.imwrite(str(dest_dir / out_name), crop_resized) +# count += 1 +# if cli: +# put_png(cli, minio_bucket, f"{minio_subprefix}/{out_name}", crop_resized) + +# print(f"[DONE] Saved {count} crops under: {out_dir} (flat={flat})") +# def run_crop(inp: Path, out_dir: Path, size: int=224, margin: float=0.1, min_wh: int=8, +# orig_dir: Optional[Path]=None, flat: bool=False, +# minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, +# minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, +# minio_prefix: str="crops", minio_secure: bool=False): +def run_crop(inp: Path, out_dir: Path, size: int=224, margin: float=0.1, min_wh: int=8, + orig_dir: Optional[Path]=None, flat: bool=False, + minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, + minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, + minio_prefix: str="CROP", minio_secure: bool=False, + run_id: Optional[str]=None): + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + out_dir = ensure_dir(out_dir) + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + count = 0 + for jp, j in _load_jsons(inp): + # עדיפות: source_path; אם אין – rel_path; ואם אין – נדרש --orig + if "source_path" in j: + img_path = Path(j["source_path"]) + rel_path = j.get("rel_path", j["image"]) + elif "rel_path" in j: + if orig_dir is None: + raise SystemExit("[ERR] JSON מכיל רק rel_path; ספקי --orig כדי למצוא את קובץ המקור") + img_path = Path(orig_dir) / j["rel_path"] + rel_path = j["rel_path"] + else: + if orig_dir is None: + raise SystemExit("[ERR] JSON חסר source_path/rel_path; ספקי --orig ותתאימי לשמות image") + img_path = Path(orig_dir) / j["image"] + rel_path = j["image"] + + if not img_path.exists(): + print(f"[WARN] Original image not found: {img_path}, skipping") + continue + + img = cv2.imread(str(img_path)) + if img is None: + print(f"[WARN] Can't read image: {img_path}") + continue + + rel_parent = str(Path(rel_path).parent) + rel_stem = Path(rel_path).stem + + # תיקיית יעד: out///... + if flat: + dest_dir = ensure_dir(out_dir) + minio_subprefix = minio_prefix + else: + dest_dir = ensure_dir(out_dir / rel_parent / rel_stem) + minio_subprefix = f"{minio_prefix}/{rel_parent}/{rel_stem}" if rel_parent != "." else f"{minio_prefix}/{rel_stem}" + + for i, (x1,y1,x2,y2,conf,cls_id) in enumerate(j.get("boxes", [])): + w = x2 - x1; h = y2 - y1 + if w < min_wh or h < min_wh: + continue + cx = (x1 + x2) * 0.5; cy = (y1 + y2) * 0.5 + half = max(w, h) * 0.5 * (1.0 + margin) + crop = _safe_crop(img, cx-half, cy-half, cx+half, cy+half) + if crop.size == 0: + continue + crop_resized = cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA) + out_name = f"det{i:03d}_cls{int(cls_id)}_{conf:.2f}.png" + cv2.imwrite(str(dest_dir / out_name), crop_resized) + count += 1 + + if cli: + base = f"{run_id}/{minio_prefix}" # תאריך/שעה קודם, אח"כ CROP + key = f"{base}/{rel_parent}/{rel_stem}/{out_name}" if rel_parent != "." else f"{base}/{rel_stem}/{out_name}" + put_png(cli, minio_bucket, key, crop_resized) + + put_png(cli, minio_bucket, f"{minio_subprefix}/{out_name}", crop_resized) + + print(f"[DONE] Saved {count} crops under: {out_dir} (flat={flat})") + +def main(): + ap = argparse.ArgumentParser(description="Create square crops from detection JSON results (+optional MinIO).") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--orig", default=None, help="דרוש רק אם JSON חסר source_path") + ap.add_argument("--size", type=int, default=224) + ap.add_argument("--margin", type=float, default=0.1) + ap.add_argument("--min-wh", type=int, default=8) + ap.add_argument("--flat", action="store_true") + + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="crops") + ap.add_argument("--minio-secure", action="store_true") + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run_crop( + inp=Path(args.input), out_dir=Path(args.out), + size=args.size, margin=args.margin, min_wh=args.min_wh, + orig_dir=Path(args.orig) if args.orig else None, flat=args.flat, + minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, minio_secure=args.minio_secure, + run_id=run_id, + ) + + # run_crop( + # inp=Path(args.input), out_dir=Path(args.out), + # size=args.size, margin=args.margin, min_wh=args.min_wh, + # orig_dir=Path(args.orig) if args.orig else None, flat=args.flat, + # minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + # minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + # minio_prefix=args.minio_prefix, minio_secure=args.minio_secure, + # ) + +if __name__ == "__main__": + main() diff --git a/airflow/dags/leaf-counting/src/detect_only.py b/airflow/dags/leaf-counting/src/detect_only.py new file mode 100755 index 000000000..8f1e7e81a --- /dev/null +++ b/airflow/dags/leaf-counting/src/detect_only.py @@ -0,0 +1,192 @@ +from __future__ import annotations +import json, argparse +from pathlib import Path +from typing import Optional +from datetime import datetime + +import cv2 +from ultralytics import YOLO +from common import iter_images, ensure_dir, draw_boxes + +try: + from minio_io import get_client, ensure_bucket, put_png, put_json +except Exception: + get_client = ensure_bucket = put_png = put_json = None + +# def run_detect(inp: Path, out_dir: Path, weights: Path, +# conf: float=0.25, imgsz: int=896, device: str="cpu", +# minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, +# minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, +# minio_prefix: str="detect", minio_secure: bool=False): +# out_dir = ensure_dir(out_dir) +# overlay_dir = ensure_dir(out_dir / "overlay") +# json_dir = ensure_dir(out_dir / "json") + +# cli = None +# if minio_endpoint and minio_access and minio_secret and minio_bucket: +# if get_client is None: +# raise SystemExit("[ERR] חסר minio או minio_io.") +# cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) +# ensure_bucket(cli, minio_bucket) + +# model = YOLO(str(weights)) +# img_paths = list(iter_images(inp)) +# if not img_paths: +# raise SystemExit(f"[ERR] No images found under: {inp}") + +# for img_path in img_paths: +# img_bgr = cv2.imread(str(img_path)) +# if img_bgr is None: +# print(f"[WARN] can't read image: {img_path}") +# continue +# h, w = img_bgr.shape[:2] +# res = model.predict(source=img_bgr, conf=conf, imgsz=imgsz, device=device, verbose=False)[0] + +# boxes_pix = [] +# if res.boxes is not None and len(res.boxes) > 0: +# for b in res.boxes: +# xyxy = b.xyxy.cpu().numpy().reshape(-1) +# conf_i = float(b.conf.cpu().numpy().reshape(-1)[0]) +# cls_i = float(b.cls.cpu().numpy().reshape(-1)[0]) if b.cls is not None else 0.0 +# x1,y1,x2,y2 = map(float, xyxy.tolist()) +# boxes_pix.append([x1,y1,x2,y2,conf_i,cls_i]) + +# j = { +# "image": img_path.name, +# "source_path": str(img_path.resolve()), # <<< חדש: מקור התמונה המלא +# "width": w, "height": h, +# "boxes": boxes_pix +# } +# json_path = json_dir / (img_path.stem + ".json") +# json_path.write_text(json.dumps(j, ensure_ascii=False, indent=2), encoding="utf-8") + +# overlay = draw_boxes(img_bgr, boxes_pix) +# overlay_path = overlay_dir / img_path.name +# cv2.imwrite(str(overlay_path), overlay) + +# if cli: +# put_json(cli, minio_bucket, f"{minio_prefix}/json/{img_path.stem}.json", j) +# put_png(cli, minio_bucket, f"{minio_prefix}/overlay/{img_path.stem}.png", overlay) + +# print(f"[OK] {img_path.name} -> {json_path.name}, boxes={len(boxes_pix)}") +# def run_detect(inp: Path, out_dir: Path, weights: Path, +# conf: float=0.25, imgsz: int=896, device: str="cpu", +# minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, +# minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, +# minio_prefix: str="detect", minio_secure: bool=False, +# run_id: Optional[str]=None): +def run_detect(inp: Path, out_dir: Path, weights: Path, + conf: float=0.25, imgsz: int=896, device: str="cpu", + minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, + minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, + minio_prefix: str="DETECT", minio_secure: bool=False, + run_id: Optional[str]=None): + + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + out_dir = ensure_dir(out_dir) + overlay_root = ensure_dir(out_dir / "overlay") + json_root = ensure_dir(out_dir / "json") + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + model = YOLO(str(weights)) + + # איסוף תמונות + img_paths = list(iter_images(inp)) + if not img_paths: + raise SystemExit(f"[ERR] No images found under: {inp}") + + is_dir_input = Path(inp).is_dir() + + for img_path in img_paths: + # יחסית לשורש הקלט (אם תיקייה) + rel_path = img_path.name if not is_dir_input else str(img_path.relative_to(inp)) + rel_parent = "." if not is_dir_input else str(img_path.relative_to(inp).parent) + rel_stem = Path(rel_path).stem + + # תיקיות פלט משוקפות + overlay_dir = ensure_dir(overlay_root / rel_parent) + json_dir = ensure_dir(json_root / rel_parent) + + img_bgr = cv2.imread(str(img_path)) + if img_bgr is None: + print(f"[WARN] can't read image: {img_path}") + continue + h, w = img_bgr.shape[:2] + + res = model.predict(source=img_bgr, conf=conf, imgsz=imgsz, device=device, verbose=False)[0] + + boxes_pix = [] + if res.boxes is not None and len(res.boxes) > 0: + for b in res.boxes: + xyxy = b.xyxy.cpu().numpy().reshape(-1) + conf_i = float(b.conf.cpu().numpy().reshape(-1)[0]) + cls_i = float(b.cls.cpu().numpy().reshape(-1)[0]) if b.cls is not None else 0.0 + x1,y1,x2,y2 = map(float, xyxy.tolist()) + boxes_pix.append([x1,y1,x2,y2,conf_i,cls_i]) + + j = { + "image": img_path.name, + "rel_path": rel_path, # <<< חדש: הנתיב היחסי בקלט + "source_path": str(img_path.resolve()), + "width": w, "height": h, + "boxes": boxes_pix + } + json_path = json_dir / f"{rel_stem}.json" + json_path.write_text(json.dumps(j, ensure_ascii=False, indent=2), encoding="utf-8") + + overlay = draw_boxes(img_bgr, boxes_pix) + ov_path = overlay_dir / img_path.name + cv2.imwrite(str(ov_path), overlay) + + if cli: + # minio_json_key = f"{minio_prefix}/json/{rel_parent}/{rel_stem}.json" if rel_parent != "." else f"{minio_prefix}/json/{rel_stem}.json" + # minio_ov_key = f"{minio_prefix}/overlay/{rel_parent}/{img_path.name}" if rel_parent != "." else f"{minio_prefix}/overlay/{img_path.name}" + # put_json(cli, minio_bucket, minio_json_key, j) + # put_png(cli, minio_bucket, minio_ov_key, overlay) + base = f"{run_id}/{minio_prefix}" # תאריך/שעה קודם, אח"כ DETECT + minio_json_key = f"{base}/json/{rel_parent}/{rel_stem}.json" if rel_parent != "." else f"{base}/json/{rel_stem}.json" + minio_ov_key = f"{base}/overlay/{rel_parent}/{img_path.name}" if rel_parent != "." else f"{base}/overlay/{img_path.name}" + put_json(cli, minio_bucket, minio_json_key, j) + put_png(cli, minio_bucket, minio_ov_key, overlay) + + print(f"[OK] {rel_path} -> {json_path.relative_to(out_dir)}, boxes={len(boxes_pix)}") + +def main(): + ap = argparse.ArgumentParser(description="YOLO detect -> pixel JSON + overlay (+optional MinIO)") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--weights", required=True) + ap.add_argument("--conf", type=float, default=0.25) + ap.add_argument("--imgsz", type=int, default=896) + ap.add_argument("--device", default="cpu") + + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="detect") + ap.add_argument("--minio-secure", action="store_true") + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + # run_detect(Path(args.input), Path(args.out), Path(args.weights), + # conf=args.conf, imgsz=args.imgsz, device=args.device, + # minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + # minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + # minio_prefix=args.minio_prefix, minio_secure=args.minio_secure) + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run_detect(Path(args.input), Path(args.out), Path(args.weights), + conf=args.conf, imgsz=args.imgsz, device=args.device, + minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, minio_secure=args.minio_secure, + run_id=run_id) + +if __name__ == "__main__": + main() diff --git a/airflow/dags/leaf-counting/src/minio_io.py b/airflow/dags/leaf-counting/src/minio_io.py new file mode 100755 index 000000000..cda3c246d --- /dev/null +++ b/airflow/dags/leaf-counting/src/minio_io.py @@ -0,0 +1,39 @@ +from __future__ import annotations +import io, json, os +from pathlib import Path +import cv2 +from minio import Minio +from minio.error import S3Error + +def get_client(endpoint: str, access_key: str, secret_key: str, secure: bool=False) -> Minio: + """ + דוגמה: + cli = get_client("localhost:9000", "minioadmin", "minioadmin", secure=False) + """ + return Minio(endpoint, access_key=access_key, secret_key=secret_key, secure=secure) + +def ensure_bucket(cli: Minio, bucket: str): + found = cli.bucket_exists(bucket) + if not found: + cli.make_bucket(bucket) + +def put_png(cli: Minio, bucket: str, key: str, img_bgr): + """ + מעלה תמונת PNG מתוך np.ndarray (BGR של OpenCV). + """ + Path(key).parent and os.makedirs(Path(key).parent, exist_ok=True) # לא חובה, לשקט נפשי מקומי + ok, buf = cv2.imencode(".png", img_bgr) + if not ok: + raise RuntimeError("cv2.imencode PNG failed") + bio = io.BytesIO(buf.tobytes()) + bio.seek(0) + cli.put_object(bucket, key, bio, length=len(bio.getvalue()), content_type="image/png") + +def put_json(cli: Minio, bucket: str, key: str, obj): + """ + מעלה JSON (dict/list). + """ + js = json.dumps(obj, ensure_ascii=False, indent=2).encode("utf-8") + bio = io.BytesIO(js) + bio.seek(0) + cli.put_object(bucket, key, bio, length=len(js), content_type="application/json") diff --git a/airflow/dags/leaf-counting/src/predict_pyramid_wbf.py b/airflow/dags/leaf-counting/src/predict_pyramid_wbf.py new file mode 100755 index 000000000..30c8ed9b1 --- /dev/null +++ b/airflow/dags/leaf-counting/src/predict_pyramid_wbf.py @@ -0,0 +1,219 @@ +from __future__ import annotations +import argparse, json +from pathlib import Path +from typing import List, Tuple, Optional +from datetime import datetime + +import cv2 +import numpy as np +from ultralytics import YOLO + +from common import iter_images, ensure_dir, draw_boxes + +try: + from minio_io import get_client, ensure_bucket, put_png, put_json +except Exception: + get_client = ensure_bucket = put_png = put_json = None + + +# ----------------- WBF utils ----------------- +def iou_xyxy(a: np.ndarray, b: np.ndarray) -> float: + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + ix1, iy1 = max(ax1, bx1), max(ay1, by1) + ix2, iy2 = min(ax2, bx2), min(ay2, by2) + iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1) + inter = iw * ih + area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) + area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) + union = area_a + area_b - inter + 1e-9 + return inter / union + + +def wbf(boxes: List[np.ndarray], scores: List[float], iou_thr: float = 0.55) -> tuple[list[np.ndarray], list[float]]: + """Very small WBF: קיבוץ לפי IoU>=thr, ממוצע משוקלל לפי conf.""" + used = [False] * len(boxes) + out_boxes, out_scores = [], [] + for i in range(len(boxes)): + if used[i]: + continue + group_idxs = [i] + used[i] = True + for j in range(i + 1, len(boxes)): + if used[j]: + continue + if iou_xyxy(boxes[i], boxes[j]) >= iou_thr: + group_idxs.append(j) + used[j] = True + bs = np.array([boxes[k] for k in group_idxs], dtype=float) + ws = np.array([scores[k] for k in group_idxs], dtype=float) + wsum = ws.sum() + 1e-9 + avg = (bs * ws[:, None]).sum(axis=0) / wsum + out_boxes.append(avg) + out_scores.append(float(ws.max())) + return out_boxes, out_scores + + +# ----------------- multi-scale predict ----------------- +def predict_at_scales(model: YOLO, img_bgr: np.ndarray, scales: List[float], conf: float, imgsz: int, device: str): + H, W = img_bgr.shape[:2] + all_boxes, all_scores, all_classes = [], [], [] + for s in scales: + if s == 1.0: + resized = img_bgr + rx, ry = 1.0, 1.0 + else: + newW, newH = int(W * s), int(H * s) + resized = cv2.resize(img_bgr, (newW, newH), interpolation=cv2.INTER_LINEAR) + rx, ry = 1.0 / s, 1.0 / s + + res = model.predict(source=resized, conf=conf, imgsz=imgsz, device=device, verbose=False)[0] + if res.boxes is None or len(res.boxes) == 0: + continue + for b in res.boxes: + xyxy = b.xyxy.cpu().numpy().reshape(-1) + conf_i = float(b.conf.cpu().numpy().reshape(-1)[0]) + cls_i = float(b.cls.cpu().numpy().reshape(-1)[0]) if b.cls is not None else 0.0 + x1, y1, x2, y2 = xyxy + # החזרה לקואורדינטות המקור + x1, y1, x2, y2 = x1 * rx, y1 * ry, x2 * rx, y2 * ry + all_boxes.append(np.array([x1, y1, x2, y2], dtype=float)) + all_scores.append(conf_i) + all_classes.append(int(cls_i)) + return all_boxes, all_scores, all_classes + + +# ----------------- main runner ----------------- +def run(inp: Path, out_dir: Path, weights: Path, + scales: List[float], conf: float = 0.25, iou_thr: float = 0.55, + imgsz: int = 896, device: str = "cpu", + minio_endpoint: Optional[str] = None, minio_access: Optional[str] = None, + minio_secret: Optional[str] = None, minio_bucket: Optional[str] = None, + minio_prefix: str = "PREDICT_PWB", minio_secure: bool = False, + run_id: Optional[str] = None): + + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + + out_dir = ensure_dir(out_dir) + overlay_root = ensure_dir(out_dir / "overlay") + json_root = ensure_dir(out_dir / "json") + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + model = YOLO(str(weights)) + images = list(iter_images(inp)) + if not images: + raise SystemExit(f"[ERR] No images under: {inp}") + + for p in images: + img = cv2.imread(str(p)) + if img is None: + print(f"[WARN] can't read: {p}") + continue + H, W = img.shape[:2] + + # נתיב יחסי לשורש הקלט + rel_path = str(p.relative_to(inp)) if inp.is_dir() else p.name + rel_parent = str(Path(rel_path).parent) + rel_stem = Path(rel_path).stem + + boxes, scores, classes = predict_at_scales(model, img, scales, conf, imgsz, device) + + # WBF לכל מחלקה בנפרד + merged = [] + for cls in sorted(set(classes)): + idxs = [i for i, c in enumerate(classes) if c == cls] + if not idxs: + continue + bcls = [boxes[i] for i in idxs] + scls = [scores[i] for i in idxs] + mbox, mscore = wbf(bcls, scls, iou_thr=iou_thr) + for bb, ss in zip(mbox, mscore): + x1, y1, x2, y2 = [float(max(0, v)) for v in bb] + x1, y1 = min(x1, W - 1), min(y1, H - 1) + x2, y2 = min(x2, W - 1), min(y2, H - 1) + merged.append([x1, y1, x2, y2, float(ss), float(cls)]) + + # תיקיות פלט משוקפות + overlay_dir = ensure_dir(overlay_root / rel_parent) + json_dir = ensure_dir(json_root / rel_parent) + + j = { + "image": p.name, + "rel_path": rel_path, + "source_path": str(p.resolve()), + "width": W, "height": H, + "boxes": merged + } + jpath = json_dir / f"{rel_stem}.json" + jpath.write_text(json.dumps(j, ensure_ascii=False, indent=2), encoding="utf-8") + + overlay = draw_boxes(img, merged) + cv2.imwrite(str(overlay_dir / p.name), overlay) + + # MinIO (תאריך/שעה קודם, אח"כ שלב) + if cli: + base = f"{run_id}/{minio_prefix}" + json_key = f"{base}/json/{rel_parent}/{rel_stem}.json" if rel_parent != "." else f"{base}/json/{rel_stem}.json" + ov_key = f"{base}/overlay/{rel_parent}/{p.name}" if rel_parent != "." else f"{base}/overlay/{p.name}" + put_json(cli, minio_bucket, json_key, j) + put_png(cli, minio_bucket, ov_key, overlay) + + print(f"[OK] {rel_path} WBF boxes={len(merged)} -> {jpath.relative_to(out_dir)}") + + +def parse_scales(s: str) -> List[float]: + return [float(x) for x in s.split(",") if x.strip()] + + +def main(): + ap = argparse.ArgumentParser(description="YOLO multi-scale + WBF (+optional MinIO)") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--weights", required=True) + ap.add_argument("--scales", default="0.75,1.0,1.25", help="comma-separated, e.g. 0.5,1.0,1.5") + ap.add_argument("--conf", type=float, default=0.25) + ap.add_argument("--iou", type=float, default=0.55, help="WBF IoU threshold") + ap.add_argument("--imgsz", type=int, default=896) + ap.add_argument("--device", default="cpu") + + # MinIO + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="PREDICT_PWB") + ap.add_argument("--minio-secure", action="store_true") + + # Run grouping + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run( + inp=Path(args.input), + out_dir=Path(args.out), + weights=Path(args.weights), + scales=parse_scales(args.scales), + conf=args.conf, + iou_thr=args.iou, + imgsz=args.imgsz, + device=args.device, + minio_endpoint=args.minio_endpoint, + minio_access=args.minio_access, + minio_secret=args.minio_secret, + minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, + minio_secure=args.minio_secure, + run_id=run_id, + ) + + +if __name__ == "__main__": + main() diff --git a/airflow/dags/leaf-counting/weights/best.pt b/airflow/dags/leaf-counting/weights/best.pt new file mode 100755 index 000000000..d9849b95f Binary files /dev/null and b/airflow/dags/leaf-counting/weights/best.pt differ diff --git a/airflow/dags/leaf_pipeline_dag.py b/airflow/dags/leaf_pipeline_dag.py new file mode 100755 index 000000000..ac5114a86 --- /dev/null +++ b/airflow/dags/leaf_pipeline_dag.py @@ -0,0 +1,1108 @@ +from __future__ import annotations +from datetime import datetime +import pendulum +from airflow import DAG +from airflow.operators.bash import BashOperator +from airflow.providers.docker.operators.docker import DockerOperator + + +PROJECT_ROOT = "/opt/airflow/dags/leaf-counting" +PYTHON_BIN = "python" +WEIGHTS = f"{PROJECT_ROOT}/weights/best.pt" + + +OUT_RUN = f"{PROJECT_ROOT}/runs_local/airflow_run" +STAGING_DIR = "/opt/airflow/staging/input" + +tz = pendulum.timezone("Asia/Jerusalem") + +with DAG( + dag_id="leaf_pipeline_v2", + start_date=datetime(2025, 10, 1, tzinfo=tz), + schedule=None, + catchup=False, + default_args={"owner": "leafcounting", "retries": 0}, + tags=["leaf-counting", "detect", "pwb", "crop", "minio"], +) as dag: + + + RUN_ID_DATE = "{{ dag_run.conf.get('run_id') or logical_date.in_timezone('Asia/Jerusalem').strftime('%Y/%m/%d/%H%M') }}" + + # ----------------------------- + # STAGE INPUT + # ----------------------------- + stage_input = BashOperator( + task_id="stage_input", + bash_command=""" +set -euo pipefail +python -m pip install --no-cache-dir -q \ + --trusted-host pypi.org --trusted-host files.pythonhosted.org --trusted-host pypi.python.org \ + awscli \ +|| apt-get update && apt-get install -y -qq ca-certificates awscli \ +|| python -m pip install --no-cache-dir -q \ + --index-url http://pypi.org/simple \ + --trusted-host pypi.org --trusted-host files.pythonhosted.org --trusted-host pypi.python.org \ + awscli +STAGING_DIR='{{ params.staging_dir }}' +INPUT_MODE='minio' +mkdir -p "$STAGING_DIR"; rm -rf "$STAGING_DIR"/* +if [ "$INPUT_MODE" = 'minio' ]; then + SRC_BUCKET='{{ dag_run.conf.get("src_bucket", var.value.leaf_minio_bucket | default("imagery")) }}' + SRC_PREFIX='leaves/examples' + ENDPOINT_URL='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' + export AWS_ACCESS_KEY_ID='{{ conn.minio_s3.login }}' + export AWS_SECRET_ACCESS_KEY='{{ conn.minio_s3.password }}' + export AWS_DEFAULT_REGION='{{ conn.minio_s3.extra_dejson.region_name or "us-east-1" }}' + export AWS_S3_FORCE_PATH_STYLE=true + export AWS_EC2_METADATA_DISABLED=true + echo "[stage] source=minio s3://$SRC_BUCKET/$SRC_PREFIX -> $STAGING_DIR (endpoint=$ENDPOINT_URL)" + python -m awscli s3 sync "s3://$SRC_BUCKET/$SRC_PREFIX" "$STAGING_DIR" --endpoint-url "$ENDPOINT_URL" +else + INPUT_DIR='{{ params.project_root }}/demo_images' + echo "[stage] source=local $INPUT_DIR -> $STAGING_DIR" + rsync -a --delete "$INPUT_DIR"/ "$STAGING_DIR"/ +fi +""", + params={"staging_dir": STAGING_DIR, "project_root": PROJECT_ROOT}, + env={"PYTHONUNBUFFERED": "1"}, + ) + + # ----------------------------- + # DETECT -> imagery/leaves///
//detect/ + # ----------------------------- + detect = BashOperator( + task_id="detect", + bash_command=""" +set -euo pipefail + +PROJECT_ROOT='{{ params.project_root }}' +PY='{{ params.python_bin }}'; if ! command -v "$PY" >/dev/null 2>&1; then PY='python'; fi + +export PYTHONEXECUTABLE="$PY" # ← להוסיף שורה זו + +INPUT_DIR='{{ params.staging_dir }}' +OUT_LOCAL_DET='{{ params.out_run }}/detect' +WEIGHTS='{{ params.weights }}' + +DATE_ONLY='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' + +DEST_PREFIX="leaves/${DATE_ONLY}/detect" + +# MinIO: +# ל-SDK (minio-py) חייבים host:port בלי סכימה ובלי path → משתמשים בשדות החיבור הישירים: +ENDPOINT_HOSTPORT='{{ (conn.minio_s3.host or "host.docker.internal") }}:{{ (conn.minio_s3.port or 9001) }}' +# ל-awscli צריך URL מלא: +ENDPOINT_URL='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' +BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' +export AWS_ACCESS_KEY_ID='{{ conn.minio_s3.login }}' +export AWS_SECRET_ACCESS_KEY='{{ conn.minio_s3.password }}' +export AWS_DEFAULT_REGION='us-east-1' +export AWS_S3_FORCE_PATH_STYLE=true + +mkdir -p "$OUT_LOCAL_DET" + +cd "$PROJECT_ROOT" +$PY src/detect_only.py \ + --input "$INPUT_DIR" \ + --out "$OUT_LOCAL_DET" \ + --weights "$WEIGHTS" \ + --conf 0.25 --imgsz 896 --device cpu \ + --minio-endpoint "$ENDPOINT_HOSTPORT" \ + --minio-access "$AWS_ACCESS_KEY_ID" \ + --minio-secret "$AWS_SECRET_ACCESS_KEY" \ + --minio-bucket "$BUCKET" \ + --minio-prefix "leaves/${DATE_ONLY}" \ + --run-id "detect" + +# יישור קו לנתיב המדויק: +pip install -q awscli || true +python -m awscli s3 sync "$OUT_LOCAL_DET"/ "s3://$BUCKET/$DEST_PREFIX/" --endpoint-url "$ENDPOINT_URL" +python -m awscli s3 ls "s3://$BUCKET/$DEST_PREFIX/" --recursive --endpoint-url "$ENDPOINT_URL" || true +""", + params={ + "project_root": PROJECT_ROOT, + # "python_bin": PYTHON_BIN, + "python_bin": "/usr/local/bin/python", + "staging_dir": STAGING_DIR, + "out_run": OUT_RUN, + "weights": WEIGHTS, + "run_id_date": RUN_ID_DATE, + }, + env={"PYTHONUNBUFFERED": "1"}, + ) + + # ----------------------------- + # PREDICT_PWB -> imagery/leaves///
//pwb/ + # ----------------------------- + pwb = BashOperator( + task_id="predict_pwb", + bash_command=""" +set -euo pipefail + +PROJECT_ROOT='{{ params.project_root }}' +PY='{{ params.python_bin }}'; if ! command -v "$PY" >/dev/null 2>&1; then PY='python'; fi +INPUT_DIR='{{ params.staging_dir }}' +OUT_LOCAL_PWB='{{ params.out_run }}/pwb' +WEIGHTS='{{ params.weights }}' + +DATE_ONLY='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' + +DEST_PREFIX="leaves/${DATE_ONLY}/pwb" + +ENDPOINT_HOSTPORT='{{ (conn.minio_s3.host or "host.docker.internal") }}:{{ (conn.minio_s3.port or 9001) }}' +ENDPOINT_URL='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' +BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' +export AWS_ACCESS_KEY_ID='{{ conn.minio_s3.login }}' +export AWS_SECRET_ACCESS_KEY='{{ conn.minio_s3.password }}' +export AWS_DEFAULT_REGION='us-east-1' +export AWS_S3_FORCE_PATH_STYLE=true + +mkdir -p "$OUT_LOCAL_PWB" + +cd "$PROJECT_ROOT" +$PY src/predict_pyramid_wbf.py \ + --input "$INPUT_DIR" \ + --out "$OUT_LOCAL_PWB" \ + --weights "$WEIGHTS" \ + --scales 0.75,1.0,1.25 --conf 0.25 --iou 0.55 --imgsz 896 --device cpu \ + --minio-endpoint "$ENDPOINT_HOSTPORT" \ + --minio-access "$AWS_ACCESS_KEY_ID" \ + --minio-secret "$AWS_SECRET_ACCESS_KEY" \ + --minio-bucket "$BUCKET" \ + --minio-prefix "leaves/${DATE_ONLY}" \ + --run-id "pwb" + +pip install -q awscli || true +python -m awscli s3 sync "$OUT_LOCAL_PWB"/ "s3://$BUCKET/$DEST_PREFIX/" --endpoint-url "$ENDPOINT_URL" +python -m awscli s3 ls "s3://$BUCKET/$DEST_PREFIX/" --recursive --endpoint-url "$ENDPOINT_URL" || true +""", + params={ + "project_root": PROJECT_ROOT, + # "python_bin": PYTHON_BIN, + "python_bin": "/usr/local/bin/python", + + "staging_dir": STAGING_DIR, + "out_run": OUT_RUN, + "weights": WEIGHTS, + "run_id_date": RUN_ID_DATE, + }, + env={"PYTHONUNBUFFERED": "1"}, + ) + + # ----------------------------- + # CROP -> imagery/leaves///
//crop/ + # ----------------------------- + # crop = BashOperator( + # task_id="crop", + # bash_command=""" + # set -euo pipefail + + # PROJECT_ROOT='{{ params.project_root }}' + # PY='{{ params.python_bin }}'; if ! command -v "$PY" >/dev/null 2>&1; then PY='python'; fi + # OUT_LOCAL_CROP='{{ params.out_run }}/crop' + # PWB_LOCAL='{{ params.out_run }}/pwb' + # RUN_ID_DATE='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' + + # RUN_ID_CROP="${RUN_ID_DATE}/crop" + + # ENDPOINT_URL='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' + # BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' + # export AWS_ACCESS_KEY_ID='{{ conn.minio_s3.login }}' + # export AWS_SECRET_ACCESS_KEY='{{ conn.minio_s3.password }}' + # export AWS_DEFAULT_REGION='us-east-1' + # export AWS_S3_FORCE_PATH_STYLE=true + + # mkdir -p "$OUT_LOCAL_CROP" + + # # בוחרים סקריפט crop קיים בפרויקט: + # if [ -f "$PROJECT_ROOT/src/crop_only.py" ]; then + # cd "$PROJECT_ROOT" + # $PY src/crop_only.py --input "$PWB_LOCAL" --out "$OUT_LOCAL_CROP" + # elif [ -f "$PROJECT_ROOT/src/crop_from_meta.py" ]; then + # cd "$PROJECT_ROOT" + # $PY src/crop_from_meta.py --input "$PWB_LOCAL" --out "$OUT_LOCAL_CROP" + # else + # echo "[crop] No crop script found; will only sync if $OUT_LOCAL_CROP has files." + # fi + + # pip install -q awscli || true + # if [ -d "$OUT_LOCAL_CROP" ] && [ "$(ls -A "$OUT_LOCAL_CROP" || true)" ]; then + # python -m awscli s3 sync "$OUT_LOCAL_CROP"/ "s3://$BUCKET/leaves/$RUN_ID_CROP/" --endpoint-url "$ENDPOINT_URL" + # else + # echo "[crop] WARNING: no local crops found to upload." + # fi + + # python -m awscli s3 ls "s3://$BUCKET/leaves/$RUN_ID_CROP/" --recursive --endpoint-url "$ENDPOINT_URL" || true + # """, + # params={ + # "project_root": PROJECT_ROOT, + # "python_bin": PYTHON_BIN, + # "out_run": OUT_RUN, + # "run_id_date": RUN_ID_DATE, + # }, + # env={"PYTHONUNBUFFERED": "1"}, + # ) + crop = BashOperator( + task_id="crop", + bash_command=""" + set -euo pipefail + + PROJECT_ROOT='{{ params.project_root }}' + PY='{{ params.python_bin }}'; if ! command -v "$PY" >/dev/null 2>&1; then PY='python'; fi + OUT_LOCAL_CROP='{{ params.out_run }}/crop' + PWB_LOCAL='{{ params.out_run }}/pwb' + RUN_ID_DATE='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' + + RUN_ID_CROP="${RUN_ID_DATE}/crop" + + ENDPOINT_URL='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' + BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' + export AWS_ACCESS_KEY_ID='{{ conn.minio_s3.login }}' + export AWS_SECRET_ACCESS_KEY='{{ conn.minio_s3.password }}' + export AWS_DEFAULT_REGION='us-east-1' + export AWS_S3_FORCE_PATH_STYLE=true + + # מזהה מכשיר ברירת מחדל לנרמול (אפשר להחליף ל-dev שלך) + export DEVICE_ID="${DEVICE_ID:-dev1}" + + mkdir -p "$OUT_LOCAL_CROP" + + # 1) מריצים crop (אם קיים) + if [ -f "$PROJECT_ROOT/src/crop_only.py" ]; then + cd "$PROJECT_ROOT" + $PY src/crop_only.py --input "$PWB_LOCAL" --out "$OUT_LOCAL_CROP" + elif [ -f "$PROJECT_ROOT/src/crop_from_meta.py" ]; then + cd "$PROJECT_ROOT" + $PY src/crop_from_meta.py --input "$PWB_LOCAL" --out "$OUT_LOCAL_CROP" + else + echo "[crop] No crop script found; will only sync if $OUT_LOCAL_CROP has files." + fi + + # 2) נרמול שמות הקבצים לפני העלאה (פורמט: _TZ[ _suffix].ext) + # - מנסה להוציא EXIF DateTimeOriginal; אם אין, נופל ל-mtime של הקובץ. + python -m pip install -q pillow piexif || true + export OUT_LOCAL_CROP # כדי ש-Python ידע איפה לעבוד + python - <<'PY' +import os, re, sys, time +from datetime import datetime, timezone +OUT = os.environ.get("OUT_LOCAL_CROP", "") +DEVICE = os.environ.get("DEVICE_ID", "dev1") + +if not OUT or not os.path.isdir(OUT): + sys.exit(0) + +IMG_EXT = {".jpg",".jpeg",".png",".webp",".tif",".tiff",".bmp"} +iso_re = re.compile(r"^[A-Za-z0-9\-]+_\d{8}T\d{6}Z(?:[ _][^/\\\\]+)?\\.[A-Za-z0-9]+$") + +def get_ts_from_exif(path): + try: + import piexif + from PIL import Image + with Image.open(path) as im: + exif = im.info.get("exif") + if not exif: + return None + exif_dict = piexif.load(exif) + dt = exif_dict["Exif"].get(piexif.ExifIFD.DateTimeOriginal) or \ + exif_dict["Exif"].get(piexif.ExifIFD.DateTimeDigitized) or \ + exif_dict["0th"].get(piexif.ImageIFD.DateTime) + if not dt: + return None + # פורמט EXIF: "YYYY:MM:DD HH:MM:SS" + s = dt.decode() if isinstance(dt, bytes) else dt + dt_obj = datetime.strptime(s, "%Y:%m:%d %H:%M:%S").replace(tzinfo=timezone.utc) + return dt_obj + except Exception: + return None + +def ts_for_file(path): + dt = get_ts_from_exif(path) + if dt is None: + # fallback: mtime כ-UTC + mt = os.path.getmtime(path) + dt = datetime.fromtimestamp(mt, tz=timezone.utc) + return dt + +renamed = 0 +skipped = 0 +for root, _, files in os.walk(OUT): + for f in files: + ext = os.path.splitext(f)[1].lower() + if ext not in IMG_EXT: + continue + if iso_re.match(f): + skipped += 1 + continue + old = os.path.join(root, f) + dt = ts_for_file(old) + ts = dt.strftime("%Y%m%dT%H%M%SZ") + # suffix אופציונלי אם יש טקסט לא ריק בשם המקורי (בלי סיומת) + base = os.path.splitext(f)[0] + suffix = "" + if base and base.lower() not in {"img","image","photo","dsc","dscn"}: + # מנקים רווחים כפולים ותווים בעייתיים + cleaned = re.sub(r"[^A-Za-z0-9._-]+", "-", base).strip("-_.") + if cleaned and cleaned != ts: + suffix = f"_{cleaned}" + new_name = f"{DEVICE}_{ts}{suffix}{ext}" + new = os.path.join(root, new_name) + if new == old: + skipped += 1 + continue + # הימנעות מדריסה + i = 1 + new_final = new + while os.path.exists(new_final): + new_final = os.path.join(root, f"{os.path.splitext(new_name)[0]}_{i}{ext}") + i += 1 + os.rename(old, new_final) + print(f"[crop][rename] {f} -> {os.path.basename(new_final)}") + renamed += 1 + +print(f"[crop][rename] done: renamed={renamed}, already_ok={skipped}") +PY + + # 3) העלאה ל-MinIO + pip install -q awscli || true + if [ -d "$OUT_LOCAL_CROP" ] && [ "$(ls -A "$OUT_LOCAL_CROP" || true)" ]; then + python -m awscli s3 sync "$OUT_LOCAL_CROP"/ "s3://$BUCKET/leaves/$RUN_ID_CROP/" --endpoint-url "$ENDPOINT_URL" + else + echo "[crop] WARNING: no local crops found to upload." + fi + + python -m awscli s3 ls "s3://$BUCKET/leaves/$RUN_ID_CROP/" --recursive --endpoint-url "$ENDPOINT_URL" || true + """, + params={ + "project_root": PROJECT_ROOT, + "python_bin": PYTHON_BIN, + "out_run": OUT_RUN, + "run_id_date": RUN_ID_DATE, + }, + env={"PYTHONUNBUFFERED": "1"}, +) + + # ----------------------------- + # DETECTION_JOBS -> קורא מ imagery/leaves//crop/ + # ----------------------------- + # detection_jobs = DockerOperator( + # task_id="detection_jobs", + # image="detection-jobs:cpu-lts", + # command="python agri_baseline/src/batch_runner.py", + # # task_id="detection_jobs", + # # image="detection-jobs:cpu-lts", + # auto_remove=True, + # mount_tmp_dir=False, + # # network_mode="host", + # network_mode="agcloud_ag_cloud", + + # environment={ + # "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://host.docker.internal:9001') }}", + # "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", + # "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", + # "AWS_S3_FORCE_PATH_STYLE": "true", + # "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db", + + # }, + # command=[ + # "bash","-lc", + # "pip install -q awscli || true; " + # "RID='{{ dag_run.conf.get('run_id') or logical_date.in_timezone('Asia/Jerusalem').strftime('%Y/%m/%d/%H%M') }}'; " + # "SRC=s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/; " + # "mkdir -p /work/in; " + # "python -m awscli s3 cp --recursive \"$SRC\" /work/in --endpoint-url {{ conn.minio_s3.extra_dejson.endpoint_url }} || true; " + # "echo \"[detection_jobs] INPUT from $SRC\"; " + # "ls -R /work/in || true; " + # "echo '[detection_jobs] no-op (batch_runner.py missing)'" + # ], + # tty=True, + # ) +# detection_jobs = DockerOperator( +# task_id="detection_jobs", +# image="detection-jobs:cpu-lts", +# docker_url="unix://var/run/docker.sock", +# api_version="auto", +# auto_remove=True, +# mount_tmp_dir=False, +# working_dir="/app", # <<< עדכני ל-root של הפרויקט בתמונה שלך +# network_mode="agcloud_ag_cloud", +# environment={ +# "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://host.docker.internal:9001') }}", +# "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", +# "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", +# "AWS_S3_FORCE_PATH_STYLE": "true", +# "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db", +# }, +# # command=[ +# # "bash","-lc", +# # "set -euo pipefail; " +# # "pip install -q awscli || true; " +# # "RID='{{ dag_run.conf.get('run_id') or logical_date.in_timezone('Asia/Jerusalem').strftime('%Y/%m/%d/%H%M') }}'; " +# # "SRC=s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/; " +# # "mkdir -p /work/in; " +# # "python -m awscli s3 cp --recursive \"$SRC\" /work/in --endpoint-url {{ conn.minio_s3.extra_dejson.endpoint_url }} || true; " +# # # "python agri_baseline/src/batch_runner.py --input /work/in --log-level INFO" +# # "python /app/agri_baseline/src/batch_runner.py --input ... --mission baseline --device cpu +# # " +# # ], +# # tty=True, +# command=[ +# "bash","-lc", +# """ +# set -euo pipefail + +# pip install -q awscli || true + +# RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# SRC="s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/" + +# mkdir -p /work/in + +# python -m awscli s3 cp --recursive "$SRC" /work/in \ +# --endpoint-url '{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://host.docker.internal:9001") }}' || true + +# python /app/agri_baseline/src/batch_runner.py \ +# --input /work/in \ +# --mission baseline \ +# --device cpu +# """ +# ], +# tty=True, + +# ) + + +# detection_jobs = DockerOperator( +# task_id="detection_jobs", +# image="detection-jobs:cpu-lts", +# docker_url="unix://var/run/docker.sock", +# api_version="auto", +# auto_remove=True, # מנקה קונטיינר אחרי סיום +# mount_tmp_dir=False, +# working_dir="/app", # ודאי ש-/app קיים בתמונה +# network_mode="agcloud_ag_cloud", + +# environment={ +# # MinIO (ברירת מחדל לרשת הפנימית, נלקח מה-Connection אם קיים) +# "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}", +# "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", +# "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", +# "AWS_S3_FORCE_PATH_STYLE": "true", +# "AWS_DEFAULT_REGION": "us-east-1", + +# # Postgres (שם השירות ברשת compose) +# "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@agcloud-postgres:5432/missions_db", + +# # מגן מקריסה של bash/rc עם set -u +# "USER": "root", +# "HOME": "/root", +# }, + +# # לא מגדירים entrypoint כדי לא להתנגש עם מה שבתמונה; מריצים פקודות דרך bash -c +# command=[ +# "bash","-c", +# """ +# set -xeuo pipefail + +# echo "[DJ] hello"; whoami; pwd; python -V || true + +# # עדיף שה-awscli יהיה מותקן בתמונה; כאן נשאיר fallback עדין +# python -m pip install --no-cache-dir -q awscli || true +# python -m awscli --version || true + +# RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# SRC="s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/" + +# mkdir -p /work/in +# echo "[DJ] syncing from $SRC via {{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}" +# python -m awscli s3 cp --recursive "$SRC" /work/in \ +# --endpoint-url '{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://minio-hot:9001") }}' || true + +# echo "[DJ] running batch_runner..." + + +# python - <<'PY' +# import sys, runpy +# # מזריקים ערך דיפולטי כדי שהעזרה/argparse לא יפלו +# init_globals = {"MISSION_ALIASES": ["baseline"]} + +# # בונים argv כאילו קראנו מהשורה +# sys.argv = [ +# "batch_runner.py", +# "--input", "/work/in", +# "--mission", "baseline", +# "--device", "cpu", +# ] + +# # מריצים את הקובץ כ-__main__ עם הגלובלים שהזרקנו +# runpy.run_path("/app/agri_baseline/src/batch_runner.py", +# run_name="__main__", init_globals=init_globals) +# PY + +# """ +# ], +# ) +# detection_jobs = DockerOperator( +# task_id="detection_jobs", +# image="detection-jobs:cpu-lts", +# docker_url="unix://var/run/docker.sock", +# api_version="auto", +# auto_remove=True, # מנקה קונטיינר אחרי סיום +# mount_tmp_dir=False, +# working_dir="/app", # ודאי ש-/app קיים בתמונה +# network_mode="agcloud_ag_cloud", + +# environment={ +# # MinIO (ברירת מחדל לרשת הפנימית, נלקח מה-Connection אם קיים) +# "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}", +# "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", +# "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", +# "AWS_S3_FORCE_PATH_STYLE": "true", +# "AWS_DEFAULT_REGION": "us-east-1", + +# # Postgres (שם השירות ברשת compose) +# "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@agcloud-postgres:5432/missions_db", + +# # מגן מקריסה של bash/rc עם set -u +# "USER": "root", +# "HOME": "/root", +# }, + +# # לא מגדירים entrypoint; מריצים פקודות דרך bash -c +# # command=[ +# # "bash","-c", +# # """ +# # set -xeuo pipefail + +# # echo "[DJ] hello"; whoami; pwd; python -V || true + +# # # עדיף שה-awscli יהיה מותקן בתמונה; כאן נשאיר fallback עדין +# # python -m pip install --no-cache-dir -q awscli || true +# # python -m awscli --version || true + +# # RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# # SRC="s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/" + +# # mkdir -p /work/in +# # echo "[DJ] syncing from $SRC via {{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}" +# # python -m awscli s3 cp --recursive "$SRC" /work/in \ +# # --endpoint-url '{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://minio-hot:9001") }}' || true + +# # echo "[DJ] running batch_runner..." + +# # # 🔧 הזרקת פול־בק ל-MISSION_ALIASES בלי בנייה מחדש +# # python - <<'PY' +# # import runpy, io, os, sys, re, pathlib +# # p = "/app/agri_baseline/src/batch_runner.py" +# # code = pathlib.Path(p).read_text(encoding="utf-8") + +# # # נכניס הגדרה בטוחה אם אין כבר הגדרה בשורות הראשונות +# # inject = ( +# # "try:\n" +# # " from agri_baseline.src.missions import MISSION_ALIASES # runtime patch\n" +# # "except Exception:\n" +# # " MISSION_ALIASES = []\n" +# # ) +# # if "MISSION_ALIASES" not in code.splitlines()[:60]: +# # code = inject + code +# # pathlib.Path(p).write_text(code, encoding="utf-8") + +# # # מריצים את הסקריפט כמו קודם +# # runpy.run_path(p, run_name="__main__") +# # PY + + +# # """ +# # ], +# command=[ +# "bash","-c", +# r''' +# set -xeuo pipefail + +# echo "[DJ] hello"; whoami; pwd; python3 -V || true + +# # ודאות ל-awscli (רק אם חסר) +# python3 -m pip install --no-cache-dir -q awscli || true +# python3 -m awscli --version || true + +# # RID דינמי כמו שיש לך (בלי תאריך קשיח) +# RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# SRC="s3://{{ var.value.leaf_minio_bucket | default('imagery') }}/leaves/${RID}/crop/" +# ENDPOINT='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://minio-hot:9001") }}' + +# mkdir -p /work/in /work/out + +# echo "[DJ] syncing from $SRC via $ENDPOINT" +# python3 -m awscli s3 cp --recursive "$SRC" /work/in --endpoint-url "$ENDPOINT" || true + +# echo "[DJ] locating batch_runner.py safely" + +# # מריצים פייתון קטן שמאתר את הסקריפט ומריץ אותו אם קיים. +# # python3 - <<'PY' +# # import os, sys, runpy, importlib + +# # # שיהיה קל לפייתון למצוא את החבילה שלך +# # sys.path.insert(0, "/app") + +# # MOD = "agri_baseline.src.batch_runner" +# # PATH_FALLBACK = "/app/agri_baseline/src/batch_runner.py" + +# # def ensure_missions_if_missing(err): +# # """אם חסר רק agri_baseline.src.missions – נייצר מודול זמני בזיכרון (לא דיסק) ואז ננסה שוב.""" +# # if "agri_baseline.src.missions" in str(err): +# # import types +# # missions = types.ModuleType("agri_baseline.src.missions") +# # missions.MISSION_ALIASES = [] +# # sys.modules["agri_baseline.src.missions"] = missions +# # return True +# # return False + +# # try: +# # # נסה להריץ כמודול (הדרך הנקייה) +# # runpy.run_module(MOD, run_name="__main__") +# # except ModuleNotFoundError as e: +# # if ensure_missions_if_missing(e): +# # runpy.run_module(MOD, run_name="__main__") +# # else: +# # # אם זה לא עניין של missions – ננסה לפי נתיב ישיר +# # if os.path.isfile(PATH_FALLBACK): +# # runpy.run_path(PATH_FALLBACK, run_name="__main__") +# # else: +# # print("[DJ] batch_runner.py לא נמצא; NO-OP") +# # sys.exit(0) +# # PY + +# # 1) shim package שיכלול config עם MISSION_ALIASES +# mkdir -p /tmp/shim/agri_baseline +# cat >/tmp/shim/agri_baseline/__init__.py <<'PY' +# # shim pkg +# PY +# cat >/tmp/shim/agri_baseline/config.py <<'PY' +# # shimbed config: אפשר לעדכן את הרשימה לפי הצורך +# MISSION_ALIASES = ("demo", "default", "mission1") +# PY + +# # 2) להבטיח שהשִׁים קודם ב-PYTHONPATH +# export PYTHONPATH="/tmp/shim:${PYTHONPATH}" + +# # 3) להריץ את הרַאנֶר (אפשר להעביר --mission כדי למנוע ויכוחי פרמטרים) +# python3 -m agri_baseline.src.batch_runner --mission demo + +# echo "[DJ] finished (no upload)" +# ''' +# ], + +# ) +# from airflow.providers.docker.operators.docker import DockerOperator + +# detection_jobs = DockerOperator( +# task_id="detection_jobs", +# image="detection-jobs:cpu-lts", +# docker_url="unix://var/run/docker.sock", +# api_version="auto", +# auto_remove=True, +# mount_tmp_dir=False, +# working_dir="/app", +# network_mode="agcloud_ag_cloud", +# environment={ +# "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}", +# "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", +# "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", +# "AWS_S3_FORCE_PATH_STYLE": "true", +# "AWS_DEFAULT_REGION": "us-east-1", +# "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@agcloud-postgres:5432/missions_db", +# "USER": "root", +# "HOME": "/root", +# }, +# # command=[ +# # "/bin/bash","-lc", +# # r''' +# # set -euo pipefail + +# # cd /app +# # python3 -V || true + +# # # awscli (שקט; אם כבר מותקן לא יפיל) +# # python3 -m pip install --no-cache-dir -q awscli || true + +# # # RID מסונכרן לשעת הריצה של Airflow (Asia/Jerusalem) +# # RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# # BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' +# # SRC="s3://${BUCKET}/leaves/${RID}/crop/" +# # ENDPOINT='{{ conn.minio_s3.extra_dejson.endpoint_url | default("http://minio-hot:9001") }}' + +# # mkdir -p /work/in /work/out +# # echo "[DJ] syncing from ${SRC} via ${ENDPOINT}" +# # python3 -m awscli s3 cp --recursive "$SRC" /work/in --endpoint-url "$ENDPOINT" || true + +# # # מריצים את ה-runner בלי לשנות קבצים: מזריקים MISSION_ALIASES בזיכרון לפני main() +# # export PYTHONPATH=/app + +# # python3 - <<'PY' +# # import sys, importlib + +# # mod = importlib.import_module('agri_baseline.src.batch_runner') + +# # # אם המשתנה לא מוגדר בקוד – נגדיר לפני הקריאה ל-main +# # if not hasattr(mod, 'MISSION_ALIASES'): +# # mod.MISSION_ALIASES = ('demo', 'default') + +# # # פרמטרים להפעלה +# # sys.argv = [ +# # 'batch_runner.py', +# # '--mission','demo', +# # '--input','/work/in', +# # '--output','/work/out', +# # ] + +# # # הרצה שמכבדת argparse שעושה SystemExit +# # code = 0 +# # try: +# # mod.main() +# # except SystemExit as e: +# # code = int(e.code) if isinstance(e.code, int) else 1 +# # sys.exit(code) +# # PY + +# # echo "[DJ] finished" +# # ''' +# # ], +# command=[ +# "/bin/bash","-lc", +# r''' +# set -euo pipefail + +# echo "[DJ] START"; whoami; pwd; python3 -V +# mkdir -p /work/in /work/out + +# # 1) ודאות ל-awscli ואז סנכרון מ-MinIO למסלול הקלט +# python3 -m pip install --no-cache-dir -q awscli || true +# RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +# BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' +# SRC="s3://${BUCKET}/leaves/${RID}/crop/" +# ENDPOINT="${MINIO_ENDPOINT:-{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://host.docker.internal:9001') }}}" + +# echo "[DJ] syncing from ${SRC} via ${ENDPOINT}" +# python3 -m awscli s3 cp --recursive "$SRC" /work/in --endpoint-url "$ENDPOINT" || true + +# # 2) אם תיקיית הקלט ריקה — מדפיסים ומסיימים בהצלחה (לא כישלון) +# if [ -z "$(find /work/in -type f -maxdepth 2 -print -quit 2>/dev/null)" ]; then +# echo "[DJ] no input files found at /work/in — SKIP" +# exit 0 +# fi + +# # 3) הרצת batch_runner עם פאטצ'ים בטוחים +# export PYTHONPATH=/app +# echo "[DJ] RUN batch_runner" + +# python3 - <<'PY' +# import sys, importlib + +# mod = importlib.import_module('agri_baseline.src.batch_runner') + +# # פאטצ' בטוח אם חסר +# if not hasattr(mod, 'MISSION_ALIASES'): +# mod.MISSION_ALIASES = ('demo','default') +# if not hasattr(mod, 'parse_mission'): +# def parse_mission(x): +# try: return int(x) +# except (TypeError, ValueError): return str(x) +# mod.parse_mission = parse_mission + +# # argv לפי מה שהמודול מכיר (ללא --output) +# sys.argv = [ +# 'batch_runner.py', +# '--mission','demo', +# '--input','/work/in', +# '--log-level','INFO', +# '--device','cpu', +# ] + +# code = 0 +# try: +# mod.main() +# except SystemExit as e: +# code = int(e.code) if isinstance(e.code, int) else 1 +# print("[DJ] batch_runner exit code:", code) +# sys.exit(code) +# PY + +# echo "[DJ] DONE" +# ''' +# ] + +# ) + + from airflow.providers.docker.operators.docker import DockerOperator + + detection_jobs = DockerOperator( + task_id="detection_jobs", + image="detection-jobs:cpu-lts", + docker_url="unix://var/run/docker.sock", + api_version="auto", + auto_remove=True, + mount_tmp_dir=False, + working_dir="/app", + network_mode="agcloud_ag_cloud", + + environment={ + # ---- MinIO (פנימי) ---- + "MINIO_ENDPOINT": "{{ conn.minio_s3.extra_dejson.endpoint_url | default('http://minio-hot:9001') }}", + "AWS_ACCESS_KEY_ID": "{{ conn.minio_s3.login }}", + "AWS_SECRET_ACCESS_KEY": "{{ conn.minio_s3.password }}", + "AWS_S3_FORCE_PATH_STYLE": "true", + "AWS_DEFAULT_REGION": "us-east-1", + + # ---- Postgres (שם שירות compose הוא 'postgres') ---- + "DATABASE_URL": "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db", + + "USER": "root", + "HOME": "/root", + }, + + command=[ + "/bin/bash","-lc", r''' +set -euo pipefail +echo "[DJ] START"; whoami; pwd; python3 -V + +# awscli (שקט; אם כבר מותקן זה לא יפיל) +python3 -m pip install --no-cache-dir -q awscli || true + +# === 1) סנכרון קלט מ-MinIO (לא קריטי ל-DB) === +RID='{{ dag_run.conf.get("run_id") or logical_date.in_timezone("Asia/Jerusalem").strftime("%Y/%m/%d/%H%M") }}' +BUCKET='{{ var.value.leaf_minio_bucket | default("imagery") }}' +SRC="s3://${BUCKET}/leaves/${RID}/crop/" +ENDPOINT="${MINIO_ENDPOINT:-http://minio-hot:9001}" + +mkdir -p /work/in /work/out +echo "[DJ] sync from ${SRC} via ${ENDPOINT}" +python3 -m awscli s3 cp --recursive "$SRC" /work/in --endpoint-url "$ENDPOINT" || true +# python3 -m awscli s3 cp --recursive "$SRC" /work/in --endpoint-url "$ENDPOINT" || true +# === Normalize input filenames and swap /work/in atomically === +set -euo pipefail + +IN_DIR="/work/in" +READY_DIR="/work/in_ready" +DEVICE_ID="${DEVICE_ID:-dev1}" + +# אם ה-IN_DIR לא קיים/ריק – אין מה לעשות +if [ ! -d "$IN_DIR" ] || [ -z "$(ls -A "$IN_DIR" 2>/dev/null || true)" ]; then + echo "[DJ][normalize] nothing to normalize in $IN_DIR" +else + rm -rf "$READY_DIR" && mkdir -p "$READY_DIR" + + # נעתיק/נרנמל *רק* קבצי תמונה (כולל תתי-תיקיות) + # חישוב timestamp לפי mtime של הקובץ (UTC), בלי תלות ב-EXIF + while IFS= read -r -d '' f; do + ext="${f##*.}"; ext_lc="$(printf '%s' "$ext" | tr '[:upper:]' '[:lower:]')" + base="$(basename "$f")" + # בדיקת התאמה לפורמט התקין + if printf '%s' "$base" | grep -Eq '^[A-Za-z0-9\-]+_[0-9]{8}T[0-9]{6}Z([ _][^/\\]+)?\.[A-Za-z0-9]+$'; then + # כבר תקין → שמור נתיב יחסי באותו מבנה תיקיות + rel="${f#"$IN_DIR"/}" + out_path="$READY_DIR/$rel" + mkdir -p "$(dirname "$out_path")" + cp -p "$f" "$out_path" + echo "[DJ][keep] $base" + else + # יצירת שם חדש: DEVICE_YYYYMMDDTHHMMSSZ _. + # זמן מ-mtime: + ts_epoch="$(stat -c '%Y' "$f" 2>/dev/null || stat -f '%m' "$f")" + ts="$(date -u -d "@$ts_epoch" +%Y%m%dT%H%M%SZ 2>/dev/null || gdate -u -d "@$ts_epoch" +%Y%m%dT%H%M%SZ)" + stem="${base%.*}" + # ניקוי suffix משם מקורי לסלאג קצר + slug="$(printf '%s' "$stem" | tr '[:space:]' '-' | tr -cd 'A-Za-z0-9._-' | sed -E 's/[-_.]+$//;s/^[-_.]+//')" + [ -n "$slug" ] && suffix="_$slug" || suffix="" + new="${DEVICE_ID}_${ts}${suffix}.${ext_lc}" + + rel_dir="$(dirname "${f#"$IN_DIR"/}")" + out_dir="$READY_DIR/$rel_dir" + mkdir -p "$out_dir" + out_path="$out_dir/$new" + i=1 + while [ -e "$out_path" ]; do + out_path="$out_dir/${DEVICE_ID}_${ts}${suffix}_$i.${ext_lc}" + i=$((i+1)) + done + cp -p "$f" "$out_path" + echo "[DJ][rename] $base -> $(basename "$out_path")" + fi + done < <(find "$IN_DIR" -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.webp' -o -iname '*.tif' -o -iname '*.tiff' -o -iname '*.bmp' \) -print0) + + echo "[DJ][normalize] ready tree:" + find "$READY_DIR" -maxdepth 2 -type f -printf '%P\n' | sed -n '1,50p' + + # מחליפים את /work/in כך שה-runner לא "ירגיש" שינוי + rm -rf "$IN_DIR".bak + if [ -L "$IN_DIR" ] || [ -d "$IN_DIR" ]; then mv "$IN_DIR" "$IN_DIR".bak || true; fi + ln -s "$READY_DIR" "$IN_DIR" + echo "[DJ][normalize] swapped $IN_DIR -> symlink to $READY_DIR" +fi + +# === נרמול שמות ב-/work/in כדי שיעמדו בפורמט הנדרש === +export DEVICE_ID="${DEVICE_ID:-dev1}" +python3 -m pip install -q pillow piexif || true +python3 - <<'PY' +import os, re, sys +from datetime import datetime, timezone +OUT="/work/in" +IMG_EXT={".jpg",".jpeg",".png",".webp",".tif",".tiff",".bmp"} +need=re.compile(r"^[A-Za-z0-9\-]+_\d{8}T\d{6}Z(?:[ _][^/\\]+)?\.[A-Za-z0-9]+$") +DEVICE=os.environ.get("DEVICE_ID","dev1") + +def exif_dt(p): + try: + import piexif + from PIL import Image + with Image.open(p) as im: + exif=im.info.get("exif") + if not exif: return None + d=piexif.load(exif) + dt = d["Exif"].get(piexif.ExifIFD.DateTimeOriginal) or \ + d["Exif"].get(piexif.ExifIFD.DateTimeDigitized) or \ + d["0th"].get(piexif.ImageIFD.DateTime) + if not dt: return None + s = dt.decode() if isinstance(dt,bytes) else dt + return datetime.strptime(s,"%Y:%m:%d %H:%M:%S").replace(tzinfo=timezone.utc) + except Exception: + return None + +def ts_for(p): + dt=exif_dt(p) + if not dt: + dt=datetime.fromtimestamp(os.path.getmtime(p), tz=timezone.utc) + return dt.strftime("%Y%m%dT%H%M%SZ") + +ren=0; skip=0 +for root,_,files in os.walk(OUT): + for f in files: + ext=os.path.splitext(f)[1].lower() + if ext not in IMG_EXT: continue + if need.match(f): + skip+=1; continue + old=os.path.join(root,f) + ts=ts_for(old) + base=os.path.splitext(f)[0] + cleaned=re.sub(r"[^A-Za-z0-9._-]+","-",base).strip("-_.") + suffix=f"_{cleaned}" if cleaned else "" + new=f"{DEVICE}_{ts}{suffix}{ext}" + newp=os.path.join(root,new) + i=1 + while os.path.exists(newp): + stem,ext2=os.path.splitext(new) + newp=os.path.join(root,f"{stem}_{i}{ext2}"); i+=1 + os.rename(old,newp); ren+=1 + print(f"[DJ][rename] {f} -> {os.path.basename(newp)}") +print(f"[DJ][rename] done: renamed={ren}, already_ok={skip}") +PY + +# אם אין קבצים - עדיין נריץ כדי לבדוק/לרשום ל-DB אם הלוגיקה שלך תומכת +if [ -z "$(ls -A /work/in 2>/dev/null || true)" ]; then + echo "[DJ] no local input files — continuing (DB path may still run)" +fi + +# === 2) בדיקת DNS לרכיב Postgres === +if ! getent hosts postgres >/dev/null 2>&1; then + echo "[DJ][ERR] cannot resolve 'postgres' on network agcloud_ag_cloud"; exit 2 +fi + +# === 3) המתנה אקטיבית לזמינות ה-DB (SQLAlchemy + psycopg2) === +# python3 - <<'PY' +# import os, time, sys +# from sqlalchemy import create_engine, text +# url = os.environ["DATABASE_URL"] +# retries, delay = 30, 2 +# for i in range(retries): +# try: +# eng = create_engine(url, pool_pre_ping=True, future=True) +# with eng.connect() as conn: +# conn.execute(text("SELECT 1")) +# print("[DJ] DB is ready") +# break +# except Exception as e: +# print(f"[DJ] DB not ready yet ({i+1}/{retries}): {e}") +# time.sleep(delay) +# else: +# print("[DJ][ERR] DB did not become ready in time") +# sys.exit(3) +# PY +python3 - <<'PY' +import os, sys, importlib + +os.environ.setdefault("DATABASE_URL", os.environ.get("DATABASE_URL","")) +mod = importlib.import_module('agri_baseline.src.batch_runner') + +# --- תיקון נקודתי: אם parse_mission לא קיים, מגדירים אותו כאן --- +if not hasattr(mod, 'parse_mission'): + def _parse_mission(alias_or_id: str): + # מאפשר גם alias → id דרך משתנה סביבה אופציונלי: MISSION_MAP="demo:1,prod:2" + env_map = os.environ.get("MISSION_MAP", "demo:1") + mapping = {} + for pair in (p.strip() for p in env_map.split(",") if p.strip()): + k, v = pair.split(":", 1) + mapping[k.strip()] = int(v.strip()) + try: + return int(alias_or_id) + except ValueError: + return mapping.get(alias_or_id, next(iter(mapping.values()))) + mod.parse_mission = _parse_mission +# ----------------------------------------------------------------- + +# ארגומנטים ל-runner (התאימי alias אם תרצי דרך MISSION_MAP) +sys.argv = ['batch_runner.py', '--input', '/work/in', '--mission', '1'] + +exit_code = 0 +try: + mod.main() +except SystemExit as e: + exit_code = int(e.code) if isinstance(e.code, int) else 1 +sys.exit(exit_code) +PY + +# === 4) הרצת ה-runner עם כתיבה ל-Postgres דרך DATABASE_URL === +echo "[DJ] RUN batch_runner" +export PYTHONPATH=/app +python3 - <<'PY' +import os, sys, importlib + +# הבטחת משתני סביבה חיוניים +os.environ.setdefault("DATABASE_URL", os.environ.get("DATABASE_URL","")) + +# טעינת ה-runner +mod = importlib.import_module('agri_baseline.src.batch_runner') + +# ברירת מחדל עדינה אם אין MISSION_ALIASES +if not hasattr(mod, 'MISSION_ALIASES'): + mod.MISSION_ALIASES = ('demo',) + +# העברת ארגומנטים שה-runner מכיר לשם כתיבה ל-DB +# (מבוסס על usage בלוגים שלך: --input [--mission] [--device]) +argv = ['batch_runner.py', '--input', '/work/in', '--mission','1'] +sys.argv = argv + +exit_code = 0 +try: + mod.main() +except SystemExit as e: + exit_code = int(e.code) if isinstance(e.code, int) else 1 +sys.exit(exit_code) +PY + +echo "[DJ] DONE" + ''' + ], +) + + disease_monitor = DockerOperator( + task_id="disease_monitor", + image="disease-monitor:cpu-lts", + command="--config /app/configs/config.docker.yaml --log-level INFO", + + working_dir="/app", + docker_url="unix://var/run/docker.sock", # ל-Webserver/Scheduler יש גישה ל-Docker host + api_version="auto", + auto_remove=True, + network_mode="agcloud_ag_cloud", # אותה רשת של ה-Postgres + dag=dag, +) + + stage_input >> detect >> pwb >> crop >> detection_jobs>>disease_monitor diff --git a/airflow/leaf-pipeline/dags/configs/disease_monitor.yaml b/airflow/leaf-pipeline/dags/configs/disease_monitor.yaml new file mode 100644 index 000000000..465a10ca7 --- /dev/null +++ b/airflow/leaf-pipeline/dags/configs/disease_monitor.yaml @@ -0,0 +1,67 @@ +# io: +# # IMPORTANT: use the Docker service name of Postgres (from your compose): +# postgres_url: "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db" +io: + postgres_url: "postgresql+psycopg2://postgres:postgres@agcloud-postgres:5432/postgres" + + +windows: + frequency: "D" + timezone: "UTC" + +source_mapping: + entity_dim: "mission" # or "region"/"device" + area_strategy: "none" # or "region_area" (requires regions table/geom) + filters: + start_time: null + end_time: null + anomaly_codes: null + +baseline: + method: "median" + lookback_periods: 28 + min_history: 7 + seasonality: null + +rules: + count_anomaly: + enabled: true + method: "zscore" + z_threshold: 3.0 + iqr_k: 1.5 + min_count: 3 + worsening: + enabled: true + method: "slope" + slope_lookback: 7 + slope_min: 0.02 + min_periods: 5 + ewma_span: 7 + ewma_threshold: 0.6 + +alerting: + dedup_cooldown_windows: 3 + resolve_after_no_anomaly: 3 + rate_limit_per_run: 100 + group_by_window: true + +delivery: + slack: + enabled: false + webhook_url: "" + webhook: + enabled: false + url: "" + headers: {} + email: + enabled: false + smtp_host: "" + smtp_port: 587 + username: "" + password_env: "SMTP_PASSWORD" + from_addr: "" + to_addrs: [] + +run: + dry_run: false + diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/.gitignore b/airflow/leaf-pipeline/projects/Detection_Jobs/.gitignore new file mode 100644 index 000000000..06593e32e --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/.gitignore @@ -0,0 +1,55 @@ +# ==== OS / IDE ==== +.DS_Store +Thumbs.db +.vscode/ +.idea/ + +# ==== Node ==== +node_modules/ +dist/ + +# ==== Python ==== +__pycache__/ +*.py[cod] +*.pyc +*.pyo +*.so +*.dylib + +# ==== Virtual envs ==== +.venv/ +venv/ +ENV/ +env/ + +# ==== Packaging / build ==== +build/ +*.egg-info/ + +# ==== Environment / Secrets ==== +.env +.env.* + +# ==== Data / Notebooks / Logs ==== +*.log +*.ipynb +.ipynb_checkpoints/ + +# ==== Artifacts / Wheels / Models ==== +artifacts/ +.wheels/ +wheels/ +*.whl +*.pt +*.pth +*.bin + +# ==== Coverage reports ==== +.pytest_cache/ +.coverage +coverage.xml +htmlcov/ + +# ==== gRPC generated (נוצרים בבילד דוקר) ==== +server/embed_pb2.py +server/embed_pb2_grpc.py diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.dockerignore b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.dockerignore new file mode 100644 index 000000000..641f56876 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.dockerignore @@ -0,0 +1,33 @@ +# Python cache +__pycache__/ +*.pyc +*.pyo + +# Virtual environments +.env +.venv/ +venv/ + +# IDE +.idea/ +.vscode/ + +# Node / Frontend +node_modules/ +dist/ + +# Test / Coverage +.pytest_cache/ +.coverage +coverage.xml +htmlcov/ + +# Local databases +*.db +agri.db + +# Data outputs +data/ +data_balanced/ +data_baseline/ +*.csv diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.gitignore b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.gitignore new file mode 100644 index 000000000..b0e9b0028 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/.gitignore @@ -0,0 +1,25 @@ +# === Python cache === +__pycache__/ +*.pyc +*.pyo + +# === Virtual environments === +.env +.venv/ +venv/ + +# === IDE / Editors === +.idea/ +.vscode/ + + +# === Test / Coverage === +.pytest_cache/ + +# === Local databases === +*.db +agri.db + +# === Data outputs === +data_balanced/ +data/ \ No newline at end of file diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/README.md b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/README.md new file mode 100644 index 000000000..b2e46aec5 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/README.md @@ -0,0 +1,115 @@ +🌿 Agri Baseline – Disease Detection Pipeline + +This project runs an end-to-end disease detection pipeline for agricultural images. +It supports both local and MinIO-based storage backends, and processes entire folders of plant images using trained CNN models. + +🚀 Quick Start +1️⃣ Setup Environment +cp agri_baseline/.env.example agri_baseline/.env +pip install -r agri_baseline/requirements.txt + +2️⃣ Run the Pipeline + +Now the pipeline fetches images directly from MinIO, not from a local folder. + +docker compose up -d +docker compose logs -f app + + +The service automatically connects to your configured MinIO bucket, downloads the images to a cache directory, and processes them. + +3️⃣ Run Tests + +To verify the system: + +docker compose run --rm app pytest -q + +📂 Project Structure +Detection_Jobs/ +│ +├── agri_baseline/ +│ ├── scripts/ +│ │ └── run_batch.py # Run the pipeline on MinIO or local images +│ │ +│ ├── src/ +│ │ ├── detectors/ # CNN models and detectors +│ │ │ ├── base.py # Base Detector/Detection classes +│ │ │ ├── cnn_multi_classifier.py +│ │ │ ├── disease_model.py # Wraps CNN model as a Detector +│ │ │ ├── train/ +│ │ │ │ └── dictionary.py +│ │ │ +│ │ ├── pipeline/ +│ │ │ ├── config.py +│ │ │ ├── db.py # DB connection via SQLAlchemy +│ │ │ ├── logging_setup.py +│ │ │ └── utils.py # Helper functions (image loading, bbox, etc.) +│ │ │ +│ │ ├── storage/ +│ │ │ ├── minio_client.py +│ │ │ └── minio_sync.py # MinIO download helpers +│ │ │ +│ │ └── validator/ +│ │ ├── rules.py # Validation rules +│ │ └── validator.py # QA manager, writes to event logs +│ │ +│ ├── batch_runner.py # Orchestrates the full pipeline +│ ├── .env # Local config (not committed) +│ ├── .env.example # Example configuration file +│ ├── requirements.txt # Python dependencies +│ └── README.md +│ +├── models/ # Trained model weights (not in git) +│ ├── resnet18-f37072fd.pth +│ ├── cnn_multi_stage3.pth +│ └── multi_classes.pth +│ +├── docker-compose.yml # Runs pipeline + MinIO connection +├── dockerfile +├── tests/ # Unit and integration tests +│ ├── test_batch_runner.py +│ ├── test_disease_model.py +│ ├── test_run_detectors.py +│ ├── test_utils.py +│ └── test_validator.py +│ +└── ressearch/ # Experimental models and training + ├── detectors/ + │ ├── models/ + │ │ ├── cnn_binary.pth + │ │ ├── cnn_multi_finetuned.pth + │ │ └── cnn_multi.pth + │ ├── train/ + │ │ ├── disease.py + │ │ ├── eval_multi_levels.py + │ │ ├── finetune_multi_stage3.py + │ │ ├── finetune_multi.py + │ │ └── train_binary_multi.py + │ ├── cnn_binary_classifier.py + │ └── dataset_binary.py + +🧩 Models + +All trained models are stored under models/ and are not committed to Git: + +cnn_multi.pth – Base multi-class CNN + +cnn_multi_finetuned.pth – Fine-tuned on additional data + +cnn_multi_stage3.pth – Advanced fine-tuning with crop-specific data + +multi_classes.pth – Unified class mapping + +🧪 Testing + +Run all integration and unit tests using Docker: + +docker compose run --rm app pytest -q + +📌 Notes + +The pipeline now supports MinIO integration via environment variables in .env. + +Make sure your .env file includes all required MINIO_* variables (endpoint, bucket, credentials). + +Avoid committing .env or model files to the repository. \ No newline at end of file diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/__init__.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/__init__.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/requirements.txt b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/requirements.txt new file mode 100644 index 000000000..8df4df0ba --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/requirements.txt @@ -0,0 +1,47 @@ +# ---------------------------- +# Core scientific stack +# ---------------------------- +numpy==1.26.4 +pandas==2.0.3 +scipy==1.12.0 + +# ---------------------------- +# Image processing +# ---------------------------- +opencv-python-headless==4.9.0.80 +Pillow==10.4.0 +albumentations==1.4.3 + +# ---------------------------- +# Database & configuration +# ---------------------------- +SQLAlchemy==1.4.52 +psycopg2-binary==2.9.9 +python-dotenv==1.0.1 +minio==7.2.9 # MinIO SDK for connecting to object storage + +# ---------------------------- +# Testing +# ---------------------------- +pytest + +# ---------------------------- +# Typing helpers +# ---------------------------- +typing-extensions>=4.9.0 +# Deep learning frameworks +torch==2.2.0 +torchvision==0.17.0 +torchaudio==2.2.0 + + +# ---------------------------- +# Training & monitoring tools +# ---------------------------- +tensorboard>=2.16 + +# ---------------------------- +# Visualization & ML utilities +# ---------------------------- +matplotlib>=3.7 +scikit-learn>=1.3 diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/scripts/__init__.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/scripts/run_batch.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/scripts/run_batch.py new file mode 100644 index 000000000..666ed466e --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/scripts/run_batch.py @@ -0,0 +1,137 @@ +""" +run_batch.py + +Purpose: +- Run the disease-detection batch pipeline either from a LOCAL folder of images + or from a MinIO bucket (objects are first downloaded to a local cache dir, + then processed exactly like local files). + +Usage examples: +1) Local folder (backward-compatible): + python -m agri_baseline.scripts.run_batch --storage local --images ./data/images + +2) MinIO (reads config from ENV and optional CLI flags): + python -m agri_baseline.scripts.run_batch --storage minio --minio-prefix "" + +Environment variables (typical .env): +- STORAGE_BACKEND=minio|local +- MINIO_ENDPOINT=127.0.0.1:9000 +- MINIO_ACCESS_KEY=minioadmin +- MINIO_SECRET_KEY=minioadmin +- MINIO_BUCKET=leaves +- MINIO_SECURE=false +- MINIO_PREFIX=mission-123/ (optional) +- MINIO_CACHE_DIR=./data/_minio_cache +""" + +import argparse +import os +from pathlib import Path + +from agri_baseline.src.pipeline.logging_setup import setup_logging +from agri_baseline.src.pipeline import config +from agri_baseline.src.batch_runner import BatchRunner + +# MinIO helpers provided in your project +from agri_baseline.src.storage.minio_client import load_minio_config # loads config from ENV +from agri_baseline.src.storage.minio_sync import download_prefix_to_dir, ensure_bucket + + +def run_local(images_dir: Path) -> None: + """ + LOCAL mode: + - Run the batch pipeline over a local folder of images. + - This preserves the original behavior for backward compatibility. + """ + runner = BatchRunner() + runner.run_folder(images_dir) + + +def run_minio(prefix: str, cache_dir: Path) -> None: + """ + MINIO mode: + - Pull objects from a MinIO bucket (based on ENV config). + - Download them to a local cache directory. + - Run the batch pipeline over the downloaded files. + """ + cfg = load_minio_config() + ensure_bucket(cfg) # Safety: create the bucket if it doesn't exist + + cache_dir.mkdir(parents=True, exist_ok=True) + + # Download objects under 'prefix' into the local cache folder + downloaded = download_prefix_to_dir(cfg, prefix=prefix, local_dir=cache_dir) + if not downloaded: + raise SystemExit( + f"No objects found in bucket '{cfg.bucket}' with prefix '{prefix}'." + ) + + runner = BatchRunner() + runner.run_folder(cache_dir) + + +def parse_args() -> argparse.Namespace: + """ + Parse CLI arguments and provide sensible defaults from ENV where applicable. + """ + ap = argparse.ArgumentParser(description="Run batch pipeline (local/minio).") + + # Backward-compatible local images folder + ap.add_argument( + "--images", + default=config.IMAGES_DIR, + help="Folder of input images (LOCAL mode)", + ) + + # Storage backend selector + ap.add_argument( + "--storage", + choices=["local", "minio"], + default=os.getenv("STORAGE_BACKEND", "local").lower(), + help="Where to read images from (local|minio).", + ) + + # MinIO options (with ENV fallbacks) + ap.add_argument( + "--minio-prefix", + default=os.getenv("MINIO_PREFIX", ""), + help="Object prefix inside the bucket (e.g. 'mission-123/').", + ) + ap.add_argument( + "--minio-cache", + default=os.getenv("MINIO_CACHE_DIR", "./data/_minio_cache"), + help="Local temp folder used to download MinIO objects before processing.", + ) + + return ap.parse_args() + + +def main() -> None: + """ + Entry point: + - Logs chosen backend. + - Dispatches to local/minio flows. + - Keeps logs concise and informative for CI/ops. + """ + log = setup_logging() + args = parse_args() + + log.info(f"Storage backend: {args.storage}") + + if args.storage == "local": + images_dir = Path(args.images) + log.info(f"Starting batch over LOCAL folder: {images_dir}") + run_local(images_dir) + log.info("Batch done (local).") + else: + cache_dir = Path(args.minio_cache) + log.info( + "Starting batch over MINIO: " + f"bucket from ENV, prefix='{args.minio_prefix}', cache='{cache_dir}'" + ) + run_minio(prefix=args.minio_prefix, cache_dir=cache_dir) + log.info("Batch done (minio).") + + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/__init__.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/batch_runner.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/batch_runner.py new file mode 100644 index 000000000..6eee8f27d --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/batch_runner.py @@ -0,0 +1,683 @@ +# # agri_baseline/src/batch_runner.py +# # Max line length: 100 + +# from __future__ import annotations + +# import json +# from dataclasses import asdict, is_dataclass +# from datetime import datetime, timezone +# from pathlib import Path +# from typing import Tuple + +# from agri_baseline.src.pipeline.utils import ( +# load_image, +# image_id_from_path, +# clamp_bbox, +# ) +# from agri_baseline.src.pipeline.db import ( +# get_engine, +# INSERT_DET, +# INSERT_COUNT, +# INSERT_QA, +# ) +# from agri_baseline.src.detectors.disease_model import DiseaseDetector + + +# class BatchRunner: +# """ +# End-to-end runner: +# - Load image +# - Run disease detector +# - Normalize detections +# - Write anomalies / counts / QA to RelDB +# """ + +# def __init__(self, mission_id: int = 1, device_id: str = "device-1") -> None: +# self.mission_id = mission_id +# self.device_id = device_id # TEXT FK per schema v2 +# self.engine = get_engine() +# self.detector = DiseaseDetector() + +# # ---------------------------- +# # Public API +# # ---------------------------- + +# def run_folder(self, folder: Path | str) -> None: +# """ +# Run pipeline on all images within a folder (non-recursive). +# Skips non-image files; prints minimal info. +# """ +# folder = Path(folder) +# assert folder.exists(), f"Folder not found: {folder.resolve()}" + +# image_paths = sorted( +# p for p in folder.iterdir() if p.suffix.lower() in {".jpg", ".jpeg", ".png"} +# ) + +# total = 0 +# total_dets = 0 +# for img_path in image_paths: +# try: +# n = self.process_image(img_path) +# total += 1 +# total_dets += n +# except Exception as ex: +# # Keep output tidy; prefer structured logging in production +# print(f"[WARN] Failed on {img_path.name}: {ex}") + +# # Record a small QA summary +# qa = { +# "images_processed": total, +# "detections_total": total_dets, +# "ts": datetime.now(timezone.utc).isoformat(timespec="seconds"), +# } +# with self.engine.begin() as conn: +# conn.execute(INSERT_QA, {"details": json.dumps(qa)}) + +# def process_image(self, img_path: Path | str) -> int: +# """ +# Run pipeline on a single image, write detections and a simple per-image score. +# Returns number of detections written. +# """ +# img_path = Path(img_path) +# img, W, H = load_image(img_path) + +# image_id = image_id_from_path(img_path) +# dets = self.detector.run(img) + +# print(f"{image_id}: found {len(dets)} disease spots") + +# # Write detections as anomalies +# written = 0 +# for d in dets: +# x, y, w, h = self._extract_bbox(d) +# x, y, w, h = clamp_bbox(int(x), int(y), int(w), int(h), W, H) +# cx = x + w / 2.0 +# cy = y + h / 2.0 + +# area = float(getattr(d, "area", w * h)) +# label = str(getattr(d, "label", "disease")) +# conf = float(getattr(d, "confidence", 1.0)) + +# details = { +# "image_id": image_id, +# "label": label, +# "bbox": [x, y, w, h], +# "area": area, +# "confidence": conf, +# } +# if is_dataclass(d): +# details["raw_detection"] = asdict(d) + +# with self.engine.begin() as conn: +# conn.execute( +# INSERT_DET, +# dict( +# mission_id=self.mission_id, +# device_id=self.device_id, # TEXT FK +# ts=datetime.now(timezone.utc), +# anomaly_type_id=1, # seeded below +# severity=conf, +# details=json.dumps(details), +# wkt_geom=f"POINT({cx} {cy})", +# ), +# ) +# written += 1 + +# # Per-image score → tile_stats (tile_id TEXT, geom POLYGON) +# if dets: +# anomaly_score = float(len(dets)) +# poly_wkt = self._make_square_polygon_wkt(W / 2.0, H / 2.0, size=1.0) +# with self.engine.begin() as conn: +# conn.execute( +# INSERT_COUNT, +# dict( +# mission_id=self.mission_id, +# tile_id=image_id, # TEXT per schema v2 +# anomaly_score=anomaly_score, +# wkt_geom=poly_wkt, # POLYGON +# ), +# ) + +# return written + +# # ---------------------------- +# # Internals +# # ---------------------------- + +# @staticmethod +# def _extract_bbox(d) -> Tuple[float, float, float, float]: +# """ +# Normalize bbox to (x, y, w, h). Supports: +# - d.x, d.y, d.w, d.h +# - d.bbox == (x, y, w, h) +# - d.xmin, d.ymin, d.xmax, d.ymax +# - d.left, d.top, d.width, d.height +# """ +# if all(hasattr(d, a) for a in ("x", "y", "w", "h")): +# return float(d.x), float(d.y), float(d.w), float(d.h) + +# if hasattr(d, "bbox"): +# bx = list(d.bbox) +# if len(bx) != 4: +# raise ValueError(f"Unexpected bbox length: {len(bx)} in {bx}") +# x, y, w, h = map(float, bx) +# return x, y, w, h + +# if all(hasattr(d, a) for a in ("xmin", "ymin", "xmax", "ymax")): +# x1, y1, x2, y2 = float(d.xmin), float(d.ymin), float(d.xmax), float(d.ymax) +# return x1, y1, max(0.0, x2 - x1), max(0.0, y2 - y1) + +# if all(hasattr(d, a) for a in ("left", "top", "width", "height")): +# return float(d.left), float(d.top), float(d.width), float(d.height) + +# raise AttributeError( +# "Detection bbox fields missing. Supported: " +# "(x,y,w,h) or bbox or (xmin,ymin,xmax,ymax) or (left,top,width,height)." +# ) + +# @staticmethod +# def _make_square_polygon_wkt(cx: float, cy: float, size: float = 1.0) -> str: +# """ +# Build a tiny square Polygon around (cx, cy) in WKT, closed ring. +# PostGIS expects Polygon for tile_stats.geom (SRID 4326). +# """ +# x1, y1 = cx - size, cy - size +# x2, y2 = cx + size, cy + size +# return f"POLYGON(({x1} {y1}, {x2} {y1}, {x2} {y2}, {x1} {y2}, {x1} {y1}))" + + +# # ------------- CLI helper ------------- + +# # def main() -> None: +# # """ +# # Local runner: +# # python -m agri_baseline.src.batch_runner --input +# # """ +# # import argparse + +# # parser = argparse.ArgumentParser(description="Run disease detection pipeline.") +# # parser.add_argument("--log-level", default="INFO", help="logging level (ignored by runner)") + +# # parser.add_argument("--input", type=str, required=True, help="Image file or folder") +# # parser.add_argument("--mission", type=int, default=1, help="Numeric mission ID") +# # parser.add_argument("--device", type=str, default="device-1", help="Text device ID") +# # args = parser.parse_args() + +# # runner = BatchRunner(mission_id=args.mission, device_id=args.device) +# # in_path = Path(args.input) +# # if in_path.is_dir(): +# # runner.run_folder(in_path) +# # else: +# # runner.process_image(in_path) + + +# # if __name__ == "__main__": +# # main() +# def main() -> None: +# """ +# Local runner: +# python -m agri_baseline.src.batch_runner --input +# """ +# import argparse + +# parser = argparse.ArgumentParser(description="Run disease detection pipeline.") +# parser.add_argument("--log-level", type=str, default="INFO", +# help="logging level (ignored by runner)") + +# parser.add_argument("--input", type=str, required=True, +# help="Image file or folder") +# # קולט גם מחרוזת וגם מספר, וממיר ל-int תקני +# parser.add_argument("--mission", type=str, default="baseline", +# help=f"Mission name/id ({', '.join(MISSION_ALIASES)} or numeric id)") +# parser.add_argument("--device", type=str, default="cpu", +# choices=["cpu", "cuda"], +# help="device to use") + +# args = parser.parse_args() + +# mission_id = parse_mission(args.mission) + +# in_path = Path(args.input) +# if not in_path.exists(): +# raise FileNotFoundError(f"input does not exist: {in_path}") +# if in_path.is_dir(): +# # אופציונלי: הגנה על תיקייה ריקה +# has_files = any(in_path.rglob("*")) +# if not has_files: +# raise RuntimeError(f"input folder is empty: {in_path}") + +# runner = BatchRunner(mission_id=mission_id, device_id=args.device) +# if in_path.is_dir(): +# runner.run_folder(in_path) +# else: +# runner.process_image(in_path) + +# if __name__ == "__main__": +# main() +# agri_baseline/src/batch_runner.py +# Max line length: 100 + +from __future__ import annotations + +from sqlalchemy import text +import os +import re +import json +from dataclasses import asdict, is_dataclass +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Tuple + +from agri_baseline.src.pipeline.utils import ( + load_image, + image_id_from_path, + clamp_bbox, +) +from agri_baseline.src.pipeline.db import ( + get_engine, +) +from agri_baseline.src.detectors.disease_model import DiseaseDetector + +# ----------------------------------- +# SQL +# ----------------------------------- + +# anomalies insert (unchanged) +INSERT_ANOMALY = text( + """ + INSERT INTO public.anomalies + (mission_id, device_id, ts, anomaly_type_id, severity, details, geom) + VALUES + ( + :mission_id, + :device_id, + :ts, + :anomaly_type_id, + :severity, + CAST(:details AS JSONB), + ST_SetSRID(ST_GeomFromText(:wkt_geom), 4326) + ) + """ +) + +# NEW: leaf_reports insert (always written) +INSERT_LEAF_REPORT = text( + """ + INSERT INTO public.leaf_reports + (device_id, leaf_disease_type_id, ts, confidence, sick) + VALUES + (:device_id, :leaf_disease_type_id, :ts, :confidence, :sick) + """ +) + +# NEW: upsert/get id for leaf_disease_types by name (case-insensitive) +UPSERT_LEAF_DISEASE_TYPE = text( + """ + WITH ins AS ( + INSERT INTO public.leaf_disease_types (name) + VALUES (:name) + ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name + RETURNING id + ) + SELECT id FROM ins + UNION ALL + SELECT id FROM public.leaf_disease_types WHERE name = :name + LIMIT 1 + """ +) + +INSERT_MISSION_FULL = text( + """ + INSERT INTO public.missions (mission_id, start_time, end_time, area_geom) + VALUES ( + :mission_id, + :start_time, + :end_time, + ST_SetSRID(ST_GeomFromText(:wkt_poly), 4326) + ) + ON CONFLICT (mission_id) DO NOTHING + """ +) + + +class BatchRunner: + """ + End-to-end runner: + - Parse device & timestamp from file name: _TZ[ _suffix].ext + - Run disease detector + - ALWAYS write a row into public.leaf_reports for each detection + - Write into public.anomalies ONLY if label is 'sick' (i.e., does NOT contain 'healthy') + - Ensure supporting FKs exist (devices:, missions: fixed 60, leaf_disease_types:) + + Notes: + * mission_id is fixed to 60 per requirement. + * geom is the pixel-center point of the detection bbox (WKT, SRID 4326). + """ + + # Fixed mission per request + FIXED_MISSION_ID = 60 + + def __init__(self, mission_id: int | None = None, device_id: str = "device-1") -> None: + # mission_id ignored; always use 60, but keep signature for CLI compatibility + self.mission_id = BatchRunner.FIXED_MISSION_ID + self.fallback_device_id = device_id # used only if filename parsing fails + self.engine = get_engine() + self.detector = DiseaseDetector() + + # anomaly_types entry for LEAF_DISEASE (used only for anomalies table) + self.leaf_anomaly_type_id = self._ensure_anomaly_type( + code="LEAF_DISEASE", description="Leaf disease detected" + ) + + # ---------------------------- + # Public API + # ---------------------------- + + @staticmethod + def _parse_device_and_ts_from_name(img_path: Path) -> tuple[str, datetime]: + """ + Accepts: + _TZ. + _TZ_. + Returns (device_id, ts_utc). Raises ValueError if the pattern doesn't match. + """ + stem = img_path.stem + parts = stem.split("_") + if len(parts) < 2: + raise ValueError( + f"Filename '{img_path.name}' must be '_TZ[ _suffix].ext'" + ) + device = parts[0] + ts_str = parts[1] + if not re.fullmatch(r"\d{8}T\d{6}Z", ts_str): + raise ValueError( + f"Filename '{img_path.name}' must include timestamp as TZ" + ) + ts = datetime.strptime(ts_str, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) + return device, ts + + def run_folder(self, folder: Path | str) -> None: + """ + Run pipeline on all images within a folder (non-recursive). + """ + folder = Path(folder) + assert folder.exists(), f"Folder not found: {folder.resolve()}" + + image_paths = sorted( + p for p in folder.iterdir() if p.suffix.lower() in {".jpg", ".jpeg", ".png"} + ) + + total, total_dets = 0, 0 + for img_path in image_paths: + try: + n = self.process_image(img_path) + total += 1 + total_dets += n + except Exception as ex: + print(f"[WARN] Failed on {img_path.name}: {ex}") + + print(f"Processed {total} images, wrote {total_dets} detections") + + def process_image(self, img_path: Path | str) -> int: + """ + Run pipeline on a single image and insert rows into leaf_reports (always) + and anomalies (only if sick). Returns number of detections processed. + """ + img_path = Path(img_path) + # img_path = Path(img_path) + +# Parse from filename (with fallback for your current crop file names) + try: + device_id, det_ts = self._parse_device_and_ts_from_name(img_path) + except Exception: + device_id = self.fallback_device_id + # timestamp: file mtime if available, otherwise now (UTC) + try: + det_ts = datetime.fromtimestamp(img_path.stat().st_mtime, tz=timezone.utc) + except Exception: + det_ts = datetime.now(timezone.utc) + + # Parse from filename + device_id, det_ts = self._parse_device_and_ts_from_name(img_path) + + # Ensure FKs exist + self._ensure_device(device_id) + self._ensure_mission_full(self.mission_id, det_ts) + + # Load image & run detector + img, W, H = load_image(img_path) + image_id = image_id_from_path(img_path) + dets = self.detector.run(img) + + print(f"{image_id}: found {len(dets)} detections") + + written = 0 + for d in dets: + x, y, w, h = self._extract_bbox(d) + x, y, w, h = clamp_bbox(int(x), int(y), int(w), int(h), W, H) + cx = x + w / 2.0 + cy = y + h / 2.0 + + area = float(getattr(d, "area", w * h)) + label = str(getattr(d, "label", "disease")) + conf = float(getattr(d, "confidence", 1.0)) + + # Build details JSON (used only in anomalies) + details = { + "image_id": image_id, + "label": label, + "bbox": [x, y, w, h], + "area": area, + "confidence": conf, + "device_id": device_id, + "ts": det_ts.isoformat(), + } + minio_url = self._minio_url(img_path) + if minio_url: + details["minio_url"] = minio_url + details.setdefault("crop_type", None) + details.setdefault("disease_type", label) + if is_dataclass(d): + details["raw_detection"] = asdict(d) + + # Decide sick/healthy by label + sick = not self._is_healthy_label(label) + + # Map label → disease_type_name (part after "__" if present) + disease_type_name = self._disease_type_from_label(label) + + with self.engine.begin() as conn: + # ensure disease type exists and get id + leaf_type_id = self._ensure_leaf_disease_type(conn, disease_type_name) + + # 1) ALWAYS insert a leaf report + conn.execute( + INSERT_LEAF_REPORT, + dict( + device_id=device_id, + leaf_disease_type_id=leaf_type_id, + ts=det_ts, + confidence=conf, + sick=sick, + ), + ) + + # 2) Insert anomaly ONLY if sick + if sick: + conn.execute( + INSERT_ANOMALY, + dict( + mission_id=self.mission_id, + device_id=device_id, + ts=det_ts, + anomaly_type_id=self.leaf_anomaly_type_id, + severity=conf, + details=json.dumps(details), + wkt_geom=f"POINT({cx} {cy})", + ), + ) + + written += 1 + + return written + + # ---------------------------- + # Internals + # ---------------------------- + + @staticmethod + def _is_healthy_label(label: str) -> bool: + """Return True if label contains 'healthy' (case-insensitive).""" + return "healthy" in label.lower() + + @staticmethod + def _disease_type_from_label(label: str) -> str: + """ + Extract disease type token from label. If label contains 'a__b', return 'b'; else return label. + Keeps underscores as-is for consistency with the model outputs. + """ + if "__" in label: + return label.split("__", 1)[1] + return label + + def _ensure_anomaly_type(self, code: str, description: str) -> int: + """Return anomaly_type_id for `code`, inserting if needed (idempotent).""" + with self.engine.begin() as conn: + row = conn.execute( + text("SELECT anomaly_type_id FROM public.anomaly_types WHERE code = :c"), + {"c": code}, + ).first() + if row: + return int(row[0]) + + row = conn.execute( + text( + """ + INSERT INTO public.anomaly_types (code, description) + VALUES (:c, :d) + ON CONFLICT (code) + DO UPDATE SET description = EXCLUDED.description + RETURNING anomaly_type_id + """ + ), + {"c": code, "d": description}, + ).first() + return int(row[0]) + + def _ensure_leaf_disease_type(self, conn, name: str) -> int: + """ + Ensure a row exists in public.leaf_disease_types for the given name and return its id. + Uses an upsert with RETURNING to be idempotent. + """ + row = conn.execute(UPSERT_LEAF_DISEASE_TYPE, {"name": name}).first() + return int(row[0]) + + def _ensure_device(self, device_id: str) -> None: + """Ensure a row exists in public.devices (TEXT PK/UNIQUE).""" + with self.engine.begin() as conn: + conn.execute( + text( + """ + INSERT INTO public.devices (device_id) + VALUES (:d) + ON CONFLICT (device_id) DO NOTHING + """ + ), + {"d": device_id}, + ) + + def _ensure_mission_full(self, mission_id: int, ts: datetime) -> None: + """ + Ensure mission row exists and matches your table shape. + If not exists: start_time=ts, end_time=ts+1h, area=default 1x1° square near (0,0). + """ + with self.engine.begin() as conn: + exists = conn.execute( + text("SELECT 1 FROM public.missions WHERE mission_id = :id"), + {"id": mission_id}, + ).first() + if exists: + return + start = ts + end = ts + timedelta(hours=1) + wkt_poly = "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + conn.execute( + INSERT_MISSION_FULL, + { + "mission_id": mission_id, + "start_time": start, + "end_time": end, + "wkt_poly": wkt_poly, + }, + ) + + @staticmethod + def _extract_bbox(d) -> Tuple[float, float, float, float]: + """ + Normalize bbox to (x, y, w, h). Supports multiple field layouts. + """ + if all(hasattr(d, a) for a in ("x", "y", "w", "h")): + return float(d.x), float(d.y), float(d.w), float(d.h) + + if hasattr(d, "bbox"): + bx = list(d.bbox) + if len(bx) != 4: + raise ValueError(f"Unexpected bbox length: {len(bx)} in {bx}") + x, y, w, h = map(float, bx) + return x, y, w, h + + if all(hasattr(d, a) for a in ("xmin", "ymin", "xmax", "ymax")): + x1, y1, x2, y2 = float(d.xmin), float(d.ymin), float(d.xmax), float(d.ymax) + return x1, y1, max(0.0, x2 - x1), max(0.0, y2 - y1) + + if all(hasattr(d, a) for a in ("left", "top", "width", "height")): + return float(d.left), float(d.top), float(d.width), float(d.height) + + raise AttributeError( + "Detection bbox fields missing. Supported: " + "(x,y,w,h) or bbox or (xmin,ymin,xmax,ymax) or (left,top,width,height)." + ) + + @staticmethod + def _minio_url(img_path: Path) -> str | None: + """ + Build a MinIO object URL if MINIO_* env vars are provided. + """ + endpoint = os.getenv("MINIO_ENDPOINT") + bucket = os.getenv("MINIO_BUCKET") + prefix = os.getenv("MINIO_PREFIX", "").strip("/") + if not endpoint or not bucket: + return None + endpoint = endpoint.rstrip("/") + key = f"{prefix}/{img_path.name}" if prefix else img_path.name + return f"{endpoint}/{bucket}/{key}" + + +# ------------- CLI helper ------------- + +def main() -> None: + """ + Local runner: + python -m agri_baseline.src.batch_runner --input + """ + import argparse + + parser = argparse.ArgumentParser( + description="Run disease detection pipeline: leaf_reports (always), anomalies (sick only)." + ) + parser.add_argument("--input", type=str, required=True, help="Image file or folder") + parser.add_argument("--mission", type=int, default=60, help="Ignored; always fixed to 60") + parser.add_argument("--device", type=str, default="device-1", help="Fallback device (unused)") + args = parser.parse_args() + + runner = BatchRunner(mission_id=args.mission, device_id=args.device) + in_path = Path(args.input) + if in_path.is_dir(): + runner.run_folder(in_path) + else: + runner.process_image(in_path) + + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/base.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/base.py new file mode 100644 index 000000000..3eede7361 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/base.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Protocol + + +@dataclass(frozen=True) +class Detection: + """ + Model-agnostic detection container. + + Canonical storage: + - bbox: (x, y, w, h) in pixel coordinates. + - confidence: float in [0, 1]. + - label: class/code string. + + Notes: + - Properties expose a stable attribute API (.x/.y/.w/.h/.area etc.) + so downstream code can use either bbox or attributes. + - The class is frozen (immutable) to avoid accidental mutations + during processing and logging. + """ + label: str + confidence: float + bbox: Tuple[float, float, float, float] + meta: Optional[Dict] = None # optional extra data (e.g., model logits) + + # ---- Convenience constructors ------------------------------------------------- + + @staticmethod + def from_xywh( + label: str, + confidence: float, + x: float, + y: float, + w: float, + h: float, + meta: Optional[Dict] = None, + ) -> "Detection": + """Create a Detection from explicit x/y/w/h values.""" + return Detection(label=label, confidence=float(confidence), bbox=(x, y, w, h), meta=meta) + + # ---- Attribute-style view over bbox ------------------------------------------ + + @property + def x(self) -> float: + return float(self.bbox[0]) + + @property + def y(self) -> float: + return float(self.bbox[1]) + + @property + def w(self) -> float: + return float(self.bbox[2]) + + @property + def h(self) -> float: + return float(self.bbox[3]) + + @property + def xmin(self) -> float: + return self.x + + @property + def ymin(self) -> float: + return self.y + + @property + def xmax(self) -> float: + return self.x + self.w + + @property + def ymax(self) -> float: + return self.y + self.h + + @property + def area(self) -> float: + # Clamp at zero to avoid negative area if w/h are negative by mistake. + return max(0.0, self.w) * max(0.0, self.h) + + +class Detector(Protocol): + """ + Base detector interface. + + Implementors must return a list of Detection objects given a BGR image + (numpy array with shape (H, W, 3), dtype=uint8). + """ + name: str + + def run(self, bgr_image) -> List[Detection]: + """Run inference on a BGR image and return model detections.""" + ... diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/cnn_multi_classifier.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/cnn_multi_classifier.py new file mode 100644 index 000000000..6a2d5f3a3 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/cnn_multi_classifier.py @@ -0,0 +1,12 @@ +# agri-baseline/src/detectors/cnn_multi_classifier.py +import torch.nn as nn +from torchvision import models + +def build_multi_model(num_classes: int, pretrained: bool = True) -> nn.Module: + """ + Builds a ResNet18 model for multi-class disease classification. + """ + model = models.resnet18(weights="IMAGENET1K_V1" if pretrained else None) + in_features = model.fc.in_features + model.fc = nn.Linear(in_features, num_classes) + return model diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/disease_model.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/disease_model.py new file mode 100644 index 000000000..a9f94ebae --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/disease_model.py @@ -0,0 +1,127 @@ +# agri_baseline/src/detectors/disease_model.py +from __future__ import annotations + +from dataclasses import dataclass +from typing import List, Tuple + +import cv2 +import numpy as np +import torch +import albumentations as A +from albumentations.pytorch import ToTensorV2 + +from agri_baseline.src.detectors.cnn_multi_classifier import build_multi_model +from agri_baseline.src.detectors.train.dictionary import CLASS_MAPPING + + +@dataclass +class Detection: + """Simple container for a single detection box.""" + bbox: Tuple[int, int, int, int] # x, y, w, h + confidence: float + label: str = "disease" + + @property + def area(self) -> int: + x, y, w, h = self.bbox + return int(w * h) + + +def _ensure_bgr_uint8(img: np.ndarray) -> np.ndarray: + """ + Normalize any input image to BGR uint8 with 3 channels. + Prevents cvtColor from crashing with color.simd_helpers.hpp:94. + + Rules: + - None / empty -> ValueError + - GRAY (H,W) -> BGR + - BGRA (H,W,4) -> BGR + - dtype != uint8 -> convert to uint8 (clip to [0..255]) + """ + if img is None or getattr(img, "size", 0) == 0: + raise ValueError("DiseaseDetector: empty/None image given") + + # If grayscale -> convert to BGR + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + # If BGRA -> drop alpha + elif img.ndim == 3 and img.shape[2] == 4: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) + + # Validate shape now + if img.ndim != 3 or img.shape[2] != 3: + raise ValueError(f"DiseaseDetector: unexpected image shape {img.shape}") + + # Ensure uint8 + if img.dtype != np.uint8: + img = np.clip(img, 0, 255).astype(np.uint8) + + # Ensure non-zero size + h, w = img.shape[:2] + if h == 0 or w == 0: + raise ValueError("DiseaseDetector: zero-sized image") + + return img + + +class DiseaseDetector: + """ + CNN-based disease classifier. + - Normalizes input to BGR uint8 (3-ch) to avoid OpenCV color conversion crashes. + - Converts BGR->RGB before Albumentations (Normalize + ToTensorV2). + """ + + name = "disease" + + def __init__(self, model_path: str = "models/cnn_multi_stage3.pth", device: str | None = None) -> None: + # choose device + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + + # build model according to class mapping + self.classes = sorted(set(CLASS_MAPPING.values())) + self.model = build_multi_model(num_classes=len(self.classes)).to(self.device) + + # load trained weights + state = torch.load(model_path, map_location=self.device) + self.model.load_state_dict(state) + self.model.eval() + + # same validation transforms used in training + self.transform = A.Compose( + [ + A.Resize(224, 224), + A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ToTensorV2(), + ] + ) + + def run(self, img: np.ndarray) -> List[Detection]: + """ + Run the classifier on a single image. + :param img: np.ndarray from OpenCV (BGR or GRAY/BGRA/float) — any shape/dtype. + :return: list with a single full-frame Detection carrying predicted label/confidence. + """ + # 1) Normalize input so cvtColor is safe + img = _ensure_bgr_uint8(img) + + # 2) Convert to RGB for the model pipeline + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 3) Albumentations -> tensor + aug = self.transform(image=img_rgb) + tensor = aug["image"].unsqueeze(0).to(self.device) + + # 4) Model inference + with torch.no_grad(): + logits = self.model(tensor) + probs = torch.softmax(logits, dim=1)[0] + conf_t, cls_t = torch.max(probs, dim=0) + + label = self.classes[cls_t.item()] + confidence = float(conf_t.item()) + + # 5) Return a single detection that spans the whole image (classifier) + h, w = img.shape[:2] + det = Detection(bbox=(0, 0, w, h), confidence=confidence, label=label) + return [det] diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/train/dictionary.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/train/dictionary.py new file mode 100644 index 000000000..1d0671026 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/detectors/train/dictionary.py @@ -0,0 +1,36 @@ +CLASS_MAPPING = { + # 🍅 Tomato + "tomato_healthy": "tomato__healthy", + "tomato_leaf": "tomato__healthy", + "tomato_bacterial_spot": "tomato__bacterial_spot", + "tomato_leaf_bacterial_spot": "tomato__bacterial_spot", + "tomato_early_blight": "tomato__early_blight", + "tomato_early_blight_leaf": "tomato__early_blight", + "tomato_late_blight": "tomato__late_blight", + "tomato_leaf_late_blight": "tomato__late_blight", + "tomato_leaf_mold": "tomato__leaf_mold", + "tomato_mold_leaf": "tomato__leaf_mold", + "tomato_septoria_leaf_spot": "tomato__septoria_leaf_spot", + "tomato_spider_mites_two_spotted_spider_mite": "tomato__spider_mites", + "tomato_spider_mites": "tomato__spider_mites", + "tomato_target_spot": "tomato__target_spot", + "tomato_tomato_mosaic_virus": "tomato__mosaic_virus", + "tomato_tomato_yellowleaf_curl_virus": "tomato__yellowleaf_curl_virus", + "tomato_leaf_mosaic_virus": "tomato__mosaic_virus", + "tomato_leaf_yellow_virus": "tomato__yellowleaf_curl_virus", + + + # 🥔 Potato + "potato_healthy": "potato__healthy", + "potato_leaf": "potato__healthy", + "potato_early_blight": "potato__early_blight", + "potato_leaf_early_blight": "potato__early_blight", + "potato_late_blight": "potato__late_blight", + "potato_leaf_late_blight": "potato__late_blight", + + # 🌶️ Pepper + "pepper_bell_healthy": "pepper__healthy", + "bell_pepper_leaf": "pepper__healthy", + "pepper_bell_bacterial_spot": "pepper__bacterial_spot", + "bell_pepper_leaf_spot": "pepper__bacterial_spot", +} diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/config.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/config.py new file mode 100644 index 000000000..18d696e0a --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/config.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import os +from pathlib import Path + +# Try to load env files both from project root and from agri_baseline/.env +try: + from dotenv import load_dotenv # type: ignore + load_dotenv(dotenv_path=Path("agri_baseline/.env"), override=False) + load_dotenv(override=False) +except Exception: + pass + +# Prefer standard name DATABASE_URL; fallback to DB_URL; finally default to localhost:5432 +DB_URL: str = ( + os.getenv("DATABASE_URL") + or os.getenv("DB_URL") + or "postgresql+psycopg2://missions_user:pg123@localhost:5432/missions_db" +) + +IMAGES_DIR = os.getenv("IMAGES_DIR", "./data/images") +BATCH_SIZE = int(os.getenv("BATCH_SIZE", 64)) +MAX_WORKERS = int(os.getenv("MAX_WORKERS", 4)) +MIN_BBOX_AREA = int(os.getenv("MIN_BBOX_AREA", 60)) +MIN_COMPONENT_AREA = int(os.getenv("MIN_COMPONENT_AREA", 200)) diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/db.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/db.py new file mode 100644 index 000000000..8c69e24f3 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/db.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +from sqlalchemy import create_engine, text, bindparam +from sqlalchemy.engine import Engine + +from . import config + +_engine: Engine | None = None + +def get_engine() -> Engine: + """Return a singleton SQLAlchemy engine for the configured DB.""" + global _engine + if _engine is None: + _engine = create_engine( + config.DB_URL, + pool_pre_ping=True, # keep-alive for flaky networks/tests + future=True, + connect_args={"connect_timeout": 5} # fail fast on bad host/port + ) + return _engine + +# === Inserts mapped to RelDB schema === + +# detections → anomalies +INSERT_DET = text( + """ + INSERT INTO anomalies(mission_id, device_id, ts, anomaly_type_id, severity, details, geom) + VALUES (:mission_id, :device_id, :ts, :anomaly_type_id, :severity, CAST(:details AS jsonb), + ST_GeomFromText(:wkt_geom, 4326)); + """ +) + +# counts → tile_stats +INSERT_COUNT = text( + """ + INSERT INTO tile_stats(mission_id, tile_id, anomaly_score, geom) + VALUES (:mission_id, :tile_id, :anomaly_score, ST_GeomFromText(:wkt_geom, 4326)) + ON CONFLICT (mission_id, tile_id) DO UPDATE + SET anomaly_score = excluded.anomaly_score; + """ +) + +# validator findings → event_logs +INSERT_FINDING = ( + text( + """ + INSERT INTO event_logs(ts, level, source, message, details) + VALUES (CURRENT_TIMESTAMP, :level, 'validator', :message, CAST(:details AS jsonb)); + """ + ) + # Defaults if the caller does not send the parameters + .bindparams( + bindparam("level", value="INFO"), + bindparam("message", value=""), + bindparam("details", value="{}"), + ) +) + + + +# QA metrics → event_logs +INSERT_QA = text( + """ + INSERT INTO event_logs(ts, level, source, message, details) + VALUES (CURRENT_TIMESTAMP, 'INFO', 'qa', 'QA metrics recorded', CAST(:details AS jsonb)); + """ +) \ No newline at end of file diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/logging_setup.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/logging_setup.py new file mode 100644 index 000000000..06193027f --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/logging_setup.py @@ -0,0 +1,9 @@ +import logging + + +def setup_logging(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + ) + return logging.getLogger("agri") \ No newline at end of file diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/utils.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/utils.py new file mode 100644 index 000000000..0b99245e9 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/pipeline/utils.py @@ -0,0 +1,62 @@ +# agri_baseline/src/pipeline/utils.py +# Max line length: 100 + +from __future__ import annotations + +import hashlib +from pathlib import Path +from typing import Tuple + +import cv2 +import numpy as np + + +class ImageLoadError(Exception): + """Raised when an image cannot be decoded or is empty.""" + + +def load_image(path: str | Path) -> Tuple[np.ndarray, int, int]: + """ + Load an image from disk as BGR uint8 and return (img, width, height). + + Rules: + - Always read as color to ensure 3 channels (BGR). + - Raise FileNotFoundError if the path doesn't exist. + - Raise ImageLoadError if decode fails or the image is empty. + - Convert dtype to uint8 if needed. + - Normalize channel count: grayscale -> BGR, BGRA -> BGR. + """ + p = Path(path) + if not p.exists(): + raise FileNotFoundError(f"Image not found: {p.resolve()}") + + # Always load as color to ensure 3 channels (BGR) + img = cv2.imread(str(p), cv2.IMREAD_COLOR) + if img is None or img.size == 0: + raise ImageLoadError(f"Failed to decode image (or empty): {p.resolve()}") + + if img.dtype != np.uint8: + img = cv2.convertScaleAbs(img) + + # Guard channel count (should be 3 after IMREAD_COLOR, but just in case) + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + elif img.ndim == 3 and img.shape[2] == 4: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) + + h, w = img.shape[:2] + return img, w, h + + +def image_id_from_path(path: str | Path) -> str: + p = Path(path) + digest = hashlib.sha1(str(p.resolve()).encode()).hexdigest()[:16] + return f"{p.stem}_{digest}" + + +def clamp_bbox(x: int, y: int, w: int, h: int, W: int, H: int) -> Tuple[int, int, int, int]: + x = max(0, min(x, W - 1)) + y = max(0, min(y, H - 1)) + w = max(1, min(w, W - x)) + h = max(1, min(h, H - y)) + return x, y, w, h diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/__init__.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_client.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_client.py new file mode 100644 index 000000000..dd5effd69 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_client.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from minio import Minio + + +@dataclass(frozen=True) +class MinioConfig: + endpoint: str + access_key: str + secret_key: str + bucket: str + secure: bool + + +def load_minio_config() -> MinioConfig: + endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000") + access_key = os.getenv("MINIO_ACCESS_KEY", "") + secret_key = os.getenv("MINIO_SECRET_KEY", "") + bucket = os.getenv("MINIO_BUCKET", "my-bucket") + secure = os.getenv("MINIO_SECURE", "false").lower() == "true" + + if not access_key or not secret_key: + raise ValueError("Missing MINIO_ACCESS_KEY / MINIO_SECRET_KEY.") + return MinioConfig(endpoint, access_key, secret_key, bucket, secure) + + +def build_client(cfg: MinioConfig) -> Minio: + return Minio( + endpoint=cfg.endpoint, + access_key=cfg.access_key, + secret_key=cfg.secret_key, + secure=cfg.secure, + ) diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_sync.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_sync.py new file mode 100644 index 000000000..8c6c2b6a1 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/storage/minio_sync.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import os +from io import BytesIO +from pathlib import Path +from typing import Iterable + +from .minio_client import MinioConfig, build_client + + +def ensure_bucket(cfg: MinioConfig) -> None: + """ + Ensure the target bucket exists; create it if it does not. + """ + client = build_client(cfg) + if not client.bucket_exists(cfg.bucket): + client.make_bucket(cfg.bucket) + + +def download_prefix_to_dir(cfg: MinioConfig, prefix: str, local_dir: Path) -> list[Path]: + """ + Download all objects under the given `prefix` to the local directory. + Returns a list of local file paths that were downloaded. + """ + client = build_client(cfg) + local_dir.mkdir(parents=True, exist_ok=True) + + downloaded: list[Path] = [] + for obj in client.list_objects(cfg.bucket, prefix=prefix, recursive=True): + # Skip entries that represent "virtual folders" + name = obj.object_name + if name.endswith("/") or not name: + continue + + # Simplify: save using the file's basename only. + # If you need to preserve the full hierarchy, use: local_dir.joinpath(name) + target = local_dir.joinpath(Path(name).name) + + response = client.get_object(cfg.bucket, name) + try: + data = response.read() + finally: + response.close() + response.release_conn() + + target.parent.mkdir(parents=True, exist_ok=True) + target.write_bytes(data) + downloaded.append(target) + + return downloaded + + +def upload_dir_to_prefix(cfg: MinioConfig, local_dir: Path, prefix: str) -> list[str]: + """ + Upload all files from the local directory under the given `prefix`. + Returns a list of object names that were uploaded. + """ + client = build_client(cfg) + ensure_bucket(cfg) + + uploaded: list[str] = [] + for path in local_dir.rglob("*"): + if not path.is_file(): + continue + + rel = path.relative_to(local_dir).as_posix() + object_name = f"{prefix.rstrip('/')}/{rel}" + data = path.read_bytes() + bio = BytesIO(data) + + client.put_object(cfg.bucket, object_name, bio, length=len(data)) + uploaded.append(object_name) + + return uploaded diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/rules.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/rules.py new file mode 100644 index 000000000..afb6318a7 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/rules.py @@ -0,0 +1,65 @@ +from __future__ import annotations +import json +from dataclasses import dataclass +from typing import Iterable, Optional + +from sqlalchemy import text +from agri_baseline.src.pipeline.db import get_engine, INSERT_FINDING, INSERT_QA + + +@dataclass +class Finding: + scope: str + image_id: str + rule: str + severity: str + message: str + details: Optional[dict] = None + + +# ---- Image-level checks ---- + +def check_bbox_bounds(image_id: str, width: int, height: int, dets: list[dict]) -> list[Finding]: + out: list[Finding] = [] + for d in dets: + x, y, w, h = d["bbox_x"], d["bbox_y"], d["bbox_w"], d["bbox_h"] + if x < 0 or y < 0 or x + w > width or y + h > height: + out.append(Finding("image", image_id, "bbox_oob", "warn", + f"BBox out-of-bounds: {(x, y, w, h)}")) + if w * h <= 0 or d["area_px"] <= 0: + out.append(Finding("image", image_id, "bbox_area_zero", "error", + "Non-positive area")) + if d["confidence"] < 0 or d["confidence"] > 1: + out.append(Finding("image", image_id, "conf_oob", "error", + f"Confidence out of range: {d['confidence']:.3f}")) + return out + + +def check_counts_reasonable(image_id: str, disease: int) -> list[Finding]: + out: list[Finding] = [] + if disease < 0: + out.append(Finding("image", image_id, "negative_counts", "error", + f"Negative count: disease={disease}")) + if disease == 0: + out.append(Finding("image", image_id, "all_zero_counts", "warn", + "Disease count is zero")) + if disease > 10000: + out.append(Finding("image", image_id, "count_too_high", "warn", + f"Suspiciously high disease count: {disease}")) + return out + + +# ---- Batch-level checks ---- + +def check_batch_error_rate(total: int, errored: int, threshold: float = 0.05) -> list[Finding]: + rate = 0.0 if total == 0 else errored / total + sev = "warn" if rate <= threshold else "error" + return [Finding("batch", None, "error_rate", sev, + f"Batch error rate={rate:.3%}, threshold={threshold:.0%}")] + + +def check_batch_no_detections(total: int, sum_dets: int) -> list[Finding]: + if total > 0 and sum_dets == 0: + return [Finding("batch", None, "no_detections", "warn", + "Pipeline produced zero detections for the entire batch")] + return [] diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/validator.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/validator.py new file mode 100644 index 000000000..3c970190c --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/agri_baseline/src/validator/validator.py @@ -0,0 +1,94 @@ +from __future__ import annotations +import json +from dataclasses import dataclass +from typing import Iterable, List, Optional +from sqlalchemy import text + +from agri_baseline.src.pipeline.db import get_engine, INSERT_FINDING, INSERT_QA + + +@dataclass +class Finding: + """Single validation finding.""" + scope: str # e.g., "image" + image_id: str # logical id per image + rule: str # rule code/name + severity: str # DEBUG/INFO/WARN/ERROR + message: str # human-readable message + details: Optional[dict] = None + + +class Validator: + """ + Collects validation findings and writes batch summaries. + """ + def image_findings(self, findings: Iterable[Finding]) -> None: + """Write image-level findings into event_logs table.""" + with get_engine().begin() as conn: + for f in findings: + details_dict = { + "scope": f.scope, + "rule": f.rule, + "image_id": f.image_id, + **(f.details or {}), + } + conn.execute( + INSERT_FINDING, + { + "level": f.severity.upper(), + "message": f.message, + # Passes as a JSON string because SQL does CAST(... AS jsonb) "details": json.dumps(details_dict), + }, + ) + + + def batch_summary(self) -> None: + """ + Aggregate anomalies → tile_stats by image_id (from anomalies.details->>'image_id'). + For each (mission_id, image_id): + - anomaly_score = count of anomalies + - geom = envelope of a small expanded collect of points (Polygon, 4326) + Idempotent via ON CONFLICT (mission_id, tile_id). + """ + sql = text( + """ + WITH per_image AS ( + SELECT + a.mission_id, + a.details->>'image_id' AS tile_id, + COUNT(*)::real AS anomaly_score, + -- produce Polygon in 4326 directly (no WKT roundtrip) + ST_Envelope( + ST_Expand( + ST_Collect(a.geom), + 0.0005 -- ~50m at equator; tweak if needed + ) + )::geometry(Polygon, 4326) AS poly + FROM anomalies a + WHERE a.geom IS NOT NULL + AND a.details ? 'image_id' + GROUP BY a.mission_id, tile_id + ) + INSERT INTO tile_stats (mission_id, tile_id, anomaly_score, geom) + SELECT mission_id, tile_id, anomaly_score, poly + FROM per_image + ON CONFLICT (mission_id, tile_id) DO UPDATE + SET anomaly_score = EXCLUDED.anomaly_score, + geom = EXCLUDED.geom; + """ + ) + + with get_engine().begin() as conn: + conn.execute(sql) + + # optional: record a QA info log (pass JSON as string) + with get_engine().begin() as conn: + conn.execute( + INSERT_QA, + { + "details": json.dumps({ + "source": "batch_summary", + "note": "tile_stats updated from anomalies by image_id", + }) + }, + ) diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/docker-compose.yml b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/docker-compose.yml new file mode 100644 index 000000000..18e1cc31c --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/docker-compose.yml @@ -0,0 +1,25 @@ +services: + app: + build: + context: . + dockerfile: Dockerfile + container_name: agri_app + # exec-form to avoid spacing/quoting issues + command: ["python", "-m", "agri_baseline.scripts.run_batch", "--storage", "minio"] + env_file: + - agri_baseline/.env + volumes: + - ./agri_baseline:/app/agri_baseline + - ./tests:/app/tests + - ./data:/app/data + - ./models:/root/.cache/torch/hub/checkpoints + networks: + - agri_net + - minio_net # ← MinIO network + +networks: + agri_net: + external: true + minio_net: + external: true + name: storage_with_mqtt_minionet # ← MinIO network name diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile new file mode 100644 index 000000000..c021c9c79 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile @@ -0,0 +1,183 @@ +# # # ============================== +# # # Based on PyTorch with CUDA +# # # ============================== +# # # FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime +# # ARG BASE_IMAGE=pytorch/pytorch:2.2.0-cpu +# # FROM ${BASE_IMAGE} + +# # # # --- NETFREE CERT INSTALL --- +# # # ADD https://netfree.link/dl/unix-ca.sh /home/netfree-unix-ca.sh +# # # RUN bash /home/netfree-unix-ca.sh \ +# # # && update-ca-certificates +# # # ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +# # # ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +# # # ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt +# # # --- NETFREE CERT INSTALL (optional) --- +# # ARG INSTALL_NETFREE_CA=0 +# # # אל תעשי ADD מהאינטרנט (זה מה שנפל) +# # # במקום זה, רק אם תרצי – נוריד בזמן הבנייה עם curl (כש-INSTALL_NETFREE_CA=1) +# # RUN if [ "$INSTALL_NETFREE_CA" = "1" ]; then \ +# # apt-get update && apt-get install -y --no-install-recommends curl ca-certificates && \ +# # curl -fsSL --retry 5 https://netfree.link/dl/unix-ca.sh -o /home/netfree-unix-ca.sh && \ +# # bash /home/netfree-unix-ca.sh && update-ca-certificates && \ +# # rm -rf /var/lib/apt/lists/* ; \ +# # else echo "Skipping NetFree CA install"; fi + +# # # Force pip to trust PyPI +# # RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" +# # RUN pip config set global.cert /etc/ssl/certs/ca-certificates.crt +# # # --- END NETFREE CERT INSTALL --- + +# # # ============================== +# # # Install system packages +# # # ============================== +# # RUN apt-get update && apt-get install -y --no-install-recommends \ +# # libgl1-mesa-glx \ +# # libglib2.0-0 \ +# # libsm6 \ +# # libxext6 \ +# # libxrender1 \ +# # libgtk2.0-0 \ +# # libcanberra-gtk-module \ +# # libcanberra-gtk3-module \ +# # && rm -rf /var/lib/apt/lists/* + +# # # ============================== +# # # Working directory +# # # ============================== +# # ==== Portable CPU base (works everywhere) ==== +# FROM python:3.10-slim + +# ENV PIP_NO_CACHE_DIR=1 \ +# PYTHONDONTWRITEBYTECODE=1 \ +# PYTHONUNBUFFERED=1 + +# # System deps מינימליים ל-CV/IO +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# git ffmpeg libsm6 libxext6 libgl1 ca-certificates \ +# && rm -rf /var/lib/apt/lists/* +# # הוספת תעודת NetFree ל־trust store של המערכת +# COPY netfree-ca.crt /usr/local/share/ca-certificates/netfree-ca.crt +# RUN update-ca-certificates + +# # לוודא שכלי רשת/פייתון משתמשים ב־CA המעודכן +# ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +# ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +# ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# # Torch/torchvision/torchaudio גרסאות CPU יציבות מה-PyTorch index +# ARG TORCH_VERSION=2.2.1 +# ARG TORCHVISION_VERSION=0.17.1 +# ARG TORCHAUDIO_VERSION=2.2.1 +# RUN python -m pip install --upgrade pip && \ +# python -m pip install --index-url https://download.pytorch.org/whl/cpu \ +# torch==${TORCH_VERSION} \ +# torchvision==${TORCHVISION_VERSION} \ +# torchaudio==${TORCHAUDIO_VERSION} + +# # (מבטל תלות ב-NetFree בזמן build; אין ADD/curl מהאינטרנט בשלב הזה) +# # ==== END portable header ==== + +# # ============================== +# # Working directory +# # ============================== +# # WORKDIR /app + +# WORKDIR /app + +# # Update pip +# RUN pip install --upgrade pip + +# # ============================== +# # Install dependencies +# # ============================== +# COPY agri_baseline/requirements.txt /app/requirements.txt +# RUN pip install --no-cache-dir --upgrade "numpy==1.26.4" +# RUN pip install --no-cache-dir --force-reinstall "opencv-python-headless==4.9.0.80" + +# RUN pip install --no-cache-dir -r /app/requirements.txt + +# # ============================== +# # Copy source code +# # ============================== +# COPY agri_baseline /app/agri_baseline +# COPY models /app/models +# # Copy tests folder +# COPY tests /app/tests + +# # Set PYTHONPATH +# ENV PYTHONPATH=/app:$PYTHONPATH + +# # ============================== +# # Entry point +# # ============================== +# CMD ["python", "agri_baseline/src/batch_runner.py"] +# syntax=docker/dockerfile:1.6 + +FROM mcr.microsoft.com/devcontainers/python:1-3.11-bullseye + +ENV PIP_NO_CACHE_DIR=0 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# 1) חבילות מערכת בסיסיות +RUN apt-get update && apt-get install -y --no-install-recommends \ + git ffmpeg libsm6 libxext6 libgl1 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# 2) הוספת תעודת NetFree שהכנת (הקובץ יושב לצד ה-dockerfile) +# COPY netfree-ca.crt /usr/local/share/ca-certificates/netfree-ca.crt +# RUN update-ca-certificates + +# 3) לוודא שכלי רשת/פייתון משתמשים ב-CA של המערכת +ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \ + REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \ + PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# 4) התקנת Torch CPU מהאינדקס של PyTorch עם cache +ARG TORCH_VERSION=2.2.1 +ARG TORCHVISION_VERSION=0.17.1 +ARG TORCHAUDIO_VERSION=2.2.1 +RUN --mount=type=cache,target=/root/.cache/pip \ + python -m pip install --upgrade pip && \ + python -m pip install --index-url https://download.pytorch.org/whl/cpu \ + torch==${TORCH_VERSION} \ + torchvision==${TORCHVISION_VERSION} \ + torchaudio==${TORCHAUDIO_VERSION} + +# 5) ספריות פייתון נוספות (עם cache + בלי --no-cache-dir) +# WORKDIR /app +# COPY Detection_Jobs/agri_baseline/requirements.txt /app/requirements.txt + +# RUN --mount=type=cache,target=/root/.cache/pip \ +# pip install --upgrade pip && \ +# pip install "numpy==1.26.4" && \ +# pip install --force-reinstall "opencv-python-headless==4.9.0.80" && \ +# pip install --retries 10 --timeout 120 -r /app/requirements.txt +# 5) ספריות פייתון נוספות (עם cache + סינון GPU) +WORKDIR /app +COPY Detection_Jobs/agri_baseline/requirements.txt /app/requirements.txt + +# מסננים תלויות GPU כדי שלא ימשכו CUDA +RUN awk '!/^(torch|torchvision|torchaudio)[[:space:]=<>!~]*$/ \ + && !/^pytorch-cuda/ \ + && !/^xformers/ \ + && !/^cupy-cuda/ \ + && !/^nvidia[-_]/' /app/requirements.txt > /app/requirements.cpu.txt + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --upgrade pip && \ + PIP_INDEX_URL=https://download.pytorch.org/whl/cpu \ + PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ + pip install --retries 10 --timeout 120 -r /app/requirements.cpu.txt + +# 6) קוד המקור +COPY Detection_Jobs/agri_baseline /app/agri_baseline +COPY Detection_Jobs/models /app/models +COPY Detection_Jobs/tests /app/tests + +# 7) PYTHONPATH – בלי ההפניה למשתנה שאינו קיים בבילד +ENV PYTHONPATH=/app + +# 8) Entry +CMD ["python", "agri_baseline/src/batch_runner.py"] diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile.bak b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile.bak new file mode 100644 index 000000000..5ac8ae7c7 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/dockerfile.bak @@ -0,0 +1,167 @@ +# # # ============================== +# # # Based on PyTorch with CUDA +# # # ============================== +# # # FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime +# # ARG BASE_IMAGE=pytorch/pytorch:2.2.0-cpu +# # FROM ${BASE_IMAGE} + +# # # # --- NETFREE CERT INSTALL --- +# # # ADD https://netfree.link/dl/unix-ca.sh /home/netfree-unix-ca.sh +# # # RUN bash /home/netfree-unix-ca.sh \ +# # # && update-ca-certificates +# # # ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +# # # ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +# # # ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt +# # # --- NETFREE CERT INSTALL (optional) --- +# # ARG INSTALL_NETFREE_CA=0 +# # # אל תעשי ADD מהאינטרנט (זה מה שנפל) +# # # במקום זה, רק אם תרצי – נוריד בזמן הבנייה עם curl (כש-INSTALL_NETFREE_CA=1) +# # RUN if [ "$INSTALL_NETFREE_CA" = "1" ]; then \ +# # apt-get update && apt-get install -y --no-install-recommends curl ca-certificates && \ +# # curl -fsSL --retry 5 https://netfree.link/dl/unix-ca.sh -o /home/netfree-unix-ca.sh && \ +# # bash /home/netfree-unix-ca.sh && update-ca-certificates && \ +# # rm -rf /var/lib/apt/lists/* ; \ +# # else echo "Skipping NetFree CA install"; fi + +# # # Force pip to trust PyPI +# # RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" +# # RUN pip config set global.cert /etc/ssl/certs/ca-certificates.crt +# # # --- END NETFREE CERT INSTALL --- + +# # # ============================== +# # # Install system packages +# # # ============================== +# # RUN apt-get update && apt-get install -y --no-install-recommends \ +# # libgl1-mesa-glx \ +# # libglib2.0-0 \ +# # libsm6 \ +# # libxext6 \ +# # libxrender1 \ +# # libgtk2.0-0 \ +# # libcanberra-gtk-module \ +# # libcanberra-gtk3-module \ +# # && rm -rf /var/lib/apt/lists/* + +# # # ============================== +# # # Working directory +# # # ============================== +# # ==== Portable CPU base (works everywhere) ==== +# FROM python:3.10-slim + +# ENV PIP_NO_CACHE_DIR=1 \ +# PYTHONDONTWRITEBYTECODE=1 \ +# PYTHONUNBUFFERED=1 + +# # System deps מינימליים ל-CV/IO +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# git ffmpeg libsm6 libxext6 libgl1 ca-certificates \ +# && rm -rf /var/lib/apt/lists/* +# # הוספת תעודת NetFree ל־trust store של המערכת +# COPY netfree-ca.crt /usr/local/share/ca-certificates/netfree-ca.crt +# RUN update-ca-certificates + +# # לוודא שכלי רשת/פייתון משתמשים ב־CA המעודכן +# ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +# ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +# ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# # Torch/torchvision/torchaudio גרסאות CPU יציבות מה-PyTorch index +# ARG TORCH_VERSION=2.2.1 +# ARG TORCHVISION_VERSION=0.17.1 +# ARG TORCHAUDIO_VERSION=2.2.1 +# RUN python -m pip install --upgrade pip && \ +# python -m pip install --index-url https://download.pytorch.org/whl/cpu \ +# torch==${TORCH_VERSION} \ +# torchvision==${TORCHVISION_VERSION} \ +# torchaudio==${TORCHAUDIO_VERSION} + +# # (מבטל תלות ב-NetFree בזמן build; אין ADD/curl מהאינטרנט בשלב הזה) +# # ==== END portable header ==== + +# # ============================== +# # Working directory +# # ============================== +# # WORKDIR /app + +# WORKDIR /app + +# # Update pip +# RUN pip install --upgrade pip + +# # ============================== +# # Install dependencies +# # ============================== +# COPY agri_baseline/requirements.txt /app/requirements.txt +# RUN pip install --no-cache-dir --upgrade "numpy==1.26.4" +# RUN pip install --no-cache-dir --force-reinstall "opencv-python-headless==4.9.0.80" + +# RUN pip install --no-cache-dir -r /app/requirements.txt + +# # ============================== +# # Copy source code +# # ============================== +# COPY agri_baseline /app/agri_baseline +# COPY models /app/models +# # Copy tests folder +# COPY tests /app/tests + +# # Set PYTHONPATH +# ENV PYTHONPATH=/app:$PYTHONPATH + +# # ============================== +# # Entry point +# # ============================== +# CMD ["python", "agri_baseline/src/batch_runner.py"] +# syntax=docker/dockerfile:1.6 + +FROM python:3.10-slim + +ENV PIP_NO_CACHE_DIR=0 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# 1) חבילות מערכת בסיסיות +RUN apt-get update && apt-get install -y --no-install-recommends \ + git ffmpeg libsm6 libxext6 libgl1 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# 2) הוספת תעודת NetFree שהכנת (הקובץ יושב לצד ה-dockerfile) +COPY netfree-ca.crt /usr/local/share/ca-certificates/netfree-ca.crt +RUN update-ca-certificates + +# 3) לוודא שכלי רשת/פייתון משתמשים ב-CA של המערכת +ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \ + REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \ + PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# 4) התקנת Torch CPU מהאינדקס של PyTorch עם cache +ARG TORCH_VERSION=2.2.1 +ARG TORCHVISION_VERSION=0.17.1 +ARG TORCHAUDIO_VERSION=2.2.1 +RUN --mount=type=cache,target=/root/.cache/pip \ + python -m pip install --upgrade pip && \ + python -m pip install --index-url https://download.pytorch.org/whl/cpu \ + torch==${TORCH_VERSION} \ + torchvision==${TORCHVISION_VERSION} \ + torchaudio==${TORCHAUDIO_VERSION} + +# 5) ספריות פייתון נוספות (עם cache + בלי --no-cache-dir) +WORKDIR /app +COPY agri_baseline/requirements.txt /app/requirements.txt + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --upgrade pip && \ + pip install "numpy==1.26.4" && \ + pip install --force-reinstall "opencv-python-headless==4.9.0.80" && \ + pip install --retries 10 --timeout 120 -r /app/requirements.txt + +# 6) קוד המקור +COPY agri_baseline /app/agri_baseline +COPY models /app/models +COPY tests /app/tests + +# 7) PYTHONPATH – בלי ההפניה למשתנה שאינו קיים בבילד +ENV PYTHONPATH=/app + +# 8) Entry +CMD ["python", "agri_baseline/src/batch_runner.py"] diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/pytest.ini b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/pytest.ini new file mode 100644 index 000000000..89313dd9b --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +pythonpath = . +testpaths = tests +addopts = -v diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/cnn_binary_classifier.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/cnn_binary_classifier.py new file mode 100644 index 000000000..898c2c918 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/cnn_binary_classifier.py @@ -0,0 +1,12 @@ +# agri-baseline/src/detectors/cnn_binary_classifier.py +import torch.nn as nn +from torchvision import models + +def build_binary_model(pretrained: bool = True) -> nn.Module: + """ + Builds a ResNet18 model for binary classification (healthy vs diseased). + """ + model = models.resnet18(weights="IMAGENET1K_V1" if pretrained else None) + in_features = model.fc.in_features + model.fc = nn.Linear(in_features, 2) # healthy / diseased + return model diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/dataset_binary.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/dataset_binary.py new file mode 100644 index 000000000..d63bf5208 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/dataset_binary.py @@ -0,0 +1,36 @@ +# agri-baseline/src/detectors/dataset_binary.py +import os +from torch.utils.data import Dataset +from PIL import Image + +class BinaryDiseaseDataset(Dataset): + """ + Dataset wrapper that maps: + - healthy folders -> label 0 + - all disease folders -> label 1 + Keeps also the original folder name for optional subtype info. + """ + def __init__(self, root: str, transform=None): + self.samples = [] + + self.targets = [] + self.transform = transform + for cls in os.listdir(root): + path = os.path.join(root, cls) + if not os.path.isdir(path): + continue + label = 0 if "healthy" in cls.lower() else 1 + for f in os.listdir(path): + if f.lower().endswith((".jpg", ".png", ".jpeg")): + self.samples.append((os.path.join(path, f), label, cls)) + self.targets.append(label) + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + path, label, cls_name = self.samples[idx] + img = Image.open(path).convert("RGB") + if self.transform: + img = self.transform(img) + return img, label, cls_name diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/disease.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/disease.py new file mode 100644 index 000000000..653f673ae --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/disease.py @@ -0,0 +1,202 @@ +import cv2 +import numpy as np + +from ...agri_baseline.src.detectors.base import Detection +from ..pipeline import config + + +class DiseaseDetector: + """ + Improved disease detector: + - Leaf mask (HSV/LAB) to isolate plant tissue. + - Candidate lesion detection: + 1) Yellow/Brown in HSV (stress/necrosis). + 2) Dark + Brown in LAB (low L, high b). + - Noise cleaning and merging. + - Shape filtering by circularity (detect "spots"). + - Confidence weighted by darkness, saturation, and circularity. + """ + + name = "disease" + + # HSV thresholds for yellow/brown (tunable) + HSV_YELLOW = ((10, 50, 40), (45, 255, 255)) + HSV_BROWN1 = ((0, 80, 30), (10, 255, 200)) + HSV_BROWN2 = ((160, 80, 30), (179, 255, 200)) + + # LAB thresholds for dark/brown lesions (tunable) + LAB_L_MAX_DARK = 145 # Lower L means darker + LAB_B_MIN_BROWN = 135 # Higher b means more yellow/brown + + # Shape filtering + MIN_CIRCULARITY = 0.22 # 4πA/P^2; range 0..1 + MAX_ASPECT_RATIO = 2.2 # Avoid elongated regions + DILATE_MERGE_RADIUS = 4 + + def __init__(self): + # Minimum area from config (fallback to default if missing) + self.min_area = int(getattr(config, "MIN_BBOX_AREA", 60)) + + def run(self, bgr_image: np.ndarray) -> list[Detection]: + h, w = bgr_image.shape[:2] + + # ---------- 1) Leaf isolation ---------- + hsv = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2HSV) + lab = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2LAB) + H, S, V = cv2.split(hsv) + L, A, B = cv2.split(lab) + + # Green mask in HSV (broad range for leaf tissue) + green1 = cv2.inRange(hsv, (35, 30, 30), (85, 255, 255)) + green2 = cv2.inRange(hsv, (25, 25, 40), (95, 255, 255)) + leaf_mask = cv2.bitwise_or(green1, green2) + + # Contrast enhancement with CLAHE on L channel + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + L_eq = clahe.apply(L) + + # Basic cleaning of leaf mask + leaf_mask = cv2.medianBlur(leaf_mask, 5) + leaf_mask = cv2.morphologyEx( + leaf_mask, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1 + ) + + # ---------- 2) Lesion candidates ---------- + # (a) Yellow/Brown in HSV + yellow = cv2.inRange(hsv, self.HSV_YELLOW[0], self.HSV_YELLOW[1]) + brown1 = cv2.inRange(hsv, self.HSV_BROWN1[0], self.HSV_BROWN1[1]) + brown2 = cv2.inRange(hsv, self.HSV_BROWN2[0], self.HSV_BROWN2[1]) + hsv_spots = cv2.bitwise_or(yellow, cv2.bitwise_or(brown1, brown2)) + + # (b) Dark + Brownish in LAB + dark = cv2.threshold(L_eq, self.LAB_L_MAX_DARK, 255, cv2.THRESH_BINARY_INV)[1] + brownish = cv2.threshold(B, self.LAB_B_MIN_BROWN, 255, cv2.THRESH_BINARY)[1] + lab_spots = cv2.bitwise_and(dark, brownish) + + # Combine HSV and LAB candidates, restricted to leaf mask + candidates = cv2.bitwise_or(hsv_spots, lab_spots) + candidates = cv2.bitwise_and(candidates, leaf_mask) + + # ---------- 3) Cleaning & merging ---------- + candidates = cv2.medianBlur(candidates, 3) + candidates = cv2.morphologyEx( + candidates, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=1 + ) + + # Dilate slightly to merge nearby spots + if self.DILATE_MERGE_RADIUS > 0: + k = cv2.getStructuringElement( + cv2.MORPH_ELLIPSE, + (2 * self.DILATE_MERGE_RADIUS + 1, 2 * self.DILATE_MERGE_RADIUS + 1), + ) + candidates = cv2.dilate(candidates, k, iterations=1) + + # ---------- 4) Contours & filtering ---------- + cnts, _ = cv2.findContours(candidates, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + dets = [] + for c in cnts: + area = cv2.contourArea(c) + if area < self.min_area: + continue + + x, y, bw, bh = cv2.boundingRect(c) + + # Circularity: 4πA / P^2 + perim = cv2.arcLength(c, True) + circularity = (4.0 * np.pi * area / (perim ** 2 + 1e-6)) if perim > 0 else 0.0 + if circularity < self.MIN_CIRCULARITY: + continue + + # Aspect ratio filtering + ar = max(bw, bh) / (min(bw, bh) + 1e-6) + if ar > self.MAX_ASPECT_RATIO: + continue + + # Extract subregion for scoring + hsv_box = hsv[y : y + bh, x : x + bw] + lab_box = lab[y : y + bh, x : x + bw] + + Lb = lab_box[:, :, 0].astype(np.float32) + Sb = hsv_box[:, :, 1].astype(np.float32) + + # Darkness score (lower L → higher score) + dark_score = np.clip((180.0 - float(np.mean(Lb))) / 180.0, 0.0, 1.0) + # Saturation score (higher S → higher score) + sat_score = np.clip(float(np.mean(Sb)) / 255.0, 0.0, 1.0) + + # Final weighted confidence + conf = 0.45 * dark_score + 0.35 * sat_score + 0.20 * np.clip(circularity, 0.0, 1.0) + conf = float(np.clip(conf, 0.0, 1.0)) + + dets.append( + Detection( + label="disease_spot", + confidence=conf, + x=int(x), + y=int(y), + w=int(bw), + h=int(bh), + area=int(area), + ) + ) + + # ---------- 5) Merge overlapping boxes ---------- + dets = self._merge_overlaps(dets, iou_thresh=0.5) + return dets + + # ---------- IoU helper ---------- + @staticmethod + def _iou(a, b): + ax1, ay1, ax2, ay2 = a.x, a.y, a.x + a.w, a.y + a.h + bx1, by1, bx2, by2 = b.x, b.y, b.x + b.w, b.y + b.h + inter_x1, inter_y1 = max(ax1, bx1), max(ay1, by1) + inter_x2, inter_y2 = min(ax2, bx2), min(ay2, by2) + iw, ih = max(0, inter_x2 - inter_x1), max(0, inter_y2 - inter_y1) + inter = iw * ih + if inter == 0: + return 0.0 + area_a = a.w * a.h + area_b = b.w * b.h + return inter / float(area_a + area_b - inter + 1e-6) + + def _merge_overlaps(self, dets, iou_thresh=0.5): + if not dets: + return dets + dets = sorted(dets, key=lambda d: d.confidence, reverse=True) + kept = [] + while dets: + base = dets.pop(0) + to_merge = [base] + remain = [] + for d in dets: + if self._iou(base, d) >= iou_thresh: + to_merge.append(d) + else: + remain.append(d) + dets = remain + + # Merge into one bounding box + xs = [d.x for d in to_merge] + ys = [d.y for d in to_merge] + x2s = [d.x + d.w for d in to_merge] + y2s = [d.y + d.h for d in to_merge] + x = int(min(xs)) + y = int(min(ys)) + w = int(max(x2s) - x) + h = int(max(y2s) - y) + + # Average confidence + conf = float(np.mean([d.confidence for d in to_merge])) + area = int(w * h) + kept.append( + Detection( + label="disease_spot", + confidence=conf, + x=x, + y=y, + w=w, + h=h, + area=area, + ) + ) + return kept diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/eval_multi_levels.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/eval_multi_levels.py new file mode 100644 index 000000000..c39ea4e5a --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/eval_multi_levels.py @@ -0,0 +1,167 @@ +# eval_multi_levels.py +import torch +import numpy as np +from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report +from torch.utils.data import DataLoader +import cv2 +import albumentations as A +from albumentations.pytorch import ToTensorV2 + +from agri_baseline.src.detectors.train.dictionary import CLASS_MAPPING +from agri_baseline.src.detectors.cnn_multi_classifier import build_multi_model +from torchvision import datasets +import seaborn as sns +import matplotlib.pyplot as plt + +# ------------------------ +# Paths +# ------------------------ +DATA_DIR = "data_balanced/PlantDoc/test" +MODEL_PATH = "models/cnn_multi_stage3.pth" + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# ------------------------ +# Transforms +# ------------------------ +val_transforms = A.Compose([ + A.Resize(224, 224), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + +# ------------------------ +# Dataset wrapper +# ------------------------ +class AlbumentationsDataset(torch.utils.data.Dataset): + def __init__(self, dataset, transform=None): + self.dataset = dataset + self.transform = transform + + def __getitem__(self, idx): + path, label = self.dataset.samples[idx] + image = cv2.imread(path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if self.transform: + image = self.transform(image=image)["image"] + return image, label + + def __len__(self): + return len(self.dataset) + + +# ------------------------ +# Prepare dataset +# ------------------------ +dataset = datasets.ImageFolder(DATA_DIR) +canonical_classes = sorted(set(CLASS_MAPPING.values())) +class_to_idx = {cls: i for i, cls in enumerate(canonical_classes)} + +new_samples, new_targets = [], [] +for path, label_idx in dataset.samples: + raw_name = dataset.classes[label_idx].lower().replace(" ", "_") + canonical_label = CLASS_MAPPING.get(raw_name) + if canonical_label is None: + raise ValueError(f"Class {raw_name} not found in CLASS_MAPPING") + new_samples.append((path, class_to_idx[canonical_label])) + new_targets.append(class_to_idx[canonical_label]) + +dataset.samples = new_samples +dataset.targets = new_targets +dataset.classes = canonical_classes +dataset.class_to_idx = class_to_idx + +val_dataset = AlbumentationsDataset(dataset, transform=val_transforms) +val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) + +# ------------------------ +# Load model +# ------------------------ +model = build_multi_model(num_classes=len(canonical_classes)).to(device) +state_dict = torch.load(MODEL_PATH, map_location=device) +model.load_state_dict(state_dict) +model.eval() + +# ------------------------ +# Evaluation +# ------------------------ +all_preds, all_labels = [], [] +with torch.no_grad(): + for images, labels in val_loader: + images, labels = images.to(device), labels.to(device) + outputs = model(images) + _, preds = outputs.max(1) + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + +all_preds = np.array(all_preds) +all_labels = np.array(all_labels) + +# ------------------------ +# Grouping +# ------------------------ +def to_healthy_sick(cls: str): + return "healthy" if "healthy" in cls else "sick" + +def to_crop(cls: str): + if cls.startswith("tomato"): return "tomato" + if cls.startswith("potato"): return "potato" + if cls.startswith("pepper"): return "pepper" + return "other" + +def to_disease(cls: str): + if "bacterial_spot" in cls: return "bacterial_spot" + if "early_blight" in cls: return "early_blight" + if "late_blight" in cls: return "late_blight" + if "leaf_mold" in cls: return "leaf_mold" + if "septoria_leaf_spot" in cls: return "septoria_leaf_spot" + if "spider_mites" in cls: return "spider_mites" + if "target_spot" in cls: return "target_spot" + if "mosaic_virus" in cls: return "mosaic_virus" + if "yellowleaf_curl_virus" in cls: return "yellowleaf_curl_virus" + return "none" + +idx_to_class = {v: k for k, v in class_to_idx.items()} + +y_true_cls = [idx_to_class[i] for i in all_labels] +y_pred_cls = [idx_to_class[i] for i in all_preds] + +# ------------------------ +# Evaluation per level +# ------------------------ +def evaluate_level(name, y_true, y_pred, labels=None): + acc = accuracy_score(y_true, y_pred) + f1 = f1_score(y_true, y_pred, average="weighted") + print(f"\n===== {name} =====") + print(f"Accuracy: {acc:.4f}") + print(f"F1-score (weighted): {f1:.4f}") + print(classification_report(y_true, y_pred, digits=4)) + cm = confusion_matrix(y_true, y_pred, labels=labels) + if labels: + plt.figure(figsize=(8, 6)) + sns.heatmap(cm, annot=True, fmt="d", xticklabels=labels, yticklabels=labels, cmap="Blues") + plt.title(f"Confusion Matrix - {name}") + plt.xlabel("Predicted") + plt.ylabel("True") + plt.show() + +# Healthy vs Sick +evaluate_level("Healthy vs Sick", + [to_healthy_sick(c) for c in y_true_cls], + [to_healthy_sick(c) for c in y_pred_cls], + labels=["healthy", "sick"]) + +# Crop type +evaluate_level("Crop type", + [to_crop(c) for c in y_true_cls], + [to_crop(c) for c in y_pred_cls], + labels=["tomato", "potato", "pepper", "other"]) + +# Disease type +evaluate_level("Disease type", + [to_disease(c) for c in y_true_cls], + [to_disease(c) for c in y_pred_cls], + labels=["bacterial_spot","early_blight","late_blight","leaf_mold", + "septoria_leaf_spot","spider_mites","target_spot", + "mosaic_virus","yellowleaf_curl_virus","none"]) diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi.py new file mode 100644 index 000000000..e3e5457cd --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi.py @@ -0,0 +1,242 @@ +# finetune_multi.py +import torch +import torch.nn as nn +import torch.optim as optim +from torchvision import datasets +import os +from sklearn.metrics import f1_score +from torch.utils.data import DataLoader, random_split, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import albumentations as A +from albumentations.pytorch import ToTensorV2 +import cv2 +import numpy as np + +from agri_baseline.src.detectors.train.dictionary import CLASS_MAPPING +from agri_baseline.src.detectors.cnn_multi_classifier import build_multi_model + + +# ------------------------ +# MixUp +# ------------------------ +def mixup_data(x, y, alpha=1.0): + if alpha > 0: + lam = np.random.beta(alpha, alpha) + else: + lam = 1 + batch_size = x.size()[0] + index = torch.randperm(batch_size).to(x.device) + + mixed_x = lam * x + (1 - lam) * x[index, :] + y_a, y_b = y, y[index] + return mixed_x, y_a, y_b, lam + +def mixup_criterion(criterion, pred, y_a, y_b, lam): + return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) + + +# ------------------------ +# Paths +# ------------------------ +DATA_DIR = "data_balanced/PlantDoc" +MODEL_PATH = "models/cnn_multi.pth" +SAVE_PATH = "models/cnn_multi_finetuned.pth" + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +# ------------------------ +# Augmentations +# ------------------------ +train_transforms = A.Compose([ + A.RandomResizedCrop(size=(224, 224), scale=(0.7, 1.0), p=1.0), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.3), + A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.2, rotate_limit=30, p=0.7), + A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, p=0.5), + A.RandomBrightnessContrast(p=0.5), + A.GaussianBlur(p=0.3), + A.CoarseDropout(max_height=32, max_width=32, max_holes=1, p=0.3), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + +val_transforms = A.Compose([ + A.Resize(224, 224), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + + +# ------------------------ +# Albumentations Dataset +# ------------------------ +class AlbumentationsDataset(torch.utils.data.Dataset): + def __init__(self, dataset, transform=None): + self.dataset = dataset + self.transform = transform + + def __getitem__(self, idx): + path, label = self.dataset.samples[idx] + image = cv2.imread(path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if self.transform: + image = self.transform(image=image)["image"] + return image, label + + def __len__(self): + return len(self.dataset) + + +# ------------------------ +# Prepare Dataset +# ------------------------ +def prepare_multi_dataset(path): + dataset = datasets.ImageFolder(path) + new_samples, new_targets = [], [] + canonical_classes = sorted(set(CLASS_MAPPING.values())) + class_to_idx = {cls: i for i, cls in enumerate(canonical_classes)} + + for sample_path, label_idx in dataset.samples: + raw_name = dataset.classes[label_idx].lower().replace(" ", "_") + canonical_label = CLASS_MAPPING.get(raw_name) + if canonical_label is None: + raise ValueError(f"Class {raw_name} not found in CLASS_MAPPING") + new_samples.append((sample_path, class_to_idx[canonical_label])) + new_targets.append(class_to_idx[canonical_label]) + + dataset.samples = new_samples + dataset.targets = new_targets + dataset.classes = canonical_classes + dataset.class_to_idx = class_to_idx + return dataset + + +# ------------------------ +# Load dataset +# ------------------------ +full_dataset = prepare_multi_dataset(os.path.join(DATA_DIR, "train")) +print("Classes:", full_dataset.classes) +print("Total samples:", len(full_dataset)) + +train_size = int(0.8 * len(full_dataset)) +val_size = len(full_dataset) - train_size +train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) + +train_dataset = AlbumentationsDataset(train_dataset.dataset, transform=train_transforms) +val_dataset = AlbumentationsDataset(val_dataset.dataset, transform=val_transforms) + +class_counts = np.bincount(full_dataset.targets) +class_weights = 1. / class_counts +sample_weights = [class_weights[t] for t in full_dataset.targets] + +sampler = WeightedRandomSampler(weights=sample_weights, + num_samples=len(sample_weights), + replacement=True) + +train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler) +val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) + + +# ------------------------ +# Model +# ------------------------ +model = build_multi_model(num_classes=len(full_dataset.classes)).to(device) +state_dict = torch.load(MODEL_PATH, map_location=device) +filtered_state_dict = {k: v for k, v in state_dict.items() if not k.startswith("fc.")} +model.load_state_dict(filtered_state_dict, strict=False) +print("✅ Loaded pretrained backbone") + + +# ------------------------ +# Training setup +# ------------------------ +criterion = nn.CrossEntropyLoss() +optimizer = optim.Adam([ + {"params": model.fc.parameters(), "lr": 1e-3}, +], lr=1e-3) + +scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=3, verbose=True) +best_val_f1 = 0.0 +patience, counter = 5, 0 + + +# ------------------------ +# Gradual Unfreeze +# ------------------------ +def unfreeze(epoch): + if epoch == 5: + for name, param in model.named_parameters(): + if "layer4" in name: + param.requires_grad = True + if epoch == 10: + for param in model.parameters(): + param.requires_grad = True + + +# ------------------------ +# Training Loop +# ------------------------ +EPOCHS = 20 +for epoch in range(EPOCHS): + unfreeze(epoch) + model.train() + total_loss, correct, total = 0.0, 0, 0 + for images, labels in train_loader: + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + images, targets_a, targets_b, lam = mixup_data(images, labels, alpha=0.4) + outputs = model(images) + loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) + loss.backward() + optimizer.step() + total_loss += loss.item() * images.size(0) + _, preds = outputs.max(1) + correct += preds.eq(labels).sum().item() + total += labels.size(0) + + train_acc = correct / total + train_loss = total_loss / total + + # Validation + model.eval() + all_preds, all_labels = [], [] + val_loss, val_correct, val_total = 0.0, 0, 0 + with torch.no_grad(): + for images, labels in val_loader: + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + val_loss += loss.item() * images.size(0) + _, preds = outputs.max(1) + val_correct += preds.eq(labels).sum().item() + val_total += labels.size(0) + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + val_acc = val_correct / val_total + val_loss /= val_total + val_f1 = f1_score(all_labels, all_preds, average="weighted") + + print(f"Epoch {epoch+1}/{EPOCHS} " + f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} " + f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}") + + scheduler.step(val_loss) + + # Save by F1 + if val_f1 > best_val_f1: + best_val_f1 = val_f1 + counter = 0 + torch.save(model.state_dict(), SAVE_PATH) + print("💾 Model improved (F1) and saved!") + else: + counter += 1 + print(f"⏳ No improvement. EarlyStopping counter: {counter}/{patience}") + if counter >= patience: + print("🛑 Early stopping triggered!") + break + +print(f"✅ Training finished. Best model saved to {SAVE_PATH}") diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi_stage3.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi_stage3.py new file mode 100644 index 000000000..eee68bd67 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/finetune_multi_stage3.py @@ -0,0 +1,191 @@ +# finetune_multi_stage3.py +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, random_split +import albumentations as A +from albumentations.pytorch import ToTensorV2 +import cv2, os +import numpy as np +from sklearn.metrics import f1_score + +from agri_baseline.src.detectors.train.dictionary import CLASS_MAPPING +from agri_baseline.src.detectors.cnn_multi_classifier import build_multi_model +from torchvision import datasets + +# ========================= +# Config +# ========================= +DATA_DIR = "data_balanced/PlantDoc" +PREV_MODEL = "models/cnn_multi_finetuned.pth" +SAVE_PATH = "models/cnn_multi_stage3.pth" + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +# ========================= +# Augmentations +# ========================= +train_tfms = A.Compose([ + A.RandomResizedCrop(size=(224, 224), scale=(0.6, 1.0), p=1.0), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.3), + A.RandomBrightnessContrast(p=0.4), + A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.2, rotate_limit=30, p=0.5), + A.GaussianBlur(p=0.2), + A.RandomGamma(p=0.3), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + +val_tfms = A.Compose([ + A.Resize(224, 224), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + + +# ========================= +# Dataset wrapper +# ========================= +class AlbumentationsDataset(torch.utils.data.Dataset): + def __init__(self, dataset, transform=None): + self.dataset = dataset + self.transform = transform + + def __getitem__(self, idx): + path, label = self.dataset.samples[idx] + img = cv2.imread(path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if self.transform: + img = self.transform(image=img)["image"] + return img, label + + def __len__(self): + return len(self.dataset) + + +def prepare_dataset(path): + ds = datasets.ImageFolder(path) + new_samples, new_targets = [], [] + canonical = sorted(set(CLASS_MAPPING.values())) + class_to_idx = {cls: i for i, cls in enumerate(canonical)} + + for pth, idx in ds.samples: + raw = ds.classes[idx].lower().replace(" ", "_") + canon = CLASS_MAPPING.get(raw) + if canon is None: + raise ValueError(f"Class {raw} missing in CLASS_MAPPING") + new_samples.append((pth, class_to_idx[canon])) + new_targets.append(class_to_idx[canon]) + + ds.samples = new_samples + ds.targets = new_targets + ds.classes = canonical + ds.class_to_idx = class_to_idx + return ds + + +# ========================= +# Progressive unfreezing +# ========================= +def unfreeze_layers(model, stages): + """ + stages: List of layer names to release (e.g.: ["layer3", "layer2"]) + """ + for name, param in model.named_parameters(): + for stage in stages: + if stage in name: + param.requires_grad = True + + +# ========================= +# Training loop +# ========================= +def train_stage3(model, train_loader, val_loader, epochs=20): + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4) + + best_f1, patience, counter = 0, 5, 0 + for epoch in range(epochs): + model.train() + total_loss, total_correct, total = 0, 0, 0 + for xb, yb in train_loader: + xb, yb = xb.to(device), yb.to(device) + optimizer.zero_grad() + out = model(xb) + loss = criterion(out, yb) + loss.backward() + optimizer.step() + total_loss += loss.item() * xb.size(0) + _, preds = out.max(1) + total_correct += preds.eq(yb).sum().item() + total += yb.size(0) + + train_acc = total_correct / total + train_loss = total_loss / total + + # Validation + model.eval() + val_loss, val_correct, val_total = 0, 0, 0 + all_preds, all_labels = [], [] + with torch.no_grad(): + for xb, yb in val_loader: + xb, yb = xb.to(device), yb.to(device) + out = model(xb) + loss = criterion(out, yb) + val_loss += loss.item() * xb.size(0) + _, preds = out.max(1) + val_correct += preds.eq(yb).sum().item() + val_total += yb.size(0) + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(yb.cpu().numpy()) + + val_acc = val_correct / val_total + val_loss /= val_total + val_f1 = f1_score(all_labels, all_preds, average="weighted") + + print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} Acc: {train_acc:.3f} " + f"| Val Loss: {val_loss:.4f} Acc: {val_acc:.3f} F1: {val_f1:.3f}") + + if val_f1 > best_f1: + best_f1 = val_f1 + counter = 0 + torch.save(model.state_dict(), SAVE_PATH) + print(f"💾 Model improved (F1={val_f1:.3f}) and saved!") + else: + counter += 1 + if counter >= patience: + print("🛑 EarlyStopping triggered.") + break + + +# ========================= +# Main +# ========================= +if __name__ == "__main__": + full_ds = prepare_dataset(os.path.join(DATA_DIR, "train")) + train_size = int(0.8 * len(full_ds)) + val_size = len(full_ds) - train_size + train_ds, val_ds = random_split(full_ds, [train_size, val_size]) + + train_ds = AlbumentationsDataset(train_ds.dataset, transform=train_tfms) + val_ds = AlbumentationsDataset(val_ds.dataset, transform=val_tfms) + + train_loader = DataLoader(train_ds, batch_size=32, shuffle=True) + val_loader = DataLoader(val_ds, batch_size=32) + + model = build_multi_model(num_classes=len(full_ds.classes)).to(device) + model.load_state_dict(torch.load(PREV_MODEL, map_location=device)) + + # In step 3 we will release additional layers beyond layer4 + for p in model.parameters(): + p.requires_grad = False + for stage in ["layer3", "layer4", "fc"]: + unfreeze_layers(model, [stage]) + print(f"🔓 Unfroze {stage}") + + train_stage3(model, train_loader, val_loader, epochs=15) + print(f"✅ Training done. Best model saved to {SAVE_PATH}") diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/train_binary_multi.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/train_binary_multi.py new file mode 100644 index 000000000..0d48afb36 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/research/detectors/train/train_binary_multi.py @@ -0,0 +1,152 @@ +# agri-baseline/src/detectors/train_binary_multi.py +import argparse +import os +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +from torch.optim.lr_scheduler import ReduceLROnPlateau +import numpy as np + +from ...agri_baseline.src.detectors.cnn_binary_classifier import build_binary_model +from ...agri_baseline.src.detectors.cnn_multi_classifier import build_multi_model +from ...agri_baseline.src.detectors.dataset_binary import BinaryDiseaseDataset + + +def train_model(model, dataloader, val_dl, device, epochs, lr, out_path): + opt = torch.optim.Adam(model.parameters(), lr=lr) + loss_fn = nn.CrossEntropyLoss() + scheduler = ReduceLROnPlateau(opt, mode="min", factor=0.5, patience=3, verbose=True) + + best_val_loss = float("inf") + patience, counter = 5, 0 + + for epoch in range(epochs): + model.train() + running_loss, correct, total = 0.0, 0, 0 + for batch in dataloader: + if len(batch) == 3: + xb, yb, _ = batch + else: + xb, yb = batch + xb, yb = xb.to(device), yb.to(device) + + opt.zero_grad() + preds = model(xb) + loss = loss_fn(preds, yb) + loss.backward() + opt.step() + + running_loss += loss.item() * xb.size(0) + _, predicted = preds.max(1) + correct += predicted.eq(yb).sum().item() + total += yb.size(0) + + acc = correct / total + + # Validation + val_loss, val_acc = evaluate(model, val_dl, device, loss_fn) + print(f"Epoch {epoch+1}/{epochs} " + f"Train Loss={running_loss/total:.4f} Train Acc={acc:.3f} " + f"Val Loss={val_loss:.4f} Val Acc={val_acc:.3f}") + + scheduler.step(val_loss) + + # EarlyStopping + if val_loss < best_val_loss: + best_val_loss = val_loss + counter = 0 + torch.save(model.state_dict(), out_path) + print(f"💾 Saved best model {out_path}") + else: + counter += 1 + print(f"⏳ EarlyStopping counter {counter}/{patience}") + if counter >= patience: + print("🛑 Early stopping triggered") + break + + +def evaluate(model, dataloader, device, loss_fn): + model.eval() + correct, total, total_loss = 0, 0, 0.0 + with torch.no_grad(): + for batch in dataloader: + if len(batch) == 3: + xb, yb, _ = batch + else: + xb, yb = batch + xb, yb = xb.to(device), yb.to(device) + preds = model(xb) + loss = loss_fn(preds, yb) + total_loss += loss.item() * xb.size(0) + _, predicted = preds.max(1) + correct += predicted.eq(yb).sum().item() + total += yb.size(0) + return total_loss/total, correct/total + + +def make_sampler(targets): + class_counts = np.bincount(targets) + class_weights = 1. / class_counts + sample_weights = [class_weights[t] for t in targets] + return WeightedRandomSampler(weights=sample_weights, + num_samples=len(sample_weights), + replacement=True) + + +def main(): + p = argparse.ArgumentParser() + p.add_argument("--data", required=True, help="Dataset root (with train/val/test)") + p.add_argument("--out", default="./models") + p.add_argument("--epochs", type=int, default=10) + p.add_argument("--batch", type=int, default=32) + p.add_argument("--lr", type=float, default=1e-3) + p.add_argument("--device", default="cpu") + args = p.parse_args() + + device = torch.device(args.device) + + # Augmentations + train_tfms = transforms.Compose([ + transforms.RandomResizedCrop(224, scale=(0.8, 1.0)), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(15), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), + transforms.ToTensor(), + transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) + ]) + test_tfms = transforms.Compose([ + transforms.Resize((224,224)), + transforms.ToTensor(), + transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) + ]) + + # Binary dataset + train_bin = BinaryDiseaseDataset(os.path.join(args.data,"train"), transform=train_tfms) + val_bin = BinaryDiseaseDataset(os.path.join(args.data,"val"), transform=test_tfms) + + sampler_bin = make_sampler(train_bin.targets) + train_dl_bin = DataLoader(train_bin, batch_size=args.batch, sampler=sampler_bin) + val_dl_bin = DataLoader(val_bin, batch_size=args.batch) + + model_bin = build_binary_model().to(device) + train_model(model_bin, train_dl_bin, val_dl_bin, device, args.epochs, args.lr, + os.path.join(args.out, "cnn_binary.pth")) + + # Multi-class dataset + train_multi = datasets.ImageFolder(os.path.join(args.data,"train"), transform=train_tfms) + val_multi = datasets.ImageFolder(os.path.join(args.data,"val"), transform=test_tfms) + + sampler_multi = make_sampler([y for _, y in train_multi.samples]) + train_dl_multi = DataLoader(train_multi, batch_size=args.batch, sampler=sampler_multi) + val_dl_multi = DataLoader(val_multi, batch_size=args.batch) + + model_multi = build_multi_model(num_classes=len(train_multi.classes)).to(device) + train_model(model_multi, train_dl_multi, val_dl_multi, device, args.epochs, args.lr, + os.path.join(args.out, "cnn_multi.pth")) + + torch.save({"classes": train_multi.classes}, + os.path.join(args.out,"multi_classes.pth")) + +if __name__=="__main__": + main() diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/conftest.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/conftest.py new file mode 100644 index 000000000..486e4fe52 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/conftest.py @@ -0,0 +1,29 @@ +# tests/conftest.py +import os +import pytest +from sqlalchemy import text +from agri_baseline.src.pipeline.db import get_engine + +@pytest.fixture(autouse=True, scope="function") +def _ensure_local_db_url(monkeypatch): + """ + Guarantee DATABASE_URL exists for tests. + """ + monkeypatch.setenv( + "DATABASE_URL", + os.getenv( + "DATABASE_URL", + "postgresql+psycopg2://missions_user:pg123@localhost:5432/missions_db", + ), + ) + +@pytest.fixture(autouse=True) +def _clean_tables_before_test(): + """ + Clean key tables before each test so counts can increase deterministically. + Adjust the list to your schema. + """ + tables = ["anomalies", "tile_stats", "event_logs"] + with get_engine().begin() as conn: + for t in tables: + conn.execute(text(f"DELETE FROM {t}")) diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_batch_runner.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_batch_runner.py new file mode 100644 index 000000000..db88f63bc --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_batch_runner.py @@ -0,0 +1,68 @@ +# Purpose: End-to-end tests for the BatchRunner pipeline. +# Verifies that running on image folders or single images correctly writes results to the database. + +import pytest +from pathlib import Path +from sqlalchemy import text + +from agri_baseline.src.batch_runner import BatchRunner +from agri_baseline.src.pipeline.db import get_engine + + +@pytest.fixture +def folder_with_images() -> Path: + """ + Return a folder that contains a few test images. + Adjust the path if your dataset sits elsewhere. + """ + folder = Path("./data_balanced/PlantDoc/train/Bell_pepper leaf") + assert folder.exists(), f"Images folder not found: {folder.resolve()}" + return folder + + +def _count(conn, sql: str, params: dict | None = None) -> int: + """ + Small helper: run a COUNT(*) query safely with SQLAlchemy 2.0. + """ + return conn.execute(text(sql), params or {}).scalar() or 0 + + +def test_run_batch_on_images_folder(folder_with_images: Path): + """ + End-to-end: run the batch pipeline on a folder and verify DB writes happened. + We compare counts before/after instead of relying on specific image_id values. + """ + runner = BatchRunner() + + with get_engine().begin() as conn: + before = _count(conn, "SELECT COUNT(1) FROM anomalies") + + runner.run_folder(folder_with_images) + + with get_engine().begin() as conn: + after = _count(conn, "SELECT COUNT(1) FROM anomalies") + + assert after > before, "No detections were written to the database." + + +def test_process_single_image(): + """ + Process a single image and assert the DB anomalies count has increased. + This avoids fragile assumptions on the exact image_id in the DB. + """ + image_path = Path( + "./data_balanced/PlantDoc/train/Bell_pepper leaf/0f3s5A.jpg" + ) + assert image_path.exists(), f"Test image not found: {image_path.resolve()}" + + runner = BatchRunner() + + with get_engine().begin() as conn: + before = _count(conn, "SELECT COUNT(1) FROM anomalies") + + runner.process_image(image_path) + + with get_engine().begin() as conn: + after = _count(conn, "SELECT COUNT(1) FROM anomalies") + + assert after > before, "Single image was not processed correctly." diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_disease_model.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_disease_model.py new file mode 100644 index 000000000..c2cb78625 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_disease_model.py @@ -0,0 +1,17 @@ +# Purpose: Unit tests for the DiseaseDetector class. +# Ensures the model loads successfully and returns valid detections on dummy input. + +import pytest +from agri_baseline.src.detectors.disease_model import DiseaseDetector +import numpy as np + +def test_disease_detector_model_loads(): + detector = DiseaseDetector(model_path="models/cnn_multi_stage3.pth") + assert detector.model is not None, "Model failed to load correctly." + +def test_disease_detector_predicts(): + detector = DiseaseDetector() + img = np.zeros((224, 224, 3)) # Dummy image for testing + detections = detector.run(img) + assert len(detections) > 0, "Model did not return any detections." + assert detections[0].confidence > 0, "Detection confidence should be greater than 0." diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_minio_integration_mock.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_minio_integration_mock.py new file mode 100644 index 000000000..369b7070c --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_minio_integration_mock.py @@ -0,0 +1,120 @@ +# Purpose: Mock-based integration tests for MinIO storage. +# Simulates MinIO object downloads, saves them locally, and verifies images can be loaded successfully. + +from __future__ import annotations + +from io import BytesIO +from pathlib import Path +from typing import Dict, Iterable + +import pytest +from PIL import Image + +from agri_baseline.src.storage import minio_sync +from agri_baseline.src.storage.minio_client import MinioConfig +from agri_baseline.src.pipeline.utils import load_image + + +class _FakeObj: + """Mimics the object returned by client.list_objects().""" + + def __init__(self, object_name: str) -> None: + self.object_name = object_name + + +class _FakeResponse: + """ + Minimal MinIO get_object-like response object. + + Provides: + - read(amt: int | None = None) -> bytes + - close() -> None + - release_conn() -> None + + This mirrors what MinIO/urllib3 responses typically expose, so production code + that calls release_conn() won't fail under the mock. + """ + + def __init__(self, data: bytes) -> None: + self._buf = BytesIO(data) + + def read(self, amt: int | None = None) -> bytes: + return self._buf.read() if amt is None else self._buf.read(amt) + + def close(self) -> None: + self._buf.close() + + def release_conn(self) -> None: + # In real clients this releases underlying HTTP resources. + # No-op here is fine for tests. + pass + + +class _FakeMinio: + """ + Fake MinIO client that supports the subset used by minio_sync: + - list_objects(bucket, prefix, recursive) -> Iterable[_FakeObj] + - get_object(bucket, key) -> _FakeResponse + """ + + def __init__(self, payload_by_key: Dict[str, bytes]) -> None: + self._payload_by_key = payload_by_key + + def list_objects(self, bucket: str, prefix: str, recursive: bool) -> Iterable[_FakeObj]: + for key in self._payload_by_key: + if key.startswith(prefix) and not key.endswith("/"): + yield _FakeObj(key) + + def get_object(self, bucket: str, key: str) -> _FakeResponse: + data = self._payload_by_key[key] + return _FakeResponse(data) + + +@pytest.fixture +def fake_jpeg() -> bytes: + """Create a tiny deterministic JPEG in-memory.""" + img = Image.new("RGB", (32, 24), (10, 20, 30)) + buf = BytesIO() + img.save(buf, format="JPEG") + return buf.getvalue() + + +def test_minio_download_and_load(monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + fake_jpeg: bytes) -> None: + """ + Flow under test: + 1) list prefix from MinIO (fake). + 2) download files to local cache dir. + 3) ensure those files exist and can be loaded with load_image. + """ + + # 1) Arrange fake MinIO payload (two images under mission-123/) + payload = { + "mission-123/imgA.jpg": fake_jpeg, + "mission-123/imgB.jpg": fake_jpeg, + } + fake_client = _FakeMinio(payload) + + # 2) Monkeypatch build_client to return our fake client + monkeypatch.setattr(minio_sync, "build_client", lambda cfg: fake_client, raising=True) + + # 3) Prepare config and download target folder + cfg = MinioConfig( + endpoint="127.0.0.1:9000", + access_key="minioadmin", + secret_key="minioadmin", + bucket="leaves", + secure=False, + ) + out_dir = tmp_path / "cache" + + # 4) Act: download objects to local dir + paths = minio_sync.download_prefix_to_dir(cfg, prefix="mission-123", local_dir=out_dir) + + # 5) Assert: files were written and are loadable + assert len(paths) == 2, f"Expected 2 files, got {len(paths)}" + for p in paths: + assert p.exists() and p.is_file(), f"Missing file: {p}" + img, w, h = load_image(str(p)) + assert img is not None and w > 0 and h > 0, f"Failed to load image {p}" diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_run_detectors.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_run_detectors.py new file mode 100644 index 000000000..6e0f13e26 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_run_detectors.py @@ -0,0 +1,21 @@ +# Purpose: Tests for running the DiseaseDetector model. +# Checks that detections are produced with valid confidence values on dummy images. + +import pytest +from agri_baseline.src.detectors.disease_model import DiseaseDetector +import numpy as np + +@pytest.fixture +def dummy_image(): + """Provide a dummy image for testing.""" + return np.zeros((224, 224, 3)) # Black dummy image + +def test_disease_detector_runs(dummy_image): + detector = DiseaseDetector() + detections = detector.run(dummy_image) + assert len(detections) > 0, "Disease detection did not return any detections." + assert detections[0].confidence > 0, "Detection confidence should be greater than 0." + +def test_disease_detector_model_loads(): + detector = DiseaseDetector(model_path="models/cnn_multi_stage3.pth") + assert detector.model is not None, "Model failed to load correctly." diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_utils_local.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_utils_local.py new file mode 100644 index 000000000..ab76fe8cf --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_utils_local.py @@ -0,0 +1,27 @@ +# Purpose: Local unit tests for utility functions. +# Covers image loading, image ID extraction, and bounding box clamping logic. + +from pathlib import Path +from PIL import Image +from agri_baseline.src.pipeline.utils import load_image, image_id_from_path, clamp_bbox + +def _write_test_image(tmp_dir: Path, name: str = "test.jpg") -> Path: + img = Image.new("RGB", (64, 48), (127, 200, 50)) + path = tmp_dir / name + img.save(path, format="JPEG") + return path + +def test_load_image_local(tmp_path: Path): + img_path = _write_test_image(tmp_path) + img, w, h = load_image(str(img_path)) + assert img is not None + assert (w, h) == (64, 48) + +def test_image_id_from_path_no_fs(tmp_path: Path): + fake_path = tmp_path / "nested" / "test.jpg" # no file needed + image_id = image_id_from_path(str(fake_path)) + assert isinstance(image_id, str) and image_id + +def test_clamp_bbox_pure(): + x, y, w, h = clamp_bbox(10, 10, 250, 250, 224, 224) + assert x >= 0 and y >= 0 and w <= 224 and h <= 224 diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_validator.py b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_validator.py new file mode 100644 index 000000000..17957c530 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Detection_Jobs/tests/test_validator.py @@ -0,0 +1,119 @@ +# Purpose: Integration tests for the Validator module. +# Verifies event logging from findings and correctness of batch summary generation in the database. + +import pytest +from sqlalchemy import text + +from agri_baseline.src.validator.validator import Validator +from agri_baseline.src.validator.rules import Finding +from agri_baseline.src.pipeline.db import get_engine +from agri_baseline.src.pipeline import config +from agri_baseline.src.pipeline.db import get_engine + +@pytest.fixture(autouse=True) +def _seed_anomalies_for_summary(): + """ + Ensure the DB has minimal data for batch_summary: + - device 'device-1' + - anomaly type id=1 + - mission id=1 with small polygon + - two anomalies with same image_id and non-null geom + Idempotent: safe to run before every test. + """ + with get_engine().begin() as conn: + conn.exec_driver_sql(""" + INSERT INTO devices(device_id, model, owner, active) + VALUES ('device-1','sim','lab',true) + ON CONFLICT (device_id) DO NOTHING; + """) + conn.exec_driver_sql(""" + INSERT INTO anomaly_types(anomaly_type_id, code, description) + VALUES (1,'disease_spot','Leaf disease spot') + ON CONFLICT (anomaly_type_id) DO NOTHING; + """) + conn.exec_driver_sql(""" + INSERT INTO missions(mission_id, start_time, area_geom) + VALUES (1, now(), ST_GeomFromText('POLYGON((0 0,1 0,1 1,0 1,0 0))',4326)) + ON CONFLICT (mission_id) DO NOTHING; + """) + conn.exec_driver_sql(""" + INSERT INTO anomalies(mission_id, device_id, ts, anomaly_type_id, severity, details, geom) + VALUES + (1, 'device-1', now(), 1, 0.6, + '{"image_id":"seed_img_for_summary"}'::jsonb, + ST_GeomFromText('POINT(0.50 0.50)',4326)), + (1, 'device-1', now(), 1, 0.7, + '{"image_id":"seed_img_for_summary"}'::jsonb, + ST_GeomFromText('POINT(0.55 0.52)',4326)) + ON CONFLICT DO NOTHING; + """) + yield + +@pytest.fixture +def dummy_finding() -> Finding: + """ + Create a minimal Finding to simulate a validator output. + Scope/value names should match your Validator implementation. + """ + return Finding( + scope="image", + image_id="test_image", + rule="bbox_oob", + severity="warn", + message="BBox out of bounds", + ) + + +def _count(conn, sql: str, params: dict | None = None) -> int: + """ + Small helper: run a COUNT(*) query safely with SQLAlchemy 2.0. + """ + return conn.execute(text(sql), params or {}).scalar() or 0 + + +def test_validator_image_findings(dummy_finding: Finding): + """ + Ensure validator writes a record into event_logs for the given finding. + We assert a strictly increasing count for the message we inserted. + """ + validator = Validator() + + with get_engine().begin() as conn: + before = _count( + conn, + "SELECT COUNT(1) FROM event_logs WHERE message = :msg", + {"msg": dummy_finding.message}, + ) + + validator.image_findings([dummy_finding]) + + with get_engine().begin() as conn: + after = _count( + conn, + "SELECT COUNT(1) FROM event_logs WHERE message = :msg", + {"msg": dummy_finding.message}, + ) + + assert after > before, "Finding was not written to event_logs." + + +def test_batch_summary(): + """ + Run batch_summary and verify tile_stats is populated or remains populated. + We allow idempotency (>=) but also require that there is some data (> 0). + """ + validator = Validator() + + with get_engine().begin() as conn: + print("DEBUG DB_URL:", config.DB_URL) + print("DEBUG anomalies:", conn.exec_driver_sql("SELECT COUNT(*) FROM anomalies").scalar()) + print("DEBUG tile_stats:", conn.exec_driver_sql("SELECT COUNT(*) FROM tile_stats").scalar()) + before = _count(conn, "SELECT COUNT(1) FROM tile_stats") + + validator.batch_summary() + + with get_engine().begin() as conn: + after = _count(conn, "SELECT COUNT(1) FROM tile_stats") + + assert after >= before, "tile_stats count unexpectedly decreased." + assert after > 0, "No images found in tile_stats for batch summary." diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/Makefile b/airflow/leaf-pipeline/projects/Detection_Jobs/Makefile new file mode 100644 index 000000000..94e2c9d4c --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/Makefile @@ -0,0 +1,7 @@ +.PHONY: ci-e2e ci-detection + +ci-e2e: + cd e2e_kafka_flink && pytest tests --cov=e2e_pipeline --cov-report=xml --maxfail=1 -v + +ci-detection: + cd agri-baseline && pytest tests --cov=agri_baseline --cov-report=xml --maxfail=1 -v diff --git a/airflow/leaf-pipeline/projects/Detection_Jobs/VENDORED_FROM.txt b/airflow/leaf-pipeline/projects/Detection_Jobs/VENDORED_FROM.txt new file mode 100644 index 000000000..b1786ac32 --- /dev/null +++ b/airflow/leaf-pipeline/projects/Detection_Jobs/VENDORED_FROM.txt @@ -0,0 +1,4 @@ +VENDORED FROM (saved 2025-11-09T04:18:09+02:00) +------------------------------------- +origin https://github.com/KamaTechOrg/AgCloud.git (fetch) [blob:none] +origin https://github.com/KamaTechOrg/AgCloud.git (push) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/.gitignore b/airflow/leaf-pipeline/projects/disease-monitor/.gitignore new file mode 100644 index 000000000..06593e32e --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/.gitignore @@ -0,0 +1,55 @@ +# ==== OS / IDE ==== +.DS_Store +Thumbs.db +.vscode/ +.idea/ + +# ==== Node ==== +node_modules/ +dist/ + +# ==== Python ==== +__pycache__/ +*.py[cod] +*.pyc +*.pyo +*.so +*.dylib + +# ==== Virtual envs ==== +.venv/ +venv/ +ENV/ +env/ + +# ==== Packaging / build ==== +build/ +*.egg-info/ + +# ==== Environment / Secrets ==== +.env +.env.* + +# ==== Data / Notebooks / Logs ==== +*.log +*.ipynb +.ipynb_checkpoints/ + +# ==== Artifacts / Wheels / Models ==== +artifacts/ +.wheels/ +wheels/ +*.whl +*.pt +*.pth +*.bin + +# ==== Coverage reports ==== +.pytest_cache/ +.coverage +coverage.xml +htmlcov/ + +# ==== gRPC generated (נוצרים בבילד דוקר) ==== +server/embed_pb2.py +server/embed_pb2_grpc.py diff --git a/airflow/leaf-pipeline/projects/disease-monitor/Makefile b/airflow/leaf-pipeline/projects/disease-monitor/Makefile new file mode 100644 index 000000000..94e2c9d4c --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/Makefile @@ -0,0 +1,7 @@ +.PHONY: ci-e2e ci-detection + +ci-e2e: + cd e2e_kafka_flink && pytest tests --cov=e2e_pipeline --cov-report=xml --maxfail=1 -v + +ci-detection: + cd agri-baseline && pytest tests --cov=agri_baseline --cov-report=xml --maxfail=1 -v diff --git a/airflow/leaf-pipeline/projects/disease-monitor/VENDORED_FROM.txt b/airflow/leaf-pipeline/projects/disease-monitor/VENDORED_FROM.txt new file mode 100644 index 000000000..b1786ac32 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/VENDORED_FROM.txt @@ -0,0 +1,4 @@ +VENDORED FROM (saved 2025-11-09T04:18:09+02:00) +------------------------------------- +origin https://github.com/KamaTechOrg/AgCloud.git (fetch) [blob:none] +origin https://github.com/KamaTechOrg/AgCloud.git (push) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.dockerignore b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.dockerignore new file mode 100644 index 000000000..9bd273d4f --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.dockerignore @@ -0,0 +1,25 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +.venv/ +venv/ + +# Tests and caches +.pytest_cache/ +tests/ + +# Local data / artifacts +data/ +alerts.db + +# Git +.git/ +.gitignore + +# IDE +.vscode/ +.idea/ + diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.gitignore b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.gitignore new file mode 100644 index 000000000..da73fe5e4 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/.gitignore @@ -0,0 +1,2 @@ +# Ignore local data +data/ diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile new file mode 100644 index 000000000..49d99c76c --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile @@ -0,0 +1,34 @@ +FROM mcr.microsoft.com/devcontainers/python:1-3.11-bullseye + +WORKDIR /app + +# 1) Install CA tools and curl +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* + +# 2) Add NetFree certificate and register in system trust store + +# Ensure Python, requests, and pip use the updated CA bundle +ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# 3) Install Python dependencies (use trusted hosts to simplify NetFree path) +COPY requirements.txt /app/requirements.txt +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir -r requirements.txt + +# 4) Install the package (PEP517) with the same trusted hosts +COPY pyproject.toml README.md /app/ +COPY src /app/src +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir . + +# 5) Copy configs (can be overridden by a bind mount) +COPY configs /app/configs + +ENV PYTHONUNBUFFERED=1 + +ENTRYPOINT ["python", "-m", "disease_monitor.cli"] + diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local new file mode 100644 index 000000000..e9dfc92b9 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local @@ -0,0 +1,32 @@ +FROM docker.io/library/python@sha256:e0c4fae70d550834a40f6c3e0326e02cfe239c2351d922e1fb1577a3c6ebde02 + +WORKDIR /app + +# 1) כלים בסיסיים ותעודות + requests (דרך APT) בלי לפנות ל-PyPI עבור החבילה הזו +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates curl python3-requests \ + && rm -rf /var/lib/apt/lists/* + +# הגדרות SSL סטנדרטיות (משתמשים בתעודות מערכת) +ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# 2) התקנת תלויות פייתון של הפרויקט +COPY requirements.txt /app/requirements.txt +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir -r requirements.txt +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir requests + +# 3) התקנת החבילה עצמה +COPY pyproject.toml README.md /app/ +COPY src /app/src +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir . + +# 4) קונפיגים +COPY configs /app/configs + +ENV PYTHONUNBUFFERED=1 +ENTRYPOINT ["python", "-m", "disease_monitor.cli"] diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local.bak b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local.bak new file mode 100644 index 000000000..b286174b7 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/Dockerfile.local.bak @@ -0,0 +1,37 @@ +FROM python:3.10-slim + +WORKDIR /app + +# 1) Install CA tools and curl +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* + +# 2) Add NetFree certificate and register in system trust store +COPY netfree-ca.crt /usr/local/share/ca-certificates/netfree-ca.crt +RUN chmod 644 /usr/local/share/ca-certificates/netfree-ca.crt && \ + update-ca-certificates + +# Ensure Python, requests, and pip use the updated CA bundle +ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt +ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt +ENV PIP_CERT=/etc/ssl/certs/ca-certificates.crt + +# 3) Install Python dependencies (use trusted hosts to simplify NetFree path) +COPY requirements.txt /app/requirements.txt +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir -r requirements.txt + +# 4) Install the package (PEP517) with the same trusted hosts +COPY pyproject.toml README.md /app/ +COPY src /app/src +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org \ + --trusted-host files.pythonhosted.org --no-cache-dir . + +# 5) Copy configs (can be overridden by a bind mount) +COPY configs /app/configs + +ENV PYTHONUNBUFFERED=1 + +ENTRYPOINT ["python", "-m", "disease_monitor.cli"] + diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/README.md b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/README.md new file mode 100644 index 000000000..a0eb9bda6 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/README.md @@ -0,0 +1,127 @@ +# Disease Monitor (Offline) + +Offline batch job that reads disease detections from **Postgres**, aggregates data, +builds baselines, detects anomalies/worsening, deduplicates & rate-limits alerts, +delivers notifications (Slack/Webhook/Email), and writes alerts back to **Postgres**. + +> **Note:** The pipeline uses **Postgres only** (both sources and sink). No CSV/SQLite. + +--- + +## Data Sources & Sink (Postgres) + +**Sources** +- `anomalies` — per-image detections (0..N rows per image) +- `tile_stats` — exactly 1 row per image (summary) +- `event_logs` — QA & validator logs (rules/metrics/errors) + +**Sink** +- `alerts` — unified alerts table (rules: `COUNT_SPIKE`, `WORSENING_TREND`) + +--- + +## Config (`configs/config.example.yaml`) + +Main sections: +- **io**: Postgres URL (e.g. `postgresql+psycopg2://user:pass@host:5432/db`) +- **windows**: frequency (`"D"`/`"W"`), timezone (e.g. `"UTC"`) +- **baseline**: method (`mean`/`median`), lookback, min_history, optional seasonality +- **rules**: thresholds & toggles for `count_anomaly` (zscore/iqr) and `worsening` (slope/ewma) +- **alerting**: dedup cooldown (windows), resolve-after-no-anomaly, per-run rate limit, group_by_window +- **delivery**: slack/webhook/email targets (can be disabled) +- **run**: `dry_run` and optional filters + +Example: +```yaml +io: + postgres_url: "postgresql+psycopg2://missions_user:pg123@localhost:5432/missions_db" + +windows: + frequency: "D" + timezone: "UTC" + +baseline: + method: "median" + lookback_periods: 28 + min_history: 7 + seasonality: null + +rules: + count_anomaly: + enabled: true + method: "zscore" + z_threshold: 3.0 + iqr_k: 1.5 + min_count: 3 + worsening: + enabled: true + method: "slope" + slope_lookback: 7 + slope_min: 0.02 + min_periods: 5 + ewma_span: 7 + ewma_threshold: 0.6 + +alerting: + dedup_cooldown_windows: 3 + resolve_after_no_anomaly: 3 + rate_limit_per_run: 100 + group_by_window: true + +delivery: + slack: + enabled: false + webhook_url: "" + webhook: + enabled: false + url: "" + headers: {} + email: + enabled: false + smtp_host: "" + smtp_port: 587 + username: "" + password_env: "SMTP_PASSWORD" + from_addr: "" + to_addrs: [] + +run: + dry_run: false +``` + +--- + +## Install & Run + +```bash +# Create & activate venv (Linux/Mac) +python -m venv .venv +source .venv/bin/activate + +# On Windows (PowerShell): +# python -m venv .venv +# .venv\Scripts\Activate.ps1 + +# Install +pip install -r requirements.txt + +# Run +python -m disease_monitor.cli --config configs/config.example.yaml --log-level INFO +``` + +--- + +## Tests + +```bash +pytest +``` + +--- + + +## Notes + +- Thresholds, lookbacks, and active rules are fully configurable from YAML. +- Logs and runtime counters are emitted to stdout. +- Extend notifiers in `src/disease_monitor/notifiers`. diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.docker.yaml b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.docker.yaml new file mode 100644 index 000000000..fb46acdfe --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.docker.yaml @@ -0,0 +1,64 @@ +io: + # IMPORTANT: use the Docker service name of Postgres (from your compose): + postgres_url: "postgresql+psycopg2://missions_user:pg123@postgres:5432/missions_db" + +windows: + frequency: "D" + timezone: "UTC" + +source_mapping: + entity_dim: "mission" # or "region"/"device" + area_strategy: "none" # or "region_area" (requires regions table/geom) + filters: + start_time: null + end_time: null + anomaly_codes: null + +baseline: + method: "median" + lookback_periods: 28 + min_history: 7 + seasonality: null + +rules: + count_anomaly: + enabled: true + method: "zscore" + z_threshold: 3.0 + iqr_k: 1.5 + min_count: 3 + worsening: + enabled: true + method: "slope" + slope_lookback: 7 + slope_min: 0.02 + min_periods: 5 + ewma_span: 7 + ewma_threshold: 0.6 + +alerting: + dedup_cooldown_windows: 3 + resolve_after_no_anomaly: 3 + rate_limit_per_run: 100 + group_by_window: true + +delivery: + slack: + enabled: false + webhook_url: "" + webhook: + enabled: false + url: "" + headers: {} + email: + enabled: false + smtp_host: "" + smtp_port: 587 + username: "" + password_env: "SMTP_PASSWORD" + from_addr: "" + to_addrs: [] + +run: + dry_run: false + diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.example.yaml b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.example.yaml new file mode 100644 index 000000000..6d4d8d699 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/configs/config.example.yaml @@ -0,0 +1,70 @@ +io: + postgres_url: "postgresql+psycopg2://missions_user:pg123@localhost:5432/missions_db" + +windows: + frequency: "D" + timezone: "UTC" + +source_mapping: + entity_dim: "mission" # or "region"/"device" + area_strategy: "none" # or "region_area" + filters: + start_time: null + end_time: null + anomaly_codes: null + +baseline: + method: "median" + lookback_periods: 28 + min_history: 7 + seasonality: null + +rules: + count_anomaly: + enabled: true + method: "zscore" + z_threshold: 3.0 + iqr_k: 1.5 + min_count: 3 + worsening: + enabled: true + method: "slope" + slope_lookback: 7 + slope_min: 0.02 + min_periods: 5 + ewma_span: 7 + ewma_threshold: 0.6 + +alerting: + dedup_cooldown_windows: 3 + resolve_after_no_anomaly: 3 + rate_limit_per_run: 100 + group_by_window: true + +delivery: + slack: + enabled: false + webhook_url: "" # paste Slack Webhook URL here if you want to enable + webhook: + enabled: false + url: "" # paste your Webhook URL here if you want to enable + headers: {} # optional headers map + email: + enabled: false + smtp_host: "" # paste your SMTP server address here if you want to enable + smtp_port: 587 + username: "" + password_env: "SMTP_PASSWORD" + from_addr: "" + to_addrs: [] + + alertmanager: + enabled: false + url: "http://localhost:9093" + default_severity: "warning" + extra_labels: + system: "disease-monitor" + team: "ag" + +run: + dry_run: false diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/docker-compose.yml b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/docker-compose.yml new file mode 100644 index 000000000..15a0a0baa --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/docker-compose.yml @@ -0,0 +1,21 @@ +services: + disease-monitor: + build: + context: . + dockerfile: Dockerfile + image: disease-monitor:latest + command: ["--config", "/app/configs/config.docker.yaml", "--log-level", "INFO"] + environment: + TZ: "UTC" + # If you enable email delivery and use password_env=SMTP_PASSWORD: + # SMTP_PASSWORD: "your-smtp-password" + volumes: + - ./configs:/app/configs:ro + networks: + - worktree-main_ag_cloud + restart: on-failure + +networks: + # Use the external network created by your worktree-main compose + worktree-main_ag_cloud: + external: true diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/pyproject.toml b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/pyproject.toml new file mode 100644 index 000000000..063c0697f --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "disease-monitor" +version = "0.1.0" +description = "Offline anomaly & worsening detection for disease cases in trees/plots/regions." +readme = "README.md" +requires-python = ">=3.10" + +[tool.pytest.ini_options] +pythonpath = ["src"] +addopts = "-q" diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/requirements.txt b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/requirements.txt new file mode 100644 index 000000000..f814e8e3c --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/requirements.txt @@ -0,0 +1,10 @@ +pandas>=2.3.0,<2.4 +numpy>=2.2,<2.4 +pyyaml==6.0.2 +sqlalchemy==2.0.32 +pydantic==2.9.2 +scipy>=1.14.1,<1.15 +pytest==8.3.2 +python-dateutil==2.9.0.post0 +psycopg2-binary==2.9.7 +requests>=2.31 diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/__init__.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/__init__.py new file mode 100644 index 000000000..a9a2c5b3b --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/alerting.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/alerting.py new file mode 100644 index 000000000..fe3802cba --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/alerting.py @@ -0,0 +1,99 @@ +from __future__ import annotations +import logging +from datetime import datetime +from typing import Dict, Any, List, Tuple +import pandas as pd + +LOGGER = logging.getLogger(__name__) + +def _merge_reasons(s: pd.Series) -> list[str]: + items = [] + for x in s: + if isinstance(x, (list, tuple, set)): + items.extend(list(x)) + else: + items.append(str(x)) + return sorted(set(items)) + +def enforce_policies(candidates: pd.DataFrame, open_alerts_df: pd.DataFrame, + cfg: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Deduplicate per (entity, rule) with cooldown; update OPEN alerts if still anomalous; + create RESOLVED entries after consecutive non-anomalous windows (handled by absence). + Rate limiting applied. + """ + if candidates.empty: + return [] + + candidates = candidates.copy() + candidates["window_start"] = pd.to_datetime(candidates["window"]) + candidates["window_end"] = pd.to_datetime(candidates["window_end"]) + candidates["first_seen"] = candidates["window_start"] + candidates["last_seen"] = candidates["window_end"] + candidates["status"] = "OPEN" + + # Dedup cooldown: skip if there is OPEN/ACK within last N windows for same (entity, rule) + cooldown = cfg["alerting"]["dedup_cooldown_windows"] + frequency = cfg["windows"]["frequency"] + + alerts_out: List[Dict[str, Any]] = [] + rate_limit = cfg["alerting"]["rate_limit_per_run"] + emitted = 0 + + # Grouping by window if requested + if cfg["alerting"]["group_by_window"]: + group_keys = ["entity_id", "rule", "window_start", "window_end"] + else: + group_keys = ["entity_id", "rule"] + + g = candidates.groupby(group_keys, as_index=False).agg({ + "score": "max", + "disease_count": "max", + "avg_severity": "max", + "affected_area": "max", + "reason": _merge_reasons +}) + + for _, row in g.iterrows(): + if emitted >= rate_limit: + LOGGER.warning("Rate limit reached (%d).", rate_limit) + break + entity, rule = row["entity_id"], row["rule"] + ws, we = row["window_start"], row["window_end"] + # Check cooldown against open alerts + if not open_alerts_df.empty: + same = open_alerts_df[(open_alerts_df["entity_id"] == entity) & + (open_alerts_df["rule"] == rule)] + # In cooldown if last_seen within last cooldown windows + recent = same[same["last_seen"] >= (ws - _windows_to_offset(frequency, cooldown))] + if not recent.empty: + LOGGER.info("Cooldown skip for %s/%s at %s.", entity, rule, ws) + continue + + meta = { + "reasons": row["reason"], + "disease_count": int(row["disease_count"]), + "avg_severity": float(row["avg_severity"]), + "affected_area": float(row["affected_area"]), + } + alerts_out.append({ + "entity_id": entity, + "rule": rule, + "window_start": ws.to_pydatetime(), + "window_end": we.to_pydatetime(), + "score": float(row["score"]), + "first_seen": ws.to_pydatetime(), + "last_seen": we.to_pydatetime(), + "status": "OPEN", + "meta": meta + }) + emitted += 1 + + return alerts_out + +def _windows_to_offset(freq: str, n: int) -> pd.Timedelta: + if n <= 0: + return pd.Timedelta(0) + if freq.upper().startswith("W"): + return pd.to_timedelta(7 * n, unit="D") + return pd.to_timedelta(n, unit="D") diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/baseline.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/baseline.py new file mode 100644 index 000000000..71e976c08 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/baseline.py @@ -0,0 +1,38 @@ +from __future__ import annotations +import pandas as pd +import numpy as np + +def compute_baseline(agg: pd.DataFrame, method: str, lookback: int, + min_history: int, seasonality: int | None) -> pd.DataFrame: + """ + Returns agg with baseline columns for disease_count, avg_severity, affected_area: + *_bl, *_std (or IQR helpers). + """ + df = agg.sort_values(["entity_id", "window"]).copy() + keys = ["entity_id"] + metrics = ["disease_count", "avg_severity", "affected_area"] + + # Optionally seasonal lag indexing + if seasonality and seasonality > 1: + df["season_index"] = df.groupby(keys)["window"].rank(method="first").astype(int) % seasonality + groupers = keys + ["season_index"] + else: + groupers = keys + + for m in metrics: + if method == "mean": + bl = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).mean()) + sd = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).std(ddof=0)) + else: + bl = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).median()) + sd = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).std(ddof=0)) + df[f"{m}_bl"] = bl.fillna(0.0) + df[f"{m}_std"] = sd.fillna(0.0) + + # IQR helpers + q1 = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).quantile(0.25)) + q3 = df.groupby(groupers)[m].transform(lambda s: s.shift(1).rolling(lookback, min_periods=min_history).quantile(0.75)) + df[f"{m}_q1"] = q1.fillna(0.0) + df[f"{m}_q3"] = q3.fillna(0.0) + + return df diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/cli.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/cli.py new file mode 100644 index 000000000..a237c9cf8 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/cli.py @@ -0,0 +1,112 @@ +import argparse +import json +import logging +from typing import Dict, Any, List + +import yaml +import pandas as pd + +from .logging_utils import setup_logging +from .config import AppConfig +from . import io as io_mod +from .baseline import compute_baseline +from .rules import apply_rules +from .alerting import enforce_policies +from .notifiers.base import Notifier +from .notifiers.slack import SlackNotifier +from .notifiers.webhook import WebhookNotifier +from .notifiers.emailer import EmailNotifier +from .io import load_inputs_from_postgres , upsert_alerts_pg , fetch_open_alerts_pg + + +LOGGER = logging.getLogger("disease_monitor") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Offline disease anomaly detector") + parser.add_argument("--config", required=True, help="Path to config file (YAML)") + parser.add_argument("--log-level", default="INFO", help="Logging level") + return parser.parse_args() + + +def load_config(path: str) -> Dict[str, Any]: + with open(path, "r", encoding="utf-8") as f: + cfg = yaml.safe_load(f) + AppConfig(**cfg) # validation + return cfg + +def build_notifiers(cfg: Dict[str, Any]) -> List[Notifier]: + ns: List[Notifier] = [] + d = cfg.get("delivery", {}) + + slack = d.get("slack", {}) + if slack.get("enabled") and slack.get("webhook_url"): + ns.append(SlackNotifier(slack["webhook_url"])) + + webhook = d.get("webhook", {}) + if webhook.get("enabled") and webhook.get("url"): + ns.append(WebhookNotifier(webhook["url"], webhook.get("headers") or {})) + + email = d.get("email", {}) + if email.get("enabled") and email.get("to_addrs"): + ns.append(EmailNotifier(email["smtp_host"], email["smtp_port"], email["username"], + email["password_env"], email["from_addr"], email["to_addrs"])) + return ns + +def main() -> None: + args = parse_args() + setup_logging(args.log_level) + cfg = load_config(args.config) + + tz = cfg["windows"]["timezone"] + freq = cfg["windows"]["frequency"] + + # Load inputs + det, reg = load_inputs_from_postgres(cfg["io"]["postgres_url"], tz, cfg) + + # Optional filters + run_cfg = cfg["run"] + if run_cfg.get("disease_filter"): + det = det[det["disease_type"].isin(run_cfg["disease_filter"])] + if run_cfg.get("limit_entities"): + keep = det["entity_id"].drop_duplicates().head(run_cfg["limit_entities"]).tolist() + det = det[det["entity_id"].isin(keep)] + + # Aggregation + baseline + agg = io_mod.aggregate(det, freq=freq) + agg_bl = compute_baseline( + agg, + method=cfg["baseline"]["method"], + lookback=cfg["baseline"]["lookback_periods"], + min_history=cfg["baseline"]["min_history"], + seasonality=cfg["baseline"]["seasonality"], + ) + + # Rules + candidates = apply_rules(agg_bl, cfg) + LOGGER.info("Candidate alerts: %d", 0 if candidates is None else len(candidates)) + + # Policies need knowledge of currently OPEN alerts from the chosen backend + open_alerts = fetch_open_alerts_pg(cfg["io"]["postgres_url"]) + alerts = enforce_policies(candidates, open_alerts, cfg) + LOGGER.info("Alerts after policies: %d", len(alerts)) + + # Delivery + notifiers = build_notifiers(cfg) + dry_run = cfg["run"]["dry_run"] + + if not dry_run and alerts: + io_mod.upsert_alerts_pg(cfg["io"]["postgres_url"], alerts) + for a in alerts: + for n in notifiers: + try: + n.send(a) + except Exception as ex: + LOGGER.error("Notifier failed: %s", ex) + else: + LOGGER.info("Dry-run or no alerts. Skipping DB write & delivery.") + LOGGER.info("Preview alerts: %s", json.dumps(alerts, default=str, ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/config.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/config.py new file mode 100644 index 000000000..a2e0aa83f --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/config.py @@ -0,0 +1,114 @@ +from pydantic import BaseModel, Field, model_validator +from typing import Optional, List, Dict, Any + +# ------------------------------ +# IO: Postgres-only +# ------------------------------ +class IOConfig(BaseModel): + postgres_url: str # required: Postgres-only + + @model_validator(mode="after") + def _ensure_pg_only(self): + url = self.postgres_url + if not isinstance(url, str) or not url.lower().startswith( + ("postgresql://", "postgresql+psycopg2://") + ): + raise ValueError("io.postgres_url is required and must be a PostgreSQL URL.") + return self + + +# ------------------------------ +# Windows/Baseline/Rules/Alerting +# ------------------------------ +class WindowsConfig(BaseModel): + frequency: str = "D" + timezone: str = "UTC" + +class BaselineConfig(BaseModel): + method: str = "median" + lookback_periods: int = 28 + min_history: int = 7 + seasonality: Optional[int] = None + +class CountAnomalyRule(BaseModel): + enabled: bool = True + method: str = "zscore" + z_threshold: float = 3.0 + iqr_k: float = 1.5 + min_count: int = 3 + +class WorseningRule(BaseModel): + enabled: bool = True + method: str = "slope" + slope_lookback: int = 7 + slope_min: float = 0.02 + min_periods: int = 5 + ewma_span: int = 7 + ewma_threshold: float = 0.6 + +class RulesConfig(BaseModel): + count_anomaly: CountAnomalyRule = Field(default_factory=CountAnomalyRule) + worsening: WorseningRule = Field(default_factory=WorseningRule) + +class AlertingConfig(BaseModel): + dedup_cooldown_windows: int = 3 + resolve_after_no_anomaly: int = 3 + rate_limit_per_run: int = 100 + group_by_window: bool = True + + +# ------------------------------ +# Delivery: add Alertmanager section +# ------------------------------ +class SlackConfig(BaseModel): + enabled: bool = False + webhook_url: Optional[str] = None + +class WebhookConfig(BaseModel): + enabled: bool = False + url: Optional[str] = None + headers: Dict[str, Any] = Field(default_factory=dict) + +class EmailConfig(BaseModel): + enabled: bool = False + smtp_host: str = "" + smtp_port: int = 587 + username: str = "" + password_env: str = "SMTP_PASSWORD" + from_addr: str = "" + to_addrs: List[str] = Field(default_factory=list) + +class AlertmanagerConfig(BaseModel): + enabled: bool = False + url: Optional[str] = None + default_severity: str = "warning" + extra_labels: Dict[str, str] = Field(default_factory=dict) + auth: Dict[str, Any] = Field(default_factory=lambda: {"type": "none"}) # {"type":"none"} or {"type":"basic",...} + +class DeliveryConfig(BaseModel): + slack: SlackConfig = Field(default_factory=SlackConfig) + webhook: WebhookConfig = Field(default_factory=WebhookConfig) + email: EmailConfig = Field(default_factory=EmailConfig) + alertmanager: AlertmanagerConfig = Field(default_factory=AlertmanagerConfig) + + +# ------------------------------ +# Run +# ------------------------------ +class RunConfig(BaseModel): + dry_run: bool = False + limit_entities: Optional[int] = None + disease_filter: Optional[List[str]] = None + + +# ------------------------------ +# AppConfig +# ------------------------------ +class AppConfig(BaseModel): + io: IOConfig + windows: WindowsConfig = Field(default_factory=WindowsConfig) + baseline: BaselineConfig = Field(default_factory=BaselineConfig) + rules: RulesConfig = Field(default_factory=RulesConfig) + alerting: AlertingConfig = Field(default_factory=AlertingConfig) + delivery: DeliveryConfig = Field(default_factory=DeliveryConfig) + run: RunConfig = Field(default_factory=RunConfig) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/io.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/io.py new file mode 100644 index 000000000..57014b783 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/io.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +import json +import logging +from typing import Tuple, Iterable, Dict, Any, List + +import pandas as pd +from sqlalchemy import create_engine, text + +LOGGER = logging.getLogger(__name__) + +# --------------------------------------------------------------------- +# Postgres sources: anomalies / anomaly_types / regions +# --------------------------------------------------------------------- + +_BASE_SQLS: Dict[str, str] = { + "device": """ + SELECT a.ts AS "timestamp", + a.device_id AS entity_id, + at.code AS disease_type, + COALESCE(a.severity::double precision, 0.0) AS severity, + 0.0 AS affected_area + FROM public.anomalies a + JOIN public.anomaly_types at ON at.anomaly_type_id = a.anomaly_type_id + WHERE a.ts IS NOT NULL + {AND_CODE_FILTER} + {AND_TIME_RANGE} + """, + "mission": """ + SELECT a.ts AS "timestamp", + a.mission_id::text AS entity_id, + at.code AS disease_type, + COALESCE(a.severity::double precision, 0.0) AS severity, + 0.0 AS affected_area + FROM public.anomalies a + JOIN public.anomaly_types at ON at.anomaly_type_id = a.anomaly_type_id + WHERE a.ts IS NOT NULL + {AND_CODE_FILTER} + {AND_TIME_RANGE} + """, + "region": """ + SELECT a.ts AS "timestamp", + r.id::text AS entity_id, + at.code AS disease_type, + COALESCE(a.severity::double precision, 0.0) AS severity, + {AREA_EXPR} AS affected_area + FROM public.anomalies a + JOIN public.anomaly_types at ON at.anomaly_type_id = a.anomaly_type_id + JOIN public.regions r ON ST_Contains(r.geom, a.geom) + WHERE a.ts IS NOT NULL AND a.geom IS NOT NULL + {AND_CODE_FILTER} + {AND_TIME_RANGE} + """, +} + + +def _build_sql( + entity_dim: str, + area_strategy: str, + codes: List[str] | None, + start: str | None, + end: str | None, +) -> tuple[str, dict]: + """ + Build parametrized SQL for reading anomalies with chosen entity dimension and area strategy. + """ + sql = _BASE_SQLS[entity_dim] + area_expr = "0.0" + if entity_dim == "region" and area_strategy == "region_area": + area_expr = "ST_Area(r.geom::geography)::double precision" + + and_code = "" + params: Dict[str, Any] = {} + if codes: + and_code = "AND at.code = ANY(:codes)" + params["codes"] = codes + + and_time = "" + if start: + and_time += " AND a.ts >= :start_time" + params["start_time"] = start + if end: + and_time += " AND a.ts < :end_time" + params["end_time"] = end + + sql = ( + sql.replace("{AREA_EXPR}", area_expr) + .replace("{AND_CODE_FILTER}", and_code) + .replace("{AND_TIME_RANGE}", and_time) + ) + return sql, params + + +# --------------------------------------------------------------------- +# Postgres input (canonical) +# --------------------------------------------------------------------- + +def load_inputs_from_postgres(pg_url: str, tz: str, cfg: dict) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Load inputs from Postgres (public.anomalies/anomaly_types/regions). + Controlled by cfg['source_mapping'] (entity_dim, area_strategy, filters, codes). + Returns: + det: columns [timestamp, entity_id, disease_type, severity, affected_area] + reg: columns [entity_id, entity_type] + """ + edim = cfg["source_mapping"]["entity_dim"] + area = cfg["source_mapping"].get("area_strategy", "none") + codes = cfg["source_mapping"].get("anomaly_codes") + filters = cfg["source_mapping"].get("filters") or {} + start = filters.get("start_time") + end = filters.get("end_time") + + sql, params = _build_sql(edim, area, codes, start, end) + + eng = create_engine(pg_url) + with eng.begin() as conn: + det = pd.read_sql(text(sql), conn, params=params) + reg = det[["entity_id"]].drop_duplicates().assign(entity_type=edim) + + det["timestamp"] = pd.to_datetime(det["timestamp"], utc=True).dt.tz_convert(tz) + + required = {"timestamp", "entity_id", "disease_type", "severity", "affected_area"} + if not required.issubset(det.columns): + missing = required - set(det.columns) + raise ValueError(f"det: missing {missing}") + if not {"entity_id", "entity_type"}.issubset(reg.columns): + raise ValueError("reg: missing cols") + + return det, reg + + +# --------------------------------------------------------------------- +# Aggregation +# --------------------------------------------------------------------- + +def aggregate(det: pd.DataFrame, freq: str) -> pd.DataFrame: + """ + Aggregate by entity_id + window and compute disease_count, avg_severity, affected_area. + """ + df = det.copy() + + # Normalize tz: drop tz-info to use pandas period-based bucketing safely + if pd.api.types.is_datetime64tz_dtype(df["timestamp"]): + df["timestamp"] = df["timestamp"].dt.tz_convert("UTC").dt.tz_localize(None) + + df["window"] = df["timestamp"].dt.to_period(freq).dt.start_time + grp = df.groupby(["entity_id", "window"], as_index=False).agg( + disease_count=("disease_type", "count"), + avg_severity=("severity", "mean"), + affected_area=("affected_area", "sum"), + ) + grp["window_end"] = grp["window"] + pd.tseries.frequencies.to_offset(freq) + return grp + + +# --------------------------------------------------------------------- +# Alerts: Postgres backend +# --------------------------------------------------------------------- + + +def fetch_open_alerts_pg(pg_url: str) -> pd.DataFrame: + eng = create_engine(pg_url) + sql = """ + SELECT id, entity_id, rule, window_start, window_end, score, + first_seen, last_seen, status, meta_json + FROM public.alerts + WHERE status IN ('OPEN','ACK') + """ + with eng.begin() as conn: + df = pd.read_sql(text(sql), conn) + if not df.empty: + for c in ("first_seen", "last_seen", "window_start", "window_end"): + # make tz-aware UTC then drop tz -> naive UTC + s = pd.to_datetime(df[c], utc=True) + df[c] = s.dt.tz_convert("UTC").dt.tz_localize(None) + + return df + + +def upsert_alerts_pg(pg_url: str, alerts: Iterable[Dict[str, Any]]) -> None: + rows = list(alerts) + if not rows: + return + eng = create_engine(pg_url) + sql = """ + INSERT INTO public.alerts + (entity_id, rule, window_start, window_end, score, + first_seen, last_seen, status, meta_json) + VALUES + (:entity_id, :rule, :window_start, :window_end, :score, + :first_seen, :last_seen, :status, CAST(:meta_json AS jsonb)) + """ + payload = [{ + "entity_id": a["entity_id"], + "rule": a["rule"], + "window_start": a["window_start"], + "window_end": a["window_end"], + "score": float(a["score"]), + "first_seen": a["first_seen"], + "last_seen": a["last_seen"], + "status": a["status"], + "meta_json": json.dumps(a["meta"], ensure_ascii=False), + } for a in rows] + + with eng.begin() as conn: + conn.execute(text(sql), payload) + LOGGER.info("Inserted %d alerts into Postgres.", len(rows)) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/logging_utils.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/logging_utils.py new file mode 100644 index 000000000..f9618ff02 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/logging_utils.py @@ -0,0 +1,10 @@ +import logging +import sys + +def setup_logging(level: str = "INFO") -> None: + handler = logging.StreamHandler(sys.stdout) + fmt = "%(asctime)s %(levelname)s %(name)s - %(message)s" + handler.setFormatter(logging.Formatter(fmt)) + root = logging.getLogger() + root.setLevel(level.upper()) + root.handlers = [handler] diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/models.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/models.py new file mode 100644 index 000000000..4796fecd8 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/models.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import Optional, Dict +from datetime import datetime + +@dataclass +class Alert: + entity_id: str + rule: str + window_start: datetime + window_end: datetime + score: float + first_seen: datetime + last_seen: datetime + status: str # OPEN | ACK | RESOLVED + meta: Dict diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/base.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/base.py new file mode 100644 index 000000000..1f8bc409a --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/base.py @@ -0,0 +1,13 @@ +from __future__ import annotations +from typing import Dict, Any, List + +class Notifier: + def send(self, alert: Dict[str, Any]) -> None: + raise NotImplementedError + +def render_text(alert: Dict[str, Any]) -> str: + return ( + f"[{alert['status']}] {alert['rule']} for {alert['entity_id']} " + f"{alert['window_start']}..{alert['window_end']} " + f"score={alert['score']:.2f} reasons={alert['meta'].get('reasons')}" + ) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/emailer.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/emailer.py new file mode 100644 index 000000000..695e0ddfe --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/emailer.py @@ -0,0 +1,28 @@ +from __future__ import annotations +import os +import smtplib +from email.mime.text import MIMEText +from typing import Dict, Any, List +from .base import Notifier, render_text + +class EmailNotifier(Notifier): + def __init__(self, host: str, port: int, username: str, password_env: str, + from_addr: str, to_addrs: List[str]) -> None: + self.host = host + self.port = port + self.username = username + self.password_env = password_env + self.from_addr = from_addr + self.to_addrs = to_addrs + + def send(self, alert: Dict[str, Any]) -> None: + password = os.getenv(self.password_env, "") + msg = MIMEText(render_text(alert)) + msg["Subject"] = f"Alert: {alert['rule']} {alert['entity_id']}" + msg["From"] = self.from_addr + msg["To"] = ", ".join(self.to_addrs) + with smtplib.SMTP(self.host, self.port, timeout=10) as s: + s.starttls() + if self.username and password: + s.login(self.username, password) + s.sendmail(self.from_addr, self.to_addrs, msg.as_string()) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/slack.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/slack.py new file mode 100644 index 000000000..68925060a --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/slack.py @@ -0,0 +1,15 @@ +from __future__ import annotations +import os +import json +import requests +from typing import Dict, Any +from .base import Notifier, render_text + +class SlackNotifier(Notifier): + def __init__(self, webhook_url: str) -> None: + self.webhook_url = webhook_url + + def send(self, alert: Dict[str, Any]) -> None: + text = render_text(alert) + payload = {"text": text} + requests.post(self.webhook_url, data=json.dumps(payload), timeout=10) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/webhook.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/webhook.py new file mode 100644 index 000000000..1e84232c7 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/notifiers/webhook.py @@ -0,0 +1,13 @@ +from __future__ import annotations +import json +import requests +from typing import Dict, Any +from .base import Notifier + +class WebhookNotifier(Notifier): + def __init__(self, url: str, headers: Dict[str, str] | None = None) -> None: + self.url = url + self.headers = headers or {} + + def send(self, alert: Dict[str, Any]) -> None: + requests.post(self.url, json=alert, headers=self.headers, timeout=10) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/rules.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/rules.py new file mode 100644 index 000000000..eeba8ec4c --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/src/disease_monitor/rules.py @@ -0,0 +1,108 @@ +from __future__ import annotations +import logging +from typing import List, Dict, Any, Tuple +import pandas as pd +import numpy as np +from scipy import stats + +LOGGER = logging.getLogger(__name__) + +def zscore_anomalies(df: pd.DataFrame, threshold: float, min_count: int) -> pd.DataFrame: + s = df["disease_count"] + mu = df["disease_count_bl"] + # Use small epsilon for zero/NaN std to avoid z=0 + sd = df["disease_count_std"] + eps = 1e-6 + sd = sd.where(sd > 0, other=eps).fillna(eps) + + z = (s - mu) / sd + cond = (z >= threshold) & (s >= min_count) + + out = df.loc[cond].copy() + out["score"] = z.loc[cond] + out["rule"] = "COUNT_SPIKE" + out["reason"] = "zscore" + return out + +def iqr_anomalies(df: pd.DataFrame, k: float, min_count: int) -> pd.DataFrame: + q1 = df["disease_count_q1"] + q3 = df["disease_count_q3"] + iqr = (q3 - q1).replace(0, np.nan) + upper = q3 + k * iqr + cond = (df["disease_count"] > upper.fillna(float("inf"))) & (df["disease_count"] >= min_count) + out = df.loc[cond].copy() + out["score"] = (df["disease_count"] - upper).loc[cond].fillna(0.0) + out["rule"] = "COUNT_SPIKE" + out["reason"] = "iqr" + return out + +def slope_worsening(df: pd.DataFrame, metric: str, lookback: int, + slope_min: float, min_periods: int) -> pd.DataFrame: + # Per entity rolling slope (OLS) + rows = [] + for entity, g in df.groupby("entity_id"): + g = g.sort_values("window") + y = g[metric].rolling(lookback, min_periods=min_periods).apply(_rolling_slope, raw=False) + cond = y >= slope_min + sel = g.loc[cond].copy() + if sel.empty: + continue + sel["score"] = y.loc[cond] + sel["rule"] = "WORSENING_TREND" + sel["reason"] = f"slope_{metric}" + rows.append(sel) + return pd.concat(rows, ignore_index=True) if rows else pd.DataFrame(columns=df.columns.tolist() + ["score","rule","reason"]) + +def _rolling_slope(s: pd.Series) -> float: + x = np.arange(len(s)) + res = stats.linregress(x, s.values) + return float(res.slope) + +def ewma_worsening(df: pd.DataFrame, metric: str, span: int, threshold: float, min_periods: int) -> pd.DataFrame: + rows = [] + for entity, g in df.groupby("entity_id"): + g = g.sort_values("window").copy() + ew = g[metric].ewm(span=span, adjust=False).mean() + cond = (ew >= threshold) & (g[metric].rolling(span, min_periods=min_periods).count() >= min_periods) + sel = g.loc[cond].copy() + if sel.empty: + continue + sel["score"] = ew.loc[cond] + sel["rule"] = "WORSENING_TREND" + sel["reason"] = f"ewma_{metric}" + rows.append(sel) + return pd.concat(rows, ignore_index=True) if rows else pd.DataFrame(columns=df.columns.tolist() + ["score","rule","reason"]) + +def apply_rules(df: pd.DataFrame, cfg: Dict[str, Any]) -> pd.DataFrame: + results = [] + + # Count anomaly + rc = cfg["rules"]["count_anomaly"] + if rc["enabled"]: + if rc["method"] == "zscore": + results.append(zscore_anomalies(df, rc["z_threshold"], rc["min_count"])) + elif rc["method"] == "iqr": + results.append(iqr_anomalies(df, rc["iqr_k"], rc["min_count"])) + else: + # Placeholder: CUSUM can be added similarly + results.append(zscore_anomalies(df, rc["z_threshold"], rc["min_count"])) + + # Worsening trend on severity and area + rw = cfg["rules"]["worsening"] + if rw["enabled"]: + if rw["method"] == "slope": + for m in ["avg_severity", "affected_area"]: + results.append(slope_worsening(df, m, rw["slope_lookback"], rw["slope_min"], rw["min_periods"])) + else: + for m in ["avg_severity", "affected_area"]: + results.append(ewma_worsening(df, m, rw["ewma_span"], rw["ewma_threshold"], rw["min_periods"])) + + if not results: + return pd.DataFrame() + out = pd.concat([r for r in results if r is not None and not r.empty], ignore_index=True) \ + if any((r is not None and not r.empty) for r in results) else pd.DataFrame() + # Prepare common fields + if not out.empty: + out = out[["entity_id", "window", "window_end", "rule", "score", "reason", + "disease_count", "avg_severity", "affected_area"]].copy() + return out diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/conftest.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/conftest.py new file mode 100644 index 000000000..043dc1712 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/conftest.py @@ -0,0 +1,17 @@ +import pandas as pd +import numpy as np +from datetime import datetime, timedelta, timezone + +TZ = "UTC" + +def make_series(start: str, days: int, entity: str, base_count=1, bump_at=None, bump=5): + rows = [] + start_dt = pd.to_datetime(start).tz_localize("UTC") + for i in range(days): + ts = start_dt + pd.Timedelta(days=i) + count = base_count + if bump_at is not None and i in bump_at: + count = bump + rows.append({"timestamp": ts, "entity_id": entity, "disease_type": "x", + "severity": 0.1 * count, "affected_area": 2.0 * count}) + return pd.DataFrame(rows) diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_aggregation.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_aggregation.py new file mode 100644 index 000000000..bb0a42558 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_aggregation.py @@ -0,0 +1,17 @@ +import pandas as pd +from disease_monitor.io import aggregate + +def test_aggregate_basic(): + det = pd.DataFrame({ + "timestamp": pd.to_datetime(["2025-08-01", "2025-08-01", "2025-08-02"]).tz_localize("UTC"), + "entity_id": ["A","A","A"], + "disease_type": ["x","x","x"], + "severity": [0.2, 0.4, 0.3], + "affected_area": [1,2,3], + }) + out = aggregate(det, "D") + assert len(out) == 2 + d1 = out[out["window"] == pd.to_datetime("2025-08-01")] + assert int(d1["disease_count"].iloc[0]) == 2 + assert abs(float(d1["avg_severity"].iloc[0]) - 0.3) < 1e-9 + assert int(d1["affected_area"].iloc[0]) == 3 diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_alerting.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_alerting.py new file mode 100644 index 000000000..48126beaf --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_alerting.py @@ -0,0 +1,34 @@ +import pandas as pd +from disease_monitor.alerting import enforce_policies + +def test_dedup_cooldown(): + candidates = pd.DataFrame({ + "entity_id": ["A","A"], + "window": pd.to_datetime(["2025-08-10","2025-08-11"]), + "window_end": pd.to_datetime(["2025-08-11","2025-08-12"]), + "rule": ["COUNT_SPIKE","COUNT_SPIKE"], + "score": [3.1, 3.2], + "reason": [["zscore"],["zscore"]], + "disease_count": [10, 9], + "avg_severity": [0.5, 0.4], + "affected_area": [10.0, 9.0], + }) + open_alerts = pd.DataFrame({ + "entity_id": ["A"], + "rule": ["COUNT_SPIKE"], + "last_seen": pd.to_datetime(["2025-08-10"]), + "window_start": pd.to_datetime(["2025-08-10"]), + "window_end": pd.to_datetime(["2025-08-11"]), + "first_seen": pd.to_datetime(["2025-08-10"]), + "status": ["OPEN"], + "id": [1], + "score": [3.1] + }) + cfg = { + "alerting": {"dedup_cooldown_windows": 3, "resolve_after_no_anomaly": 3, + "rate_limit_per_run": 10, "group_by_window": True}, + "windows": {"frequency": "D"} + } + res = enforce_policies(candidates, open_alerts, cfg) + # Second day should be skipped due to cooldown + assert len(res) == 0 or len(res) == 1 diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_anomaly_rules.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_anomaly_rules.py new file mode 100644 index 000000000..23f595a7a --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_anomaly_rules.py @@ -0,0 +1,22 @@ +import pandas as pd +from disease_monitor.baseline import compute_baseline +from disease_monitor.rules import apply_rules + +def test_zscore_spike_detected(): + # mostly low counts, then spike + det = [] + for d in range(10): + det.append({"window": pd.to_datetime(f"2025-08-{d+1:02d}"), + "entity_id": "E1", + "disease_count": 1 if d < 8 else (10 if d==8 else 1), + "avg_severity": 0.2, "affected_area": 2.0}) + df = pd.DataFrame(det) + df["window_end"] = df["window"] + pd.Timedelta(days=1) + bl = compute_baseline(df.rename(columns={"window":"window"}), "median", 7, 3, None) + cfg = { + "rules": {"count_anomaly": {"enabled": True, "method": "zscore", "z_threshold": 2.5, "min_count": 3}, + "worsening": {"enabled": False}}, + } + out = apply_rules(bl, cfg) + assert not out.empty + assert "COUNT_SPIKE" in out["rule"].unique() diff --git a/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_worsening_rules.py b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_worsening_rules.py new file mode 100644 index 000000000..e17b34e57 --- /dev/null +++ b/airflow/leaf-pipeline/projects/disease-monitor/disease-monitor/tests/test_worsening_rules.py @@ -0,0 +1,24 @@ +import pandas as pd +from disease_monitor.baseline import compute_baseline +from disease_monitor.rules import apply_rules + +def test_worsening_slope_on_severity(): + rows = [] + for i in range(10): + rows.append({ + "window": pd.to_datetime(f"2025-08-{i+1:02d}"), + "entity_id": "E1", + "disease_count": 1, + "avg_severity": 0.1 + 0.03*i, + "affected_area": 2 + i + }) + df = pd.DataFrame(rows) + df["window_end"] = df["window"] + pd.Timedelta(days=1) + bl = compute_baseline(df, "median", 7, 3, None) + cfg = {"rules": + {"count_anomaly": {"enabled": False}, + "worsening": {"enabled": True, "method": "slope", + "slope_lookback": 7, "slope_min": 0.02, "min_periods": 5}}} + out = apply_rules(bl, cfg) + assert not out.empty + assert "WORSENING_TREND" in out["rule"].unique() diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/05.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/05.jpg new file mode 100644 index 000000000..09be3be5f Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/05.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/bacterialspot3_600px.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/bacterialspot3_600px.jpg new file mode 100644 index 000000000..c56ceecd3 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf spot/bacterialspot3_600px.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/DSCN3768.JPG.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/DSCN3768.JPG.jpg new file mode 100644 index 000000000..1dd73ab87 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/DSCN3768.JPG.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/IMG_3891.JPG_1492073147.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/IMG_3891.JPG_1492073147.jpg new file mode 100644 index 000000000..4acbdc891 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Bell_pepper leaf/IMG_3891.JPG_1492073147.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/Late-blight-infected-potato-plants_2.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/Late-blight-infected-potato-plants_2.jpg new file mode 100644 index 000000000..238c58e4e Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/Late-blight-infected-potato-plants_2.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/blight-on-potato-leaves.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/blight-on-potato-leaves.jpg new file mode 100644 index 000000000..311a114ce Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Potato leaf late blight/blight-on-potato-leaves.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Early blight leaf/dscn3175.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Early blight leaf/dscn3175.jpg new file mode 100644 index 000000000..a1014b817 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Early blight leaf/dscn3175.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato-badleaves.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato-badleaves.jpg new file mode 100644 index 000000000..545773c4c Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato-badleaves.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato_septoria_05_zoom.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato_septoria_05_zoom.jpg new file mode 100644 index 000000000..9ca14479a Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato Septoria leaf spot/tomato_septoria_05_zoom.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/late_blight_tomato_leaf4x1200.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/late_blight_tomato_leaf4x1200.jpg new file mode 100644 index 000000000..83ea0c4f6 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/late_blight_tomato_leaf4x1200.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/russian-2-319-dt-2010-leaves-high-tunnel-9-29-2014-c.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/russian-2-319-dt-2010-leaves-high-tunnel-9-29-2014-c.jpg new file mode 100644 index 000000000..ca68fc44c Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato leaf/russian-2-319-dt-2010-leaves-high-tunnel-9-29-2014-c.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/Leaf-mold3.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/Leaf-mold3.jpg new file mode 100644 index 000000000..5c7238de3 Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/Leaf-mold3.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/tomato_plants_1_original.JPG_1407178095.jpg b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/tomato_plants_1_original.JPG_1407178095.jpg new file mode 100644 index 000000000..d8215f30d Binary files /dev/null and b/airflow/leaf-pipeline/projects/leaf-counting/demo_images/10/Tomato mold leaf/tomato_plants_1_original.JPG_1407178095.jpg differ diff --git a/airflow/leaf-pipeline/projects/leaf-counting/requirements.orig.txt b/airflow/leaf-pipeline/projects/leaf-counting/requirements.orig.txt new file mode 100644 index 000000000..81958653a --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/requirements.orig.txt @@ -0,0 +1,5 @@ + +ultralytics>=8.1.0 +opencv-python-headless>=4.9.0.80 +numpy>=1.23.0 +minio>=7.1.15 diff --git a/airflow/leaf-pipeline/projects/leaf-counting/requirements.txt b/airflow/leaf-pipeline/projects/leaf-counting/requirements.txt new file mode 100644 index 000000000..0c27d4902 --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/requirements.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +torch==2.6.0+cpu +torchvision==0.21.0+cpu +torchaudio==2.6.0+cpu + +ultralytics>=8.1.0 +opencv-python-headless>=4.9.0.80 +numpy>=1.23.0 +minio>=7.1.15 diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/__init__.py b/airflow/leaf-pipeline/projects/leaf-counting/src/__init__.py new file mode 100644 index 000000000..ee49d4339 --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/__init__.py @@ -0,0 +1,5 @@ +# decompyle3 version 3.9.3 +# Python bytecode version base 3.12.0 (3531) +# Decompiled from: Python 3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0] +# Embedded file name: /home/user/ml-workspace/projects/leaf-counting/src/__init__.py +# Compiled at: 2025-10-20 13:47:51 diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/common.py b/airflow/leaf-pipeline/projects/leaf-counting/src/common.py new file mode 100644 index 000000000..ea30f64ea --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/common.py @@ -0,0 +1,35 @@ +from __future__ import annotations +from pathlib import Path +import cv2 +import numpy as np + +IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"} + +def is_image(path: Path) -> bool: + return path.suffix.lower() in IMG_EXTS + +def iter_images(inp: Path): + p = Path(inp) + if p.is_file() and is_image(p): + yield p + elif p.is_dir(): + for q in sorted(p.rglob("*")): + if q.is_file() and is_image(q): + yield q + +def ensure_dir(p: Path) -> Path: + Path(p).mkdir(parents=True, exist_ok=True) + return Path(p) + +def draw_boxes(img_bgr: np.ndarray, boxes, color=(0,255,0), thickness=2): + h, w = img_bgr.shape[:2] + out = img_bgr.copy() + for (x1,y1,x2,y2,conf,cls_id) in boxes: + x1 = max(0, min(w-1, int(x1))) + y1 = max(0, min(h-1, int(y1))) + x2 = max(0, min(w-1, int(x2))) + y2 = max(0, min(h-1, int(y2))) + cv2.rectangle(out, (x1,y1), (x2,y2), color, thickness) + label = f"{int(cls_id)}:{conf:.2f}" + cv2.putText(out, label, (x1, max(0, y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA) + return out \ No newline at end of file diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/crop_only.py b/airflow/leaf-pipeline/projects/leaf-counting/src/crop_only.py new file mode 100644 index 000000000..c54066f4c --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/crop_only.py @@ -0,0 +1,143 @@ +from __future__ import annotations +import json, argparse +from pathlib import Path +from typing import Optional +import cv2 +from common import ensure_dir +from datetime import datetime + +try: + from minio_io import get_client, ensure_bucket, put_png +except Exception: + get_client = ensure_bucket = put_png = None + + +def _load_jsons(inp: Path): + jdir = inp / "json" + if not jdir.exists(): + raise SystemExit(f"[ERR] Expected JSON dir not found: {jdir} (run detect_only.py first)") + + for jp in sorted(jdir.rglob("*.json")): + with jp.open("r", encoding="utf-8") as f: + j = json.load(f) + yield jp, j + +def _safe_crop(img, x1, y1, x2, y2): + h, w = img.shape[:2] + x1 = max(0, min(w-1, int(x1))); y1 = max(0, min(h-1, int(y1))) + x2 = max(0, min(w-1, int(x2))); y2 = max(0, min(h-1, int(y2))) + if x2 <= x1: x2 = min(w-1, x1+1) + if y2 <= y1: y2 = min(h-1, y1+1) + return img[y1:y2, x1:x2] + + +def run_crop(inp: Path, out_dir: Path, size: int=224, margin: float=0.1, min_wh: int=8, + orig_dir: Optional[Path]=None, flat: bool=False, + minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, + minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, + minio_prefix: str="CROP", minio_secure: bool=False, + run_id: Optional[str]=None): + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + out_dir = ensure_dir(out_dir) + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + count = 0 + for jp, j in _load_jsons(inp): + + if "source_path" in j: + img_path = Path(j["source_path"]) + rel_path = j.get("rel_path", j["image"]) + elif "rel_path" in j: + if orig_dir is None: + raise SystemExit("[ERR] JSON מכיל רק rel_path; ספקי --orig כדי למצוא את קובץ המקור") + img_path = Path(orig_dir) / j["rel_path"] + rel_path = j["rel_path"] + else: + if orig_dir is None: + raise SystemExit("[ERR] JSON חסר source_path/rel_path; ספקי --orig ותתאימי לשמות image") + img_path = Path(orig_dir) / j["image"] + rel_path = j["image"] + + if not img_path.exists(): + print(f"[WARN] Original image not found: {img_path}, skipping") + continue + + img = cv2.imread(str(img_path)) + if img is None: + print(f"[WARN] Can't read image: {img_path}") + continue + + rel_parent = str(Path(rel_path).parent) + rel_stem = Path(rel_path).stem + + + if flat: + dest_dir = ensure_dir(out_dir) + minio_subprefix = minio_prefix + else: + dest_dir = ensure_dir(out_dir / rel_parent / rel_stem) + minio_subprefix = f"{minio_prefix}/{rel_parent}/{rel_stem}" if rel_parent != "." else f"{minio_prefix}/{rel_stem}" + + for i, (x1,y1,x2,y2,conf,cls_id) in enumerate(j.get("boxes", [])): + w = x2 - x1; h = y2 - y1 + if w < min_wh or h < min_wh: + continue + cx = (x1 + x2) * 0.5; cy = (y1 + y2) * 0.5 + half = max(w, h) * 0.5 * (1.0 + margin) + crop = _safe_crop(img, cx-half, cy-half, cx+half, cy+half) + if crop.size == 0: + continue + crop_resized = cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA) + out_name = f"det{i:03d}_cls{int(cls_id)}_{conf:.2f}.png" + cv2.imwrite(str(dest_dir / out_name), crop_resized) + count += 1 + + if cli: + base = f"{run_id}/{minio_prefix}" # תאריך/שעה קודם, אח"כ CROP + key = f"{base}/{rel_parent}/{rel_stem}/{out_name}" if rel_parent != "." else f"{base}/{rel_stem}/{out_name}" + put_png(cli, minio_bucket, key, crop_resized) + + put_png(cli, minio_bucket, f"{minio_subprefix}/{out_name}", crop_resized) + + print(f"[DONE] Saved {count} crops under: {out_dir} (flat={flat})") + +def main(): + ap = argparse.ArgumentParser(description="Create square crops from detection JSON results (+optional MinIO).") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--orig", default=None, help="דרוש רק אם JSON חסר source_path") + ap.add_argument("--size", type=int, default=224) + ap.add_argument("--margin", type=float, default=0.1) + ap.add_argument("--min-wh", type=int, default=8) + ap.add_argument("--flat", action="store_true") + + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="crops") + ap.add_argument("--minio-secure", action="store_true") + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run_crop( + inp=Path(args.input), out_dir=Path(args.out), + size=args.size, margin=args.margin, min_wh=args.min_wh, + orig_dir=Path(args.orig) if args.orig else None, flat=args.flat, + minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, minio_secure=args.minio_secure, + run_id=run_id, + ) + + + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/detect_only.py b/airflow/leaf-pipeline/projects/leaf-counting/src/detect_only.py new file mode 100644 index 000000000..707c88a66 --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/detect_only.py @@ -0,0 +1,140 @@ + +from __future__ import annotations +import json, argparse +from pathlib import Path +from typing import Optional +from datetime import datetime + +# --- HARD PATCH: עקיפת cpuinfo/ultralytics באופן גורף (מונע Popen('')) --- +try: + import cpuinfo as _ci + _ci.get_cpu_info = (lambda: {"brand_raw": "unknown"}) +except Exception: + pass +try: + import ultralytics.utils.torch_utils as _tu + _tu.get_cpu_info = (lambda: "unknown") +except Exception: + pass +# --- end hard patch --- + +import cv2 +from ultralytics import YOLO +from common import iter_images, ensure_dir, draw_boxes + +import cpuinfo +try: + print("cpu brand:", cpuinfo.get_cpu_info().get("brand_raw")) +except Exception as e: + print("cpuinfo error:", repr(e)) + +try: + from minio_io import get_client, ensure_bucket, put_png, put_json +except Exception: + get_client = ensure_bucket = put_png = put_json = None + +def run_detect(inp: Path, out_dir: Path, weights: Path, + conf: float=0.25, imgsz: int=896, device: str="cpu", + minio_endpoint: Optional[str]=None, minio_access: Optional[str]=None, + minio_secret: Optional[str]=None, minio_bucket: Optional[str]=None, + minio_prefix: str="DETECT", minio_secure: bool=False, + run_id: Optional[str]=None): + + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + out_dir = ensure_dir(out_dir) + overlay_root = ensure_dir(out_dir / "overlay") + json_root = ensure_dir(out_dir / "json") + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + model = YOLO(str(weights)) + + + img_paths = list(iter_images(inp)) + if not img_paths: + raise SystemExit(f"[ERR] No images found under: {inp}") + + is_dir_input = Path(inp).is_dir() + + for img_path in img_paths: + + rel_path = img_path.name if not is_dir_input else str(img_path.relative_to(inp)) + rel_parent = "." if not is_dir_input else str(img_path.relative_to(inp).parent) + rel_stem = Path(rel_path).stem + + overlay_dir = ensure_dir(overlay_root / rel_parent) + json_dir = ensure_dir(json_root / rel_parent) + + img_bgr = cv2.imread(str(img_path)) + if img_bgr is None: + print(f"[WARN] can't read image: {img_path}") + continue + h, w = img_bgr.shape[:2] + + res = model.predict(source=img_bgr, conf=conf, imgsz=imgsz, device=device, verbose=False)[0] + + boxes_pix = [] + if res.boxes is not None and len(res.boxes) > 0: + for b in res.boxes: + xyxy = b.xyxy.cpu().numpy().reshape(-1) + conf_i = float(b.conf.cpu().numpy().reshape(-1)[0]) + cls_i = float(b.cls.cpu().numpy().reshape(-1)[0]) if b.cls is not None else 0.0 + x1,y1,x2,y2 = map(float, xyxy.tolist()) + boxes_pix.append([x1,y1,x2,y2,conf_i,cls_i]) + + j = { + "image": img_path.name, + "rel_path": rel_path, + "source_path": str(img_path.resolve()), + "width": w, "height": h, + "boxes": boxes_pix + } + json_path = json_dir / f"{rel_stem}.json" + json_path.write_text(json.dumps(j, ensure_ascii=False, indent=2), encoding="utf-8") + + overlay = draw_boxes(img_bgr, boxes_pix) + ov_path = overlay_dir / img_path.name + cv2.imwrite(str(ov_path), overlay) + + if cli: + base = f"{run_id}/{minio_prefix}" + minio_json_key = f"{base}/json/{rel_parent}/{rel_stem}.json" if rel_parent != "." else f"{base}/json/{rel_stem}.json" + minio_ov_key = f"{base}/overlay/{rel_parent}/{img_path.name}" if rel_parent != "." else f"{base}/overlay/{img_path.name}" + put_json(cli, minio_bucket, minio_json_key, j) + put_png(cli, minio_bucket, minio_ov_key, overlay) + + print(f"[OK] {rel_path} -> {json_path.relative_to(out_dir)}, boxes={len(boxes_pix)}") + +def main(): + ap = argparse.ArgumentParser(description="YOLO detect -> pixel JSON + overlay (+optional MinIO)") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--weights", required=True) + ap.add_argument("--conf", type=float, default=0.25) + ap.add_argument("--imgsz", type=int, default=896) + ap.add_argument("--device", default="cpu") + + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="detect") + ap.add_argument("--minio-secure", action="store_true") + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run_detect(Path(args.input), Path(args.out), Path(args.weights), + conf=args.conf, imgsz=args.imgsz, device=args.device, + minio_endpoint=args.minio_endpoint, minio_access=args.minio_access, + minio_secret=args.minio_secret, minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, minio_secure=args.minio_secure, + run_id=run_id) + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/minio_io.py b/airflow/leaf-pipeline/projects/leaf-counting/src/minio_io.py new file mode 100644 index 000000000..cda3c246d --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/minio_io.py @@ -0,0 +1,39 @@ +from __future__ import annotations +import io, json, os +from pathlib import Path +import cv2 +from minio import Minio +from minio.error import S3Error + +def get_client(endpoint: str, access_key: str, secret_key: str, secure: bool=False) -> Minio: + """ + דוגמה: + cli = get_client("localhost:9000", "minioadmin", "minioadmin", secure=False) + """ + return Minio(endpoint, access_key=access_key, secret_key=secret_key, secure=secure) + +def ensure_bucket(cli: Minio, bucket: str): + found = cli.bucket_exists(bucket) + if not found: + cli.make_bucket(bucket) + +def put_png(cli: Minio, bucket: str, key: str, img_bgr): + """ + מעלה תמונת PNG מתוך np.ndarray (BGR של OpenCV). + """ + Path(key).parent and os.makedirs(Path(key).parent, exist_ok=True) # לא חובה, לשקט נפשי מקומי + ok, buf = cv2.imencode(".png", img_bgr) + if not ok: + raise RuntimeError("cv2.imencode PNG failed") + bio = io.BytesIO(buf.tobytes()) + bio.seek(0) + cli.put_object(bucket, key, bio, length=len(bio.getvalue()), content_type="image/png") + +def put_json(cli: Minio, bucket: str, key: str, obj): + """ + מעלה JSON (dict/list). + """ + js = json.dumps(obj, ensure_ascii=False, indent=2).encode("utf-8") + bio = io.BytesIO(js) + bio.seek(0) + cli.put_object(bucket, key, bio, length=len(js), content_type="application/json") diff --git a/airflow/leaf-pipeline/projects/leaf-counting/src/predict_pyramid_wbf.py b/airflow/leaf-pipeline/projects/leaf-counting/src/predict_pyramid_wbf.py new file mode 100644 index 000000000..292829ba1 --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/src/predict_pyramid_wbf.py @@ -0,0 +1,233 @@ + +from __future__ import annotations +import argparse, json +from pathlib import Path +from typing import List, Tuple, Optional +from datetime import datetime + +# --- HARD PATCH: עקיפת cpuinfo/ultralytics (מונע Popen('')) --- +try: + import cpuinfo as _ci + _ci.get_cpu_info = (lambda: {"brand_raw": "unknown"}) +except Exception: + pass +try: + import ultralytics.utils.torch_utils as _tu + _tu.get_cpu_info = (lambda: "unknown") +except Exception: + pass +# --- end hard patch --- + +import cv2 +import numpy as np +from ultralytics import YOLO + +from common import iter_images, ensure_dir, draw_boxes + +try: + from minio_io import get_client, ensure_bucket, put_png, put_json +except Exception: + get_client = ensure_bucket = put_png = put_json = None + + +# ----------------- WBF utils ----------------- +def iou_xyxy(a: np.ndarray, b: np.ndarray) -> float: + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + ix1, iy1 = max(ax1, bx1), max(ay1, by1) + ix2, iy2 = min(ax2, bx2), min(ay2, by2) + iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1) + inter = iw * ih + area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) + area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) + union = area_a + area_b - inter + 1e-9 + return inter / union + + +def wbf(boxes: List[np.ndarray], scores: List[float], iou_thr: float = 0.55) -> tuple[list[np.ndarray], list[float]]: + """Very small WBF: קיבוץ לפי IoU>=thr, ממוצע משוקלל לפי conf.""" + used = [False] * len(boxes) + out_boxes, out_scores = [], [] + for i in range(len(boxes)): + if used[i]: + continue + group_idxs = [i] + used[i] = True + for j in range(i + 1, len(boxes)): + if used[j]: + continue + if iou_xyxy(boxes[i], boxes[j]) >= iou_thr: + group_idxs.append(j) + used[j] = True + bs = np.array([boxes[k] for k in group_idxs], dtype=float) + ws = np.array([scores[k] for k in group_idxs], dtype=float) + wsum = ws.sum() + 1e-9 + avg = (bs * ws[:, None]).sum(axis=0) / wsum + out_boxes.append(avg) + out_scores.append(float(ws.max())) + return out_boxes, out_scores + + +# ----------------- multi-scale predict ----------------- +def predict_at_scales(model: YOLO, img_bgr: np.ndarray, scales: List[float], conf: float, imgsz: int, device: str): + H, W = img_bgr.shape[:2] + all_boxes, all_scores, all_classes = [], [], [] + for s in scales: + if s == 1.0: + resized = img_bgr + rx, ry = 1.0, 1.0 + else: + newW, newH = int(W * s), int(H * s) + resized = cv2.resize(img_bgr, (newW, newH), interpolation=cv2.INTER_LINEAR) + rx, ry = 1.0 / s, 1.0 / s + + res = model.predict(source=resized, conf=conf, imgsz=imgsz, device=device, verbose=False)[0] + if res.boxes is None or len(res.boxes) == 0: + continue + for b in res.boxes: + xyxy = b.xyxy.cpu().numpy().reshape(-1) + conf_i = float(b.conf.cpu().numpy().reshape(-1)[0]) + cls_i = float(b.cls.cpu().numpy().reshape(-1)[0]) if b.cls is not None else 0.0 + x1, y1, x2, y2 = xyxy + # החזרה לקואורדינטות המקור + x1, y1, x2, y2 = x1 * rx, y1 * ry, x2 * rx, y2 * ry + all_boxes.append(np.array([x1, y1, x2, y2], dtype=float)) + all_scores.append(conf_i) + all_classes.append(int(cls_i)) + return all_boxes, all_scores, all_classes + + +# ----------------- main runner ----------------- +def run(inp: Path, out_dir: Path, weights: Path, + scales: List[float], conf: float = 0.25, iou_thr: float = 0.55, + imgsz: int = 896, device: str = "cpu", + minio_endpoint: Optional[str] = None, minio_access: Optional[str] = None, + minio_secret: Optional[str] = None, minio_bucket: Optional[str] = None, + minio_prefix: str = "PREDICT_PWB", minio_secure: bool = False, + run_id: Optional[str] = None): + + run_id = run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + + out_dir = ensure_dir(out_dir) + overlay_root = ensure_dir(out_dir / "overlay") + json_root = ensure_dir(out_dir / "json") + + cli = None + if minio_endpoint and minio_access and minio_secret and minio_bucket: + if get_client is None: + raise SystemExit("[ERR] חסר minio או minio_io.") + cli = get_client(minio_endpoint, minio_access, minio_secret, secure=minio_secure) + ensure_bucket(cli, minio_bucket) + + model = YOLO(str(weights)) + images = list(iter_images(inp)) + if not images: + raise SystemExit(f"[ERR] No images under: {inp}") + + for p in images: + img = cv2.imread(str(p)) + if img is None: + print(f"[WARN] can't read: {p}") + continue + H, W = img.shape[:2] + + + rel_path = str(p.relative_to(inp)) if inp.is_dir() else p.name + rel_parent = str(Path(rel_path).parent) + rel_stem = Path(rel_path).stem + + boxes, scores, classes = predict_at_scales(model, img, scales, conf, imgsz, device) + + + merged = [] + for cls in sorted(set(classes)): + idxs = [i for i, c in enumerate(classes) if c == cls] + if not idxs: + continue + bcls = [boxes[i] for i in idxs] + scls = [scores[i] for i in idxs] + mbox, mscore = wbf(bcls, scls, iou_thr=iou_thr) + for bb, ss in zip(mbox, mscore): + x1, y1, x2, y2 = [float(max(0, v)) for v in bb] + x1, y1 = min(x1, W - 1), min(y1, H - 1) + x2, y2 = min(x2, W - 1), min(y2, H - 1) + merged.append([x1, y1, x2, y2, float(ss), float(cls)]) + + + overlay_dir = ensure_dir(overlay_root / rel_parent) + json_dir = ensure_dir(json_root / rel_parent) + + j = { + "image": p.name, + "rel_path": rel_path, + "source_path": str(p.resolve()), + "width": W, "height": H, + "boxes": merged + } + jpath = json_dir / f"{rel_stem}.json" + jpath.write_text(json.dumps(j, ensure_ascii=False, indent=2), encoding="utf-8") + + overlay = draw_boxes(img, merged) + cv2.imwrite(str(overlay_dir / p.name), overlay) + + + if cli: + base = f"{run_id}/{minio_prefix}" + json_key = f"{base}/json/{rel_parent}/{rel_stem}.json" if rel_parent != "." else f"{base}/json/{rel_stem}.json" + ov_key = f"{base}/overlay/{rel_parent}/{p.name}" if rel_parent != "." else f"{base}/overlay/{p.name}" + put_json(cli, minio_bucket, json_key, j) + put_png(cli, minio_bucket, ov_key, overlay) + + print(f"[OK] {rel_path} WBF boxes={len(merged)} -> {jpath.relative_to(out_dir)}") + + +def parse_scales(s: str) -> List[float]: + return [float(x) for x in s.split(",") if x.strip()] + + +def main(): + ap = argparse.ArgumentParser(description="YOLO multi-scale + WBF (+optional MinIO)") + ap.add_argument("--input", required=True) + ap.add_argument("--out", required=True) + ap.add_argument("--weights", required=True) + ap.add_argument("--scales", default="0.75,1.0,1.25", help="comma-separated, e.g. 0.5,1.0,1.5") + ap.add_argument("--conf", type=float, default=0.25) + ap.add_argument("--iou", type=float, default=0.55, help="WBF IoU threshold") + ap.add_argument("--imgsz", type=int, default=896) + ap.add_argument("--device", default="cpu") + + # MinIO + ap.add_argument("--minio-endpoint", default=None) + ap.add_argument("--minio-access", default=None) + ap.add_argument("--minio-secret", default=None) + ap.add_argument("--minio-bucket", default=None) + ap.add_argument("--minio-prefix", default="PREDICT_PWB") + ap.add_argument("--minio-secure", action="store_true") + + # Run grouping + ap.add_argument("--run-id", default=None, help="תיקיית הריצה ב-MinIO (ברירת מחדל: YYYY/MM/DD/HHmm)") + + args = ap.parse_args() + + run_id = args.run_id or datetime.now().strftime("%Y/%m/%d/%H%M") + run( + inp=Path(args.input), + out_dir=Path(args.out), + weights=Path(args.weights), + scales=parse_scales(args.scales), + conf=args.conf, + iou_thr=args.iou, + imgsz=args.imgsz, + device=args.device, + minio_endpoint=args.minio_endpoint, + minio_access=args.minio_access, + minio_secret=args.minio_secret, + minio_bucket=args.minio_bucket, + minio_prefix=args.minio_prefix, + minio_secure=args.minio_secure, + run_id=run_id, + ) + + +if __name__ == "__main__": + main() diff --git a/airflow/leaf-pipeline/projects/leaf-counting/weights/best.pt b/airflow/leaf-pipeline/projects/leaf-counting/weights/best.pt new file mode 100644 index 000000000..5cda183ca --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fc4cdddc6c71ae0dc8b38bdff5fdb0c963c9f1b872a7ffb9d799c908d9cb5a +size 6230435 diff --git a/airflow/leaf-pipeline/projects/leaf-counting/weights/last.pt b/airflow/leaf-pipeline/projects/leaf-counting/weights/last.pt new file mode 100644 index 000000000..8760aa4d2 --- /dev/null +++ b/airflow/leaf-pipeline/projects/leaf-counting/weights/last.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302fcee6565f57895c0fbfc1fa2c3922de0a4fbecde7fc8a6e130c831e038181 +size 6230435 diff --git a/airflow/webserver_config.py b/airflow/webserver_config.py new file mode 100644 index 000000000..3048bb21f --- /dev/null +++ b/airflow/webserver_config.py @@ -0,0 +1,132 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Default configuration for the Airflow webserver.""" + +from __future__ import annotations + +import os + +from flask_appbuilder.const import AUTH_DB + +# from airflow.www.fab_security.manager import AUTH_LDAP +# from airflow.www.fab_security.manager import AUTH_OAUTH +# from airflow.www.fab_security.manager import AUTH_OID +# from airflow.www.fab_security.manager import AUTH_REMOTE_USER + + +basedir = os.path.abspath(os.path.dirname(__file__)) + +# Flask-WTF flag for CSRF +WTF_CSRF_ENABLED = True +WTF_CSRF_TIME_LIMIT = None + +# ---------------------------------------------------- +# AUTHENTICATION CONFIG +# ---------------------------------------------------- +# For details on how to set up each of the following authentication, see +# http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods +# for details. + +# The authentication type +# AUTH_OID : Is for OpenID +# AUTH_DB : Is for database +# AUTH_LDAP : Is for LDAP +# AUTH_REMOTE_USER : Is for using REMOTE_USER from web server +# AUTH_OAUTH : Is for OAuth +AUTH_TYPE = AUTH_DB + +# Uncomment to setup Full admin role name +# AUTH_ROLE_ADMIN = 'Admin' + +# Uncomment and set to desired role to enable access without authentication +# AUTH_ROLE_PUBLIC = 'Viewer' + +# Will allow user self registration +# AUTH_USER_REGISTRATION = True + +# The recaptcha it's automatically enabled for user self registration is active and the keys are necessary +# RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY +# RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY + +# Config for Flask-Mail necessary for user self registration +# MAIL_SERVER = 'smtp.gmail.com' +# MAIL_USE_TLS = True +# MAIL_USERNAME = 'yourappemail@gmail.com' +# MAIL_PASSWORD = 'passwordformail' +# MAIL_DEFAULT_SENDER = 'sender@gmail.com' + +# The default user self registration role +# AUTH_USER_REGISTRATION_ROLE = "Public" + +# When using OAuth Auth, uncomment to setup provider(s) info +# Google OAuth example: +# OAUTH_PROVIDERS = [{ +# 'name':'google', +# 'token_key':'access_token', +# 'icon':'fa-google', +# 'remote_app': { +# 'api_base_url':'https://www.googleapis.com/oauth2/v2/', +# 'client_kwargs':{ +# 'scope': 'email profile' +# }, +# 'access_token_url':'https://accounts.google.com/o/oauth2/token', +# 'authorize_url':'https://accounts.google.com/o/oauth2/auth', +# 'request_token_url': None, +# 'client_id': GOOGLE_KEY, +# 'client_secret': GOOGLE_SECRET_KEY, +# } +# }] + +# When using LDAP Auth, setup the ldap server +# AUTH_LDAP_SERVER = "ldap://ldapserver.new" + +# When using OpenID Auth, uncomment to setup OpenID providers. +# example for OpenID authentication +# OPENID_PROVIDERS = [ +# { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' }, +# { 'name': 'AOL', 'url': 'http://openid.aol.com/' }, +# { 'name': 'Flickr', 'url': 'http://www.flickr.com/' }, +# { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }] + +# ---------------------------------------------------- +# Theme CONFIG +# ---------------------------------------------------- +# Flask App Builder comes up with a number of predefined themes +# that you can use for Apache Airflow. +# http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes +# Please make sure to remove "navbar_color" configuration from airflow.cfg +# in order to fully utilize the theme. (or use that property in conjunction with theme) +# APP_THEME = "bootstrap-theme.css" # default bootstrap +# APP_THEME = "amelia.css" +# APP_THEME = "cerulean.css" +# APP_THEME = "cosmo.css" +# APP_THEME = "cyborg.css" +# APP_THEME = "darkly.css" +# APP_THEME = "flatly.css" +# APP_THEME = "journal.css" +# APP_THEME = "lumen.css" +# APP_THEME = "paper.css" +# APP_THEME = "readable.css" +# APP_THEME = "sandstone.css" +# APP_THEME = "simplex.css" +# APP_THEME = "slate.css" +# APP_THEME = "solar.css" +# APP_THEME = "spacelab.css" +# APP_THEME = "superhero.css" +# APP_THEME = "united.css" +# APP_THEME = "yeti.css" diff --git a/docker-compose.yml b/docker-compose.yml index 8ed2581e3..15769648a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,1028 +1,206 @@ -# ========================== -# Docker Compose - AG Cloud -# ========================== +# # version: "3.8" + +# # x-airflow-common: &airflow-common +# # image: leaf-airflow:2.9.3 +# # build: +# # context: . +# # dockerfile: Dockerfile +# # environment: +# # AIRFLOW__CORE__LOAD_EXAMPLES: "False" +# # AIRFLOW__CORE__EXECUTOR: "SequentialExecutor" # לשלב ראשון. אפשר לשדרג LocalExecutor בהמשך +# # AIRFLOW_HOME: /opt/airflow +# # # כתובת MinIO החיצונית שלך (כמו שהרצת): +# # LEAF_MINIO_ENDPOINT: "http://host.docker.internal:9001" +# # volumes: +# # # תיקיית Airflow מקומית (DB/לוגים/קונפיג) +# # - ./airflow:/opt/airflow +# # # ה-DAG שלך +# # - ./airflow/dags:/opt/airflow/dags +# # # קוד הפרויקט והמשקלים +# # - ./projects/leaf-counting:/opt/leaf-pipeline/projects/leaf-counting +# # user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-0}" +# # restart: unless-stopped + +# # # services: +# # # airflow-init: +# # # <<: *airflow-common +# # # entrypoint: bash +# # # command: > +# # # -lc " +# # # airflow db migrate && +# # # airflow users create +# # # --username admin +# # # --firstname admin +# # # --lastname admin +# # # --role Admin +# # # --email admin@example.com +# # # --password admin && +# # # # חיבור MinIO כ-generic עם endpoint ו-creds +# # # airflow connections delete minio_s3 || true && +# # # airflow connections add minio_s3 +# # # --conn-type generic +# # # --conn-login minioadmin +# # # --conn-password minioadmin123 +# # # --conn-extra '{\"endpoint_url\":\"${LEAF_MINIO_ENDPOINT}\",\"region_name\":\"us-east-1\"}' && +# # # airflow variables set leaf_minio_bucket example-bucket && +# # # echo 'Init done' +# # # " +# # # profiles: ["init"] + +# # # scheduler: +# # # <<: *airflow-common +# # # command: ["airflow", "scheduler"] +# # # user: "0:0" # זמני, כדי שלא ניתקע על הרשאות של הסוקט +# # # volumes: +# # # - ./airflow/dags:/opt/airflow/dags +# # # - /var/run/docker.sock:/var/run/docker.sock + +# # # webserver: +# # # <<: *airflow-common +# # # command: ["airflow", "webserver"] +# # # user: "0:0" # זמני, כדי שלא ניתקע על הרשאות של הסוקט +# # # volumes: +# # # - ./airflow/dags:/opt/airflow/dags +# # # - /var/run/docker.sock:/var/run/docker.sock +# # # ports: +# # # - "8081:8080" # ה-UI על 8081 במכונת ה-Windows שלך +# # # depends_on: +# # # - scheduler +# # services: +# # scheduler: +# # <<: *airflow-common +# # command: ["airflow", "scheduler"] +# # user: "0:0" +# # volumes: +# # - ${PWD}/airflow/dags:/opt/airflow/dags +# # - ${PWD}/airflow/logs:/opt/airflow/logs +# # - ${PWD}/airflow/plugins:/opt/airflow/plugins +# # - /var/run/docker.sock:/var/run/docker.sock +# # networks: [agcloud_ag_cloud] +# # webserver: +# # <<: *airflow-common +# # command: ["airflow", "webserver"] +# # user: "0:0" +# # volumes: +# # - ${PWD}/airflow/dags:/opt/airflow/dags +# # - ${PWD}/airflow/logs:/opt/airflow/logs +# # - ${PWD}/airflow/plugins:/opt/airflow/plugins +# # - /var/run/docker.sock:/var/run/docker.sock +# # ports: +# # - "8081:8080" +# # depends_on: +# # - scheduler +# # networks: [agcloud_ag_cloud] +# # networks: +# # agcloud_ag_cloud: +# # external: true + +# version: "3.8" + +# x-airflow-common: &airflow-common +# # שינוי: להשתמש באימג’ עם תלויות מותקנות +# image: leaf-airflow:2.9.3-fixed +# build: +# context: . +# dockerfile: Dockerfile +# environment: +# AIRFLOW__CORE__LOAD_EXAMPLES: "False" +# AIRFLOW__CORE__EXECUTOR: "SequentialExecutor" +# AIRFLOW_HOME: /opt/airflow +# # שינוי: אם עובדים מול MinIO ברשת הפנימית, לא צריך host.docker.internal +# LEAF_MINIO_ENDPOINT: "http://minio-hot:9000" +# volumes: +# - ./airflow:/opt/airflow +# - ./airflow/dags:/opt/airflow/dags +# # אם את לא משתמשת בנתיב הזה – אפשר להסיר. אחרת להשאיר. +# - ./projects/leaf-counting:/opt/leaf-pipeline/projects/leaf-counting +# user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-0}" +# restart: unless-stopped + +# services: +# scheduler: +# <<: *airflow-common +# command: ["airflow", "scheduler"] +# user: "0:0" +# volumes: +# - ${PWD}/airflow/dags:/opt/airflow/dags +# - ${PWD}/airflow/logs:/opt/airflow/logs +# - ${PWD}/airflow/plugins:/opt/airflow/plugins +# - /var/run/docker.sock:/var/run/docker.sock +# networks: [agcloud_ag_cloud] + +# webserver: +# <<: *airflow-common +# command: ["airflow", "webserver"] +# user: "0:0" +# volumes: +# - ${PWD}/airflow/dags:/opt/airflow/dags +# - ${PWD}/airflow/logs:/opt/airflow/logs +# - ${PWD}/airflow/plugins:/opt/airflow/plugins +# - /var/run/docker.sock:/var/run/docker.sock +# ports: +# - "8081:8080" +# depends_on: +# - scheduler +# networks: [agcloud_ag_cloud] + +# networks: +# agcloud_ag_cloud: +# external: true + +# # אופציונלי: MinIO מקומי בתוך compose (כבוי כברירת מחדל) +# # אם תרצי, בטלי את ההערות ויהיה לך MinIO פנימי (Host: 9001 -> Container: 9000) +# # minio: +# # image: minio/minio:latest +# # command: server /data --console-address ":9001" +# # environment: +# # MINIO_ROOT_USER: minioadmin +# # MINIO_ROOT_PASSWORD: minioadmin123 +# # ports: +# # - "9001:9000" +# # - "9002:9001" +# # volumes: +# # - ./minio-data:/data +version: "3.8" + +x-airflow-common: &airflow-common + image: leaf-airflow:2.9.3-fixed + build: + context: . + dockerfile: Dockerfile + environment: + AIRFLOW__CORE__LOAD_EXAMPLES: "False" + AIRFLOW__CORE__EXECUTOR: "SequentialExecutor" # תואם dev עם SQLite + AIRFLOW_HOME: /opt/airflow + # אם את על אותה רשת עם agcloud והקונטיינר נקרא minio-hot, זה נכון: + LEAF_MINIO_ENDPOINT: "http://minio-hot:9000" + volumes: + # מיפוי יחיד לכל הבית של איירפלואו (כולל airflow.db, airflow.cfg, logs, dags, plugins, staging) + - ./airflow:/opt/airflow + # אם את צריכה קוד/מודלים מתוך הפרויקט: + - ./projects/leaf-counting:/opt/leaf-pipeline/projects/leaf-counting + # ל-DockerOperator (אם יש): + - /var/run/docker.sock:/var/run/docker.sock + user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-0}" + restart: unless-stopped -version: "3.9" -# -------------------------- -# Networks -# -------------------------- -networks: - ag_cloud: - name: ag_cloud - driver: bridge - -# -------------------------- -# Volumes -# -------------------------- -volumes: - postgres_data: - wal_archive: - backups: - gui_data: - minio-hot-data: {} - minio-cold-data: {} - contracts: {} - -# ========================== -# Services -# ========================== services: + scheduler: + <<: *airflow-common + command: ["airflow", "scheduler"] + user: "0:0" # למנוע תקלות הרשאות על ה-sock בפעם הראשונה + networks: [agcloud_ag_cloud] - # -------------------------- - # RelDB / Postgres - # -------------------------- - postgres: - build: ./RelDB - container_name: postgres - environment: - POSTGRES_USER: missions_user - POSTGRES_PASSWORD: pg123 - POSTGRES_DB: missions_db - PGHOST: 0.0.0.0 - PGPORT: 5432 - PGDATA: /var/lib/postgresql/data - WAL_DIR: /var/lib/postgresql/wal_archive - BACKUP_DIR: /var/lib/postgresql/backups - RETENTION: 7 - TZ: Asia/Jerusalem - ports: - - "5432:5432" - volumes: - - postgres_data:/var/lib/postgresql/data - - wal_archive:/var/lib/postgresql/wal_archive - - backups:/var/lib/postgresql/backups - healthcheck: - test: [ "CMD", "pg_isready", "-U", "missions_user", "-d", "missions_db" ] - interval: 10s - timeout: 5s - retries: 5 - start_period: 30s - networks: - - ag_cloud - restart: unless-stopped - - postgres_exporter: - image: quay.io/prometheuscommunity/postgres-exporter:v0.15.0 - environment: - DATA_SOURCE_NAME: "postgresql://missions_user:pg123@postgres:5432/missions_db?sslmode=disable" - command: - - "--extend.query-path=/etc/postgres-queries.yml" - volumes: - - ./RelDB/graphs/postgres-queries.yml:/etc/postgres-queries.yml - depends_on: - - postgres - ports: - - "9187:9187" - networks: - - ag_cloud - # ------------------------- - # Sound Metrics Service - # ------------------------- - - sound_metrics: - build: - context: ./services/sound_metrics - dockerfile: Dockerfile - environment: - - ADDR=0.0.0.0 - - PORT=8005 - - USE_UTC=false - - WINDOW_MIN=1 - - STABLE_SEC=1 - - PYTHONUNBUFFERED=1 - - - MINIO_ENDPOINT=minio-hot:9000 - - MINIO_ACCESS_KEY=minioadmin - - MINIO_SECRET_KEY=minioadmin123 - - MINIO_BUCKET=sound - - MINIO_PREFIX=sounds/ - - command: [ "python", "-u", "src/metrics.py" ] - ports: - - "8005:8005" - depends_on: - - minio-hot - networks: - - ag_cloud - restart: unless-stopped - - # ------------------------- - # Plant Stress Daily Batch - # ------------------------- - - plant_stress_daily: - build: ./services/plant_stress - env_file: - - ./services/plant_stress/.env - restart: "no" - environment: - MODEL_DIR: /models - CONFIDENCE_THRESHOLD: "0.60" - TF_CPP_MIN_LOG_LEVEL: "2" - TIMEZONE: Asia/Jerusalem - POSTGRES_DSN: postgresql://missions_user:pg123@postgres:5432/missions_db - MINIO_ENDPOINT: minio-hot:9000 - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin123 - MINIO_BUCKET: sound - MINIO_PREFIX: plants/ - MINIO_SECURE: "false" - command: ["python","-u","/app/predict_minio_daily.py"] - volumes: - - "./services/plant_stress/models:/models:ro" - depends_on: - postgres: - condition: service_healthy - minio-hot: - condition: service_healthy - mc-bootstrap: - condition: service_started - networks: [ag_cloud] - - # ------------------------- - # MQTT + Kafka + Connect + Init - # ------------------------- - kafka: - build: - context: ./mqtt_and_kafka/kafka - dockerfile: dockerfile - container_name: kafka - environment: - - ALLOW_PLAINTEXT_LISTENER=yes - - KAFKA_ENABLE_KRAFT=yes - - KAFKA_CFG_PROCESS_ROLES=broker,controller - - KAFKA_CFG_NODE_ID=1 - - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER - - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=1@kafka:9093 - - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT - - KAFKA_CFG_LISTENERS=INTERNAL://:9092,EXTERNAL://:9094,CONTROLLER://:9093 - - KAFKA_CFG_ADVERTISED_LISTENERS=INTERNAL://kafka:9092,EXTERNAL://localhost:29092 - - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=INTERNAL - - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=false - - KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR=1 - - KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR=1 - ports: - - "9092:9092" - - "29092:29092" - networks: - - ag_cloud - healthcheck: - test: [ "CMD-SHELL", "/opt/bitnami/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --list >/dev/null 2>&1 || exit 1" ] - interval: 10s - timeout: 5s - retries: 20 - - mosquitto: - image: eclipse-mosquitto:2.0 - container_name: mosquitto - command: [ "mosquitto", "-c", "/mqtt_and_kafka/mosquitto/config/mosquitto.conf" ] - ports: - - "1883:1883" - volumes: - - ./mqtt_and_kafka/mosquitto/config:/mqtt_and_kafka/mosquitto/config:ro - depends_on: - kafka: - condition: service_healthy - networks: - - ag_cloud - healthcheck: - test: [ "CMD", "mosquitto_sub", "-h", "localhost", "-p", "1883", "-t", "$$SYS/#", "-C", "1", "-W", "15" ] - interval: 10s - timeout: 5s - retries: 12 - - connect: - build: - context: ./mqtt_and_kafka - dockerfile: connect.Dockerfile - image: local/connect-with-mqtt:1.0.0 - container_name: connect - depends_on: - kafka: - condition: service_healthy - mosquitto: - condition: service_healthy - ports: - - "8083:8083" - environment: - - CONNECT_BOOTSTRAP_SERVERS=kafka:9092 - - CONNECT_GROUP_ID=agcloud-connect - - CONNECT_CONFIG_STORAGE_TOPIC=_connect_configs - - CONNECT_OFFSET_STORAGE_TOPIC=_connect_offsets - - CONNECT_STATUS_STORAGE_TOPIC=_connect_status - - CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR=1 - - CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR=1 - - CONNECT_STATUS_STORAGE_REPLICATION_FACTOR=1 - - CONNECT_KEY_CONVERTER=org.apache.kafka.connect.storage.StringConverter - - CONNECT_VALUE_CONVERTER=org.apache.kafka.connect.storage.StringConverter - - CONNECT_REST_ADVERTISED_HOST_NAME=localhost - - CONNECT_PLUGIN_PATH=/usr/share/java,/usr/share/confluent-hub-components - networks: - - ag_cloud - healthcheck: - test: [ "CMD", "curl", "-sf", "http://localhost:8083/connectors" ] - interval: 10s - timeout: 5s - retries: 12 - - init-connector: - image: curlimages/curl:8.7.1 - depends_on: - connect: - condition: service_healthy - volumes: - - ./mqtt_and_kafka/connectors:/connectors - networks: - - ag_cloud - entrypoint: > - sh -c " - echo '==> Creating MQTT connector...'; - curl -X POST -H 'Content-Type: application/json' --data @/connectors/mqtt-source.json http://connect:8083/connectors; - echo '==> Done.'; - " - - # -------------------------- - # GUI / Runner / Gateway - # -------------------------- - runner: - build: - context: ./GUI - dockerfile: src/vast/runner/Dockerfile - args: - USE_NETFREE: ${USE_NETFREE:-true} - container_name: runner - environment: - - RUNNER_MODE=real - - SQLITE_DB=/data/app.db - - LOG_LEVEL=INFO - volumes: - - ./GUI/data:/data:ro - ports: - - "50051:50051" - restart: unless-stopped - - gateway: - container_name: gateway - build: - context: ./GUI - dockerfile: src/vast/gateway/Dockerfile - args: - USE_NETFREE: ${USE_NETFREE:-true} - environment: - - RUNNER_ADDR=runner:50051 - ports: - - "8000:8000" - depends_on: - - runner - restart: unless-stopped - - sensors_metrics: - build: - context: ./GUI - dockerfile: src/vast/services/Dockerfile - container_name: sensors_metrics - environment: - - SQLITE_DB=/data/app.db - - GATEWAY_URL=http://gateway:8000 - volumes: - - ./GUI/data:/data:ro - depends_on: - - gateway - networks: - - ag_cloud - restart: unless-stopped - - # -------------------------- - # Prometheus / Grafana - # -------------------------- - prometheus: - image: prom/prometheus:latest - volumes: - - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - - ./prometheus/prometheus-recording.rules.yml:/etc/prometheus/prometheus-recording.rules.yml:ro - - ./prometheus/postgres-alerts.yml:/etc/prometheus/postgres-alerts.yml:ro - ports: - - "9090:9090" - depends_on: - - postgres_exporter - - minio-hot - - minio-cold - networks: - - ag_cloud - - grafana: - image: grafana/grafana-oss:latest - environment: - GF_SECURITY_ALLOW_EMBEDDING: "true" - GF_AUTH_ANONYMOUS_ENABLED: "true" - GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer - GF_USERS_DEFAULT_THEME: light - GF_SECURITY_ADMIN_USER: admin - GF_SECURITY_ADMIN_PASSWORD: admin - volumes: - - ./grafana/dashboards:/var/lib/grafana/dashboards:ro - - ./grafana/provisioning:/etc/grafana/provisioning:ro - ports: - - "3000:3000" - depends_on: - - prometheus - networks: - - ag_cloud - - pushgateway: - image: prom/pushgateway:v1.8.0 - container_name: pushgateway - ports: - - "9091:9091" - networks: - - ag_cloud - restart: unless-stopped - - # -------------------------- - # Desktop App - # -------------------------- - desktop_app: - build: - context: ./GUI - dockerfile: src/vast/desktop/Dockerfile - container_name: desktop_app - environment: - - NO_VNC_PORT=8080 - - DISPLAY=host.docker.internal:0.0 - - GATEWAY_URL=http://sensors_metrics:8000 - - NOTIFICATION_API_URL=http://notification_api:5000 - ports: - - "5900:5900" - - "8080:8080" - depends_on: - - db_api_service - - notification_api - - alerts-gateway - volumes: - - ./GUI/src/vast:/app/src/vast - - ./templates:/app/templates:ro - networks: - - ag_cloud - restart: unless-stopped - - # -------------------------- - # Large Mosquitto - # -------------------------- - large-mosquitto: - container_name: large-mosquitto - image: eclipse-mosquitto:2 - restart: unless-stopped - volumes: - - ./storage_with_mqtt/mqtt_images/mosquitto/mosquitto.conf:/mosquitto/config/mosquitto.conf - ports: - - "1885:1885" - networks: - - ag_cloud - - # -------------------------- - # MinIO: hot + cold + bootstrap - # -------------------------- - minio-hot: - build: - context: ./storage_with_mqtt/storage/minio-storage - container_name: minio-hot - environment: - MINIO_PROMETHEUS_AUTH_TYPE: public - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin123 - - # ===== IMAGE NOTIFIERS ===== - MINIO_NOTIFY_KAFKA_ENABLE_aerial: "on" - MINIO_NOTIFY_KAFKA_BROKERS_aerial: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_aerial: "image.new.aerial" - - MINIO_NOTIFY_KAFKA_ENABLE_air: "on" - MINIO_NOTIFY_KAFKA_BROKERS_air: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_air: "image.new.air" - - MINIO_NOTIFY_KAFKA_ENABLE_fruits: "on" - MINIO_NOTIFY_KAFKA_BROKERS_fruits: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_fruits: "image.new.fruits" - - MINIO_NOTIFY_KAFKA_ENABLE_leaves: "on" - MINIO_NOTIFY_KAFKA_BROKERS_leaves: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_leaves: "image.new.leaves" - - MINIO_NOTIFY_KAFKA_ENABLE_ground: "on" - MINIO_NOTIFY_KAFKA_BROKERS_ground: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_ground: "image.new.ground" - - MINIO_NOTIFY_KAFKA_ENABLE_field: "on" - MINIO_NOTIFY_KAFKA_BROKERS_field: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_field: "image.new.field" - - # ===== SOUND NOTIFIERS ===== - MINIO_NOTIFY_KAFKA_ENABLE_plants: "on" - MINIO_NOTIFY_KAFKA_BROKERS_plants: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_plants: "sound.new.plants" - - MINIO_NOTIFY_KAFKA_ENABLE_sounds: "on" - MINIO_NOTIFY_KAFKA_BROKERS_sounds: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_sounds: "sound.new.sounds" - - # ===== SECURITY NOTIFIER ===== - MINIO_NOTIFY_KAFKA_ENABLE_security: "on" - MINIO_NOTIFY_KAFKA_BROKERS_security: "kafka:9092" - MINIO_NOTIFY_KAFKA_TOPIC_security: "image.new.security" - ports: - - "9001:9000" # HOT S3 - - "9002:9001" # HOT Console - networks: [ ag_cloud ] - healthcheck: - test: [ "CMD", "curl", "-fsS", "http://localhost:9000/minio/health/ready" ] - interval: 3s - timeout: 2s - retries: 40 - volumes: - - minio-hot-data:/data - - minio-cold: - build: - context: ./storage_with_mqtt/storage/minio-storage - container_name: minio-cold - environment: - MINIO_PROMETHEUS_AUTH_TYPE: public - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin123 + webserver: + <<: *airflow-common + command: ["airflow", "webserver"] + user: "0:0" ports: - - "9101:9000" # COLD S3 - - "9102:9001" # COLD Console - networks: [ ag_cloud ] - healthcheck: - test: [ "CMD", "curl", "-fsS", "http://localhost:9000/minio/health/ready" ] - interval: 3s - timeout: 2s - retries: 40 - volumes: - - minio-cold-data:/data - - mc-bootstrap: - build: - context: ./storage_with_mqtt/storage/Lifecycle_rules/minio-bootstrap - container_name: mc-bootstrap - volumes: - - ./storage_with_mqtt/storage/combined_minio_setup/config:/config:ro - - ./storage_with_mqtt/data/config:/config - depends_on: - minio-hot: - condition: service_healthy - minio-cold: - condition: service_healthy - kafka: - condition: service_healthy - environment: - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin123 - HOT_ENDPOINT: http://minio-hot:9000 - COLD_ENDPOINT: http://minio-cold:9000 - MC_ALIAS_HOT: hot - MC_ALIAS_COLD: cold - BUCKET_IMAGERY: imagery - BUCKET_SOUND: sound - networks: [ ag_cloud ] - restart: unless-stopped - - # -------------------------- - # MQTT Ingest & Publisher - # -------------------------- - mqtt_ingest: - build: - context: ./storage_with_mqtt/mqtt_images/mqtt_ingest - container_name: mqtt_ingest - environment: - MINIO_ENDPOINT: http://minio-hot:9000 - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin123 - BUCKET_IMAGERY: imagery - BUCKET_SOUND: sound - MQTT_BROKER: large-mosquitto - MQTT_PORT: 1885 - MQTT_TOPIC: MQTT/imagery/# - MQTT_PUB_TOPIC: imagery/ingested - DUMMY_DB: 0 - DB_API_BASE: http://db_api_service:8001 - DB_API_TOKEN: auto - OUTBOX_DIR: /app/outbox - DB_API_AUTH_MODE: service - DB_API_SERVICE_NAME: mqtt_ingest - INGEST_WORKERS: 8 - volumes: - - ./storage_with_mqtt/mqtt_images/outbox:/app/outbox - depends_on: - large-mosquitto: - condition: service_started - minio-hot: - condition: service_healthy - mc-bootstrap: - condition: service_started - db_api_service: - condition: service_started - networks: - - ag_cloud - restart: unless-stopped - - mqtt_ingest_sound: - build: - context: ./storage_with_mqtt/mqtt_images/mqtt_ingest - container_name: mqtt_ingest_sound - environment: - MINIO_ENDPOINT: http://minio-hot:9000 - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin123 - S3_BUCKET: sound - MQTT_BROKER: large-mosquitto - MQTT_PORT: 1885 - MQTT_TOPIC: MQTT/sound/# - DEFAULT_PREFIX: MIC-01 - CAMERA_PREFIX: camera - MICROPHONE_PREFIX: microphone - DUMMY_DB: 0 - DB_API_BASE: http://db_api_service:8001 - DB_API_TOKEN: auto - OUTBOX_DIR: /app/outbox - DB_API_AUTH_MODE: service - DB_API_SERVICE_NAME: mqtt_ingest_sound - INGEST_WORKERS: 8 - volumes: - - ./storage_with_mqtt/mqtt_images/outbox:/app/outbox - depends_on: - large-mosquitto: - condition: service_started - minio-hot: - condition: service_healthy - mc-bootstrap: - condition: service_started - db_api_service: - condition: service_started - networks: - - ag_cloud - restart: unless-stopped - - mqtt_publisher: - build: - context: ./storage_with_mqtt/mqtt_images/mqtt_publisher - container_name: mqtt_publisher - environment: - - MQTT_HOST=large-mosquitto - - MQTT_PORT=1885 - - MQTT_TOPIC_BASE=MQTT/imagery - - IMAGES_DIR=/images - - CAMERA_ID=camera-01 - - LIMIT=0 - - SHUFFLE=1 - - MQTT_QOS=2 - - PUBLISH_DELAY_MS=100 - volumes: - - ./storage_with_mqtt/mqtt_images/data/real_images:/images:ro - depends_on: - - large-mosquitto - - mqtt_ingest - networks: - - ag_cloud - - # ------------------------ - # Classifier - Sounds - # ------------------------ - sounds_classifier: - build: - context: ./services/sounds_classifier - dockerfile: Dockerfile.classifier-svc - container_name: sounds_classifier - restart: unless-stopped - environment: - # Runtime mode - - DEVICE=cpu - - BACKBONE=cnn14 - - # Model artifacts (must exist inside the image) - - CHECKPOINT=/app/classification/models/panns_data/Cnn14_mAP=0.431.pth - - HEAD=/app/classification/models/head/head_cnn14_rf.joblib - - HEAD_META=/app/classification/models/head/head_cnn14_rf.joblib.meta.json - - # DB - - WRITE_DB=false - - DB_URL=postgresql://missions_user:pg123@postgres:5432/missions_db - - DB_SCHEMA=agcloud_audio - - DB_RUN_ID=api-default - - # Kafka - - KAFKA_BROKERS=kafka:9092 - - ALERTS_TOPIC=alerts - - ENABLE_ALERTS=true - - # MinIO - - MINIO_ENDPOINT=minio-hot:9000 - - MINIO_ACCESS_KEY=minioadmin - - MINIO_SECRET_KEY=minioadmin123 - - MINIO_SECURE=false - - # Request validation - - ALLOWED_BUCKETS=imagery - - ALLOWED_CONTENT_TYPES=audio/wav,audio/x-wav,audio/mpeg,audio/flac,audio/ogg,audio/mp4 - - MAX_BYTES=104857600 - - # Tuning params - - UNKNOWN_THRESHOLD=0.4 - - WINDOW_SEC=2.0 - - HOP_SEC=0.5 - - PAD_LAST=true - - AGG=mean - depends_on: - postgres: - condition: service_healthy - kafka: - condition: service_healthy - mc-bootstrap: - condition: service_started - ports: - - "8088:8088" - networks: - - ag_cloud - healthcheck: - test: [ "CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8088/health').read()" ] - interval: 45s - timeout: 5s - retries: 10 - start_period: 20s - - # -------------------------- - # DB API Service - # -------------------------- - - - - contracts-gen: - build: - context: ./services/db_api_service - dockerfile: app/contracts/Dockerfile - env_file: - - ./services/db_api_service/.env - environment: - DATABASE_URL: postgresql+psycopg://missions_user:pg123@postgres:5432/missions_db + - "8081:8080" depends_on: - postgres: - condition: service_healthy - volumes: - - contracts:/app/app/contracts - networks: - - ag_cloud - restart: "no" + - scheduler + networks: [agcloud_ag_cloud] - db_api_service: - build: - context: ./services/db_api_service - dockerfile: Dockerfile - container_name: db_api_service - env_file: - - ./services/db_api_service/.env - environment: - DB_DSN: postgresql+psycopg://missions_user:pg123@postgres:5432/missions_db - ENV: dev - JWT_SECRET: change-me-please-very-secret - JWT_ALGO: HS256 - ACCESS_TTL_MIN: 15 - REFRESH_TTL_DAYS: 14 - DEV_SA_NAME: my-ingest-service - ports: - - "8001:8001" - volumes: - - ./services/db_api_service/app:/app/app - - contracts:/app/app/contracts - depends_on: - contracts-gen: - condition: service_completed_successfully - postgres: - condition: service_healthy - networks: - - ag_cloud - restart: unless-stopped - - notification_api: - build: - context: ./services/API-notifications/src - dockerfile: Dockerfile - container_name: notification_api - environment: - - FLASK_ENV=development - ports: - - "5000:5000" - depends_on: - - postgres - - ripeness-api: - build: - context: ./services/ripeness-ml - dockerfile: deploy/Dockerfile - image: ripeness-api:latest - environment: - - PGHOST=postgres - - PGPORT=5432 - - PGDATABASE=missions_db - - PGUSER=missions_user - - PGPASSWORD=pg123 - - MINIO_ENDPOINT=minio-hot:9000 - - MINIO_SECURE=false - - MINIO_ACCESS_KEY=minioadmin - - MINIO_SECRET_KEY=minioadmin123 - - MODEL_NAME=best_conditional - - BATCH_LIMIT=500 - - FRUITS=Apple,Banana,Orange - depends_on: - - postgres - - minio-hot - volumes: - - ./services/ripeness-ml/checkpoints:/app/checkpoints - - ./services/ripeness-ml/configs:/app/configs - - ./services/ripeness-ml/model:/app/model - container_name: ripeness-api - networks: [ ag_cloud ] - ports: - - "8091:8088" - restart: unless-stopped - - # -------------------------- - # Flink JobManager & TaskManager - # -------------------------- - flink-jobmanager: - build: - context: ./streaming/flink - dockerfile: Dockerfile.flink-py - image: agcloud-flink-py:1.18 - container_name: flink-jobmanager - command: jobmanager - ports: - - "8081:8081" - networks: [ ag_cloud ] - environment: - - | - FLINK_PROPERTIES= - jobmanager.rpc.address: flink-jobmanager - parallelism.default: 2 - taskmanager.numberOfTaskSlots: 2 - jobmanager.memory.process.size: 1600m - taskmanager.memory.process.size: 1728m - s3.endpoint: http://minio-hot:9000 - s3.path.style.access: true - s3.access.key: minioadmin - s3.secret.key: minioadmin123 - fs.s3a.connection.ssl.enabled: false - python.client.executable: /usr/bin/python3 - python.executable: /usr/bin/python3 - - HTTP_INFER_URL=http://fruit-inference-http:8000/infer_json - volumes: - - ./streaming/flink/jobs:/opt/flink/jobs:ro - - ./streaming/flink/connectors/flink-json-1.18.1.jar:/opt/flink/lib/flink-json-1.18.1.jar:ro - - ./streaming/flink/connectors/flink-sql-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-sql-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/flink-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/kafka-clients-3.2.3.jar:/opt/flink/lib/kafka-clients-3.2.3.jar:ro - - ./streaming/flink/connectors/lz4-java-1.8.0.jar:/opt/flink/lib/lz4-java-1.8.0.jar:ro - - ./streaming/flink/connectors/snappy-java-1.1.10.5.jar:/opt/flink/lib/snappy-java-1.1.10.5.jar:ro - restart: unless-stopped - - audio_compression: - build: - context: ./services/compression - dockerfile: Dockerfile - container_name: audio_compression - environment: - - RAW_MAX_AGE_DAYS=30 - - COMPRESSION_CODEC=opus - - COMPRESSED_MAX_AGE_DAYS=90 - - CHECK_INTERVAL_SECONDS=3600 - - MINIO_ENDPOINT=minio-hot:9000 - - ACCESS_KEY=minioadmin - - SECRET_KEY=minioadmin123 - - BUCKET_NAME=imagery - depends_on: - minio-hot: - condition: service_healthy - mc-bootstrap: - condition: service_started - networks: - - ag_cloud - restart: unless-stopped - - flink_writer_db: - build: - context: ./services/flink_writer_db - dockerfile: Dockerfile.flink - container_name: flink_writer_db - environment: - - KAFKA_BROKERS=kafka:9092 - - TOPICS=sensor_zone_stats,sensor_anomalies,aerial_images_keys,aerial_image_object_detections,aerial_image_anomaly_detections,image_new_security_connections,alerts - - DB_API_BASE=http://db_api_service:8001 - - DB_API_AUTH_MODE=service - - DB_API_SERVICE_NAME=flink-writer-db - - DB_API_TOKEN_FILE=/opt/app/secrets/db_api_token - - FLINK_PARALLELISM=1 - depends_on: - kafka: - condition: service_healthy - db_api_service: - condition: service_started - networks: - - ag_cloud - restart: unless-stopped - - flink-taskmanager: - image: agcloud-flink-py:1.18 - container_name: flink-taskmanager - command: taskmanager - depends_on: - flink-jobmanager: - condition: service_started - networks: [ ag_cloud ] - environment: - - | - FLINK_PROPERTIES= - jobmanager.rpc.address: flink-jobmanager - parallelism.default: 2 - taskmanager.numberOfTaskSlots: 2 - jobmanager.memory.process.size: 1600m - taskmanager.memory.process.size: 1728m - s3.endpoint: http://minio-hot:9000 - s3.path.style.access: true - s3.access.key: minioadmin - s3.secret.key: minioadmin123 - fs.s3a.connection.ssl.enabled: false - python.client.executable: /usr/bin/python3 - python.executable: /usr/bin/python3 - - HTTP_INFER_URL=http://fruit-inference-http:8000/infer_json - volumes: - - ./streaming/flink/connectors/flink-json-1.18.1.jar:/opt/flink/lib/flink-json-1.18.1.jar:ro - - ./streaming/flink/connectors/flink-sql-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-sql-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/flink-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/kafka-clients-3.2.3.jar:/opt/flink/lib/kafka-clients-3.2.3.jar:ro - - ./streaming/flink/connectors/lz4-java-1.8.0.jar:/opt/flink/lib/lz4-java-1.8.0.jar:ro - - ./streaming/flink/connectors/snappy-java-1.1.10.5.jar:/opt/flink/lib/snappy-java-1.1.10.5.jar:ro - restart: unless-stopped - - # -------------------------- - # Inference HTTP Service - # -------------------------- - fruit-inference-http: - build: - context: ./services/inference_http - dockerfile: Dockerfile - environment: - - TEAM=fruit - - WEIGHTS_PATH=/app/weights/fruit_cls_best.ts - - MINIO_ENDPOINT=minio-hot:9000 - - MINIO_ACCESS_KEY=minioadmin - - MINIO_SECRET_KEY=minioadmin123 - - MINIO_SECURE=0 - volumes: - - ./services/inference_http/weights:/app/weights:ro - container_name: fruit-inference-http - networks: [ ag_cloud ] - ports: - - "8011:8000" - restart: unless-stopped - - # -------------------------- - # Flink Jobs - # -------------------------- - flink-dispatcher-fruit: - image: agcloud-flink-py:1.18 - container_name: flink-dispatcher-fruit - depends_on: - flink-jobmanager: { condition: service_started } - flink-taskmanager: { condition: service_started } - fruit-inference-http: { condition: service_started } - networks: [ ag_cloud ] - environment: - - KAFKA_BOOTSTRAP=kafka:9092 - - INPUT_TOPIC=imagery.new.fruit - - TEAM=fruit - - HTTP_URL=http://fruit-inference-http:8000/infer_json - - DLQ_TOPIC=dlq.inference.http - - GROUP_ID=http-dispatcher-fruit - - PARALLELISM=2 - - PYFLINK_CLIENT_EXECUTABLE=/usr/bin/python3 - volumes: - - ./streaming/flink/jobs:/opt/flink/jobs:ro - - ./streaming/flink/connectors/flink-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/flink-sql-connector-kafka-3.2.0-1.18.jar:/opt/flink/lib/flink-sql-connector-kafka-3.2.0-1.18.jar:ro - - ./streaming/flink/connectors/flink-json-1.18.1.jar:/opt/flink/lib/flink-json-1.18.1.jar:ro - - ./streaming/flink/connectors/kafka-clients-3.2.3.jar:/opt/flink/lib/kafka-clients-3.2.3.jar:ro - - ./streaming/flink/connectors/lz4-java-1.8.0.jar:/opt/flink/lib/lz4-java-1.8.0.jar:ro - - ./streaming/flink/connectors/snappy-java-1.1.10.5.jar:/opt/flink/lib/snappy-java-1.1.10.5.jar:ro - command: [ "bash", "-lc", "set -e; echo 'Waiting for JobManager to accept commands...'; until /opt/flink/bin/flink list --jobmanager flink-jobmanager:8081 >/dev/null 2>&1; do echo 'still waiting...'; sleep 3; done; echo 'JobManager is ready!'; /opt/flink/bin/flink run -Dpython.client.executable=/usr/bin/python3 -Dpython.executable=/usr/bin/python3 -Dpipeline.jars=file:///opt/flink/lib/flink-connector-kafka-3.2.0-1.18.jar,file:///opt/flink/lib/flink-sql-connector-kafka-3.2.0-1.18.jar,file:///opt/flink/lib/flink-json-1.18.1.jar --jobmanager flink-jobmanager:8081 --detached --python /opt/flink/jobs/http_dispatcher.py -- --bootstrap kafka:9092 --input-topic imagery.new.fruit --team fruit --http-url http://fruit-inference-http:8000/infer_json --group-id http-dispatcher-fruit --dlq-topic dlq.inference.http; tail -f /dev/null" ] - - flink-alerts-job: - build: - context: ./services/alerts_forwarder - dockerfile: Dockerfile.flink - container_name: alerts-forwarder - depends_on: - kafka: - condition: service_healthy - alertmanager_service: - condition: service_started - environment: - - PYTHONPATH=/opt/app - - KAFKA_BROKERS=kafka:9092 - - ALERTMANAGER_SERVICE_URL=http://alertmanager_service:8090/alerts - command: [ "python", "/opt/app/alerts_forwarder.py" ] - networks: - - ag_cloud - restart: unless-stopped - - alertmanager: - image: prom/alertmanager:v0.27.0 - container_name: alertmanager - command: - - "--config.file=/etc/alertmanager/alertmanager.yml" - - "--storage.path=/alertmanager" - - "--log.level=debug" - volumes: - - ./services/alertmanager_service/compose/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - ports: - - "9093:9093" - networks: - - ag_cloud - restart: always - - alertmanager_service: - build: - context: ./services/alertmanager_service/src - dockerfile: Dockerfile - container_name: alertmanager_service - ports: - - "8090:8090" - command: [ "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8090" ] - volumes: - - ./templates:/app/templates:ro - environment: - - CFG_PATH=/app/templates/templates.yml - - ALERTMANAGER_URL=http://alertmanager:9093 - - GATEWAY_URL=http://alerts-gateway:8000/internal/alert - depends_on: - - alertmanager - - alerts-gateway - networks: - - ag_cloud - - alerts-gateway: - build: - context: ./services/alertmanager_service/src - dockerfile: Dockerfile - container_name: alerts_gateway - command: [ "uvicorn", "gateway:app", "--host", "0.0.0.0", "--port", "8000" ] - ports: - - "8010:8000" # host:container - networks: - - ag_cloud - - image-linker-jobmanager: - build: - context: ./services/image-linker - dockerfile: Dockerfile.flink - container_name: image-linker-jobmanager - command: jobmanager - ports: - - "8084:8081" # ✅ unique external port (no overlap) - environment: - - JOB_MANAGER_RPC_ADDRESS=image-linker-jobmanager - - KAFKA_BROKERS=kafka:9092 - - CONFIG_PATH=/opt/app/config/topics.yaml - networks: - - ag_cloud - - image-linker-taskmanager: - build: - context: ./services/image-linker - dockerfile: Dockerfile.flink - container_name: image-linker-taskmanager - command: taskmanager - environment: - - JOB_MANAGER_RPC_ADDRESS=image-linker-jobmanager - - KAFKA_BROKERS=kafka:9092 - - CONFIG_PATH=/opt/app/config/topics.yaml - depends_on: - image-linker-jobmanager: - condition: service_started - networks: - - ag_cloud - - image-linker-submitter: - build: - context: ./services/image-linker - dockerfile: Dockerfile.flink - container_name: image-linker-submit - depends_on: - image-linker-jobmanager: - condition: service_started - command: > - bash -lc "sleep 10 && - flink run -m image-linker-jobmanager:8081 -py /opt/app/job_linker.py && - echo 'Image-Linker job submitted successfully' && - sleep 1" - networks: - - ag_cloud +networks: + agcloud_ag_cloud: + external: true