From 7c5a8962db403d2cd601ba8fd11a8467de34201e Mon Sep 17 00:00:00 2001 From: Veronica Martinez Date: Thu, 14 May 2026 16:58:04 -0600 Subject: [PATCH 1/5] ENH: adding logic to release api handler --- lambda_layer/database/requirements.txt | 6 +- lambda_layer/spice/requirements.txt | 6 +- poetry.lock | 76 +-- pyproject.toml | 4 +- .../constructs/sds_api_manager_construct.py | 11 +- .../SDSCode/api_lambdas/release_api.py | 594 +++++++++++++++++- .../lambda_code/SDSCode/spice_utilities.py | 2 +- .../idex_processing/requirements.txt | 6 +- .../lambda_code/processing/requirements.txt | 6 +- tests/lambda_endpoints/test_release_api.py | 351 +++++++++++ 10 files changed, 983 insertions(+), 79 deletions(-) create mode 100644 tests/lambda_endpoints/test_release_api.py diff --git a/lambda_layer/database/requirements.txt b/lambda_layer/database/requirements.txt index 77bd1083d..6336d6267 100644 --- a/lambda_layer/database/requirements.txt +++ b/lambda_layer/database/requirements.txt @@ -200,9 +200,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 jmespath==1.1.0 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d \ --hash=sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64 diff --git a/lambda_layer/spice/requirements.txt b/lambda_layer/spice/requirements.txt index e6dae531c..48c5f4d16 100644 --- a/lambda_layer/spice/requirements.txt +++ b/lambda_layer/spice/requirements.txt @@ -200,9 +200,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 jmespath==1.1.0 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d \ --hash=sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64 diff --git a/poetry.lock b/poetry.lock index 6ad51ef29..97f53fd5b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.3.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.0 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -141,30 +141,17 @@ files = [ docs = ["pytest"] test = ["hypothesis", "pytest", "pytest-remotedata"] -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["main"] -markers = "extra == \"test\" and sys_platform == \"win32\"" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] - [[package]] name = "attrs" version = "25.4.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.9" -groups = ["main", "cdk-install"] +groups = ["cdk-install"] files = [ {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, ] -markers = {main = "extra == \"test\""} [[package]] name = "aws-cdk-asset-awscli-v1" @@ -1035,14 +1022,14 @@ files = [ [[package]] name = "imap-data-access" -version = "0.40.1" +version = "0.41.0" description = "IMAP SDC Data Access" optional = false python-versions = "*" groups = ["main", "layer-idex-processing", "layer-processing"] files = [ - {file = "imap_data_access-0.40.1-py3-none-any.whl", hash = "sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832"}, - {file = "imap_data_access-0.40.1.tar.gz", hash = "sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab"}, + {file = "imap_data_access-0.41.0-py3-none-any.whl", hash = "sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557"}, + {file = "imap_data_access-0.41.0.tar.gz", hash = "sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169"}, ] [package.dependencies] @@ -1932,19 +1919,6 @@ files = [ {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, ] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["main"] -markers = "extra == \"test\"" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] - [[package]] name = "pycparser" version = "3.0" @@ -2026,36 +2000,33 @@ files = [ {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, ] -markers = {main = "extra == \"doc\""} +markers = {main = "extra == \"doc\" or extra == \"test\""} [package.extras] windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "6.2.5" +version = "8.4.2" description = "pytest: simple powerful testing with Python" optional = true -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"test\"" files = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, ] [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -toml = "*" +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-cov" @@ -2732,19 +2703,6 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -optional = true -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["main"] -markers = "extra == \"test\"" -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] - [[package]] name = "typeguard" version = "2.13.3" @@ -2893,4 +2851,4 @@ tool = ["alembic"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "2d5991a15ea1c5157131391c2b68967b0b9fa0a2b5a0302b789d3dbd0f2d3a2a" +content-hash = "8ca7a4a385cc6021beab9b2792e60c221393777c28b75610b0faa1b22078a53a" diff --git a/pyproject.toml b/pyproject.toml index f1e98e1ea..51a70604a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ # the correct version and hash are used: # poetry export -f requirements.txt -o lambda_layer/database/requirements.txt --with layer-database dependencies = [ - "imap-data-access>=0.40.1", + "imap-data-access>=0.41.0", "boto3>=1.26.78,<2", "SQLAlchemy<=3.0.0", "numpy (>=2.4.6,<3.0.0)" @@ -43,7 +43,7 @@ poetry-plugin-export = ">=1.8" [project.optional-dependencies] dev = ["pre-commit>=3.3.3,<4", "ruff==0.2.1"] doc = ["sphinx>=7.1.0,<8", "myst-parser>=2.0.0,<3", "pydata-sphinx-theme>=0.13.3"] -test = ["pytest==6.2.5", "pytest-cov>=4.0.0,<5", "moto>=4.1.3,<5"] +test = ["pytest>=7.4.0,<9", "pytest-cov>=4.0.0,<5", "moto>=4.1.3,<5"] tool = ["alembic (>=1.18.4,<2.0.0)"] [tool.poetry.group.cdk-install.dependencies] diff --git a/sds_data_manager/constructs/sds_api_manager_construct.py b/sds_data_manager/constructs/sds_api_manager_construct.py index f3a91bd30..f5b55a2b8 100644 --- a/sds_data_manager/constructs/sds_api_manager_construct.py +++ b/sds_data_manager/constructs/sds_api_manager_construct.py @@ -53,7 +53,7 @@ def add_stable_route(api, base_path, http_method, lambda_function, prefix_list): class SdsApiManager(Construct): """Construct for API Management.""" - def __init__( + def __init__( # noqa: PLR0915 self, scope: Construct, construct_id: str, @@ -163,8 +163,10 @@ def __init__( # account only can allow upload through API key. if account_name == "prod": upload_route_prefixes = ["/api-key"] + release_route_prefixes = ["/api-key"] else: upload_route_prefixes = auth_route_prefixes + release_route_prefixes = ["/api-key"] # {proxy+} is used to allow for any pathParams after /upload/ add_stable_route( @@ -375,16 +377,19 @@ def __init__( security_groups=[rds_security_group], environment={ "SECRET_NAME": db_secret_name, + "IMAP_DATA_DIR": "/tmp", # noqa: S108 + "S3_BUCKET": data_bucket.bucket_name, + "REGION": env.region, }, layers=layers, ) - for prefix in auth_route_prefixes: - # {proxy+} is used to allow for any pathParams after /processing-jobs/ + for prefix in release_route_prefixes: api.add_route( route=f"{prefix}/release", http_method="GET", lambda_function=release_api_lambda, ) + release_api_lambda.add_to_role_policy(s3_read_policy) rds_secret = secrets.Secret.from_secret_name_v2( self, "rds_secret", db_secret_name diff --git a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py index c55b8d916..7ebfbe7f6 100644 --- a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py +++ b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py @@ -1,6 +1,596 @@ """Lambda function for release API endpoint.""" +import datetime +import json +import logging +from pathlib import Path + +import imap_data_access +from imap_data_access.file_validation import ( + AncillaryFilePath, + ScienceFilePath, + generate_imap_file_path, +) +from sqlalchemy import func, or_, select, union_all + +from ..database import database as db +from ..database import models +from ..spice_utilities import download_from_s3 + +# Logger setup +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def check_api_key(event): + """Check API key scope; only non-read keys may release files.""" + request_ctx = event.get("requestContext", {}) + auth = request_ctx.get("authorizer", {}) + auth_ctx = auth.get("lambda", {}) + scope = auth_ctx.get("scope", "") + api_key = auth_ctx.get("apiKey", "unknown") + + logger.info(f"Release request received with scope: {scope}, api_key: {api_key}") + + if scope == "read": + logger.warning("Release denied: read scope user attempted release operation") + return { + "statusCode": 403, + "body": json.dumps( + "Release operation denied. Your API key has read permissions." + ), + } + + return { + "statusCode": 200, + "body": json.dumps("API key validated successfully."), + } + + +def validate_query_params(event): + """Validate query parameters and return (is_valid, error_message).""" + query_params = event.get("queryStringParameters") or {} + + # Validate release_type and derive the released flag value. + release_type = query_params["release_type"] + valid_release_types = ["release", "unrelease", "early-release"] + if release_type not in valid_release_types: + return { + "statusCode": 400, + "body": json.dumps( + f"'{release_type}' is not a valid release_type. " + f"Valid options are: {valid_release_types}" + ), + } + + if release_type == "release" and "release_number" not in query_params: + return { + "statusCode": 400, + "body": json.dumps( + "'release_number' query parameter is required when " + "'release_type' is 'release'. Please provide a release_number " + "indicating which release batch to apply. For example, " + "withhold files with 'release_number=1' will be included in " + "the first release batch, 'release_number=2' in the second, " + "and so on." + ), + } + + # "unrelease" sets released=False; everything else sets released=True. + released_flag = release_type != "unrelease" + + valid_parameters = [ + "instrument", + "start_date", + "end_date", + "release_type", + "exclude_file", + "manifest_file", + "release_number", + ] + + for param in query_params: + if param not in valid_parameters: + return { + "statusCode": 400, + "body": json.dumps( + f"'{param}' is not a valid query parameter. " + f"Valid query parameters are: {valid_parameters}" + ), + } + + return { + "statusCode": 200, + "data": { + "release_type": release_type, + "released_flag": released_flag, + "query_params": query_params, + }, + } + + +def query_withhold_files(session, instrument, start_date, end_date, release_number): + """Query for the latest-version withhold files matching given criteria.""" + descriptor = f"withhold-data-release-{int(release_number):03d}" + release_table = models.ReleaseFiles + + # Query latest withhold file versions for given group + # ( instrument, descriptor, and date range). + max_ver_subq = ( + session.query( + release_table.instrument, + release_table.descriptor, + release_table.start_date, + release_table.end_date, + func.max(release_table.version).label("max_version"), + ) + .group_by( + release_table.instrument, + release_table.descriptor, + release_table.start_date, + release_table.end_date, + ) + .subquery() + ) + # Now query for specific withhold file matching input + withhold_files = ( + session.query(release_table) + .join( + max_ver_subq, + (release_table.instrument == max_ver_subq.c.instrument) + & (release_table.descriptor == max_ver_subq.c.descriptor) + & (release_table.start_date == max_ver_subq.c.start_date) + & (release_table.end_date == max_ver_subq.c.end_date) + & (release_table.version == max_ver_subq.c.max_version), + ) + .filter( + release_table.instrument == instrument, + release_table.end_date >= start_date, + release_table.start_date <= end_date, + release_table.descriptor == descriptor, + ) + .one_or_none() # TODO: update this logic if we want to + # support more than one withhold file per release batch + ) + return withhold_files + + +def query_latest_science_files(session, instrument, start_date, end_date): + """Query for the latest-version science file paths matching given criteria.""" + science_table = models.ScienceFiles + + max_ver_subq = ( + session.query( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + func.max(science_table.version).label("max_version"), + ) + .group_by( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + ) + .subquery() + ) + latest_science_files = [ + row.file_path + for row in ( + session.query(science_table.file_path) + .join( + max_ver_subq, + (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.descriptor == max_ver_subq.c.descriptor) + & (science_table.start_date == max_ver_subq.c.start_date) + & (science_table.version == max_ver_subq.c.max_version), + ) + .filter( + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + ) + .all() + ) + ] + logger.info( + f"Found {len(latest_science_files)} science file(s) for instrument={instrument}" + ) + return latest_science_files + + +def get_latest_ancillary_files( + session, + instrument: str, + start_date: datetime.datetime, + end_date: datetime.datetime, +) -> list: + """Get latest-version ancillary files for an instrument over a date range. + + The function retrieves files in two groups based on overlap with date range: + + Files with explicit end_date in their filename: + overlaps if start_date <= query_end AND end_date >= query_start + + Files without end_date in their filename and considered valid until + the next file with start_date after it appears: + overlaps if start_date <= query_end AND + (next_file_start >= query_start OR no next file) + + Parameters + ---------- + session : orm session + Database session. + instrument : str + Instrument name. + start_date : datetime.datetime + Start of query date range. + end_date : datetime.datetime + End of query date range. + + Returns + ------- + list + List of file paths ordered by file_path. + """ + ancillary_table = models.AncillaryFiles + + # ======================================== + # Step 1: Keep only latest version per + # (instrument, descriptor, start_date, end_date). + # ======================================== + row_num_col = ( + func.row_number() + .over( + partition_by=[ + ancillary_table.instrument, + ancillary_table.descriptor, + ancillary_table.start_date, + ancillary_table.end_date, + ], + order_by=ancillary_table.version.desc(), + ) + .label("row_num") + ) + ranked = session.query( + ancillary_table.file_path, + ancillary_table.instrument, + ancillary_table.descriptor, + ancillary_table.start_date, + ancillary_table.end_date, + ancillary_table.version, + row_num_col, + ).subquery() + + latest_versions = session.query(ranked).filter(ranked.c.row_num == 1).subquery() + + # ======================================== + # Step 2: Files with end_date + # ======================================== + with_end_date_query = session.query( + latest_versions.c.file_path, + latest_versions.c.instrument, + latest_versions.c.descriptor, + latest_versions.c.start_date, + latest_versions.c.end_date, + latest_versions.c.version, + ).filter( + latest_versions.c.instrument == instrument, + latest_versions.c.end_date.isnot(None), + latest_versions.c.start_date <= end_date, + latest_versions.c.end_date >= start_date, + ) + + # ======================================== + # Step 3: Files without end_date + # ======================================== + next_start_date_col = ( + func.lead(latest_versions.c.start_date) + .over( + partition_by=[ + latest_versions.c.instrument, + latest_versions.c.descriptor, + ], + order_by=latest_versions.c.start_date, + ) + .label("next_start_date") + ) + no_end_date_coverage = ( + session.query( + latest_versions.c.file_path, + latest_versions.c.instrument, + latest_versions.c.descriptor, + latest_versions.c.start_date, + latest_versions.c.end_date, + latest_versions.c.version, + next_start_date_col, + ) + .filter( + latest_versions.c.end_date.is_(None), + ) + .subquery() + ) + + # ======================================================== + # Step 4: Look for files where start_date <= query_end AND + # (next_file_start >= query_start) + # ======================================================== + no_end_date_query = session.query( + no_end_date_coverage.c.file_path, + no_end_date_coverage.c.instrument, + no_end_date_coverage.c.descriptor, + no_end_date_coverage.c.start_date, + no_end_date_coverage.c.end_date, + no_end_date_coverage.c.version, + ).filter( + no_end_date_coverage.c.instrument == instrument, + no_end_date_coverage.c.start_date <= end_date, + or_( + no_end_date_coverage.c.next_start_date.is_(None), + no_end_date_coverage.c.next_start_date > start_date, + ), + ) + + # ======================================== + # Final: UNION ALL and order by file_path. + # ======================================== + combined = union_all( + select(with_end_date_query.subquery()), + select(no_end_date_query.subquery()), + ).order_by("file_path") + + file_paths = [row.file_path for row in session.execute(combined).fetchall()] + logger.info( + f"Found {len(file_paths)} ancillary file(s) for instrument={instrument}" + ) + return file_paths + + +def download_read_file(exception_list_file_path): + """Download a manifest file from S3 and group its entries by file type. + + Parameters + ---------- + exception_list_file_path : str + S3 path to the manifest text file. Each line is an IMAP file path. + + Returns + ------- + tuple[list[str], list[str]] + A tuple of (science_files, ancillary_files) where each entry is the + file path string listed in the manifest. + """ + # Create the proper file path object based on the extension and filename + file_path = Path(exception_list_file_path) + path_obj = generate_imap_file_path(file_path.name) + + s3_file_path = ( + path_obj.construct_path() + .relative_to(imap_data_access.config["DATA_DIR"]) + .as_posix() + ) + + logger.debug(f"Downloading manifest file from S3 path: {s3_file_path}") + download_path = download_from_s3(s3_file_path) + logger.debug(f"Download path after download: {download_path}") + lines = download_path.read_text(encoding="utf-8").splitlines() + + science_files = [] + ancillary_files = [] + for line in lines: + filename = line.strip() + if not filename: + continue + file_obj = imap_data_access.file_validation.generate_imap_file_path(filename) + if isinstance(file_obj, ScienceFilePath): + science_files.append(filename) + elif isinstance(file_obj, AncillaryFilePath): + ancillary_files.append(filename) + else: + logger.warning(f"Unrecognized file type in manifest, skipping: {filename}") + + return science_files, ancillary_files + + +def release_type_handler(released_flag, query_params): + """Handle 'release' type requests.""" + start_date = datetime.datetime.strptime(query_params["start_date"], "%Y%m%d") + end_date = datetime.datetime.strptime(query_params["end_date"], "%Y%m%d") + + with db.Session() as session: + # Query for withhold files to exclude from release. + science_files_to_exclude = [] + ancillary_files_to_exclude = [] + + withhold_files = query_withhold_files( + session, + query_params["instrument"], + start_date, + end_date, + query_params.get("release_number"), + ) + if withhold_files: + science_files_to_exclude, ancillary_files_to_exclude = download_read_file( + withhold_files.file_path + ) + + science_files_to_update = query_latest_science_files( + session, + query_params["instrument"], + start_date, + end_date, + ) + if science_files_to_exclude != []: + science_files_to_update = [ + file_path + for file_path in science_files_to_update + if Path(file_path).name not in science_files_to_exclude + ] + + ancillary_files_to_update = get_latest_ancillary_files( + session, + query_params["instrument"], + start_date, + end_date, + ) + if ancillary_files_to_exclude != []: + ancillary_files_to_update = [ + file_path + for file_path in ancillary_files_to_update + if Path(file_path).name not in ancillary_files_to_exclude + ] + + # For all science and ancillary files, update released flag for all + # applicable files. + session.query(models.ScienceFiles).filter( + models.ScienceFiles.file_path.in_(science_files_to_update) + ).update( + {models.ScienceFiles.released: released_flag}, synchronize_session=False + ) + + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_files_to_update) + ).update( + {models.AncillaryFiles.released: released_flag}, synchronize_session=False + ) + + session.commit() + + +def early_release_type_handler(query_params): + """Handle early-release requests using manifest file.""" + manifest_file = query_params["manifest_file"] + + science_files, ancillary_files = download_read_file(manifest_file) + + with db.Session() as session: + if science_files: + session.query(models.ScienceFiles).filter( + models.ScienceFiles.file_path.in_(science_files) + ).update( + {models.ScienceFiles.released: True}, + synchronize_session=False, + ) + + if ancillary_files: + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_files) + ).update( + {models.AncillaryFiles.released: True}, + synchronize_session=False, + ) + + session.commit() + + logger.info( + f"Early released " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ) + + return { + "statusCode": 200, + "body": json.dumps( + f"Successfully early released " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ), + } + + +def unrelease_type_handler(query_params): + """Handle unrelease requests using manifest file.""" + manifest_file = query_params["manifest_file"] + + science_files, ancillary_files = download_read_file(manifest_file) + + with db.Session() as session: + if science_files: + session.query(models.ScienceFiles).filter( + models.ScienceFiles.file_path.in_(science_files) + ).update( + {models.ScienceFiles.released: False}, + synchronize_session=False, + ) + + if ancillary_files: + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_files) + ).update( + {models.AncillaryFiles.released: False}, + synchronize_session=False, + ) + + session.commit() + + logger.info( + f"Unreleased " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ) + + return { + "statusCode": 200, + "body": json.dumps( + f"Successfully unreleased " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ), + } + def lambda_handler(event, context): - """Lambda handler for release API.""" - return {"statusCode": 200, "body": "Release API is working!"} + """Entry point for the release API lambda. + + Required query parameters for 'release' type: + instrument : instrument name (e.g. mag, swe, lo, codice) + start_date : inclusive lower bound on file start_date (YYYYMMDD) + end_date : inclusive upper bound on file start_date (YYYYMMDD) + + Optional parameters for 'release' type: + exclude_file : S3 path to manifest text file listing files to + exclude from release. Each line in the manifest + can contain science or ancillary filename. + + Required parameters for 'early' or 'unrelease' type: + manifest_file : S3 path to manifest text file listing files to + release/unrelease. + + Parameters + ---------- + event : dict + Input event containing ``queryStringParameters``. + context : LambdaContext + Lambda runtime context object. + """ + logger.info("Received release request with event: " + json.dumps(event, indent=2)) + + # Check API key and scope. Only API keys with non-read scopes may release files. + api_key_check = check_api_key(event) + if api_key_check["statusCode"] != 200: + return api_key_check + + # Validate query parameters. + query_validation = validate_query_params(event) + if query_validation["statusCode"] != 200: + return query_validation + + released_flag = query_validation["data"]["released_flag"] + release_type = query_validation["data"]["release_type"] + query_params = query_validation["data"]["query_params"] + + if release_type == "release": + release_type_handler(released_flag, query_params) + elif release_type == "early-release": + early_release_type_handler(query_params) + elif release_type == "unrelease": + unrelease_type_handler(query_params) + + return { + "statusCode": 200, + "body": json.dumps( + f"Successful {release_type} action - " + f"updated release status to '{released_flag}'" + ), + } diff --git a/sds_data_manager/lambda_code/SDSCode/spice_utilities.py b/sds_data_manager/lambda_code/SDSCode/spice_utilities.py index caa6337f1..9f0fa2a82 100644 --- a/sds_data_manager/lambda_code/SDSCode/spice_utilities.py +++ b/sds_data_manager/lambda_code/SDSCode/spice_utilities.py @@ -59,7 +59,7 @@ def download_from_s3(s3_key: str, bucket_name: str | None = None) -> Path: logger.info(f"Downloaded {s3_key} from bucket {bucket_name} to {local_path}") return local_path except Exception as e: - logger.error() + logger.error(e) raise FileNotFoundError( f"Failed to download {s3_key} from bucket {bucket_name}: {e}" ) from e diff --git a/sds_data_manager/lambda_code/idex_processing/requirements.txt b/sds_data_manager/lambda_code/idex_processing/requirements.txt index c2e90d4fa..f0e5fae26 100644 --- a/sds_data_manager/lambda_code/idex_processing/requirements.txt +++ b/sds_data_manager/lambda_code/idex_processing/requirements.txt @@ -238,9 +238,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 imap-processing==1.0.32 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:c6dc7e945d8118e2ad5b48cc2ba98aadfe80c4b26d641b8255172aaebf28b77b \ --hash=sha256:d53ca85c1ca111354074949e479fd082e518416194817bd094513ca048bdf17b diff --git a/sds_data_manager/lambda_code/processing/requirements.txt b/sds_data_manager/lambda_code/processing/requirements.txt index 72be208fe..88c466248 100644 --- a/sds_data_manager/lambda_code/processing/requirements.txt +++ b/sds_data_manager/lambda_code/processing/requirements.txt @@ -238,9 +238,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 imap-processing==1.0.32 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:c6dc7e945d8118e2ad5b48cc2ba98aadfe80c4b26d641b8255172aaebf28b77b \ --hash=sha256:d53ca85c1ca111354074949e479fd082e518416194817bd094513ca048bdf17b diff --git a/tests/lambda_endpoints/test_release_api.py b/tests/lambda_endpoints/test_release_api.py new file mode 100644 index 000000000..2fd0e6fb6 --- /dev/null +++ b/tests/lambda_endpoints/test_release_api.py @@ -0,0 +1,351 @@ +"""Integration tests for the Release API. + +These tests verify the six core release behaviors against a live in-memory DB: + +Test Case 1 release (no descriptor) — all matching instrument+date files released +Test Case 2 release (with descriptor) — only the matching descriptor subset released +Test Case 3 early-release (no descriptor) — same bulk outcome as Test Case 1 +Test Case 4 early-release (with descriptor) — same selective outcome as Test Case 2 +Test Case 5 unrelease (no descriptor) — all matching files unreleased +Test Case 6 unrelease (with descriptor) — only matching descriptor files unreleased +""" + +import datetime + +from sds_data_manager.lambda_code.SDSCode.api_lambdas import release_api +from sds_data_manager.lambda_code.SDSCode.database import models + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _build_event(query_params, scope="full"): + """Build a minimal API Gateway event for the release API.""" + return { + "queryStringParameters": query_params, + "rawPath": "/api-key/release", + "body": "", + "requestContext": { + "authorizer": {"lambda": {"apiKey": "test-key", "scope": scope}} + }, + } + + +def _required( + instrument="hit", + start_date="20250101", + end_date="20250131", + release_type="release", +): + """Return a minimal valid query-param dict.""" + return { + "instrument": instrument, + "start_date": start_date, + "end_date": end_date, + "release_type": release_type, + } + + +def _science( + session, + *, + file_path, + instrument="hit", + descriptor="hk", + start_date="20250115", + released=False, +): + session.add( + models.ScienceFiles( + file_path=file_path, + instrument=instrument, + data_level="l0", + descriptor=descriptor, + start_date=datetime.datetime.strptime(start_date, "%Y%m%d"), + version="v001", + extension="pkts", + released=released, + ingestion_date=datetime.datetime( + 2025, 1, 20, 0, 0, 0, tzinfo=datetime.timezone.utc + ), + ) + ) + session.commit() + + +# --------------------------------------------------------------------------- +# Test Case 1 release — all files for instrument + date range +# --------------------------------------------------------------------------- + + +def test_release_all_files_in_date_range(session): + """release_type=release with no descriptor releases every matching file. + + Two files for the target instrument within the date range and one file + outside the range must remain unreleased. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + ) # outside range + + result = release_api.lambda_handler( + event=_build_event(_required(start_date="20250101", end_date="20250131")), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is True, ( + "In-range file should be released" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is True, ( + "In-range file should be released" + ) + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is False, ( + "Out-of-range file must stay unreleased" + ) + + +# --------------------------------------------------------------------------- +# Test Case 2 release — only the matching descriptor +# --------------------------------------------------------------------------- + + +def test_release_specific_descriptor(session): + """release_type=release with descriptor releases only that product. + + Two files share the same instrument and date range but have different + descriptors; only the specified descriptor must be released. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250115", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250115", + ) + + params = _required() + params["descriptor"] = "hk" + result = release_api.lambda_handler(event=_build_event(params), context={}) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is True, ( + "Specified descriptor must be released" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is False, ( + "Other descriptor must remain unreleased" + ) + + +# --------------------------------------------------------------------------- +# Test Case 3 early-release — all files for instrument + date range +# --------------------------------------------------------------------------- + + +def test_early_release_all_files_in_date_range(session): + """release_type=early-release with no descriptor releases all matching files. + + Behaviour must be identical to Test Case 1 but without requiring release_number. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + ) + + result = release_api.lambda_handler( + event=_build_event( + _required( + start_date="20250101", end_date="20250131", release_type="early-release" + ) + ), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is True + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is True + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is False + + +# --------------------------------------------------------------------------- +# Test Case 4 early-release — only the matching descriptor +# --------------------------------------------------------------------------- + + +def test_early_release_specific_descriptor(session): + """release_type=early-release with descriptor releases only that product.""" + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250115", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250115", + ) + + params = _required(release_type="early-release") + params["descriptor"] = "hk" + result = release_api.lambda_handler(event=_build_event(params), context={}) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is True + assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is False + + +# --------------------------------------------------------------------------- +# Test Case 5 unrelease — all files for instrument + date range +# --------------------------------------------------------------------------- + + +def test_unrelease_all_files_in_date_range(session): + """release_type=unrelease clears released flag on all matching files. + + Pre-populate files as released=True; after the call they must be False. + Files outside the range must remain released=True. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + released=True, + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + released=True, + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + released=True, + ) # outside range + + result = release_api.lambda_handler( + event=_build_event( + _required( + start_date="20250101", end_date="20250131", release_type="unrelease" + ) + ), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is False, ( + "In-range file must be unreleased" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is False, ( + "In-range file must be unreleased" + ) + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is True, ( + "Out-of-range file must remain released" + ) + + +# --------------------------------------------------------------------------- +# Test Case 6 unrelease — only the matching descriptor +# --------------------------------------------------------------------------- + + +def test_unrelease_specific_descriptor(session): + """release_type=unrelease with descriptor only clears that descriptor. + + Both files start as released=True; only the specified descriptor should + be unreleased after the call. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250115", + released=True, + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250115", + released=True, + ) + + params = _required(release_type="unrelease") + params["descriptor"] = "hk" + result = release_api.lambda_handler(event=_build_event(params), context={}) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is False, ( + "Specified descriptor must be unreleased" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is True, ( + "Other descriptor must remain released" + ) From 3d9e49f430d2d7a578e9ecf3fd5349f948f84815 Mon Sep 17 00:00:00 2001 From: Tenzin Choedon Date: Wed, 27 May 2026 09:41:46 -0600 Subject: [PATCH 2/5] test fixes --- tests/lambda_endpoints/test_release_api.py | 200 +++++---------------- 1 file changed, 45 insertions(+), 155 deletions(-) diff --git a/tests/lambda_endpoints/test_release_api.py b/tests/lambda_endpoints/test_release_api.py index 2fd0e6fb6..558a5676f 100644 --- a/tests/lambda_endpoints/test_release_api.py +++ b/tests/lambda_endpoints/test_release_api.py @@ -3,14 +3,12 @@ These tests verify the six core release behaviors against a live in-memory DB: Test Case 1 release (no descriptor) — all matching instrument+date files released -Test Case 2 release (with descriptor) — only the matching descriptor subset released -Test Case 3 early-release (no descriptor) — same bulk outcome as Test Case 1 -Test Case 4 early-release (with descriptor) — same selective outcome as Test Case 2 -Test Case 5 unrelease (no descriptor) — all matching files unreleased -Test Case 6 unrelease (with descriptor) — only matching descriptor files unreleased +Test Case 2 early-release +Test Case 3 unrelease """ import datetime +from unittest.mock import patch from sds_data_manager.lambda_code.SDSCode.api_lambdas import release_api from sds_data_manager.lambda_code.SDSCode.database import models @@ -32,21 +30,6 @@ def _build_event(query_params, scope="full"): } -def _required( - instrument="hit", - start_date="20250101", - end_date="20250131", - release_type="release", -): - """Return a minimal valid query-param dict.""" - return { - "instrument": instrument, - "start_date": start_date, - "end_date": end_date, - "release_type": release_type, - } - - def _science( session, *, @@ -107,8 +90,15 @@ def test_release_all_files_in_date_range(session): start_date="20250201", ) # outside range + params = { + "instrument": "hit", + "start_date": "20250101", + "end_date": "20250131", + "release_type": "release", + "release_number": "1", + } result = release_api.lambda_handler( - event=_build_event(_required(start_date="20250101", end_date="20250131")), + event=_build_event(params), context={}, ) @@ -127,56 +117,22 @@ def test_release_all_files_in_date_range(session): # --------------------------------------------------------------------------- -# Test Case 2 release — only the matching descriptor +# Test Case 2 early-release # --------------------------------------------------------------------------- -def test_release_specific_descriptor(session): - """release_type=release with descriptor releases only that product. +@patch( + "sds_data_manager.lambda_code.SDSCode.api_lambdas.release_api.download_read_file" +) +def test_early_release(mock_download_read_file, session): + # Provide the manifest file with the two in-range files + science_files = [ + "imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + "imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + ] + ancillary_files = [] + mock_download_read_file.return_value = (science_files, ancillary_files) - Two files share the same instrument and date range but have different - descriptors; only the specified descriptor must be released. - """ - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", - instrument="hit", - descriptor="hk", - start_date="20250115", - ) - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", - instrument="hit", - descriptor="sci", - start_date="20250115", - ) - - params = _required() - params["descriptor"] = "hk" - result = release_api.lambda_handler(event=_build_event(params), context={}) - - assert result["statusCode"] == 200 - - rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} - assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is True, ( - "Specified descriptor must be released" - ) - assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is False, ( - "Other descriptor must remain unreleased" - ) - - -# --------------------------------------------------------------------------- -# Test Case 3 early-release — all files for instrument + date range -# --------------------------------------------------------------------------- - - -def test_early_release_all_files_in_date_range(session): - """release_type=early-release with no descriptor releases all matching files. - - Behaviour must be identical to Test Case 1 but without requiring release_number. - """ _science( session, file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", @@ -199,11 +155,13 @@ def test_early_release_all_files_in_date_range(session): start_date="20250201", ) + manifest_file = "s3://dummy-bucket/manifest.txt" result = release_api.lambda_handler( event=_build_event( - _required( - start_date="20250101", end_date="20250131", release_type="early-release" - ) + { + "release_type": "early-release", + "manifest_file": manifest_file, + } ), context={}, ) @@ -217,49 +175,22 @@ def test_early_release_all_files_in_date_range(session): # --------------------------------------------------------------------------- -# Test Case 4 early-release — only the matching descriptor -# --------------------------------------------------------------------------- - - -def test_early_release_specific_descriptor(session): - """release_type=early-release with descriptor releases only that product.""" - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", - instrument="hit", - descriptor="hk", - start_date="20250115", - ) - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", - instrument="hit", - descriptor="sci", - start_date="20250115", - ) - - params = _required(release_type="early-release") - params["descriptor"] = "hk" - result = release_api.lambda_handler(event=_build_event(params), context={}) - - assert result["statusCode"] == 200 - - rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} - assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is True - assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is False - - -# --------------------------------------------------------------------------- -# Test Case 5 unrelease — all files for instrument + date range +# Test Case 3 unrelease # --------------------------------------------------------------------------- -def test_unrelease_all_files_in_date_range(session): - """release_type=unrelease clears released flag on all matching files. +@patch( + "sds_data_manager.lambda_code.SDSCode.api_lambdas.release_api.download_read_file" +) +def test_unrelease_all_files_in_date_range(mock_download_read_file, session): + # Provide the manifest file with the two in-range files + science_files = [ + "imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + "imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + ] + ancillary_files = [] + mock_download_read_file.return_value = (science_files, ancillary_files) - Pre-populate files as released=True; after the call they must be False. - Files outside the range must remain released=True. - """ _science( session, file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", @@ -285,11 +216,13 @@ def test_unrelease_all_files_in_date_range(session): released=True, ) # outside range + manifest_file = "s3://dummy-bucket/manifest.txt" result = release_api.lambda_handler( event=_build_event( - _required( - start_date="20250101", end_date="20250131", release_type="unrelease" - ) + { + "release_type": "unrelease", + "manifest_file": manifest_file, + } ), context={}, ) @@ -306,46 +239,3 @@ def test_unrelease_all_files_in_date_range(session): assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is True, ( "Out-of-range file must remain released" ) - - -# --------------------------------------------------------------------------- -# Test Case 6 unrelease — only the matching descriptor -# --------------------------------------------------------------------------- - - -def test_unrelease_specific_descriptor(session): - """release_type=unrelease with descriptor only clears that descriptor. - - Both files start as released=True; only the specified descriptor should - be unreleased after the call. - """ - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts", - instrument="hit", - descriptor="hk", - start_date="20250115", - released=True, - ) - _science( - session, - file_path="imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts", - instrument="hit", - descriptor="sci", - start_date="20250115", - released=True, - ) - - params = _required(release_type="unrelease") - params["descriptor"] = "hk" - result = release_api.lambda_handler(event=_build_event(params), context={}) - - assert result["statusCode"] == 200 - - rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} - assert rows["imap/hit/l0/imap_hit_l0_hk_20250115_v001.pkts"] is False, ( - "Specified descriptor must be unreleased" - ) - assert rows["imap/hit/l0/imap_hit_l0_sci_20250115_v001.pkts"] is True, ( - "Other descriptor must remain released" - ) From 04b006e31de1f45c294e60f2835974030887db4d Mon Sep 17 00:00:00 2001 From: Tenzin Choedon Date: Wed, 27 May 2026 12:19:16 -0600 Subject: [PATCH 3/5] feedback changes --- .../SDSCode/api_lambdas/release_api.py | 227 +++++++++--------- tests/lambda_endpoints/test_release_api.py | 45 ++++ 2 files changed, 152 insertions(+), 120 deletions(-) diff --git a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py index 7ebfbe7f6..7ae37c200 100644 --- a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py +++ b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py @@ -76,9 +76,6 @@ def validate_query_params(event): ), } - # "unrelease" sets released=False; everything else sets released=True. - released_flag = release_type != "unrelease" - valid_parameters = [ "instrument", "start_date", @@ -103,82 +100,84 @@ def validate_query_params(event): "statusCode": 200, "data": { "release_type": release_type, - "released_flag": released_flag, "query_params": query_params, }, } -def query_withhold_files(session, instrument, start_date, end_date, release_number): - """Query for the latest-version withhold files matching given criteria.""" - descriptor = f"withhold-data-release-{int(release_number):03d}" - release_table = models.ReleaseFiles +def query_latest_science_files( + session, instrument, start_date, end_date, science_files_to_exclude=None +): + """Query for the latest-version science file paths matching given criteria.""" + science_table = models.ScienceFiles - # Query latest withhold file versions for given group - # ( instrument, descriptor, and date range). - max_ver_subq = ( - session.query( - release_table.instrument, - release_table.descriptor, - release_table.start_date, - release_table.end_date, - func.max(release_table.version).label("max_version"), - ) - .group_by( - release_table.instrument, - release_table.descriptor, - release_table.start_date, - release_table.end_date, - ) - .subquery() - ) - # Now query for specific withhold file matching input - withhold_files = ( - session.query(release_table) - .join( - max_ver_subq, - (release_table.instrument == max_ver_subq.c.instrument) - & (release_table.descriptor == max_ver_subq.c.descriptor) - & (release_table.start_date == max_ver_subq.c.start_date) - & (release_table.end_date == max_ver_subq.c.end_date) - & (release_table.version == max_ver_subq.c.max_version), - ) + # Check if any file in the range has a non-null repointing + has_repointing = ( + session.query(science_table) .filter( - release_table.instrument == instrument, - release_table.end_date >= start_date, - release_table.start_date <= end_date, - release_table.descriptor == descriptor, + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + science_table.repointing.isnot(None), ) - .one_or_none() # TODO: update this logic if we want to - # support more than one withhold file per release batch + .first() + is not None ) - return withhold_files - -def query_latest_science_files(session, instrument, start_date, end_date): - """Query for the latest-version science file paths matching given criteria.""" - science_table = models.ScienceFiles - - max_ver_subq = ( - session.query( - science_table.instrument, - science_table.data_level, - science_table.descriptor, - science_table.start_date, - func.max(science_table.version).label("max_version"), + if has_repointing: + max_ver_subq = ( + session.query( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + science_table.repointing, + func.max(science_table.version).label("max_version"), + ) + .group_by( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + science_table.repointing, + ) + .subquery() + ) + latest_science_files = ( + session.query(science_table) + .join( + max_ver_subq, + (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.descriptor == max_ver_subq.c.descriptor) + & (science_table.start_date == max_ver_subq.c.start_date) + & (science_table.repointing == max_ver_subq.c.repointing) + & (science_table.version == max_ver_subq.c.max_version), + ) + .filter( + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + ) ) - .group_by( - science_table.instrument, - science_table.data_level, - science_table.descriptor, - science_table.start_date, + else: + max_ver_subq = ( + session.query( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + func.max(science_table.version).label("max_version"), + ) + .group_by( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + ) + .subquery() ) - .subquery() - ) - latest_science_files = [ - row.file_path - for row in ( - session.query(science_table.file_path) + latest_science_files = ( + session.query(science_table) .join( max_ver_subq, (science_table.instrument == max_ver_subq.c.instrument) @@ -191,13 +190,14 @@ def query_latest_science_files(session, instrument, start_date, end_date): science_table.start_date >= start_date, science_table.start_date <= end_date, ) - .all() ) - ] - logger.info( - f"Found {len(latest_science_files)} science file(s) for instrument={instrument}" - ) - return latest_science_files + if science_files_to_exclude: + latest_science_files = latest_science_files.filter( + ~science_table.file_path.in_(science_files_to_exclude) + ) + results = list(latest_science_files) + logger.info(f"Found {len(results)} science file(s) for instrument={instrument}") + return results def get_latest_ancillary_files( @@ -205,6 +205,7 @@ def get_latest_ancillary_files( instrument: str, start_date: datetime.datetime, end_date: datetime.datetime, + ancillary_files_to_exclude: list | None = None, ) -> list: """Get latest-version ancillary files for an instrument over a date range. @@ -228,6 +229,8 @@ def get_latest_ancillary_files( Start of query date range. end_date : datetime.datetime End of query date range. + ancillary_files_to_exclude : list, optional + List of ancillary file paths to exclude from results, by default None Returns ------- @@ -340,11 +343,24 @@ def get_latest_ancillary_files( select(no_end_date_query.subquery()), ).order_by("file_path") - file_paths = [row.file_path for row in session.execute(combined).fetchall()] - logger.info( - f"Found {len(file_paths)} ancillary file(s) for instrument={instrument}" + # Now exclude any files in the exclude list + if ancillary_files_to_exclude: + combined = combined.where(~combined.c.file_path.in_(ancillary_files_to_exclude)) + + ancillary_file_paths = [ + row.file_path for row in session.execute(combined).fetchall() + ] + if not ancillary_file_paths: + logger.info(f"Found 0 ancillary file(s) for instrument={instrument}") + return [] + + results = list( + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_file_paths) + ) ) - return file_paths + logger.info(f"Found {len(results)} ancillary file(s) for instrument={instrument}") + return results def download_read_file(exception_list_file_path): @@ -373,7 +389,7 @@ def download_read_file(exception_list_file_path): logger.debug(f"Downloading manifest file from S3 path: {s3_file_path}") download_path = download_from_s3(s3_file_path) - logger.debug(f"Download path after download: {download_path}") + logger.debug(f"Local path after download: {download_path}") lines = download_path.read_text(encoding="utf-8").splitlines() science_files = [] @@ -393,26 +409,20 @@ def download_read_file(exception_list_file_path): return science_files, ancillary_files -def release_type_handler(released_flag, query_params): +def release_type_handler(query_params): """Handle 'release' type requests.""" start_date = datetime.datetime.strptime(query_params["start_date"], "%Y%m%d") end_date = datetime.datetime.strptime(query_params["end_date"], "%Y%m%d") + exclude_file = query_params.get("exclude_file", None) with db.Session() as session: # Query for withhold files to exclude from release. science_files_to_exclude = [] ancillary_files_to_exclude = [] - withhold_files = query_withhold_files( - session, - query_params["instrument"], - start_date, - end_date, - query_params.get("release_number"), - ) - if withhold_files: + if exclude_file: science_files_to_exclude, ancillary_files_to_exclude = download_read_file( - withhold_files.file_path + exclude_file ) science_files_to_update = query_latest_science_files( @@ -420,41 +430,22 @@ def release_type_handler(released_flag, query_params): query_params["instrument"], start_date, end_date, + science_files_to_exclude=science_files_to_exclude, ) - if science_files_to_exclude != []: - science_files_to_update = [ - file_path - for file_path in science_files_to_update - if Path(file_path).name not in science_files_to_exclude - ] ancillary_files_to_update = get_latest_ancillary_files( session, query_params["instrument"], start_date, end_date, - ) - if ancillary_files_to_exclude != []: - ancillary_files_to_update = [ - file_path - for file_path in ancillary_files_to_update - if Path(file_path).name not in ancillary_files_to_exclude - ] - - # For all science and ancillary files, update released flag for all - # applicable files. - session.query(models.ScienceFiles).filter( - models.ScienceFiles.file_path.in_(science_files_to_update) - ).update( - {models.ScienceFiles.released: released_flag}, synchronize_session=False - ) - - session.query(models.AncillaryFiles).filter( - models.AncillaryFiles.file_path.in_(ancillary_files_to_update) - ).update( - {models.AncillaryFiles.released: released_flag}, synchronize_session=False + ancillary_files_to_exclude=ancillary_files_to_exclude, ) + # Directly update ORM objects + for obj in science_files_to_update: + obj.released = True + for obj in ancillary_files_to_update: + obj.released = True session.commit() @@ -576,12 +567,11 @@ def lambda_handler(event, context): if query_validation["statusCode"] != 200: return query_validation - released_flag = query_validation["data"]["released_flag"] release_type = query_validation["data"]["release_type"] query_params = query_validation["data"]["query_params"] if release_type == "release": - release_type_handler(released_flag, query_params) + release_type_handler(query_params) elif release_type == "early-release": early_release_type_handler(query_params) elif release_type == "unrelease": @@ -589,8 +579,5 @@ def lambda_handler(event, context): return { "statusCode": 200, - "body": json.dumps( - f"Successful {release_type} action - " - f"updated release status to '{released_flag}'" - ), + "body": json.dumps(f"Successful {release_type} "), } diff --git a/tests/lambda_endpoints/test_release_api.py b/tests/lambda_endpoints/test_release_api.py index 558a5676f..a140e4f9b 100644 --- a/tests/lambda_endpoints/test_release_api.py +++ b/tests/lambda_endpoints/test_release_api.py @@ -239,3 +239,48 @@ def test_unrelease_all_files_in_date_range(mock_download_read_file, session): assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is True, ( "Out-of-range file must remain released" ) + + +# --------------------------------------------------------------------------- +# Test Case 4 repoint files for a single day +# --------------------------------------------------------------------------- + + +def test_release_repoint_files_date_range(session): + """Test multiple repoint files for a single day.""" + # April 7th, Hi has three repoint files with different repointing and version + files = [ + ("imap_hi_l1a_45sensor-hk_20260407-repoint00209_v001.cdf", 209, "v001"), + ("imap_hi_l1a_45sensor-hk_20260407-repoint00210_v002.cdf", 210, "v002"), + ("imap_hi_l1a_45sensor-hk_20260407-repoint00211_v003.cdf", 211, "v003"), + ] + for file_path, repointing, version in files: + session.add( + models.ScienceFiles( + file_path=file_path, + instrument="hi", + data_level="l1a", + descriptor="45sensor-hk", + start_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + repointing=repointing, + version=version, + extension="cdf", + released=False, + ingestion_date=datetime.datetime(2026, 4, 8, 0, 0, 0), + ) + ) + session.commit() + + # Query for all files on this date + results = release_api.query_latest_science_files( + session, + instrument="hi", + start_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + ) + file_paths = sorted([obj.file_path for obj in results]) + assert file_paths == [ + "imap_hi_l1a_45sensor-hk_20260407-repoint00209_v001.cdf", + "imap_hi_l1a_45sensor-hk_20260407-repoint00210_v002.cdf", + "imap_hi_l1a_45sensor-hk_20260407-repoint00211_v003.cdf", + ], f"Expected all repoint files, got: {file_paths}" From 570cf1a27fd6decb0c71e1a94304f35b45b39122 Mon Sep 17 00:00:00 2001 From: Tenzin Choedon Date: Wed, 27 May 2026 13:06:43 -0600 Subject: [PATCH 4/5] feedback --- sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py index 7ae37c200..ab205d209 100644 --- a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py +++ b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py @@ -148,6 +148,7 @@ def query_latest_science_files( .join( max_ver_subq, (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.data_level == max_ver_subq.c.data_level) & (science_table.descriptor == max_ver_subq.c.descriptor) & (science_table.start_date == max_ver_subq.c.start_date) & (science_table.repointing == max_ver_subq.c.repointing) @@ -181,6 +182,7 @@ def query_latest_science_files( .join( max_ver_subq, (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.data_level == max_ver_subq.c.data_level) & (science_table.descriptor == max_ver_subq.c.descriptor) & (science_table.start_date == max_ver_subq.c.start_date) & (science_table.version == max_ver_subq.c.max_version), From 11acea7141223605189b89c25cc09409a3a34d86 Mon Sep 17 00:00:00 2001 From: Tenzin Choedon Date: Wed, 27 May 2026 14:36:48 -0600 Subject: [PATCH 5/5] ancillary query simplification --- .../SDSCode/api_lambdas/release_api.py | 123 +++++++----------- tests/lambda_endpoints/test_release_api.py | 67 ++++++++++ 2 files changed, 111 insertions(+), 79 deletions(-) diff --git a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py index ab205d209..19114174b 100644 --- a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py +++ b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py @@ -11,7 +11,7 @@ ScienceFilePath, generate_imap_file_path, ) -from sqlalchemy import func, or_, select, union_all +from sqlalchemy import func, or_, text, union_all from ..database import database as db from ..database import models @@ -241,15 +241,12 @@ def get_latest_ancillary_files( """ ancillary_table = models.AncillaryFiles - # ======================================== - # Step 1: Keep only latest version per - # (instrument, descriptor, start_date, end_date). - # ======================================== + # Step 1: Get latest version per (descriptor, start_date, end_date) + # Filter by instrument early to reduce data processed row_num_col = ( func.row_number() .over( partition_by=[ - ancillary_table.instrument, ancillary_table.descriptor, ancillary_table.start_date, ancillary_table.end_date, @@ -258,100 +255,68 @@ def get_latest_ancillary_files( ) .label("row_num") ) - ranked = session.query( - ancillary_table.file_path, - ancillary_table.instrument, - ancillary_table.descriptor, - ancillary_table.start_date, - ancillary_table.end_date, - ancillary_table.version, - row_num_col, - ).subquery() - - latest_versions = session.query(ranked).filter(ranked.c.row_num == 1).subquery() - - # ======================================== - # Step 2: Files with end_date - # ======================================== - with_end_date_query = session.query( - latest_versions.c.file_path, - latest_versions.c.instrument, - latest_versions.c.descriptor, - latest_versions.c.start_date, - latest_versions.c.end_date, - latest_versions.c.version, - ).filter( - latest_versions.c.instrument == instrument, - latest_versions.c.end_date.isnot(None), - latest_versions.c.start_date <= end_date, - latest_versions.c.end_date >= start_date, - ) - # ======================================== - # Step 3: Files without end_date - # ======================================== - next_start_date_col = ( - func.lead(latest_versions.c.start_date) - .over( - partition_by=[ - latest_versions.c.instrument, - latest_versions.c.descriptor, - ], - order_by=latest_versions.c.start_date, + latest_versions = ( + session.query( + ancillary_table.file_path, + ancillary_table.descriptor, + ancillary_table.start_date, + ancillary_table.end_date, ) - .label("next_start_date") + .filter(ancillary_table.instrument == instrument) + .add_columns(row_num_col) + .subquery() ) - no_end_date_coverage = ( + + latest = ( session.query( latest_versions.c.file_path, - latest_versions.c.instrument, latest_versions.c.descriptor, latest_versions.c.start_date, latest_versions.c.end_date, - latest_versions.c.version, - next_start_date_col, - ) - .filter( - latest_versions.c.end_date.is_(None), ) + .filter(latest_versions.c.row_num == 1) .subquery() ) - # ======================================================== - # Step 4: Look for files where start_date <= query_end AND - # (next_file_start >= query_start) - # ======================================================== - no_end_date_query = session.query( - no_end_date_coverage.c.file_path, - no_end_date_coverage.c.instrument, - no_end_date_coverage.c.descriptor, - no_end_date_coverage.c.start_date, - no_end_date_coverage.c.end_date, - no_end_date_coverage.c.version, - ).filter( - no_end_date_coverage.c.instrument == instrument, - no_end_date_coverage.c.start_date <= end_date, + # Step 2: Files WITH end_date - simple overlap check + with_end_date_query = session.query(latest.c.file_path).filter( + latest.c.end_date.isnot(None), + latest.c.start_date <= end_date, + latest.c.end_date >= start_date, + ) + + # Step 3: Files WITHOUT end_date - use LEAD() to find coverage end + next_start_col = ( + func.lead(latest.c.start_date) + .over(partition_by=latest.c.descriptor, order_by=latest.c.start_date) + .label("next_start_date") + ) + + no_end_with_next = ( + session.query(latest.c.file_path, latest.c.start_date, next_start_col) + .filter(latest.c.end_date.is_(None)) + .subquery() + ) + + no_end_date_query = session.query(no_end_with_next.c.file_path).filter( + no_end_with_next.c.start_date <= end_date, or_( - no_end_date_coverage.c.next_start_date.is_(None), - no_end_date_coverage.c.next_start_date > start_date, + no_end_with_next.c.next_start_date.is_(None), + no_end_with_next.c.next_start_date > start_date, ), ) - # ======================================== - # Final: UNION ALL and order by file_path. - # ======================================== - combined = union_all( - select(with_end_date_query.subquery()), - select(no_end_date_query.subquery()), - ).order_by("file_path") + # Combine + combined = union_all(with_end_date_query, no_end_date_query).order_by( + text("file_path") + ) # Now exclude any files in the exclude list if ancillary_files_to_exclude: combined = combined.where(~combined.c.file_path.in_(ancillary_files_to_exclude)) - ancillary_file_paths = [ - row.file_path for row in session.execute(combined).fetchall() - ] + ancillary_file_paths = [row[0] for row in session.execute(combined).fetchall()] if not ancillary_file_paths: logger.info(f"Found 0 ancillary file(s) for instrument={instrument}") return [] diff --git a/tests/lambda_endpoints/test_release_api.py b/tests/lambda_endpoints/test_release_api.py index a140e4f9b..fe701d8f4 100644 --- a/tests/lambda_endpoints/test_release_api.py +++ b/tests/lambda_endpoints/test_release_api.py @@ -5,6 +5,8 @@ Test Case 1 release (no descriptor) — all matching instrument+date files released Test Case 2 early-release Test Case 3 unrelease +Test Case 4 repoint files for a single day +Test Case 5 ancillary files query """ import datetime @@ -284,3 +286,68 @@ def test_release_repoint_files_date_range(session): "imap_hi_l1a_45sensor-hk_20260407-repoint00210_v002.cdf", "imap_hi_l1a_45sensor-hk_20260407-repoint00211_v003.cdf", ], f"Expected all repoint files, got: {file_paths}" + + +# --------------------------------------------------------------------------- +# Test Case 5 ancillary files released from manifest +# --------------------------------------------------------------------------- +def test_release_ancillary_files_in_date_range(session): + """Ancillary files in manifest are released properly.""" + # Add all as unreleased + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260129_v002.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260129", "%Y%m%d"), + end_date=None, + version="v002", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 1, 30, 0, 0, 0), + ) + ) + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_20260403_v001.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + version="v001", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 4, 4, 0, 0, 0), + ) + ) + # Add an out-of-range ancillary file (should not be released) + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_v002.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=None, + version="v002", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 1, 2, 0, 0, 0), + ) + ) + session.commit() + + result = release_api.get_latest_ancillary_files( + session, + instrument="codice", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260430", "%Y%m%d"), + ) + expected_ancillary_files = [ + "imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_20260403_v001.json", + "imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_v002.json", + ] + assert len(result) == 2, f"Expected 2 ancillary files, got {len(result)}" + assert sorted([f.file_path for f in result]) == expected_ancillary_files, ( + f"Expected ancillary files {expected_ancillary_files}, " + f"got {[f.file_path for f in result]}" + )