diff --git a/lambda_layer/database/requirements.txt b/lambda_layer/database/requirements.txt index 77bd1083d..6336d6267 100644 --- a/lambda_layer/database/requirements.txt +++ b/lambda_layer/database/requirements.txt @@ -200,9 +200,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 jmespath==1.1.0 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d \ --hash=sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64 diff --git a/lambda_layer/spice/requirements.txt b/lambda_layer/spice/requirements.txt index e6dae531c..48c5f4d16 100644 --- a/lambda_layer/spice/requirements.txt +++ b/lambda_layer/spice/requirements.txt @@ -200,9 +200,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 jmespath==1.1.0 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d \ --hash=sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64 diff --git a/poetry.lock b/poetry.lock index 6ad51ef29..97f53fd5b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.3.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.0 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -141,30 +141,17 @@ files = [ docs = ["pytest"] test = ["hypothesis", "pytest", "pytest-remotedata"] -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["main"] -markers = "extra == \"test\" and sys_platform == \"win32\"" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] - [[package]] name = "attrs" version = "25.4.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.9" -groups = ["main", "cdk-install"] +groups = ["cdk-install"] files = [ {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, ] -markers = {main = "extra == \"test\""} [[package]] name = "aws-cdk-asset-awscli-v1" @@ -1035,14 +1022,14 @@ files = [ [[package]] name = "imap-data-access" -version = "0.40.1" +version = "0.41.0" description = "IMAP SDC Data Access" optional = false python-versions = "*" groups = ["main", "layer-idex-processing", "layer-processing"] files = [ - {file = "imap_data_access-0.40.1-py3-none-any.whl", hash = "sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832"}, - {file = "imap_data_access-0.40.1.tar.gz", hash = "sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab"}, + {file = "imap_data_access-0.41.0-py3-none-any.whl", hash = "sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557"}, + {file = "imap_data_access-0.41.0.tar.gz", hash = "sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169"}, ] [package.dependencies] @@ -1932,19 +1919,6 @@ files = [ {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, ] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["main"] -markers = "extra == \"test\"" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] - [[package]] name = "pycparser" version = "3.0" @@ -2026,36 +2000,33 @@ files = [ {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, ] -markers = {main = "extra == \"doc\""} +markers = {main = "extra == \"doc\" or extra == \"test\""} [package.extras] windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "6.2.5" +version = "8.4.2" description = "pytest: simple powerful testing with Python" optional = true -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"test\"" files = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, ] [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -toml = "*" +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-cov" @@ -2732,19 +2703,6 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -optional = true -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["main"] -markers = "extra == \"test\"" -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] - [[package]] name = "typeguard" version = "2.13.3" @@ -2893,4 +2851,4 @@ tool = ["alembic"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "2d5991a15ea1c5157131391c2b68967b0b9fa0a2b5a0302b789d3dbd0f2d3a2a" +content-hash = "8ca7a4a385cc6021beab9b2792e60c221393777c28b75610b0faa1b22078a53a" diff --git a/pyproject.toml b/pyproject.toml index f1e98e1ea..51a70604a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ # the correct version and hash are used: # poetry export -f requirements.txt -o lambda_layer/database/requirements.txt --with layer-database dependencies = [ - "imap-data-access>=0.40.1", + "imap-data-access>=0.41.0", "boto3>=1.26.78,<2", "SQLAlchemy<=3.0.0", "numpy (>=2.4.6,<3.0.0)" @@ -43,7 +43,7 @@ poetry-plugin-export = ">=1.8" [project.optional-dependencies] dev = ["pre-commit>=3.3.3,<4", "ruff==0.2.1"] doc = ["sphinx>=7.1.0,<8", "myst-parser>=2.0.0,<3", "pydata-sphinx-theme>=0.13.3"] -test = ["pytest==6.2.5", "pytest-cov>=4.0.0,<5", "moto>=4.1.3,<5"] +test = ["pytest>=7.4.0,<9", "pytest-cov>=4.0.0,<5", "moto>=4.1.3,<5"] tool = ["alembic (>=1.18.4,<2.0.0)"] [tool.poetry.group.cdk-install.dependencies] diff --git a/sds_data_manager/constructs/sds_api_manager_construct.py b/sds_data_manager/constructs/sds_api_manager_construct.py index f3a91bd30..f5b55a2b8 100644 --- a/sds_data_manager/constructs/sds_api_manager_construct.py +++ b/sds_data_manager/constructs/sds_api_manager_construct.py @@ -53,7 +53,7 @@ def add_stable_route(api, base_path, http_method, lambda_function, prefix_list): class SdsApiManager(Construct): """Construct for API Management.""" - def __init__( + def __init__( # noqa: PLR0915 self, scope: Construct, construct_id: str, @@ -163,8 +163,10 @@ def __init__( # account only can allow upload through API key. if account_name == "prod": upload_route_prefixes = ["/api-key"] + release_route_prefixes = ["/api-key"] else: upload_route_prefixes = auth_route_prefixes + release_route_prefixes = ["/api-key"] # {proxy+} is used to allow for any pathParams after /upload/ add_stable_route( @@ -375,16 +377,19 @@ def __init__( security_groups=[rds_security_group], environment={ "SECRET_NAME": db_secret_name, + "IMAP_DATA_DIR": "/tmp", # noqa: S108 + "S3_BUCKET": data_bucket.bucket_name, + "REGION": env.region, }, layers=layers, ) - for prefix in auth_route_prefixes: - # {proxy+} is used to allow for any pathParams after /processing-jobs/ + for prefix in release_route_prefixes: api.add_route( route=f"{prefix}/release", http_method="GET", lambda_function=release_api_lambda, ) + release_api_lambda.add_to_role_policy(s3_read_policy) rds_secret = secrets.Secret.from_secret_name_v2( self, "rds_secret", db_secret_name diff --git a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py index c55b8d916..19114174b 100644 --- a/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py +++ b/sds_data_manager/lambda_code/SDSCode/api_lambdas/release_api.py @@ -1,6 +1,550 @@ """Lambda function for release API endpoint.""" +import datetime +import json +import logging +from pathlib import Path + +import imap_data_access +from imap_data_access.file_validation import ( + AncillaryFilePath, + ScienceFilePath, + generate_imap_file_path, +) +from sqlalchemy import func, or_, text, union_all + +from ..database import database as db +from ..database import models +from ..spice_utilities import download_from_s3 + +# Logger setup +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def check_api_key(event): + """Check API key scope; only non-read keys may release files.""" + request_ctx = event.get("requestContext", {}) + auth = request_ctx.get("authorizer", {}) + auth_ctx = auth.get("lambda", {}) + scope = auth_ctx.get("scope", "") + api_key = auth_ctx.get("apiKey", "unknown") + + logger.info(f"Release request received with scope: {scope}, api_key: {api_key}") + + if scope == "read": + logger.warning("Release denied: read scope user attempted release operation") + return { + "statusCode": 403, + "body": json.dumps( + "Release operation denied. Your API key has read permissions." + ), + } + + return { + "statusCode": 200, + "body": json.dumps("API key validated successfully."), + } + + +def validate_query_params(event): + """Validate query parameters and return (is_valid, error_message).""" + query_params = event.get("queryStringParameters") or {} + + # Validate release_type and derive the released flag value. + release_type = query_params["release_type"] + valid_release_types = ["release", "unrelease", "early-release"] + if release_type not in valid_release_types: + return { + "statusCode": 400, + "body": json.dumps( + f"'{release_type}' is not a valid release_type. " + f"Valid options are: {valid_release_types}" + ), + } + + if release_type == "release" and "release_number" not in query_params: + return { + "statusCode": 400, + "body": json.dumps( + "'release_number' query parameter is required when " + "'release_type' is 'release'. Please provide a release_number " + "indicating which release batch to apply. For example, " + "withhold files with 'release_number=1' will be included in " + "the first release batch, 'release_number=2' in the second, " + "and so on." + ), + } + + valid_parameters = [ + "instrument", + "start_date", + "end_date", + "release_type", + "exclude_file", + "manifest_file", + "release_number", + ] + + for param in query_params: + if param not in valid_parameters: + return { + "statusCode": 400, + "body": json.dumps( + f"'{param}' is not a valid query parameter. " + f"Valid query parameters are: {valid_parameters}" + ), + } + + return { + "statusCode": 200, + "data": { + "release_type": release_type, + "query_params": query_params, + }, + } + + +def query_latest_science_files( + session, instrument, start_date, end_date, science_files_to_exclude=None +): + """Query for the latest-version science file paths matching given criteria.""" + science_table = models.ScienceFiles + + # Check if any file in the range has a non-null repointing + has_repointing = ( + session.query(science_table) + .filter( + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + science_table.repointing.isnot(None), + ) + .first() + is not None + ) + + if has_repointing: + max_ver_subq = ( + session.query( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + science_table.repointing, + func.max(science_table.version).label("max_version"), + ) + .group_by( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + science_table.repointing, + ) + .subquery() + ) + latest_science_files = ( + session.query(science_table) + .join( + max_ver_subq, + (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.data_level == max_ver_subq.c.data_level) + & (science_table.descriptor == max_ver_subq.c.descriptor) + & (science_table.start_date == max_ver_subq.c.start_date) + & (science_table.repointing == max_ver_subq.c.repointing) + & (science_table.version == max_ver_subq.c.max_version), + ) + .filter( + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + ) + ) + else: + max_ver_subq = ( + session.query( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + func.max(science_table.version).label("max_version"), + ) + .group_by( + science_table.instrument, + science_table.data_level, + science_table.descriptor, + science_table.start_date, + ) + .subquery() + ) + latest_science_files = ( + session.query(science_table) + .join( + max_ver_subq, + (science_table.instrument == max_ver_subq.c.instrument) + & (science_table.data_level == max_ver_subq.c.data_level) + & (science_table.descriptor == max_ver_subq.c.descriptor) + & (science_table.start_date == max_ver_subq.c.start_date) + & (science_table.version == max_ver_subq.c.max_version), + ) + .filter( + science_table.instrument == instrument, + science_table.start_date >= start_date, + science_table.start_date <= end_date, + ) + ) + if science_files_to_exclude: + latest_science_files = latest_science_files.filter( + ~science_table.file_path.in_(science_files_to_exclude) + ) + results = list(latest_science_files) + logger.info(f"Found {len(results)} science file(s) for instrument={instrument}") + return results + + +def get_latest_ancillary_files( + session, + instrument: str, + start_date: datetime.datetime, + end_date: datetime.datetime, + ancillary_files_to_exclude: list | None = None, +) -> list: + """Get latest-version ancillary files for an instrument over a date range. + + The function retrieves files in two groups based on overlap with date range: + + Files with explicit end_date in their filename: + overlaps if start_date <= query_end AND end_date >= query_start + + Files without end_date in their filename and considered valid until + the next file with start_date after it appears: + overlaps if start_date <= query_end AND + (next_file_start >= query_start OR no next file) + + Parameters + ---------- + session : orm session + Database session. + instrument : str + Instrument name. + start_date : datetime.datetime + Start of query date range. + end_date : datetime.datetime + End of query date range. + ancillary_files_to_exclude : list, optional + List of ancillary file paths to exclude from results, by default None + + Returns + ------- + list + List of file paths ordered by file_path. + """ + ancillary_table = models.AncillaryFiles + + # Step 1: Get latest version per (descriptor, start_date, end_date) + # Filter by instrument early to reduce data processed + row_num_col = ( + func.row_number() + .over( + partition_by=[ + ancillary_table.descriptor, + ancillary_table.start_date, + ancillary_table.end_date, + ], + order_by=ancillary_table.version.desc(), + ) + .label("row_num") + ) + + latest_versions = ( + session.query( + ancillary_table.file_path, + ancillary_table.descriptor, + ancillary_table.start_date, + ancillary_table.end_date, + ) + .filter(ancillary_table.instrument == instrument) + .add_columns(row_num_col) + .subquery() + ) + + latest = ( + session.query( + latest_versions.c.file_path, + latest_versions.c.descriptor, + latest_versions.c.start_date, + latest_versions.c.end_date, + ) + .filter(latest_versions.c.row_num == 1) + .subquery() + ) + + # Step 2: Files WITH end_date - simple overlap check + with_end_date_query = session.query(latest.c.file_path).filter( + latest.c.end_date.isnot(None), + latest.c.start_date <= end_date, + latest.c.end_date >= start_date, + ) + + # Step 3: Files WITHOUT end_date - use LEAD() to find coverage end + next_start_col = ( + func.lead(latest.c.start_date) + .over(partition_by=latest.c.descriptor, order_by=latest.c.start_date) + .label("next_start_date") + ) + + no_end_with_next = ( + session.query(latest.c.file_path, latest.c.start_date, next_start_col) + .filter(latest.c.end_date.is_(None)) + .subquery() + ) + + no_end_date_query = session.query(no_end_with_next.c.file_path).filter( + no_end_with_next.c.start_date <= end_date, + or_( + no_end_with_next.c.next_start_date.is_(None), + no_end_with_next.c.next_start_date > start_date, + ), + ) + + # Combine + combined = union_all(with_end_date_query, no_end_date_query).order_by( + text("file_path") + ) + + # Now exclude any files in the exclude list + if ancillary_files_to_exclude: + combined = combined.where(~combined.c.file_path.in_(ancillary_files_to_exclude)) + + ancillary_file_paths = [row[0] for row in session.execute(combined).fetchall()] + if not ancillary_file_paths: + logger.info(f"Found 0 ancillary file(s) for instrument={instrument}") + return [] + + results = list( + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_file_paths) + ) + ) + logger.info(f"Found {len(results)} ancillary file(s) for instrument={instrument}") + return results + + +def download_read_file(exception_list_file_path): + """Download a manifest file from S3 and group its entries by file type. + + Parameters + ---------- + exception_list_file_path : str + S3 path to the manifest text file. Each line is an IMAP file path. + + Returns + ------- + tuple[list[str], list[str]] + A tuple of (science_files, ancillary_files) where each entry is the + file path string listed in the manifest. + """ + # Create the proper file path object based on the extension and filename + file_path = Path(exception_list_file_path) + path_obj = generate_imap_file_path(file_path.name) + + s3_file_path = ( + path_obj.construct_path() + .relative_to(imap_data_access.config["DATA_DIR"]) + .as_posix() + ) + + logger.debug(f"Downloading manifest file from S3 path: {s3_file_path}") + download_path = download_from_s3(s3_file_path) + logger.debug(f"Local path after download: {download_path}") + lines = download_path.read_text(encoding="utf-8").splitlines() + + science_files = [] + ancillary_files = [] + for line in lines: + filename = line.strip() + if not filename: + continue + file_obj = imap_data_access.file_validation.generate_imap_file_path(filename) + if isinstance(file_obj, ScienceFilePath): + science_files.append(filename) + elif isinstance(file_obj, AncillaryFilePath): + ancillary_files.append(filename) + else: + logger.warning(f"Unrecognized file type in manifest, skipping: {filename}") + + return science_files, ancillary_files + + +def release_type_handler(query_params): + """Handle 'release' type requests.""" + start_date = datetime.datetime.strptime(query_params["start_date"], "%Y%m%d") + end_date = datetime.datetime.strptime(query_params["end_date"], "%Y%m%d") + exclude_file = query_params.get("exclude_file", None) + + with db.Session() as session: + # Query for withhold files to exclude from release. + science_files_to_exclude = [] + ancillary_files_to_exclude = [] + + if exclude_file: + science_files_to_exclude, ancillary_files_to_exclude = download_read_file( + exclude_file + ) + + science_files_to_update = query_latest_science_files( + session, + query_params["instrument"], + start_date, + end_date, + science_files_to_exclude=science_files_to_exclude, + ) + + ancillary_files_to_update = get_latest_ancillary_files( + session, + query_params["instrument"], + start_date, + end_date, + ancillary_files_to_exclude=ancillary_files_to_exclude, + ) + + # Directly update ORM objects + for obj in science_files_to_update: + obj.released = True + for obj in ancillary_files_to_update: + obj.released = True + session.commit() + + +def early_release_type_handler(query_params): + """Handle early-release requests using manifest file.""" + manifest_file = query_params["manifest_file"] + + science_files, ancillary_files = download_read_file(manifest_file) + + with db.Session() as session: + if science_files: + session.query(models.ScienceFiles).filter( + models.ScienceFiles.file_path.in_(science_files) + ).update( + {models.ScienceFiles.released: True}, + synchronize_session=False, + ) + + if ancillary_files: + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_files) + ).update( + {models.AncillaryFiles.released: True}, + synchronize_session=False, + ) + + session.commit() + + logger.info( + f"Early released " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ) + + return { + "statusCode": 200, + "body": json.dumps( + f"Successfully early released " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ), + } + + +def unrelease_type_handler(query_params): + """Handle unrelease requests using manifest file.""" + manifest_file = query_params["manifest_file"] + + science_files, ancillary_files = download_read_file(manifest_file) + + with db.Session() as session: + if science_files: + session.query(models.ScienceFiles).filter( + models.ScienceFiles.file_path.in_(science_files) + ).update( + {models.ScienceFiles.released: False}, + synchronize_session=False, + ) + + if ancillary_files: + session.query(models.AncillaryFiles).filter( + models.AncillaryFiles.file_path.in_(ancillary_files) + ).update( + {models.AncillaryFiles.released: False}, + synchronize_session=False, + ) + + session.commit() + + logger.info( + f"Unreleased " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ) + + return { + "statusCode": 200, + "body": json.dumps( + f"Successfully unreleased " + f"{len(science_files)} science files and " + f"{len(ancillary_files)} ancillary files." + ), + } + def lambda_handler(event, context): - """Lambda handler for release API.""" - return {"statusCode": 200, "body": "Release API is working!"} + """Entry point for the release API lambda. + + Required query parameters for 'release' type: + instrument : instrument name (e.g. mag, swe, lo, codice) + start_date : inclusive lower bound on file start_date (YYYYMMDD) + end_date : inclusive upper bound on file start_date (YYYYMMDD) + + Optional parameters for 'release' type: + exclude_file : S3 path to manifest text file listing files to + exclude from release. Each line in the manifest + can contain science or ancillary filename. + + Required parameters for 'early' or 'unrelease' type: + manifest_file : S3 path to manifest text file listing files to + release/unrelease. + + Parameters + ---------- + event : dict + Input event containing ``queryStringParameters``. + context : LambdaContext + Lambda runtime context object. + """ + logger.info("Received release request with event: " + json.dumps(event, indent=2)) + + # Check API key and scope. Only API keys with non-read scopes may release files. + api_key_check = check_api_key(event) + if api_key_check["statusCode"] != 200: + return api_key_check + + # Validate query parameters. + query_validation = validate_query_params(event) + if query_validation["statusCode"] != 200: + return query_validation + + release_type = query_validation["data"]["release_type"] + query_params = query_validation["data"]["query_params"] + + if release_type == "release": + release_type_handler(query_params) + elif release_type == "early-release": + early_release_type_handler(query_params) + elif release_type == "unrelease": + unrelease_type_handler(query_params) + + return { + "statusCode": 200, + "body": json.dumps(f"Successful {release_type} "), + } diff --git a/sds_data_manager/lambda_code/SDSCode/spice_utilities.py b/sds_data_manager/lambda_code/SDSCode/spice_utilities.py index caa6337f1..9f0fa2a82 100644 --- a/sds_data_manager/lambda_code/SDSCode/spice_utilities.py +++ b/sds_data_manager/lambda_code/SDSCode/spice_utilities.py @@ -59,7 +59,7 @@ def download_from_s3(s3_key: str, bucket_name: str | None = None) -> Path: logger.info(f"Downloaded {s3_key} from bucket {bucket_name} to {local_path}") return local_path except Exception as e: - logger.error() + logger.error(e) raise FileNotFoundError( f"Failed to download {s3_key} from bucket {bucket_name}: {e}" ) from e diff --git a/sds_data_manager/lambda_code/idex_processing/requirements.txt b/sds_data_manager/lambda_code/idex_processing/requirements.txt index c2e90d4fa..f0e5fae26 100644 --- a/sds_data_manager/lambda_code/idex_processing/requirements.txt +++ b/sds_data_manager/lambda_code/idex_processing/requirements.txt @@ -238,9 +238,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 imap-processing==1.0.32 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:c6dc7e945d8118e2ad5b48cc2ba98aadfe80c4b26d641b8255172aaebf28b77b \ --hash=sha256:d53ca85c1ca111354074949e479fd082e518416194817bd094513ca048bdf17b diff --git a/sds_data_manager/lambda_code/processing/requirements.txt b/sds_data_manager/lambda_code/processing/requirements.txt index 72be208fe..88c466248 100644 --- a/sds_data_manager/lambda_code/processing/requirements.txt +++ b/sds_data_manager/lambda_code/processing/requirements.txt @@ -238,9 +238,9 @@ greenlet==3.5.0 ; python_version >= "3.11" and python_version < "3.13" and (plat idna==3.13 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \ --hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3 -imap-data-access==0.40.1 ; python_version >= "3.11" and python_version < "3.13" \ - --hash=sha256:184c4db2d9dd7a3dbf3dbc150405a61d12f603b866e166a4ae04bd1df8aac6ab \ - --hash=sha256:8142457ea38b577d05e48fdca31e6199222ab562229a6c0ecb1dbf2b92c84832 +imap-data-access==0.41.0 ; python_version >= "3.11" and python_version < "3.13" \ + --hash=sha256:88c550e41c8e9fcb881787e6911bd21a5b4253be3e5ba7176a51151324588169 \ + --hash=sha256:927966c3d0b16d9a1f8be23abdc34cff28afdec377a6e72ade7ae27496108557 imap-processing==1.0.32 ; python_version >= "3.11" and python_version < "3.13" \ --hash=sha256:c6dc7e945d8118e2ad5b48cc2ba98aadfe80c4b26d641b8255172aaebf28b77b \ --hash=sha256:d53ca85c1ca111354074949e479fd082e518416194817bd094513ca048bdf17b diff --git a/tests/lambda_endpoints/test_release_api.py b/tests/lambda_endpoints/test_release_api.py new file mode 100644 index 000000000..fe701d8f4 --- /dev/null +++ b/tests/lambda_endpoints/test_release_api.py @@ -0,0 +1,353 @@ +"""Integration tests for the Release API. + +These tests verify the six core release behaviors against a live in-memory DB: + +Test Case 1 release (no descriptor) — all matching instrument+date files released +Test Case 2 early-release +Test Case 3 unrelease +Test Case 4 repoint files for a single day +Test Case 5 ancillary files query +""" + +import datetime +from unittest.mock import patch + +from sds_data_manager.lambda_code.SDSCode.api_lambdas import release_api +from sds_data_manager.lambda_code.SDSCode.database import models + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _build_event(query_params, scope="full"): + """Build a minimal API Gateway event for the release API.""" + return { + "queryStringParameters": query_params, + "rawPath": "/api-key/release", + "body": "", + "requestContext": { + "authorizer": {"lambda": {"apiKey": "test-key", "scope": scope}} + }, + } + + +def _science( + session, + *, + file_path, + instrument="hit", + descriptor="hk", + start_date="20250115", + released=False, +): + session.add( + models.ScienceFiles( + file_path=file_path, + instrument=instrument, + data_level="l0", + descriptor=descriptor, + start_date=datetime.datetime.strptime(start_date, "%Y%m%d"), + version="v001", + extension="pkts", + released=released, + ingestion_date=datetime.datetime( + 2025, 1, 20, 0, 0, 0, tzinfo=datetime.timezone.utc + ), + ) + ) + session.commit() + + +# --------------------------------------------------------------------------- +# Test Case 1 release — all files for instrument + date range +# --------------------------------------------------------------------------- + + +def test_release_all_files_in_date_range(session): + """release_type=release with no descriptor releases every matching file. + + Two files for the target instrument within the date range and one file + outside the range must remain unreleased. + """ + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + ) # outside range + + params = { + "instrument": "hit", + "start_date": "20250101", + "end_date": "20250131", + "release_type": "release", + "release_number": "1", + } + result = release_api.lambda_handler( + event=_build_event(params), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is True, ( + "In-range file should be released" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is True, ( + "In-range file should be released" + ) + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is False, ( + "Out-of-range file must stay unreleased" + ) + + +# --------------------------------------------------------------------------- +# Test Case 2 early-release +# --------------------------------------------------------------------------- + + +@patch( + "sds_data_manager.lambda_code.SDSCode.api_lambdas.release_api.download_read_file" +) +def test_early_release(mock_download_read_file, session): + # Provide the manifest file with the two in-range files + science_files = [ + "imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + "imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + ] + ancillary_files = [] + mock_download_read_file.return_value = (science_files, ancillary_files) + + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + ) + + manifest_file = "s3://dummy-bucket/manifest.txt" + result = release_api.lambda_handler( + event=_build_event( + { + "release_type": "early-release", + "manifest_file": manifest_file, + } + ), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is True + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is True + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is False + + +# --------------------------------------------------------------------------- +# Test Case 3 unrelease +# --------------------------------------------------------------------------- + + +@patch( + "sds_data_manager.lambda_code.SDSCode.api_lambdas.release_api.download_read_file" +) +def test_unrelease_all_files_in_date_range(mock_download_read_file, session): + # Provide the manifest file with the two in-range files + science_files = [ + "imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + "imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + ] + ancillary_files = [] + mock_download_read_file.return_value = (science_files, ancillary_files) + + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250110", + released=True, + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts", + instrument="hit", + descriptor="sci", + start_date="20250120", + released=True, + ) + _science( + session, + file_path="imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts", + instrument="hit", + descriptor="hk", + start_date="20250201", + released=True, + ) # outside range + + manifest_file = "s3://dummy-bucket/manifest.txt" + result = release_api.lambda_handler( + event=_build_event( + { + "release_type": "unrelease", + "manifest_file": manifest_file, + } + ), + context={}, + ) + + assert result["statusCode"] == 200 + + rows = {r.file_path: r.released for r in session.query(models.ScienceFiles).all()} + assert rows["imap/hit/l0/imap_hit_l0_hk_20250110_v001.pkts"] is False, ( + "In-range file must be unreleased" + ) + assert rows["imap/hit/l0/imap_hit_l0_sci_20250120_v001.pkts"] is False, ( + "In-range file must be unreleased" + ) + assert rows["imap/hit/l0/imap_hit_l0_hk_20250201_v001.pkts"] is True, ( + "Out-of-range file must remain released" + ) + + +# --------------------------------------------------------------------------- +# Test Case 4 repoint files for a single day +# --------------------------------------------------------------------------- + + +def test_release_repoint_files_date_range(session): + """Test multiple repoint files for a single day.""" + # April 7th, Hi has three repoint files with different repointing and version + files = [ + ("imap_hi_l1a_45sensor-hk_20260407-repoint00209_v001.cdf", 209, "v001"), + ("imap_hi_l1a_45sensor-hk_20260407-repoint00210_v002.cdf", 210, "v002"), + ("imap_hi_l1a_45sensor-hk_20260407-repoint00211_v003.cdf", 211, "v003"), + ] + for file_path, repointing, version in files: + session.add( + models.ScienceFiles( + file_path=file_path, + instrument="hi", + data_level="l1a", + descriptor="45sensor-hk", + start_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + repointing=repointing, + version=version, + extension="cdf", + released=False, + ingestion_date=datetime.datetime(2026, 4, 8, 0, 0, 0), + ) + ) + session.commit() + + # Query for all files on this date + results = release_api.query_latest_science_files( + session, + instrument="hi", + start_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260407", "%Y%m%d"), + ) + file_paths = sorted([obj.file_path for obj in results]) + assert file_paths == [ + "imap_hi_l1a_45sensor-hk_20260407-repoint00209_v001.cdf", + "imap_hi_l1a_45sensor-hk_20260407-repoint00210_v002.cdf", + "imap_hi_l1a_45sensor-hk_20260407-repoint00211_v003.cdf", + ], f"Expected all repoint files, got: {file_paths}" + + +# --------------------------------------------------------------------------- +# Test Case 5 ancillary files released from manifest +# --------------------------------------------------------------------------- +def test_release_ancillary_files_in_date_range(session): + """Ancillary files in manifest are released properly.""" + # Add all as unreleased + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260129_v002.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260129", "%Y%m%d"), + end_date=None, + version="v002", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 1, 30, 0, 0, 0), + ) + ) + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_20260403_v001.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + version="v001", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 4, 4, 0, 0, 0), + ) + ) + # Add an out-of-range ancillary file (should not be released) + session.add( + models.AncillaryFiles( + file_path="imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_v002.json", + instrument="codice", + descriptor="l1a-sci-lut", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=None, + version="v002", + extension="json", + released=False, + ingestion_date=datetime.datetime(2026, 1, 2, 0, 0, 0), + ) + ) + session.commit() + + result = release_api.get_latest_ancillary_files( + session, + instrument="codice", + start_date=datetime.datetime.strptime("20260403", "%Y%m%d"), + end_date=datetime.datetime.strptime("20260430", "%Y%m%d"), + ) + expected_ancillary_files = [ + "imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_20260403_v001.json", + "imap/ancillary/codice/imap_codice_l1a-sci-lut_20260403_v002.json", + ] + assert len(result) == 2, f"Expected 2 ancillary files, got {len(result)}" + assert sorted([f.file_path for f in result]) == expected_ancillary_files, ( + f"Expected ancillary files {expected_ancillary_files}, " + f"got {[f.file_path for f in result]}" + )