From 78ca86deea5333a9fbe62eb2cce7e3b523d5e6d0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:32:55 -0700 Subject: [PATCH 01/59] add upload script --- cellpack/bin/upload_to_client.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py new file mode 100644 index 00000000..93d94f3a --- /dev/null +++ b/cellpack/bin/upload_to_client.py @@ -0,0 +1,51 @@ +import json +import fire + +from cellpack.autopack.FirebaseHandler import FirebaseHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.loaders.config_loader import ConfigLoader +from cellpack.autopack.loaders.recipe_loader import RecipeLoader +from cellpack.bin.upload import get_recipe_metadata + +def upload_to_client( + recipe: str, + config: str, + fields: str, + name: str +): + """ + Uploads recipe, config, and editable fields, read from specified + JSON files, to the database for client access + """ + db_handler = FirebaseHandler() + recipe_id = "" + config_id = "" + editable_fields_ids = [] + if FirebaseHandler._initialized: + db_handler = DBUploader(db_handler) + if recipe: + recipe_loader = RecipeLoader(recipe) + recipe_full_data = recipe_loader._read(resolve_inheritance=False) + recipe_meta_data = get_recipe_metadata(recipe_loader) + recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) + if config: + config_data = ConfigLoader(config).config + config_id = db_handler.upload_config(config_data, config) + if fields: + editable_fields_data = json.load(open(fields, "r")) + for field in editable_fields_data.get("editable_fields", []): + id, _ = db_handler.upload_data("editable_fields", field) + editable_fields_ids.append(id) + recipe_metadata = { + "name": name, + "recipe": recipe_id, + "config": config_id, + "editable_fields": editable_fields_ids, + } + db_handler.upload_data("client_recipes", recipe_metadata) + +def main(): + fire.Fire(upload_to_client) + +if __name__ == "__main__": + main() From 78b9b0a756443d7aa07d359ba63225fdfdb999a6 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:40:00 -0700 Subject: [PATCH 02/59] add example data and more documentation --- cellpack/bin/upload_to_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 93d94f3a..9536defc 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -16,6 +16,15 @@ def upload_to_client( """ Uploads recipe, config, and editable fields, read from specified JSON files, to the database for client access + + :param recipe: string argument + path to local recipe file to upload to firebase + :param config: string argument + path to local config file to upload to firebase + :param fields: string argument + path to local editable fields file to upload to firebase + :param name: string argument + display name for recipe in client selection menu """ db_handler = FirebaseHandler() recipe_id = "" From a9b056bc0919bfacd7bfb3c3cb29f4c912fa996e Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:42:16 -0700 Subject: [PATCH 03/59] point to correct collection --- cellpack/bin/upload_to_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 9536defc..f778aa53 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -51,7 +51,9 @@ def upload_to_client( "config": config_id, "editable_fields": editable_fields_ids, } - db_handler.upload_data("client_recipes", recipe_metadata) + + # Upload the combined recipe metadata to example_packings collection for client + db_handler.upload_data("example_packings", recipe_metadata) def main(): fire.Fire(upload_to_client) From f5f7a6910a91aea4089ff861264dea6127c24c4c Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:03 -0700 Subject: [PATCH 04/59] have server accept recipe as json object in body of request --- cellpack/autopack/__init__.py | 8 +++++++- cellpack/autopack/loaders/recipe_loader.py | 5 +++-- cellpack/bin/pack.py | 4 ++-- docker/server.py | 14 +++++++++----- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 2219525b..4bfd996e 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,8 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False + filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None ): + if recipe_obj is not None: + composition = DBRecipeLoader.remove_empty( + recipe_obj.get("composition", {}) + ) + recipe_obj["composition"] = composition + return recipe_obj, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 84cd78ac..4b2930c8 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,6 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe + self.recipe_obj = recipe_obj # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -169,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker + self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 9db53937..fdbfede0 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker + recipe, packing_config_data["save_converted_recipe"], docker, recipe_str ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 581e0151..6e625314 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,11 +12,11 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id): + async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True) + pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) @@ -37,8 +37,12 @@ async def health_check(self, request: web.Request) -> web.Response: return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: - recipe = request.rel_url.query.get("recipe") - if recipe is None: + recipe = request.rel_url.query.get("recipe") or "" + if request.can_read_body: + body = await request.json() + else: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include recipe as a query param" ) @@ -46,7 +50,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id)) + packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From f87915ac4c8ee4137333ee8ce1d721214446ad23 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:49 -0700 Subject: [PATCH 05/59] update documentation --- docker/Dockerfile.ecs | 3 ++- docs/DOCKER.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 7a303c02..2016c222 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,7 +4,8 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -r requirements/linux/requirements.txt --root-user-action=ignore +RUN python -m pip install . +RUN python -m pip install aiohttp mdutils EXPOSE 80 diff --git a/docs/DOCKER.md b/docs/DOCKER.md index a1214a33..48e4b6dd 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -13,6 +13,6 @@ ## AWS ECS Docker Image 1. Build image, running `docker build -f docker/Dockerfile.ecs -t [CONTAINER-NAME] .` 2. Run packings in the container, running: `docker run -v ~/.aws:/root/.aws -p 80:80 [CONTAINER-NAME]` -3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:8443/hello` in your browser. -4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/pack?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. +3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:80/hello` in your browser. +4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/start-packing?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. 5. Verify that the packing result path was uploaded to the firebase results table, with the job id specified in the response from the request in step 4.The result simularium file can be found at the s3 path specified there. \ No newline at end of file From 1f2d2e345523914d135379c826bdc00e51fbe6c0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 16:41:35 -0700 Subject: [PATCH 06/59] remove accidential dockerfile changes --- docker/Dockerfile.ecs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 2016c222..5ed89b7b 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,8 +4,7 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -RUN python -m pip install aiohttp mdutils +RUN python -m pip install . --root-user-action=ignore EXPOSE 80 From bd8ec42d33b005ec06316de1a386478eb4cd24f7 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Thu, 23 Oct 2025 13:42:23 -0700 Subject: [PATCH 07/59] rename param json_recipe --- cellpack/autopack/__init__.py | 10 +++++----- cellpack/autopack/loaders/recipe_loader.py | 6 +++--- cellpack/bin/data-manifest.json | 10 ++++++++++ cellpack/bin/pack.py | 4 ++-- docker/server.py | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4bfd996e..65d32078 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,14 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None + filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None ): - if recipe_obj is not None: + if json_recipe is not None: composition = DBRecipeLoader.remove_empty( - recipe_obj.get("composition", {}) + json_recipe.get("composition", {}) ) - recipe_obj["composition"] = composition - return recipe_obj, None, False + json_recipe["composition"] = composition + return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b2930c8..23044443 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,7 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe - self.recipe_obj = recipe_obj + self.json_recipe = json_recipe # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -170,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj + self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json new file mode 100644 index 00000000..0a5e1f2d --- /dev/null +++ b/cellpack/bin/data-manifest.json @@ -0,0 +1,10 @@ +{ + "assay-dev": { + "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", + "static_files": [] + }, + "test": { + "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", + "static_files": [] + } +} \ No newline at end of file diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index fdbfede0..dadf0f31 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, recipe_str + recipe, packing_config_data["save_converted_recipe"], docker, json_recipe ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 6e625314..ce6da7ee 100644 --- a/docker/server.py +++ b/docker/server.py @@ -16,7 +16,7 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) + pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 358158eaa5aae8641d8c4c14574a03dbe1a3cb65 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:42:35 -0800 Subject: [PATCH 08/59] remove file that shouldn't be in this PR --- cellpack/bin/upload_to_client.py | 62 -------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py deleted file mode 100644 index f778aa53..00000000 --- a/cellpack/bin/upload_to_client.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import fire - -from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader -from cellpack.autopack.loaders.config_loader import ConfigLoader -from cellpack.autopack.loaders.recipe_loader import RecipeLoader -from cellpack.bin.upload import get_recipe_metadata - -def upload_to_client( - recipe: str, - config: str, - fields: str, - name: str -): - """ - Uploads recipe, config, and editable fields, read from specified - JSON files, to the database for client access - - :param recipe: string argument - path to local recipe file to upload to firebase - :param config: string argument - path to local config file to upload to firebase - :param fields: string argument - path to local editable fields file to upload to firebase - :param name: string argument - display name for recipe in client selection menu - """ - db_handler = FirebaseHandler() - recipe_id = "" - config_id = "" - editable_fields_ids = [] - if FirebaseHandler._initialized: - db_handler = DBUploader(db_handler) - if recipe: - recipe_loader = RecipeLoader(recipe) - recipe_full_data = recipe_loader._read(resolve_inheritance=False) - recipe_meta_data = get_recipe_metadata(recipe_loader) - recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) - if config: - config_data = ConfigLoader(config).config - config_id = db_handler.upload_config(config_data, config) - if fields: - editable_fields_data = json.load(open(fields, "r")) - for field in editable_fields_data.get("editable_fields", []): - id, _ = db_handler.upload_data("editable_fields", field) - editable_fields_ids.append(id) - recipe_metadata = { - "name": name, - "recipe": recipe_id, - "config": config_id, - "editable_fields": editable_fields_ids, - } - - # Upload the combined recipe metadata to example_packings collection for client - db_handler.upload_data("example_packings", recipe_metadata) - -def main(): - fire.Fire(upload_to_client) - -if __name__ == "__main__": - main() From f0beaa1fc28cfb0242bfc428e8d7879716f3d7bd Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:43:04 -0800 Subject: [PATCH 09/59] remove accidential file --- cellpack/bin/data-manifest.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json deleted file mode 100644 index 0a5e1f2d..00000000 --- a/cellpack/bin/data-manifest.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "assay-dev": { - "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", - "static_files": [] - }, - "test": { - "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", - "static_files": [] - } -} \ No newline at end of file From a54ffa1d8393f89d2afb2926864bd1381e7870f2 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:52:19 -0800 Subject: [PATCH 10/59] lint fixes --- cellpack/autopack/__init__.py | 11 +++++++---- cellpack/autopack/loaders/recipe_loader.py | 13 +++++++++++-- cellpack/bin/pack.py | 7 ++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 65d32078..ce06494d 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,12 +411,15 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None + filename, + destination="", + cache="geometries", + force=None, + use_docker=False, + json_recipe=None, ): if json_recipe is not None: - composition = DBRecipeLoader.remove_empty( - json_recipe.get("composition", {}) - ) + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe, None, False if is_remote_path(filename): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 23044443..cf87a10c 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,13 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): + def __init__( + self, + input_file_path, + save_converted_recipe=False, + use_docker=False, + json_recipe=None, + ): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -170,7 +176,10 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe + self.file_path, + cache="recipes", + use_docker=use_docker, + json_recipe=self.json_recipe, ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index dadf0f31..99186c9d 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,12 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, + json_recipe=None, ): """ Initializes an autopack packing from the command line From 3d01db317181e78eb0b9c928b6959657d4b9b778 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:52:55 -0800 Subject: [PATCH 11/59] refactor to try to improve clarity of json recipe vs file path --- cellpack/autopack/__init__.py | 9 ++--- cellpack/autopack/loaders/recipe_loader.py | 17 +++++--- cellpack/bin/pack.py | 45 ++++++++++++++++++++-- docker/server.py | 7 +++- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index ce06494d..4520d388 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,6 +409,10 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data +def load_json_recipe(json_recipe): + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) + json_recipe["composition"] = composition + return json_recipe def load_file( filename, @@ -416,12 +420,7 @@ def load_file( cache="geometries", force=None, use_docker=False, - json_recipe=None, ): - if json_recipe is not None: - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index cf87a10c..df9e26ac 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -175,12 +175,17 @@ def _migrate_version(self, old_recipe): ) def _read(self, resolve_inheritance=True, use_docker=False): - new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, - json_recipe=self.json_recipe, - ) + database_name = None + is_unnested_firebase = False + if self.json_recipe is not None: + new_values = autopack.load_json_recipe(self.json_recipe) + else: + new_values, database_name, is_unnested_firebase = autopack.load_file( + self.file_path, + cache="recipes", + use_docker=use_docker, + ) + if database_name == "firebase": if is_unnested_firebase: objects = new_values.get("objects", {}) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 99186c9d..39003eda 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -30,7 +30,6 @@ def pack( analysis_config_path=None, docker=False, validate=True, - json_recipe=None, ): """ Initializes an autopack packing from the command line @@ -42,12 +41,52 @@ def pack( :return: void """ - packing_config_data = ConfigLoader(config_path, docker).config + config_loader = ConfigLoader(config_path, docker) recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, json_recipe + recipe, config_loader.config["save_converted_recipe"], docker ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def pack_from_json( + json_recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, +): + """ + Initializes an autopack packing from the command line + :param json: JSON object representing the recipe + :param config_path: string argument, path to packing config file + :param analysis_config_path: string argument, path to analysis config file + :param docker: boolean argument, are we using docker + :param validate: boolean argument, validate recipe before packing + + :return: void + """ + config_loader = ConfigLoader(config_path, docker) + + recipe_loader = RecipeLoader( + "", config_loader.config["save_converted_recipe"], docker, json_recipe + ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data + packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index ce6da7ee..6bbf338c 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack +from cellpack.bin.pack import pack, pack_from_json SERVER_PORT = 80 @@ -16,7 +16,10 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) + if body is None: + pack(recipe=recipe, config_path=config, docker=True) + else: + pack_from_json(json_recipe=body, config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 529e15b40e93ba0c60c526acf16646ed2eb04d61 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:56:38 -0800 Subject: [PATCH 12/59] lint fixes --- cellpack/autopack/__init__.py | 2 ++ cellpack/bin/pack.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4520d388..f04bcb43 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,11 +409,13 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data + def load_json_recipe(json_recipe): composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe + def load_file( filename, destination="", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 39003eda..80ef6784 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -54,6 +54,7 @@ def pack( validate ) + def pack_from_json( json_recipe, config_path=None, @@ -84,6 +85,7 @@ def pack_from_json( validate ) + def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data packing_config_data = config_loader.config From 63514c90cd54610a6ec759423c062cc52fd2ab37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:57:23 -0800 Subject: [PATCH 13/59] lint fix --- cellpack/bin/pack.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 80ef6784..b8446f33 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -47,11 +47,7 @@ def pack( recipe, config_loader.config["save_converted_recipe"], docker ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) @@ -78,11 +74,7 @@ def pack_from_json( "", config_loader.config["save_converted_recipe"], docker, json_recipe ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) From b2440cd3f905caf29704a68db5edb2571f8e871d Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:00:38 -0800 Subject: [PATCH 14/59] minimize changeset --- cellpack/autopack/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index f04bcb43..e6701b52 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,13 +416,7 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file( - filename, - destination="", - cache="geometries", - force=None, - use_docker=False, -): +def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 470e3a18ae7e4860eac53f0ff6637a920ad2b16b Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:02:35 -0800 Subject: [PATCH 15/59] minimize changeset --- cellpack/autopack/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index e6701b52..dbc90c58 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,7 +416,9 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): +def load_file( + filename, destination="", cache="geometries", force=None, use_docker=False +): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 8a3489895a71298652056ab27dd73c391aaa0198 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:56:18 -0800 Subject: [PATCH 16/59] simplify changeset --- cellpack/autopack/__init__.py | 6 --- cellpack/autopack/loaders/recipe_loader.py | 15 +++++-- cellpack/bin/pack.py | 47 ++++------------------ docker/server.py | 8 ++-- 4 files changed, 23 insertions(+), 53 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index dbc90c58..2219525b 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -410,12 +410,6 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) return sphere_data -def load_json_recipe(json_recipe): - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe - - def load_file( filename, destination="", cache="geometries", force=None, use_docker=False ): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index df9e26ac..4b89c816 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -56,6 +56,15 @@ def __init__( self.recipe_data = self._read(use_docker=use_docker) + @classmethod + def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False): + return cls( + input_file_path="", + save_converted_recipe=save_converted_recipe, + use_docker=use_docker, + json_recipe=json_recipe, + ) + @staticmethod def _resolve_object(key, objects): current_object = objects[key] @@ -177,9 +186,9 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): database_name = None is_unnested_firebase = False - if self.json_recipe is not None: - new_values = autopack.load_json_recipe(self.json_recipe) - else: + new_values = self.json_recipe + if new_values is None: + # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( self.file_path, cache="recipes", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index b8446f33..6c603cbb 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -33,7 +33,7 @@ def pack( ): """ Initializes an autopack packing from the command line - :param recipe: string argument, path to recipe + :param recipe: string argument, path to recipe file, or a dictionary representing a recipe :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker @@ -41,45 +41,14 @@ def pack( :return: void """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - recipe, config_loader.config["save_converted_recipe"], docker - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def pack_from_json( - json_recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, -): - """ - Initializes an autopack packing from the command line - :param json: JSON object representing the recipe - :param config_path: string argument, path to packing config file - :param analysis_config_path: string argument, path to analysis config file - :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing - - :return: void - """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - "", config_loader.config["save_converted_recipe"], docker, json_recipe - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): + if isinstance(recipe, dict): + # Load recipe from JSON dictionary + recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) + else: + # Load recipe from file path + recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data + config_loader = ConfigLoader(config_path, docker) packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: diff --git a/docker/server.py b/docker/server.py index 6bbf338c..257e66ec 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack, pack_from_json +from cellpack.bin.pack import pack SERVER_PORT = 80 @@ -16,10 +16,8 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - if body is None: - pack(recipe=recipe, config_path=config, docker=True) - else: - pack_from_json(json_recipe=body, config_path=config, docker=True) + # Pack JSON recipe in body if provided, otherwise use recipe path + pack(recipe=(body if body else recipe), config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 45d438ae8788b6d999ae546fe4556742f30cc05f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:03:12 -0800 Subject: [PATCH 17/59] code cleanup --- cellpack/autopack/loaders/recipe_loader.py | 4 +--- cellpack/bin/pack.py | 10 +++------- docker/server.py | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b89c816..e40b469e 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,9 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker, ) if database_name == "firebase": diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 6c603cbb..23a69a57 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,11 +25,7 @@ def pack( - recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True ): """ Initializes an autopack packing from the command line @@ -41,6 +37,8 @@ def pack( :return: void """ + packing_config_data = ConfigLoader(config_path, docker).config + if isinstance(recipe, dict): # Load recipe from JSON dictionary recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) @@ -48,8 +46,6 @@ def pack( # Load recipe from file path recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data - config_loader = ConfigLoader(config_path, docker) - packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index 257e66ec..9b1ce105 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,7 +12,7 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id, body=None): + async def run_packing(self, job_id, recipe=None, config=None, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: @@ -51,7 +51,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) + packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From c8fe120ef4b9ecd392a8f8030307d07fee176f1f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:05:28 -0800 Subject: [PATCH 18/59] minimize changeset --- cellpack/bin/pack.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 23a69a57..83b22264 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -44,7 +44,9 @@ def pack( recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) else: # Load recipe from file path - recipe_loader = RecipeLoader(recipe, use_docker=docker) + recipe_loader = RecipeLoader( + recipe, packing_config_data["save_converted_recipe"], docker + ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} if analysis_config_path is not None: From ecc645d4ca013c56e0ca6e8f82fbb6c4540f5c37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:07:30 -0800 Subject: [PATCH 19/59] remove trailing comma --- cellpack/autopack/loaders/recipe_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index e40b469e..bbdb662a 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,7 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker ) if database_name == "firebase": From 17ba17c284349e249c1c7892d6b6405880cbd8fe Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:11:58 -0800 Subject: [PATCH 20/59] Feature/firebase lookup (#445) * remove os fetch for job_id * use dedup_hash instead of job id * proposal: get hash from recipe loader * renaming and add TODOs * format * rename param to hash * remove unused validate param and doc strings in pack * simplify get_ dedup_hash * refactor job_status update * cleanup * fix upload_job_status to handle awshandler * pass dedup_pash to env for fetching across files * add tests * format1 * format test --- cellpack/autopack/DBRecipeHandler.py | 87 +++++++++---------- .../upy/simularium/simularium_helper.py | 12 ++- cellpack/autopack/writers/__init__.py | 5 +- cellpack/bin/pack.py | 44 +++++----- cellpack/tests/test_db_uploader.py | 53 +++++++++++ docker/server.py | 55 +++++++----- 6 files changed, 160 insertions(+), 96 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 8b4e8578..00fc8bb7 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, job_id=None): + def upload_result_metadata(self, file_name, url, dedup_hash=None): """ Upload the metadata of the result file to the database. """ @@ -543,28 +543,40 @@ def upload_result_metadata(self, file_name, url, job_id=None): "user": username, "timestamp": timestamp, "url": url, - "batch_job_id": job_id, + "dedup_hash": dedup_hash, }, ) - if job_id: - self.upload_job_status(job_id, "DONE", result_path=url) + if dedup_hash: + self.upload_job_status(dedup_hash, "DONE", result_path=url) - def upload_job_status(self, job_id, status, result_path=None, error_message=None): + def upload_job_status( + self, + dedup_hash, + status, + result_path=None, + error_message=None, + outputs_directory=None, + ): """ - Update status for a given job ID + Update status for a given dedup_hash """ if self.db: - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "status": str(status), - "result_path": result_path, - "error_message": error_message, - }, - ) + db_handler = self.db + # If db is AWSHandler, switch to firebase handler for job status updates + if hasattr(self.db, "s3_client"): + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE) + db_handler = handler(default_db="staging") + timestamp = db_handler.create_timestamp() + data = { + "timestamp": timestamp, + "status": str(status), + "error_message": error_message, + } + if result_path: + data["result_path"] = result_path + if outputs_directory: + data["outputs_directory"] = outputs_directory + db_handler.update_or_create("job_status", dedup_hash, data) def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data): output_path = Path(output_folder) @@ -583,7 +595,7 @@ def upload_packing_results_workflow( self, source_folder, recipe_name, - job_id, + dedup_hash, config_data, recipe_data, ): @@ -591,7 +603,7 @@ def upload_packing_results_workflow( Complete packing results upload workflow including folder preparation and s3 upload """ try: - if job_id: + if dedup_hash: source_path = Path(source_folder) if not source_path.exists(): @@ -601,7 +613,7 @@ def upload_packing_results_workflow( # prepare unique S3 upload folder parent_folder = source_path.parent - unique_folder_name = f"{source_path.name}_run_{job_id}" + unique_folder_name = f"{source_path.name}_run_{dedup_hash}" s3_upload_folder = parent_folder / unique_folder_name logging.debug(f"outputs will be copied to: {s3_upload_folder}") @@ -618,7 +630,7 @@ def upload_packing_results_workflow( upload_result = self.upload_outputs_to_s3( output_folder=s3_upload_folder, recipe_name=recipe_name, - job_id=job_id, + dedup_hash=dedup_hash, ) # clean up temporary folder after upload @@ -628,9 +640,11 @@ def upload_packing_results_workflow( f"Cleaned up temporary upload folder: {s3_upload_folder}" ) - # update outputs directory in firebase - self.update_outputs_directory( - job_id, upload_result.get("outputs_directory") + # update outputs directory in job status + self.upload_job_status( + dedup_hash, + "DONE", + outputs_directory=upload_result.get("outputs_directory"), ) return upload_result @@ -639,7 +653,7 @@ def upload_packing_results_workflow( logging.error(e) return {"success": False, "error": e} - def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): + def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): """ Upload packing outputs to S3 bucket """ @@ -647,7 +661,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): bucket_name = self.db.bucket_name region_name = self.db.region_name sub_folder_name = self.db.sub_folder_name - s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}" + s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}" try: upload_result = self.db.upload_directory( @@ -671,7 +685,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): return { "success": True, - "run_id": job_id, + "dedup_hash": dedup_hash, "s3_bucket": bucket_name, "s3_prefix": s3_prefix, "public_url_base": f"{base_url}/{s3_prefix}/", @@ -685,25 +699,6 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): logging.error(e) return {"success": False, "error": e} - def update_outputs_directory(self, job_id, outputs_directory): - if not self.db or self.db.s3_client: - # switch to firebase handler to update job status - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler(default_db="staging") - if job_id: - timestamp = initialized_db.create_timestamp() - initialized_db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "outputs_directory": outputs_directory, - }, - ) - logging.debug( - f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}" - ) - class DBRecipeLoader(object): """ diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 86af0874..4f934e0e 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1385,16 +1385,14 @@ def raycast(self, **kw): def raycast_test(self, obj, start, end, length, **kw): return - def post_and_open_file(self, file_name, open_results_in_browser): + def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - url = None - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=job_id + simularium_file, storage="aws", batch_job_id=dedup_hash ) if file_name and url: simulariumHelper.store_metadata( - file_name, url, db="firebase", job_id=job_id + file_name, url, db="firebase", dedup_hash=dedup_hash ) if open_results_in_browser: simulariumHelper.open_in_simularium(url) @@ -1428,7 +1426,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, job_id=None): + def store_metadata(file_name, url, db=None, dedup_hash=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1436,7 +1434,7 @@ def store_metadata(file_name, url, db=None, job_id=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, job_id) + db_uploader.upload_result_metadata(file_name, url, dedup_hash) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( diff --git a/cellpack/autopack/writers/__init__.py b/cellpack/autopack/writers/__init__.py index 6ca931af..0b09e03a 100644 --- a/cellpack/autopack/writers/__init__.py +++ b/cellpack/autopack/writers/__init__.py @@ -197,8 +197,11 @@ def save_as_simularium(self, env, seed_to_results_map): number_of_packings = env.config_data.get("number_of_packings", 1) open_results_in_browser = env.config_data.get("open_results_in_browser", False) upload_results = env.config_data.get("upload_results", False) + dedup_hash = getattr(env, "dedup_hash", None) if (number_of_packings == 1 or is_aggregate) and upload_results: - autopack.helper.post_and_open_file(file_name, open_results_in_browser) + autopack.helper.post_and_open_file( + file_name, open_results_in_browser, dedup_hash + ) def save_Mixed_asJson( self, diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 83b22264..27c4d018 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -1,6 +1,5 @@ import logging import logging.config -import os import time from pathlib import Path @@ -25,7 +24,11 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + hash=None, ): """ Initializes an autopack packing from the command line @@ -33,7 +36,7 @@ def pack( :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing + :param hash: string argument, dedup hash identifier for tracking/caching results :return: void """ @@ -57,6 +60,7 @@ def pack( autopack.helper = helper env = Environment(config=packing_config_data, recipe=recipe_data) env.helper = helper + env.dedup_hash = hash log.info("Packing recipe: %s", recipe_data["name"]) log.info("Outputs will be saved to %s", env.out_folder) @@ -83,24 +87,22 @@ def pack( env.buildGrid(rebuild=True) env.pack_grid(verbose=0, usePP=False) - if docker: - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) - if job_id: - handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # temporarily using demo bucket before permissions are granted - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name="runs", - region_name="us-west-2", - ) - uploader = DBUploader(db_handler=initialized_handler) - uploader.upload_packing_results_workflow( - source_folder=env.out_folder, - recipe_name=recipe_data["name"], - job_id=job_id, - config_data=packing_config_data, - recipe_data=recipe_loader.serializable_recipe_data, - ) + if docker and hash: + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # temporarily using demo bucket before permissions are granted + initialized_handler = handler( + bucket_name="cellpack-demo", + sub_folder_name="runs", + region_name="us-west-2", + ) + uploader = DBUploader(db_handler=initialized_handler) + uploader.upload_packing_results_workflow( + source_folder=env.out_folder, + recipe_name=recipe_data["name"], + dedup_hash=hash, + config_data=packing_config_data, + recipe_data=recipe_loader.serializable_recipe_data, + ) def main(): diff --git a/cellpack/tests/test_db_uploader.py b/cellpack/tests/test_db_uploader.py index 0c91cbd5..414f6b9c 100644 --- a/cellpack/tests/test_db_uploader.py +++ b/cellpack/tests/test_db_uploader.py @@ -175,3 +175,56 @@ def test_upload_recipe(): "A": "firebase:composition/test_id", } assert recipe_doc.objects_to_path_map == {"sphere_25": "firebase:objects/test_id"} + + +def test_upload_job_status_with_firebase_handler(): + mock_firebase_db = MagicMock() + mock_firebase_db.create_timestamp.return_value = "test_timestamp" + # firebaseHandler does not have s3_client attribute + del mock_firebase_db.s3_client + + uploader = DBUploader(mock_firebase_db) + uploader.upload_job_status("test_hash", "RUNNING") + + mock_firebase_db.create_timestamp.assert_called_once() + mock_firebase_db.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "test_timestamp", + "status": "RUNNING", + "error_message": None, + }, + ) + + +def test_upload_job_status_with_aws_handler(): + mock_aws_db = MagicMock() + mock_aws_db.s3_client = MagicMock() # AWSHandler has s3_client + + mock_firebase_handler = MagicMock() + mock_firebase_handler.create_timestamp.return_value = "firebase_timestamp" + + with patch( + "cellpack.autopack.DBRecipeHandler.DATABASE_IDS.handlers" + ) as mock_handlers: + mock_handlers.return_value.get.return_value = ( + lambda default_db: mock_firebase_handler + ) + + uploader = DBUploader(mock_aws_db) + uploader.upload_job_status("test_hash", "DONE", result_path="test_path") + + mock_firebase_handler.create_timestamp.assert_called_once() + mock_firebase_handler.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "firebase_timestamp", + "status": "DONE", + "error_message": None, + "result_path": "test_path", + }, + ) + # AWS handler should not be called for timestamp + mock_aws_db.create_timestamp.assert_not_called() diff --git a/docker/server.py b/docker/server.py index 9b1ce105..74bb20f3 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,8 +1,6 @@ import asyncio from aiohttp import web -import os -import uuid -from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.bin.pack import pack @@ -12,29 +10,40 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, job_id, recipe=None, config=None, body=None): - os.environ["AWS_BATCH_JOB_ID"] = job_id - self.update_job_status(job_id, "RUNNING") + def _get_firebase_handler(self, database_name="firebase"): + handler = DATABASE_IDS.handlers().get(database_name) + initialized_db = handler(default_db="staging") + if initialized_db._initialized: + return initialized_db + return None + + def job_exists(self, dedup_hash): + db = self._get_firebase_handler() + if not db: + return False + + job_status, _ = db.get_doc_by_id("job_status", dedup_hash) + return job_status is not None + + async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): + self.update_job_status(dedup_hash, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) except Exception as e: - self.update_job_status(job_id, "FAILED", error_message=str(e)) + self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) - def update_job_status(self, job_id, status, result_path=None, error_message=None): - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler( - default_db="staging" - ) - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_job_status(job_id, status, result_path, error_message) + def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + db = self._get_firebase_handler() + if db: + db_uploader = DBUploader(db) + db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") async def health_check(self, request: web.Request) -> web.Response: - # healthcheck endpoint needed for AWS load balancer + # health check endpoint needed for AWS load balancer return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: @@ -48,10 +57,14 @@ async def pack_handler(self, request: web.Request) -> web.Response: "Pack requests must include recipe as a query param" ) config = request.rel_url.query.get("config") - job_id = str(uuid.uuid4()) + + dedup_hash = DataDoc.generate_hash(body) + + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) + packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -60,7 +73,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": job_id}) + return web.json_response({"jobId": dedup_hash}) async def init_app() -> web.Application: @@ -75,4 +88,4 @@ async def init_app() -> web.Application: ) return app -web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) \ No newline at end of file +web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) From 79e77e83c3749d3d8f51640a3497dac2e39e634b Mon Sep 17 00:00:00 2001 From: Alli <111383930+ascibisz@users.noreply.github.com> Date: Wed, 4 Feb 2026 08:47:47 -0800 Subject: [PATCH 21/59] Only upload simularium file once (#446) * proposal: get hash from recipe loader * simplify get_ dedup_hash * only post simularium results file once for server job runs * update code for rebase * code cleanup --------- Co-authored-by: Ruge Li --- cellpack/autopack/DBRecipeHandler.py | 12 +++--- .../upy/simularium/simularium_helper.py | 42 ++++++++----------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 00fc8bb7..3eb691d5 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, dedup_hash=None): + def upload_result_metadata(self, file_name, url): """ Upload the metadata of the result file to the database. """ @@ -543,11 +543,8 @@ def upload_result_metadata(self, file_name, url, dedup_hash=None): "user": username, "timestamp": timestamp, "url": url, - "dedup_hash": dedup_hash, }, ) - if dedup_hash: - self.upload_job_status(dedup_hash, "DONE", result_path=url) def upload_job_status( self, @@ -644,6 +641,7 @@ def upload_packing_results_workflow( self.upload_job_status( dedup_hash, "DONE", + result_path=upload_result.get("simularium_url"), outputs_directory=upload_result.get("outputs_directory"), ) @@ -675,8 +673,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): f"{base_url}/{file_info['s3_key']}" for file_info in upload_result["uploaded_files"] ] + simularium_url = None + for url in public_urls: + if url.endswith(".simularium"): + simularium_url = url outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/" - logging.info( f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}" ) @@ -694,6 +695,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): "total_size": upload_result["total_size"], "urls": public_urls, "outputs_directory": outputs_directory, + "simularium_url": simularium_url, } except Exception as e: logging.error(e) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 4f934e0e..08179d85 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1387,36 +1387,28 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=dedup_hash - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase", dedup_hash=dedup_hash + if dedup_hash is None: + file_name, url = simulariumHelper.store_result_file( + simularium_file, storage="aws" ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + if file_name and url: + simulariumHelper.store_metadata( + file_name, url, db="firebase" + ) + if open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file( - file_path, storage=None, batch_job_id=None, sub_folder="simularium" + file_path, storage=None, sub_folder="simularium" ): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) - # if batch_job_id is not None, then we are in a batch job and should use the temp bucket - # TODO: use cellpack-results bucket for batch jobs once we have the correct permissions - if batch_job_id: - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) - else: - initialized_handler = handler( - bucket_name="cellpack-results", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) + initialized_handler = handler( + bucket_name="cellpack-results", + sub_folder_name=sub_folder, + region_name="us-west-2", + ) file_name, url = initialized_handler.save_file_and_get_url(file_path) if not file_name or not url: db_maintainer = DBMaintenance(initialized_handler) @@ -1426,7 +1418,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, dedup_hash=None): + def store_metadata(file_name, url, db=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1434,7 +1426,7 @@ def store_metadata(file_name, url, db=None, dedup_hash=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, dedup_hash) + db_uploader.upload_result_metadata(file_name, url) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( From 653285e946842613d2976c3a4956536a1b04eb67 Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 9 Feb 2026 10:43:42 -0800 Subject: [PATCH 22/59] Maint/firebase collection cleanup (#448) * remove local metadata writes for auto-pop feature * remove cleanup firebase workflow * remove cleanup firebase code * 1. make doc url a constant 2.remove unused param --- .github/workflows/cleanup-firebase.yml | 22 ------ cellpack/autopack/DBRecipeHandler.py | 70 +------------------ .../interface_objects/default_values.py | 1 - .../upy/simularium/simularium_helper.py | 45 +++--------- cellpack/bin/cleanup_tasks.py | 20 ------ cellpack/bin/upload.py | 7 +- 6 files changed, 14 insertions(+), 151 deletions(-) delete mode 100644 .github/workflows/cleanup-firebase.yml delete mode 100644 cellpack/bin/cleanup_tasks.py diff --git a/.github/workflows/cleanup-firebase.yml b/.github/workflows/cleanup-firebase.yml deleted file mode 100644 index ec939d5b..00000000 --- a/.github/workflows/cleanup-firebase.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Cleanup Firebase Metadata - -on: - schedule: - - cron: "24 18 * * 1" # Runs at 18:24 UTC every Monday - -jobs: - cleanup: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest, windows-latest, macOS-latest] - steps: - - uses: actions/checkout@v4.2.2 - - uses: ./.github/actions/dependencies - - name: Cleanup Firebase Metadata - env: - FIREBASE_TOKEN: ${{ secrets.FIREBASE_TOKEN }} - FIREBASE_EMAIL: ${{ secrets.FIREBASE_EMAIL }} - run: | - uv run python cellpack/bin/cleanup_tasks.py diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 3eb691d5..8e3aec7f 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -1,7 +1,6 @@ import copy import logging import shutil -from datetime import datetime, timezone from enum import Enum from pathlib import Path @@ -10,7 +9,6 @@ import hashlib import json -import requests from cellpack.autopack.utils import deep_merge @@ -321,36 +319,6 @@ def __init__(self, settings): self.settings = settings -class ResultDoc: - def __init__(self, db): - self.db = db - - def handle_expired_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - current_utc = datetime.now(timezone.utc) - results = self.db.get_all_docs("results") - if results: - for result in results: - result_data = self.db.doc_to_dict(result) - result_age = current_utc - result_data["timestamp"] - if result_age.days > 180 and not self.validate_existence( - result_data["url"] - ): - self.db.delete_doc("results", self.db.doc_id(result)) - logging.info("Results cleanup complete.") - else: - logging.info("No results found in the database.") - - def validate_existence(self, url): - """ - Validate the existence of an S3 object by checking if the URL is accessible. - Returns True if the URL is accessible. - """ - return requests.head(url).status_code == requests.codes.ok - - class DBUploader(object): """ Handles the uploading of data to the database. @@ -529,23 +497,6 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url): - """ - Upload the metadata of the result file to the database. - """ - if self.db: - username = self.db.get_username() - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "results", - file_name, - { - "user": username, - "timestamp": timestamp, - "url": url, - }, - ) - def upload_job_status( self, dedup_hash, @@ -887,23 +838,4 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict): return recipe_data -class DBMaintenance(object): - """ - Handles the maintenance of the database. - """ - - def __init__(self, db_handler): - self.db = db_handler - self.result_doc = ResultDoc(self.db) - - def cleanup_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - self.result_doc.handle_expired_results() - - def readme_url(self): - """ - Return the URL to the README file for the database setup section. - """ - return "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" +DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" diff --git a/cellpack/autopack/interface_objects/default_values.py b/cellpack/autopack/interface_objects/default_values.py index bbdbc452..969eaa6a 100644 --- a/cellpack/autopack/interface_objects/default_values.py +++ b/cellpack/autopack/interface_objects/default_values.py @@ -9,7 +9,6 @@ "objects", "gradients", "recipes", - "results", "configs", "recipes_edited", ] diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 08179d85..87c616e1 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -22,7 +22,7 @@ from simulariumio.cellpack import HAND_TYPE, CellpackConverter from simulariumio.constants import DISPLAY_TYPE, VIZ_TYPE -from cellpack.autopack.DBRecipeHandler import DBMaintenance, DBUploader +from cellpack.autopack.DBRecipeHandler import DB_SETUP_README_URL from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.upy import hostHelper from cellpack.autopack.upy.simularium.plots import PlotData @@ -1388,20 +1388,12 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") if dedup_hash is None: - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws" - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase" - ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + url = simulariumHelper.store_result_file(simularium_file, storage="aws") + if url and open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod - def store_result_file( - file_path, storage=None, sub_folder="simularium" - ): + def store_result_file(file_path, storage=None, sub_folder="simularium"): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) initialized_handler = handler( @@ -1409,30 +1401,13 @@ def store_result_file( sub_folder_name=sub_folder, region_name="us-west-2", ) - file_name, url = initialized_handler.save_file_and_get_url(file_path) - if not file_name or not url: - db_maintainer = DBMaintenance(initialized_handler) - logging.warning( - f"Skipping browser opening, upload credentials not configured. For setup instructions see: {db_maintainer.readme_url()}" - ) - return file_name, url - - @staticmethod - def store_metadata(file_name, url, db=None): - if db == "firebase": - handler = DATABASE_IDS.handlers().get(db) - initialized_db = handler( - default_db="staging" - ) # default to staging for metadata uploads - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url) - else: - db_maintainer = DBMaintenance(initialized_db) + _, url = initialized_handler.save_file_and_get_url(file_path) + if not url: logging.warning( - f"Firebase credentials not found. For setup instructions see: {db_maintainer.readme_url()}. Or try cellPACK web interface: https://cellpack.allencell.org (no setup required)" + f"Skipping browser opening, upload credentials not configured. For setup instructions see: {DB_SETUP_README_URL}" ) - return + return url + return None @staticmethod def open_in_simularium(aws_url): diff --git a/cellpack/bin/cleanup_tasks.py b/cellpack/bin/cleanup_tasks.py deleted file mode 100644 index 08217aa0..00000000 --- a/cellpack/bin/cleanup_tasks.py +++ /dev/null @@ -1,20 +0,0 @@ -from cellpack.autopack.DBRecipeHandler import DBMaintenance -from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS - - -def run_cleanup(db_id=DATABASE_IDS.FIREBASE): - """ - Performs cleanup operations on expired database entries. - This function is executed as part of a scheduled task defined in .github/workflows/cleanup-firebase.yml - - Args: - db_id(str): The database id to use - """ - handler = DATABASE_IDS.handlers().get(db_id) - initialized_db = handler(default_db="staging") - db_maintainer = DBMaintenance(initialized_db) - db_maintainer.cleanup_results() - - -if __name__ == "__main__": - run_cleanup() diff --git a/cellpack/bin/upload.py b/cellpack/bin/upload.py index b038b4e4..59fc2104 100644 --- a/cellpack/bin/upload.py +++ b/cellpack/bin/upload.py @@ -3,7 +3,7 @@ import json from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader, DBMaintenance +from cellpack.autopack.DBRecipeHandler import DBUploader, DB_SETUP_README_URL from cellpack.autopack.upy.simularium.simularium_helper import simulariumHelper from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.loaders.config_loader import ConfigLoader @@ -90,7 +90,7 @@ def upload( id, _ = db_handler.upload_data("editable_fields", field) editable_fields_ids.append(id) if output_file: - _, result_url = simulariumHelper.store_result_file( + result_url = simulariumHelper.store_result_file( output_file, storage="aws", sub_folder="client" ) if studio: @@ -105,9 +105,8 @@ def upload( db_handler.upload_data("example_packings", recipe_metadata) else: - db_maintainer = DBMaintenance(db_handler) sys.exit( - f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {db_maintainer.readme_url()} " + f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {DB_SETUP_README_URL} " ) From 84d13c41abf5b2e506f24a8c937f709f48f7aa9f Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:31:13 -0800 Subject: [PATCH 23/59] handle both recipe_path and json body requests (#449) --- docker/server.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docker/server.py b/docker/server.py index 74bb20f3..7b0a209d 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,4 +1,5 @@ import asyncio +import uuid from aiohttp import web from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS @@ -25,19 +26,19 @@ def job_exists(self, dedup_hash): job_status, _ = db.get_doc_by_id("job_status", dedup_hash) return job_status is not None - async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): - self.update_job_status(dedup_hash, "RUNNING") + async def run_packing(self, job_id, recipe=None, config=None, body=None): + self.update_job_status(job_id, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=job_id) except Exception as e: - self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) + self.update_job_status(job_id, "FAILED", error_message=str(e)) - def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + def update_job_status(self, job_id, status, result_path=None, error_message=None): db = self._get_firebase_handler() if db: db_uploader = DBUploader(db) - db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) + db_uploader.upload_job_status(job_id, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") @@ -58,13 +59,18 @@ async def pack_handler(self, request: web.Request) -> web.Response: ) config = request.rel_url.query.get("config") - dedup_hash = DataDoc.generate_hash(body) - - if self.job_exists(dedup_hash): - return web.json_response({"jobId": dedup_hash}) + if body: + dedup_hash = DataDoc.generate_hash(body) + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) + job_id = dedup_hash + else: + job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) + packing_task = asyncio.create_task( + self.run_packing(job_id, recipe, config, body) + ) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -73,7 +79,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": dedup_hash}) + return web.json_response({"jobId": job_id}) async def init_app() -> web.Application: From 5770826b04f81c4ee7e890233657e87229edb415 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:14:29 -0700 Subject: [PATCH 24/59] change error message body --- docker/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server.py b/docker/server.py index 74bb20f3..51b135ca 100644 --- a/docker/server.py +++ b/docker/server.py @@ -54,7 +54,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: body = None if not recipe and not body: raise web.HTTPBadRequest( - "Pack requests must include recipe as a query param" + "Pack requests must include a recipe, either as a query param or in the request body" ) config = request.rel_url.query.get("config") From 162ef12e9b881a2d1eb117a49daee6c6651567c8 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:25:17 -0700 Subject: [PATCH 25/59] lint fixes --- cellpack/autopack/upy/simularium/simularium_helper.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 08179d85..f2b3a94c 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1392,16 +1392,12 @@ def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None simularium_file, storage="aws" ) if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase" - ) + simulariumHelper.store_metadata(file_name, url, db="firebase") if open_results_in_browser: simulariumHelper.open_in_simularium(url) @staticmethod - def store_result_file( - file_path, storage=None, sub_folder="simularium" - ): + def store_result_file(file_path, storage=None, sub_folder="simularium"): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) initialized_handler = handler( From 64c60c89e5104729c8e7c547724fc9fa5d574ccc Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:25:51 -0700 Subject: [PATCH 26/59] add more checks when attempting to read json body --- docker/server.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/server.py b/docker/server.py index 51b135ca..8b2642e4 100644 --- a/docker/server.py +++ b/docker/server.py @@ -48,10 +48,17 @@ async def health_check(self, request: web.Request) -> web.Response: async def pack_handler(self, request: web.Request) -> web.Response: recipe = request.rel_url.query.get("recipe") or "" - if request.can_read_body: - body = await request.json() - else: - body = None + body = None + + # If request has a body, attempt to parse it as JSON and use as recipe + # otherwise rely on recipe query param + if (request.can_read_body and request.content_length + and request.content_length > 0): + try: + body = await request.json() + except Exception: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include a recipe, either as a query param or in the request body" From 3baff4989d669fe0938c930b76c6684a17f53079 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:32:55 -0700 Subject: [PATCH 27/59] add upload script --- cellpack/bin/upload_to_client.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py new file mode 100644 index 00000000..93d94f3a --- /dev/null +++ b/cellpack/bin/upload_to_client.py @@ -0,0 +1,51 @@ +import json +import fire + +from cellpack.autopack.FirebaseHandler import FirebaseHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.loaders.config_loader import ConfigLoader +from cellpack.autopack.loaders.recipe_loader import RecipeLoader +from cellpack.bin.upload import get_recipe_metadata + +def upload_to_client( + recipe: str, + config: str, + fields: str, + name: str +): + """ + Uploads recipe, config, and editable fields, read from specified + JSON files, to the database for client access + """ + db_handler = FirebaseHandler() + recipe_id = "" + config_id = "" + editable_fields_ids = [] + if FirebaseHandler._initialized: + db_handler = DBUploader(db_handler) + if recipe: + recipe_loader = RecipeLoader(recipe) + recipe_full_data = recipe_loader._read(resolve_inheritance=False) + recipe_meta_data = get_recipe_metadata(recipe_loader) + recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) + if config: + config_data = ConfigLoader(config).config + config_id = db_handler.upload_config(config_data, config) + if fields: + editable_fields_data = json.load(open(fields, "r")) + for field in editable_fields_data.get("editable_fields", []): + id, _ = db_handler.upload_data("editable_fields", field) + editable_fields_ids.append(id) + recipe_metadata = { + "name": name, + "recipe": recipe_id, + "config": config_id, + "editable_fields": editable_fields_ids, + } + db_handler.upload_data("client_recipes", recipe_metadata) + +def main(): + fire.Fire(upload_to_client) + +if __name__ == "__main__": + main() From 558b753647aecf97c973ffc83045aec08eb0a512 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:40:00 -0700 Subject: [PATCH 28/59] add example data and more documentation --- cellpack/bin/upload_to_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 93d94f3a..9536defc 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -16,6 +16,15 @@ def upload_to_client( """ Uploads recipe, config, and editable fields, read from specified JSON files, to the database for client access + + :param recipe: string argument + path to local recipe file to upload to firebase + :param config: string argument + path to local config file to upload to firebase + :param fields: string argument + path to local editable fields file to upload to firebase + :param name: string argument + display name for recipe in client selection menu """ db_handler = FirebaseHandler() recipe_id = "" From 57468b04c40ab1dcb5e4547ab3788eed802147b3 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:42:16 -0700 Subject: [PATCH 29/59] point to correct collection --- cellpack/bin/upload_to_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 9536defc..f778aa53 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -51,7 +51,9 @@ def upload_to_client( "config": config_id, "editable_fields": editable_fields_ids, } - db_handler.upload_data("client_recipes", recipe_metadata) + + # Upload the combined recipe metadata to example_packings collection for client + db_handler.upload_data("example_packings", recipe_metadata) def main(): fire.Fire(upload_to_client) From 4addc650f7ad23419f0d21ce396b7d6682007fdc Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:03 -0700 Subject: [PATCH 30/59] have server accept recipe as json object in body of request --- cellpack/autopack/__init__.py | 8 +++++++- cellpack/autopack/loaders/recipe_loader.py | 5 +++-- cellpack/bin/pack.py | 4 ++-- docker/server.py | 14 +++++++++----- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index e9317240..248db9fe 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -412,8 +412,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False + filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None ): + if recipe_obj is not None: + composition = DBRecipeLoader.remove_empty( + recipe_obj.get("composition", {}) + ) + recipe_obj["composition"] = composition + return recipe_obj, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 84cd78ac..4b2930c8 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,6 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe + self.recipe_obj = recipe_obj # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -169,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker + self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 9db53937..fdbfede0 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker + recipe, packing_config_data["save_converted_recipe"], docker, recipe_str ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 581e0151..6e625314 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,11 +12,11 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id): + async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True) + pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) @@ -37,8 +37,12 @@ async def health_check(self, request: web.Request) -> web.Response: return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: - recipe = request.rel_url.query.get("recipe") - if recipe is None: + recipe = request.rel_url.query.get("recipe") or "" + if request.can_read_body: + body = await request.json() + else: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include recipe as a query param" ) @@ -46,7 +50,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id)) + packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From 8ccacc33e033888a49cb975e4c860505474be1f3 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:49 -0700 Subject: [PATCH 31/59] update documentation --- docker/Dockerfile.ecs | 3 ++- docs/DOCKER.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 7a303c02..2016c222 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,7 +4,8 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -r requirements/linux/requirements.txt --root-user-action=ignore +RUN python -m pip install . +RUN python -m pip install aiohttp mdutils EXPOSE 80 diff --git a/docs/DOCKER.md b/docs/DOCKER.md index a1214a33..48e4b6dd 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -13,6 +13,6 @@ ## AWS ECS Docker Image 1. Build image, running `docker build -f docker/Dockerfile.ecs -t [CONTAINER-NAME] .` 2. Run packings in the container, running: `docker run -v ~/.aws:/root/.aws -p 80:80 [CONTAINER-NAME]` -3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:8443/hello` in your browser. -4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/pack?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. +3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:80/hello` in your browser. +4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/start-packing?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. 5. Verify that the packing result path was uploaded to the firebase results table, with the job id specified in the response from the request in step 4.The result simularium file can be found at the s3 path specified there. \ No newline at end of file From 497747f86c248e547e50482011073879369096f1 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 16:41:35 -0700 Subject: [PATCH 32/59] remove accidential dockerfile changes --- docker/Dockerfile.ecs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 2016c222..5ed89b7b 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,8 +4,7 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -RUN python -m pip install aiohttp mdutils +RUN python -m pip install . --root-user-action=ignore EXPOSE 80 From a136b57b9f1eaa33c507c1ed8407961c7ec4f212 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Thu, 23 Oct 2025 13:42:23 -0700 Subject: [PATCH 33/59] rename param json_recipe --- cellpack/autopack/__init__.py | 10 +++++----- cellpack/autopack/loaders/recipe_loader.py | 6 +++--- cellpack/bin/data-manifest.json | 10 ++++++++++ cellpack/bin/pack.py | 4 ++-- docker/server.py | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 248db9fe..c22f684b 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -412,14 +412,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None + filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None ): - if recipe_obj is not None: + if json_recipe is not None: composition = DBRecipeLoader.remove_empty( - recipe_obj.get("composition", {}) + json_recipe.get("composition", {}) ) - recipe_obj["composition"] = composition - return recipe_obj, None, False + json_recipe["composition"] = composition + return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b2930c8..23044443 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,7 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe - self.recipe_obj = recipe_obj + self.json_recipe = json_recipe # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -170,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj + self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json new file mode 100644 index 00000000..0a5e1f2d --- /dev/null +++ b/cellpack/bin/data-manifest.json @@ -0,0 +1,10 @@ +{ + "assay-dev": { + "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", + "static_files": [] + }, + "test": { + "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", + "static_files": [] + } +} \ No newline at end of file diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index fdbfede0..dadf0f31 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, recipe_str + recipe, packing_config_data["save_converted_recipe"], docker, json_recipe ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 6e625314..ce6da7ee 100644 --- a/docker/server.py +++ b/docker/server.py @@ -16,7 +16,7 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) + pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From fdea7f1ea4f0aa0a6bda43a53d11047a5835b918 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:42:35 -0800 Subject: [PATCH 34/59] remove file that shouldn't be in this PR --- cellpack/bin/upload_to_client.py | 62 -------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py deleted file mode 100644 index f778aa53..00000000 --- a/cellpack/bin/upload_to_client.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import fire - -from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader -from cellpack.autopack.loaders.config_loader import ConfigLoader -from cellpack.autopack.loaders.recipe_loader import RecipeLoader -from cellpack.bin.upload import get_recipe_metadata - -def upload_to_client( - recipe: str, - config: str, - fields: str, - name: str -): - """ - Uploads recipe, config, and editable fields, read from specified - JSON files, to the database for client access - - :param recipe: string argument - path to local recipe file to upload to firebase - :param config: string argument - path to local config file to upload to firebase - :param fields: string argument - path to local editable fields file to upload to firebase - :param name: string argument - display name for recipe in client selection menu - """ - db_handler = FirebaseHandler() - recipe_id = "" - config_id = "" - editable_fields_ids = [] - if FirebaseHandler._initialized: - db_handler = DBUploader(db_handler) - if recipe: - recipe_loader = RecipeLoader(recipe) - recipe_full_data = recipe_loader._read(resolve_inheritance=False) - recipe_meta_data = get_recipe_metadata(recipe_loader) - recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) - if config: - config_data = ConfigLoader(config).config - config_id = db_handler.upload_config(config_data, config) - if fields: - editable_fields_data = json.load(open(fields, "r")) - for field in editable_fields_data.get("editable_fields", []): - id, _ = db_handler.upload_data("editable_fields", field) - editable_fields_ids.append(id) - recipe_metadata = { - "name": name, - "recipe": recipe_id, - "config": config_id, - "editable_fields": editable_fields_ids, - } - - # Upload the combined recipe metadata to example_packings collection for client - db_handler.upload_data("example_packings", recipe_metadata) - -def main(): - fire.Fire(upload_to_client) - -if __name__ == "__main__": - main() From d1686ee828a8823081ca5e00fe8472164dc5a0c8 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:43:04 -0800 Subject: [PATCH 35/59] remove accidential file --- cellpack/bin/data-manifest.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json deleted file mode 100644 index 0a5e1f2d..00000000 --- a/cellpack/bin/data-manifest.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "assay-dev": { - "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", - "static_files": [] - }, - "test": { - "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", - "static_files": [] - } -} \ No newline at end of file From 3934ebe95d070af27022575f7571886d1b9f96fb Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:52:19 -0800 Subject: [PATCH 36/59] lint fixes --- cellpack/autopack/__init__.py | 11 +++++++---- cellpack/autopack/loaders/recipe_loader.py | 13 +++++++++++-- cellpack/bin/pack.py | 7 ++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index c22f684b..45e37b0c 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -412,12 +412,15 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None + filename, + destination="", + cache="geometries", + force=None, + use_docker=False, + json_recipe=None, ): if json_recipe is not None: - composition = DBRecipeLoader.remove_empty( - json_recipe.get("composition", {}) - ) + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe, None, False if is_remote_path(filename): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 23044443..cf87a10c 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,13 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): + def __init__( + self, + input_file_path, + save_converted_recipe=False, + use_docker=False, + json_recipe=None, + ): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -170,7 +176,10 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe + self.file_path, + cache="recipes", + use_docker=use_docker, + json_recipe=self.json_recipe, ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index dadf0f31..99186c9d 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,12 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, + json_recipe=None, ): """ Initializes an autopack packing from the command line From 5fba3b19713421dda6ce6f96525185e1412da0ca Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:52:55 -0800 Subject: [PATCH 37/59] refactor to try to improve clarity of json recipe vs file path --- cellpack/autopack/__init__.py | 9 ++--- cellpack/autopack/loaders/recipe_loader.py | 17 +++++--- cellpack/bin/pack.py | 45 ++++++++++++++++++++-- docker/server.py | 7 +++- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 45e37b0c..9b1b0084 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -410,6 +410,10 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data +def load_json_recipe(json_recipe): + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) + json_recipe["composition"] = composition + return json_recipe def load_file( filename, @@ -417,12 +421,7 @@ def load_file( cache="geometries", force=None, use_docker=False, - json_recipe=None, ): - if json_recipe is not None: - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index cf87a10c..df9e26ac 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -175,12 +175,17 @@ def _migrate_version(self, old_recipe): ) def _read(self, resolve_inheritance=True, use_docker=False): - new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, - json_recipe=self.json_recipe, - ) + database_name = None + is_unnested_firebase = False + if self.json_recipe is not None: + new_values = autopack.load_json_recipe(self.json_recipe) + else: + new_values, database_name, is_unnested_firebase = autopack.load_file( + self.file_path, + cache="recipes", + use_docker=use_docker, + ) + if database_name == "firebase": if is_unnested_firebase: objects = new_values.get("objects", {}) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 99186c9d..39003eda 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -30,7 +30,6 @@ def pack( analysis_config_path=None, docker=False, validate=True, - json_recipe=None, ): """ Initializes an autopack packing from the command line @@ -42,12 +41,52 @@ def pack( :return: void """ - packing_config_data = ConfigLoader(config_path, docker).config + config_loader = ConfigLoader(config_path, docker) recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, json_recipe + recipe, config_loader.config["save_converted_recipe"], docker ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def pack_from_json( + json_recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, +): + """ + Initializes an autopack packing from the command line + :param json: JSON object representing the recipe + :param config_path: string argument, path to packing config file + :param analysis_config_path: string argument, path to analysis config file + :param docker: boolean argument, are we using docker + :param validate: boolean argument, validate recipe before packing + + :return: void + """ + config_loader = ConfigLoader(config_path, docker) + + recipe_loader = RecipeLoader( + "", config_loader.config["save_converted_recipe"], docker, json_recipe + ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data + packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index ce6da7ee..6bbf338c 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack +from cellpack.bin.pack import pack, pack_from_json SERVER_PORT = 80 @@ -16,7 +16,10 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) + if body is None: + pack(recipe=recipe, config_path=config, docker=True) + else: + pack_from_json(json_recipe=body, config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 8f49b0e73e2c04b5393afb9901d9ed1b05a4b2f9 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:56:38 -0800 Subject: [PATCH 38/59] lint fixes --- cellpack/autopack/__init__.py | 2 ++ cellpack/bin/pack.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 9b1b0084..251dc253 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -410,11 +410,13 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data + def load_json_recipe(json_recipe): composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe + def load_file( filename, destination="", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 39003eda..80ef6784 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -54,6 +54,7 @@ def pack( validate ) + def pack_from_json( json_recipe, config_path=None, @@ -84,6 +85,7 @@ def pack_from_json( validate ) + def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data packing_config_data = config_loader.config From 728c19d81f469837bab04dbfdebe6d8e281711ca Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:57:23 -0800 Subject: [PATCH 39/59] lint fix --- cellpack/bin/pack.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 80ef6784..b8446f33 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -47,11 +47,7 @@ def pack( recipe, config_loader.config["save_converted_recipe"], docker ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) @@ -78,11 +74,7 @@ def pack_from_json( "", config_loader.config["save_converted_recipe"], docker, json_recipe ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) From 8f0c468e2ca417387b91b5664b4392a4662268f6 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:00:38 -0800 Subject: [PATCH 40/59] minimize changeset --- cellpack/autopack/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 251dc253..359bca0f 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -417,13 +417,7 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file( - filename, - destination="", - cache="geometries", - force=None, - use_docker=False, -): +def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 3fd95a4a06f0018a223282450596ded9e3288f93 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:02:35 -0800 Subject: [PATCH 41/59] minimize changeset --- cellpack/autopack/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 359bca0f..fb357e6c 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -417,7 +417,9 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): +def load_file( + filename, destination="", cache="geometries", force=None, use_docker=False +): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 27848251eeccf4efc0f42dac4c9e6da913947796 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:56:18 -0800 Subject: [PATCH 42/59] simplify changeset --- cellpack/autopack/__init__.py | 6 --- cellpack/autopack/loaders/recipe_loader.py | 15 +++++-- cellpack/bin/pack.py | 47 ++++------------------ docker/server.py | 8 ++-- 4 files changed, 23 insertions(+), 53 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index fb357e6c..e9317240 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,12 +411,6 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) return sphere_data -def load_json_recipe(json_recipe): - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe - - def load_file( filename, destination="", cache="geometries", force=None, use_docker=False ): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index df9e26ac..4b89c816 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -56,6 +56,15 @@ def __init__( self.recipe_data = self._read(use_docker=use_docker) + @classmethod + def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False): + return cls( + input_file_path="", + save_converted_recipe=save_converted_recipe, + use_docker=use_docker, + json_recipe=json_recipe, + ) + @staticmethod def _resolve_object(key, objects): current_object = objects[key] @@ -177,9 +186,9 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): database_name = None is_unnested_firebase = False - if self.json_recipe is not None: - new_values = autopack.load_json_recipe(self.json_recipe) - else: + new_values = self.json_recipe + if new_values is None: + # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( self.file_path, cache="recipes", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index b8446f33..6c603cbb 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -33,7 +33,7 @@ def pack( ): """ Initializes an autopack packing from the command line - :param recipe: string argument, path to recipe + :param recipe: string argument, path to recipe file, or a dictionary representing a recipe :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker @@ -41,45 +41,14 @@ def pack( :return: void """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - recipe, config_loader.config["save_converted_recipe"], docker - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def pack_from_json( - json_recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, -): - """ - Initializes an autopack packing from the command line - :param json: JSON object representing the recipe - :param config_path: string argument, path to packing config file - :param analysis_config_path: string argument, path to analysis config file - :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing - - :return: void - """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - "", config_loader.config["save_converted_recipe"], docker, json_recipe - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): + if isinstance(recipe, dict): + # Load recipe from JSON dictionary + recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) + else: + # Load recipe from file path + recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data + config_loader = ConfigLoader(config_path, docker) packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: diff --git a/docker/server.py b/docker/server.py index 6bbf338c..257e66ec 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack, pack_from_json +from cellpack.bin.pack import pack SERVER_PORT = 80 @@ -16,10 +16,8 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - if body is None: - pack(recipe=recipe, config_path=config, docker=True) - else: - pack_from_json(json_recipe=body, config_path=config, docker=True) + # Pack JSON recipe in body if provided, otherwise use recipe path + pack(recipe=(body if body else recipe), config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From e140122a58cef455da611cf24dde85383b6e5164 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:03:12 -0800 Subject: [PATCH 43/59] code cleanup --- cellpack/autopack/loaders/recipe_loader.py | 4 +--- cellpack/bin/pack.py | 10 +++------- docker/server.py | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b89c816..e40b469e 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,9 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker, ) if database_name == "firebase": diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 6c603cbb..23a69a57 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,11 +25,7 @@ def pack( - recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True ): """ Initializes an autopack packing from the command line @@ -41,6 +37,8 @@ def pack( :return: void """ + packing_config_data = ConfigLoader(config_path, docker).config + if isinstance(recipe, dict): # Load recipe from JSON dictionary recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) @@ -48,8 +46,6 @@ def pack( # Load recipe from file path recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data - config_loader = ConfigLoader(config_path, docker) - packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index 257e66ec..9b1ce105 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,7 +12,7 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id, body=None): + async def run_packing(self, job_id, recipe=None, config=None, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: @@ -51,7 +51,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) + packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From cf3b9eddae61a742eaa14854094af7e00ec1271c Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:05:28 -0800 Subject: [PATCH 44/59] minimize changeset --- cellpack/bin/pack.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 23a69a57..83b22264 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -44,7 +44,9 @@ def pack( recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) else: # Load recipe from file path - recipe_loader = RecipeLoader(recipe, use_docker=docker) + recipe_loader = RecipeLoader( + recipe, packing_config_data["save_converted_recipe"], docker + ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} if analysis_config_path is not None: From 071aaf9c408b597cff5c252ff16804b69c8f8dc9 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:07:30 -0800 Subject: [PATCH 45/59] remove trailing comma --- cellpack/autopack/loaders/recipe_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index e40b469e..bbdb662a 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,7 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker ) if database_name == "firebase": From fa148ccbedaa6639c95489244fa5de4a1d01747b Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:11:58 -0800 Subject: [PATCH 46/59] Feature/firebase lookup (#445) * remove os fetch for job_id * use dedup_hash instead of job id * proposal: get hash from recipe loader * renaming and add TODOs * format * rename param to hash * remove unused validate param and doc strings in pack * simplify get_ dedup_hash * refactor job_status update * cleanup * fix upload_job_status to handle awshandler * pass dedup_pash to env for fetching across files * add tests * format1 * format test --- cellpack/autopack/DBRecipeHandler.py | 87 +++++++++---------- .../upy/simularium/simularium_helper.py | 12 ++- cellpack/autopack/writers/__init__.py | 5 +- cellpack/bin/pack.py | 44 +++++----- cellpack/tests/test_db_uploader.py | 53 +++++++++++ docker/server.py | 55 +++++++----- 6 files changed, 160 insertions(+), 96 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 8b4e8578..00fc8bb7 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, job_id=None): + def upload_result_metadata(self, file_name, url, dedup_hash=None): """ Upload the metadata of the result file to the database. """ @@ -543,28 +543,40 @@ def upload_result_metadata(self, file_name, url, job_id=None): "user": username, "timestamp": timestamp, "url": url, - "batch_job_id": job_id, + "dedup_hash": dedup_hash, }, ) - if job_id: - self.upload_job_status(job_id, "DONE", result_path=url) + if dedup_hash: + self.upload_job_status(dedup_hash, "DONE", result_path=url) - def upload_job_status(self, job_id, status, result_path=None, error_message=None): + def upload_job_status( + self, + dedup_hash, + status, + result_path=None, + error_message=None, + outputs_directory=None, + ): """ - Update status for a given job ID + Update status for a given dedup_hash """ if self.db: - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "status": str(status), - "result_path": result_path, - "error_message": error_message, - }, - ) + db_handler = self.db + # If db is AWSHandler, switch to firebase handler for job status updates + if hasattr(self.db, "s3_client"): + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE) + db_handler = handler(default_db="staging") + timestamp = db_handler.create_timestamp() + data = { + "timestamp": timestamp, + "status": str(status), + "error_message": error_message, + } + if result_path: + data["result_path"] = result_path + if outputs_directory: + data["outputs_directory"] = outputs_directory + db_handler.update_or_create("job_status", dedup_hash, data) def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data): output_path = Path(output_folder) @@ -583,7 +595,7 @@ def upload_packing_results_workflow( self, source_folder, recipe_name, - job_id, + dedup_hash, config_data, recipe_data, ): @@ -591,7 +603,7 @@ def upload_packing_results_workflow( Complete packing results upload workflow including folder preparation and s3 upload """ try: - if job_id: + if dedup_hash: source_path = Path(source_folder) if not source_path.exists(): @@ -601,7 +613,7 @@ def upload_packing_results_workflow( # prepare unique S3 upload folder parent_folder = source_path.parent - unique_folder_name = f"{source_path.name}_run_{job_id}" + unique_folder_name = f"{source_path.name}_run_{dedup_hash}" s3_upload_folder = parent_folder / unique_folder_name logging.debug(f"outputs will be copied to: {s3_upload_folder}") @@ -618,7 +630,7 @@ def upload_packing_results_workflow( upload_result = self.upload_outputs_to_s3( output_folder=s3_upload_folder, recipe_name=recipe_name, - job_id=job_id, + dedup_hash=dedup_hash, ) # clean up temporary folder after upload @@ -628,9 +640,11 @@ def upload_packing_results_workflow( f"Cleaned up temporary upload folder: {s3_upload_folder}" ) - # update outputs directory in firebase - self.update_outputs_directory( - job_id, upload_result.get("outputs_directory") + # update outputs directory in job status + self.upload_job_status( + dedup_hash, + "DONE", + outputs_directory=upload_result.get("outputs_directory"), ) return upload_result @@ -639,7 +653,7 @@ def upload_packing_results_workflow( logging.error(e) return {"success": False, "error": e} - def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): + def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): """ Upload packing outputs to S3 bucket """ @@ -647,7 +661,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): bucket_name = self.db.bucket_name region_name = self.db.region_name sub_folder_name = self.db.sub_folder_name - s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}" + s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}" try: upload_result = self.db.upload_directory( @@ -671,7 +685,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): return { "success": True, - "run_id": job_id, + "dedup_hash": dedup_hash, "s3_bucket": bucket_name, "s3_prefix": s3_prefix, "public_url_base": f"{base_url}/{s3_prefix}/", @@ -685,25 +699,6 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): logging.error(e) return {"success": False, "error": e} - def update_outputs_directory(self, job_id, outputs_directory): - if not self.db or self.db.s3_client: - # switch to firebase handler to update job status - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler(default_db="staging") - if job_id: - timestamp = initialized_db.create_timestamp() - initialized_db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "outputs_directory": outputs_directory, - }, - ) - logging.debug( - f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}" - ) - class DBRecipeLoader(object): """ diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 86af0874..4f934e0e 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1385,16 +1385,14 @@ def raycast(self, **kw): def raycast_test(self, obj, start, end, length, **kw): return - def post_and_open_file(self, file_name, open_results_in_browser): + def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - url = None - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=job_id + simularium_file, storage="aws", batch_job_id=dedup_hash ) if file_name and url: simulariumHelper.store_metadata( - file_name, url, db="firebase", job_id=job_id + file_name, url, db="firebase", dedup_hash=dedup_hash ) if open_results_in_browser: simulariumHelper.open_in_simularium(url) @@ -1428,7 +1426,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, job_id=None): + def store_metadata(file_name, url, db=None, dedup_hash=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1436,7 +1434,7 @@ def store_metadata(file_name, url, db=None, job_id=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, job_id) + db_uploader.upload_result_metadata(file_name, url, dedup_hash) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( diff --git a/cellpack/autopack/writers/__init__.py b/cellpack/autopack/writers/__init__.py index de5f7216..1484d20f 100644 --- a/cellpack/autopack/writers/__init__.py +++ b/cellpack/autopack/writers/__init__.py @@ -196,8 +196,11 @@ def save_as_simularium(self, env, seed_to_results_map): number_of_packings = env.config_data.get("number_of_packings", 1) open_results_in_browser = env.config_data.get("open_results_in_browser", False) upload_results = env.config_data.get("upload_results", False) + dedup_hash = getattr(env, "dedup_hash", None) if (number_of_packings == 1 or is_aggregate) and upload_results: - autopack.helper.post_and_open_file(file_name, open_results_in_browser) + autopack.helper.post_and_open_file( + file_name, open_results_in_browser, dedup_hash + ) def save_Mixed_asJson( self, diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 83b22264..27c4d018 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -1,6 +1,5 @@ import logging import logging.config -import os import time from pathlib import Path @@ -25,7 +24,11 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + hash=None, ): """ Initializes an autopack packing from the command line @@ -33,7 +36,7 @@ def pack( :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing + :param hash: string argument, dedup hash identifier for tracking/caching results :return: void """ @@ -57,6 +60,7 @@ def pack( autopack.helper = helper env = Environment(config=packing_config_data, recipe=recipe_data) env.helper = helper + env.dedup_hash = hash log.info("Packing recipe: %s", recipe_data["name"]) log.info("Outputs will be saved to %s", env.out_folder) @@ -83,24 +87,22 @@ def pack( env.buildGrid(rebuild=True) env.pack_grid(verbose=0, usePP=False) - if docker: - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) - if job_id: - handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # temporarily using demo bucket before permissions are granted - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name="runs", - region_name="us-west-2", - ) - uploader = DBUploader(db_handler=initialized_handler) - uploader.upload_packing_results_workflow( - source_folder=env.out_folder, - recipe_name=recipe_data["name"], - job_id=job_id, - config_data=packing_config_data, - recipe_data=recipe_loader.serializable_recipe_data, - ) + if docker and hash: + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # temporarily using demo bucket before permissions are granted + initialized_handler = handler( + bucket_name="cellpack-demo", + sub_folder_name="runs", + region_name="us-west-2", + ) + uploader = DBUploader(db_handler=initialized_handler) + uploader.upload_packing_results_workflow( + source_folder=env.out_folder, + recipe_name=recipe_data["name"], + dedup_hash=hash, + config_data=packing_config_data, + recipe_data=recipe_loader.serializable_recipe_data, + ) def main(): diff --git a/cellpack/tests/test_db_uploader.py b/cellpack/tests/test_db_uploader.py index 0c91cbd5..414f6b9c 100644 --- a/cellpack/tests/test_db_uploader.py +++ b/cellpack/tests/test_db_uploader.py @@ -175,3 +175,56 @@ def test_upload_recipe(): "A": "firebase:composition/test_id", } assert recipe_doc.objects_to_path_map == {"sphere_25": "firebase:objects/test_id"} + + +def test_upload_job_status_with_firebase_handler(): + mock_firebase_db = MagicMock() + mock_firebase_db.create_timestamp.return_value = "test_timestamp" + # firebaseHandler does not have s3_client attribute + del mock_firebase_db.s3_client + + uploader = DBUploader(mock_firebase_db) + uploader.upload_job_status("test_hash", "RUNNING") + + mock_firebase_db.create_timestamp.assert_called_once() + mock_firebase_db.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "test_timestamp", + "status": "RUNNING", + "error_message": None, + }, + ) + + +def test_upload_job_status_with_aws_handler(): + mock_aws_db = MagicMock() + mock_aws_db.s3_client = MagicMock() # AWSHandler has s3_client + + mock_firebase_handler = MagicMock() + mock_firebase_handler.create_timestamp.return_value = "firebase_timestamp" + + with patch( + "cellpack.autopack.DBRecipeHandler.DATABASE_IDS.handlers" + ) as mock_handlers: + mock_handlers.return_value.get.return_value = ( + lambda default_db: mock_firebase_handler + ) + + uploader = DBUploader(mock_aws_db) + uploader.upload_job_status("test_hash", "DONE", result_path="test_path") + + mock_firebase_handler.create_timestamp.assert_called_once() + mock_firebase_handler.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "firebase_timestamp", + "status": "DONE", + "error_message": None, + "result_path": "test_path", + }, + ) + # AWS handler should not be called for timestamp + mock_aws_db.create_timestamp.assert_not_called() diff --git a/docker/server.py b/docker/server.py index 9b1ce105..74bb20f3 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,8 +1,6 @@ import asyncio from aiohttp import web -import os -import uuid -from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.bin.pack import pack @@ -12,29 +10,40 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, job_id, recipe=None, config=None, body=None): - os.environ["AWS_BATCH_JOB_ID"] = job_id - self.update_job_status(job_id, "RUNNING") + def _get_firebase_handler(self, database_name="firebase"): + handler = DATABASE_IDS.handlers().get(database_name) + initialized_db = handler(default_db="staging") + if initialized_db._initialized: + return initialized_db + return None + + def job_exists(self, dedup_hash): + db = self._get_firebase_handler() + if not db: + return False + + job_status, _ = db.get_doc_by_id("job_status", dedup_hash) + return job_status is not None + + async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): + self.update_job_status(dedup_hash, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) except Exception as e: - self.update_job_status(job_id, "FAILED", error_message=str(e)) + self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) - def update_job_status(self, job_id, status, result_path=None, error_message=None): - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler( - default_db="staging" - ) - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_job_status(job_id, status, result_path, error_message) + def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + db = self._get_firebase_handler() + if db: + db_uploader = DBUploader(db) + db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") async def health_check(self, request: web.Request) -> web.Response: - # healthcheck endpoint needed for AWS load balancer + # health check endpoint needed for AWS load balancer return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: @@ -48,10 +57,14 @@ async def pack_handler(self, request: web.Request) -> web.Response: "Pack requests must include recipe as a query param" ) config = request.rel_url.query.get("config") - job_id = str(uuid.uuid4()) + + dedup_hash = DataDoc.generate_hash(body) + + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) + packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -60,7 +73,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": job_id}) + return web.json_response({"jobId": dedup_hash}) async def init_app() -> web.Application: @@ -75,4 +88,4 @@ async def init_app() -> web.Application: ) return app -web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) \ No newline at end of file +web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) From ac34219775d66c22a1445e0adb10b8753a927652 Mon Sep 17 00:00:00 2001 From: Alli <111383930+ascibisz@users.noreply.github.com> Date: Wed, 4 Feb 2026 08:47:47 -0800 Subject: [PATCH 47/59] Only upload simularium file once (#446) * proposal: get hash from recipe loader * simplify get_ dedup_hash * only post simularium results file once for server job runs * update code for rebase * code cleanup --------- Co-authored-by: Ruge Li --- cellpack/autopack/DBRecipeHandler.py | 12 +++--- .../upy/simularium/simularium_helper.py | 42 ++++++++----------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 00fc8bb7..3eb691d5 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, dedup_hash=None): + def upload_result_metadata(self, file_name, url): """ Upload the metadata of the result file to the database. """ @@ -543,11 +543,8 @@ def upload_result_metadata(self, file_name, url, dedup_hash=None): "user": username, "timestamp": timestamp, "url": url, - "dedup_hash": dedup_hash, }, ) - if dedup_hash: - self.upload_job_status(dedup_hash, "DONE", result_path=url) def upload_job_status( self, @@ -644,6 +641,7 @@ def upload_packing_results_workflow( self.upload_job_status( dedup_hash, "DONE", + result_path=upload_result.get("simularium_url"), outputs_directory=upload_result.get("outputs_directory"), ) @@ -675,8 +673,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): f"{base_url}/{file_info['s3_key']}" for file_info in upload_result["uploaded_files"] ] + simularium_url = None + for url in public_urls: + if url.endswith(".simularium"): + simularium_url = url outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/" - logging.info( f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}" ) @@ -694,6 +695,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): "total_size": upload_result["total_size"], "urls": public_urls, "outputs_directory": outputs_directory, + "simularium_url": simularium_url, } except Exception as e: logging.error(e) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 4f934e0e..08179d85 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1387,36 +1387,28 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=dedup_hash - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase", dedup_hash=dedup_hash + if dedup_hash is None: + file_name, url = simulariumHelper.store_result_file( + simularium_file, storage="aws" ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + if file_name and url: + simulariumHelper.store_metadata( + file_name, url, db="firebase" + ) + if open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file( - file_path, storage=None, batch_job_id=None, sub_folder="simularium" + file_path, storage=None, sub_folder="simularium" ): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) - # if batch_job_id is not None, then we are in a batch job and should use the temp bucket - # TODO: use cellpack-results bucket for batch jobs once we have the correct permissions - if batch_job_id: - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) - else: - initialized_handler = handler( - bucket_name="cellpack-results", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) + initialized_handler = handler( + bucket_name="cellpack-results", + sub_folder_name=sub_folder, + region_name="us-west-2", + ) file_name, url = initialized_handler.save_file_and_get_url(file_path) if not file_name or not url: db_maintainer = DBMaintenance(initialized_handler) @@ -1426,7 +1418,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, dedup_hash=None): + def store_metadata(file_name, url, db=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1434,7 +1426,7 @@ def store_metadata(file_name, url, db=None, dedup_hash=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, dedup_hash) + db_uploader.upload_result_metadata(file_name, url) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( From 86b31043af6d528447e84c59a33baf35da7dd551 Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 9 Feb 2026 10:43:42 -0800 Subject: [PATCH 48/59] Maint/firebase collection cleanup (#448) * remove local metadata writes for auto-pop feature * remove cleanup firebase workflow * remove cleanup firebase code * 1. make doc url a constant 2.remove unused param --- .github/workflows/cleanup-firebase.yml | 22 ------ cellpack/autopack/DBRecipeHandler.py | 70 +------------------ .../interface_objects/default_values.py | 1 - .../upy/simularium/simularium_helper.py | 45 +++--------- cellpack/bin/cleanup_tasks.py | 20 ------ cellpack/bin/upload.py | 7 +- 6 files changed, 14 insertions(+), 151 deletions(-) delete mode 100644 .github/workflows/cleanup-firebase.yml delete mode 100644 cellpack/bin/cleanup_tasks.py diff --git a/.github/workflows/cleanup-firebase.yml b/.github/workflows/cleanup-firebase.yml deleted file mode 100644 index 89ccd179..00000000 --- a/.github/workflows/cleanup-firebase.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Cleanup Firebase Metadata - -on: - schedule: - - cron: "24 18 * * 1" # Runs at 18:24 UTC every Monday - -jobs: - cleanup: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest, windows-latest, macOS-latest] - steps: - - uses: actions/checkout@v6 - - uses: ./.github/actions/dependencies - - name: Cleanup Firebase Metadata - env: - FIREBASE_TOKEN: ${{ secrets.FIREBASE_TOKEN }} - FIREBASE_EMAIL: ${{ secrets.FIREBASE_EMAIL }} - run: | - uv run python cellpack/bin/cleanup_tasks.py diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 3eb691d5..8e3aec7f 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -1,7 +1,6 @@ import copy import logging import shutil -from datetime import datetime, timezone from enum import Enum from pathlib import Path @@ -10,7 +9,6 @@ import hashlib import json -import requests from cellpack.autopack.utils import deep_merge @@ -321,36 +319,6 @@ def __init__(self, settings): self.settings = settings -class ResultDoc: - def __init__(self, db): - self.db = db - - def handle_expired_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - current_utc = datetime.now(timezone.utc) - results = self.db.get_all_docs("results") - if results: - for result in results: - result_data = self.db.doc_to_dict(result) - result_age = current_utc - result_data["timestamp"] - if result_age.days > 180 and not self.validate_existence( - result_data["url"] - ): - self.db.delete_doc("results", self.db.doc_id(result)) - logging.info("Results cleanup complete.") - else: - logging.info("No results found in the database.") - - def validate_existence(self, url): - """ - Validate the existence of an S3 object by checking if the URL is accessible. - Returns True if the URL is accessible. - """ - return requests.head(url).status_code == requests.codes.ok - - class DBUploader(object): """ Handles the uploading of data to the database. @@ -529,23 +497,6 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url): - """ - Upload the metadata of the result file to the database. - """ - if self.db: - username = self.db.get_username() - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "results", - file_name, - { - "user": username, - "timestamp": timestamp, - "url": url, - }, - ) - def upload_job_status( self, dedup_hash, @@ -887,23 +838,4 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict): return recipe_data -class DBMaintenance(object): - """ - Handles the maintenance of the database. - """ - - def __init__(self, db_handler): - self.db = db_handler - self.result_doc = ResultDoc(self.db) - - def cleanup_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - self.result_doc.handle_expired_results() - - def readme_url(self): - """ - Return the URL to the README file for the database setup section. - """ - return "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" +DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" diff --git a/cellpack/autopack/interface_objects/default_values.py b/cellpack/autopack/interface_objects/default_values.py index bbdbc452..969eaa6a 100644 --- a/cellpack/autopack/interface_objects/default_values.py +++ b/cellpack/autopack/interface_objects/default_values.py @@ -9,7 +9,6 @@ "objects", "gradients", "recipes", - "results", "configs", "recipes_edited", ] diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 08179d85..87c616e1 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -22,7 +22,7 @@ from simulariumio.cellpack import HAND_TYPE, CellpackConverter from simulariumio.constants import DISPLAY_TYPE, VIZ_TYPE -from cellpack.autopack.DBRecipeHandler import DBMaintenance, DBUploader +from cellpack.autopack.DBRecipeHandler import DB_SETUP_README_URL from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.upy import hostHelper from cellpack.autopack.upy.simularium.plots import PlotData @@ -1388,20 +1388,12 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") if dedup_hash is None: - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws" - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase" - ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + url = simulariumHelper.store_result_file(simularium_file, storage="aws") + if url and open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod - def store_result_file( - file_path, storage=None, sub_folder="simularium" - ): + def store_result_file(file_path, storage=None, sub_folder="simularium"): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) initialized_handler = handler( @@ -1409,30 +1401,13 @@ def store_result_file( sub_folder_name=sub_folder, region_name="us-west-2", ) - file_name, url = initialized_handler.save_file_and_get_url(file_path) - if not file_name or not url: - db_maintainer = DBMaintenance(initialized_handler) - logging.warning( - f"Skipping browser opening, upload credentials not configured. For setup instructions see: {db_maintainer.readme_url()}" - ) - return file_name, url - - @staticmethod - def store_metadata(file_name, url, db=None): - if db == "firebase": - handler = DATABASE_IDS.handlers().get(db) - initialized_db = handler( - default_db="staging" - ) # default to staging for metadata uploads - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url) - else: - db_maintainer = DBMaintenance(initialized_db) + _, url = initialized_handler.save_file_and_get_url(file_path) + if not url: logging.warning( - f"Firebase credentials not found. For setup instructions see: {db_maintainer.readme_url()}. Or try cellPACK web interface: https://cellpack.allencell.org (no setup required)" + f"Skipping browser opening, upload credentials not configured. For setup instructions see: {DB_SETUP_README_URL}" ) - return + return url + return None @staticmethod def open_in_simularium(aws_url): diff --git a/cellpack/bin/cleanup_tasks.py b/cellpack/bin/cleanup_tasks.py deleted file mode 100644 index 08217aa0..00000000 --- a/cellpack/bin/cleanup_tasks.py +++ /dev/null @@ -1,20 +0,0 @@ -from cellpack.autopack.DBRecipeHandler import DBMaintenance -from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS - - -def run_cleanup(db_id=DATABASE_IDS.FIREBASE): - """ - Performs cleanup operations on expired database entries. - This function is executed as part of a scheduled task defined in .github/workflows/cleanup-firebase.yml - - Args: - db_id(str): The database id to use - """ - handler = DATABASE_IDS.handlers().get(db_id) - initialized_db = handler(default_db="staging") - db_maintainer = DBMaintenance(initialized_db) - db_maintainer.cleanup_results() - - -if __name__ == "__main__": - run_cleanup() diff --git a/cellpack/bin/upload.py b/cellpack/bin/upload.py index b038b4e4..59fc2104 100644 --- a/cellpack/bin/upload.py +++ b/cellpack/bin/upload.py @@ -3,7 +3,7 @@ import json from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader, DBMaintenance +from cellpack.autopack.DBRecipeHandler import DBUploader, DB_SETUP_README_URL from cellpack.autopack.upy.simularium.simularium_helper import simulariumHelper from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.loaders.config_loader import ConfigLoader @@ -90,7 +90,7 @@ def upload( id, _ = db_handler.upload_data("editable_fields", field) editable_fields_ids.append(id) if output_file: - _, result_url = simulariumHelper.store_result_file( + result_url = simulariumHelper.store_result_file( output_file, storage="aws", sub_folder="client" ) if studio: @@ -105,9 +105,8 @@ def upload( db_handler.upload_data("example_packings", recipe_metadata) else: - db_maintainer = DBMaintenance(db_handler) sys.exit( - f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {db_maintainer.readme_url()} " + f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {DB_SETUP_README_URL} " ) From 45a10ab8c4bb5ec146906f755ce9addd15493367 Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:31:13 -0800 Subject: [PATCH 49/59] handle both recipe_path and json body requests (#449) --- docker/server.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docker/server.py b/docker/server.py index 74bb20f3..7b0a209d 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,4 +1,5 @@ import asyncio +import uuid from aiohttp import web from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS @@ -25,19 +26,19 @@ def job_exists(self, dedup_hash): job_status, _ = db.get_doc_by_id("job_status", dedup_hash) return job_status is not None - async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): - self.update_job_status(dedup_hash, "RUNNING") + async def run_packing(self, job_id, recipe=None, config=None, body=None): + self.update_job_status(job_id, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=job_id) except Exception as e: - self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) + self.update_job_status(job_id, "FAILED", error_message=str(e)) - def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + def update_job_status(self, job_id, status, result_path=None, error_message=None): db = self._get_firebase_handler() if db: db_uploader = DBUploader(db) - db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) + db_uploader.upload_job_status(job_id, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") @@ -58,13 +59,18 @@ async def pack_handler(self, request: web.Request) -> web.Response: ) config = request.rel_url.query.get("config") - dedup_hash = DataDoc.generate_hash(body) - - if self.job_exists(dedup_hash): - return web.json_response({"jobId": dedup_hash}) + if body: + dedup_hash = DataDoc.generate_hash(body) + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) + job_id = dedup_hash + else: + job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) + packing_task = asyncio.create_task( + self.run_packing(job_id, recipe, config, body) + ) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -73,7 +79,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": dedup_hash}) + return web.json_response({"jobId": job_id}) async def init_app() -> web.Application: From 801b86ff9c3816ba8fb4ade187d86360eedc78cd Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:14:29 -0700 Subject: [PATCH 50/59] change error message body --- docker/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server.py b/docker/server.py index 7b0a209d..08b3cb5c 100644 --- a/docker/server.py +++ b/docker/server.py @@ -55,7 +55,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: body = None if not recipe and not body: raise web.HTTPBadRequest( - "Pack requests must include recipe as a query param" + "Pack requests must include a recipe, either as a query param or in the request body" ) config = request.rel_url.query.get("config") From 7a42705e4253e93fe1ced479f4e36b43c1d67533 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:25:17 -0700 Subject: [PATCH 51/59] lint fixes --- cellpack/autopack/upy/simularium/simularium_helper.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 87c616e1..99f54cf7 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1388,9 +1388,13 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") if dedup_hash is None: - url = simulariumHelper.store_result_file(simularium_file, storage="aws") - if url and open_results_in_browser: - simulariumHelper.open_in_simularium(url) + file_name, url = simulariumHelper.store_result_file( + simularium_file, storage="aws" + ) + if file_name and url: + simulariumHelper.store_metadata(file_name, url, db="firebase") + if open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file(file_path, storage=None, sub_folder="simularium"): From c2259af87aa2614619c6a47c37adb1ca6e554157 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 14:25:51 -0700 Subject: [PATCH 52/59] add more checks when attempting to read json body --- docker/server.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/server.py b/docker/server.py index 08b3cb5c..00523eb5 100644 --- a/docker/server.py +++ b/docker/server.py @@ -49,10 +49,17 @@ async def health_check(self, request: web.Request) -> web.Response: async def pack_handler(self, request: web.Request) -> web.Response: recipe = request.rel_url.query.get("recipe") or "" - if request.can_read_body: - body = await request.json() - else: - body = None + body = None + + # If request has a body, attempt to parse it as JSON and use as recipe + # otherwise rely on recipe query param + if (request.can_read_body and request.content_length + and request.content_length > 0): + try: + body = await request.json() + except Exception: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include a recipe, either as a query param or in the request body" From e86069ee02b92d52ab4eac15106a0770394bd91a Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Wed, 25 Mar 2026 13:52:28 -0700 Subject: [PATCH 53/59] let recipe loader check the input and key stripping --- cellpack/autopack/loaders/recipe_loader.py | 44 ++++++++----------- .../upy/simularium/simularium_helper.py | 10 ++--- cellpack/bin/pack.py | 11 ++--- .../tests/test_recipe_version_migration.py | 6 +-- 4 files changed, 27 insertions(+), 44 deletions(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index bbdb662a..ce220c0f 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -32,39 +32,30 @@ class RecipeLoader(object): def __init__( self, - input_file_path, + input_data, save_converted_recipe=False, use_docker=False, - json_recipe=None, ): - _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION - self.file_path = input_file_path - self.file_extension = file_extension self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe - self.json_recipe = json_recipe - # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes - if autopack.is_remote_path(self.file_path): - autopack.CURRENT_RECIPE_PATH = os.path.join( - os.getcwd(), "examples", "recipes", "v2" - ) + if isinstance(input_data, dict): + self._json_recipe = input_data + self.file_path = None else: - autopack.CURRENT_RECIPE_PATH = os.path.dirname(self.file_path) + self.file_path = input_data + self._json_recipe = None + if autopack.is_remote_path(self.file_path): + autopack.CURRENT_RECIPE_PATH = os.path.join( + os.getcwd(), "examples", "recipes", "v2" + ) + else: + autopack.CURRENT_RECIPE_PATH = os.path.dirname(self.file_path) self.recipe_data = self._read(use_docker=use_docker) - @classmethod - def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False): - return cls( - input_file_path="", - save_converted_recipe=save_converted_recipe, - use_docker=use_docker, - json_recipe=json_recipe, - ) - @staticmethod def _resolve_object(key, objects): current_object = objects[key] @@ -186,20 +177,23 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): database_name = None is_unnested_firebase = False - new_values = self.json_recipe + new_values = self._json_recipe if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( self.file_path, cache="recipes", use_docker=use_docker ) + if "composition" in new_values: + new_values["composition"] = DBRecipeLoader.remove_empty( + new_values["composition"] + ) + if database_name == "firebase": if is_unnested_firebase: objects = new_values.get("objects", {}) gradients = new_values.get("gradients", {}) - composition = DBRecipeLoader.remove_empty( - new_values.get("composition", {}) - ) + composition = new_values.get("composition", {}) else: objects, gradients, composition = DBRecipeLoader.collect_and_sort_data( new_values["composition"] diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 99f54cf7..87c616e1 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1388,13 +1388,9 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") if dedup_hash is None: - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws" - ) - if file_name and url: - simulariumHelper.store_metadata(file_name, url, db="firebase") - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + url = simulariumHelper.store_result_file(simularium_file, storage="aws") + if url and open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file(file_path, storage=None, sub_folder="simularium"): diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 27c4d018..d20f2fd7 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -42,14 +42,9 @@ def pack( """ packing_config_data = ConfigLoader(config_path, docker).config - if isinstance(recipe, dict): - # Load recipe from JSON dictionary - recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) - else: - # Load recipe from file path - recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker - ) + recipe_loader = RecipeLoader( + recipe, packing_config_data["save_converted_recipe"], docker + ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} if analysis_config_path is not None: diff --git a/cellpack/tests/test_recipe_version_migration.py b/cellpack/tests/test_recipe_version_migration.py index a4a77044..37801c1f 100644 --- a/cellpack/tests/test_recipe_version_migration.py +++ b/cellpack/tests/test_recipe_version_migration.py @@ -317,7 +317,7 @@ def test_convert_v1_to_v2( [ ( RecipeLoader( - input_file_path="cellpack/tests/recipes/v1/test_single_spheres.json" + "cellpack/tests/recipes/v1/test_single_spheres.json" ).recipe_data, { "version": "1.0", @@ -386,9 +386,7 @@ def test_migrate_version_error(): "converted_compartment_data, expected_compartment_data", [ ( - RecipeLoader( - input_file_path="cellpack/tests/recipes/v1/test_compartment.json" - ).recipe_data, + RecipeLoader("cellpack/tests/recipes/v1/test_compartment.json").recipe_data, { "version": "1.0", "format_version": "2.0", From bcdc065abdeb8f99e169e098dc493a400243ad9e Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:36:34 -0700 Subject: [PATCH 54/59] Update cellpack/autopack/writers/__init__.py Co-authored-by: Saurabh Mogre --- cellpack/autopack/writers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/autopack/writers/__init__.py b/cellpack/autopack/writers/__init__.py index 1484d20f..434fd008 100644 --- a/cellpack/autopack/writers/__init__.py +++ b/cellpack/autopack/writers/__init__.py @@ -196,7 +196,7 @@ def save_as_simularium(self, env, seed_to_results_map): number_of_packings = env.config_data.get("number_of_packings", 1) open_results_in_browser = env.config_data.get("open_results_in_browser", False) upload_results = env.config_data.get("upload_results", False) - dedup_hash = getattr(env, "dedup_hash", None) + dedup_hash = getattr(env, "dedup_hash") if (number_of_packings == 1 or is_aggregate) and upload_results: autopack.helper.post_and_open_file( file_name, open_results_in_browser, dedup_hash From e667517178567a791ffbeea851ba9aa87622fa91 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Mon, 30 Mar 2026 13:01:33 -0700 Subject: [PATCH 55/59] use `isinstance` for AWSHandler, and misc --- cellpack/autopack/DBRecipeHandler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 8e3aec7f..03558df4 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -5,6 +5,7 @@ from pathlib import Path from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS +from cellpack.autopack.AWSHandler import AWSHandler import hashlib @@ -12,6 +13,8 @@ from cellpack.autopack.utils import deep_merge +DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" + class DataDoc(object): def __init__( @@ -511,7 +514,7 @@ def upload_job_status( if self.db: db_handler = self.db # If db is AWSHandler, switch to firebase handler for job status updates - if hasattr(self.db, "s3_client"): + if isinstance(self.db, AWSHandler): handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE) db_handler = handler(default_db="staging") timestamp = db_handler.create_timestamp() @@ -836,6 +839,3 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict): {**v} for v in DBRecipeLoader.remove_dedup_hash(grad_dict).values() ] return recipe_data - - -DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" From f9570ff7a393e2f5493a7cc80a2a074e2b07186e Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Mon, 30 Mar 2026 13:02:21 -0700 Subject: [PATCH 56/59] update aws tests --- cellpack/tests/test_db_uploader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cellpack/tests/test_db_uploader.py b/cellpack/tests/test_db_uploader.py index 414f6b9c..5d3ee4d7 100644 --- a/cellpack/tests/test_db_uploader.py +++ b/cellpack/tests/test_db_uploader.py @@ -1,3 +1,4 @@ +from cellpack.autopack.AWSHandler import AWSHandler from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.tests.mocks.mock_db import MockDB from unittest.mock import MagicMock, patch @@ -180,8 +181,6 @@ def test_upload_recipe(): def test_upload_job_status_with_firebase_handler(): mock_firebase_db = MagicMock() mock_firebase_db.create_timestamp.return_value = "test_timestamp" - # firebaseHandler does not have s3_client attribute - del mock_firebase_db.s3_client uploader = DBUploader(mock_firebase_db) uploader.upload_job_status("test_hash", "RUNNING") @@ -199,8 +198,9 @@ def test_upload_job_status_with_firebase_handler(): def test_upload_job_status_with_aws_handler(): - mock_aws_db = MagicMock() - mock_aws_db.s3_client = MagicMock() # AWSHandler has s3_client + mock_aws_db = MagicMock(spec=AWSHandler, wraps=None) + # Allow create_timestamp to be checked even though it's not on AWSHandler + mock_aws_db.create_timestamp = MagicMock() mock_firebase_handler = MagicMock() mock_firebase_handler.create_timestamp.return_value = "firebase_timestamp" From b2db8ecb574574f9a92bd8b3757205f6b3251481 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Mon, 30 Mar 2026 13:25:21 -0700 Subject: [PATCH 57/59] initialize dedup_hash --- cellpack/autopack/Environment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cellpack/autopack/Environment.py b/cellpack/autopack/Environment.py index 101dce8f..a9805dae 100644 --- a/cellpack/autopack/Environment.py +++ b/cellpack/autopack/Environment.py @@ -124,6 +124,7 @@ def __init__(self, config=None, recipe=None): self.config_data = config self.recipe_data = recipe + self.dedup_hash = None name = recipe["name"] self.log = logging.getLogger("env") self.log.propagate = False From a8ab9ad895b691ff164119af0c5c2296d2fb983e Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Mon, 30 Mar 2026 13:35:56 -0700 Subject: [PATCH 58/59] add in-line comment --- cellpack/bin/pack.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index d20f2fd7..75ee170b 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -82,6 +82,7 @@ def pack( env.buildGrid(rebuild=True) env.pack_grid(verbose=0, usePP=False) + # Upload results to S3 for server-initiated packings (docker and hash are both provided) if docker and hash: handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) # temporarily using demo bucket before permissions are granted From fd126196df5976e5ec45558a719c0c5867657393 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Mon, 30 Mar 2026 13:58:01 -0700 Subject: [PATCH 59/59] temp solution: use requirement.txt --- docker/Dockerfile.ecs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 5ed89b7b..7a303c02 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,7 +4,7 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . --root-user-action=ignore +RUN python -m pip install . -r requirements/linux/requirements.txt --root-user-action=ignore EXPOSE 80