From 35fe80ec878bbaa31fefb3edfca4cce34e7e8620 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 28 Apr 2026 15:57:59 +0200 Subject: [PATCH 01/25] added endpoint for data retrieval --- src/simdb/remote/apis/v1_2/__init__.py | 3 +- src/simdb/remote/apis/v1_2/simulation_data.py | 233 ++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 src/simdb/remote/apis/v1_2/simulation_data.py diff --git a/src/simdb/remote/apis/v1_2/__init__.py b/src/simdb/remote/apis/v1_2/__init__.py index 920f303..6b8bc01 100644 --- a/src/simdb/remote/apis/v1_2/__init__.py +++ b/src/simdb/remote/apis/v1_2/__init__.py @@ -10,6 +10,7 @@ from simdb.remote.core.typing import current_app from simdb.remote.models import StagingDirectoryResponse +from .simulation_data import api as data_ns from .simulations import api as sim_ns api = Api( @@ -31,7 +32,7 @@ ) api.add_namespace(sim_ns) -namespaces = [metadata_ns, watcher_ns, file_ns, sim_ns] +namespaces = [metadata_ns, watcher_ns, file_ns, sim_ns, data_ns] @api.route("/staging_dir", defaults={"sim_hex": None}) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py new file mode 100644 index 0000000..482f17a --- /dev/null +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -0,0 +1,233 @@ +"""Simulation IMAS data endpoint: /data. + +TODO: Temporal solution to retrive data (Use IBEX backend) +""" + +import re +import uuid as _uuid +from typing import Any + +import numpy as np +from flask import request +from flask_restx import Namespace, Resource +from imas.ids_primitive import IDSPrimitive + +from simdb.cli.manifest import DataObject +from simdb.database import DatabaseError +from simdb.imas.utils import FLOAT_MISSING_VALUE, INT_MISSING_VALUE, ImasError, open_imas +from simdb.remote.core.auth import User, requires_auth +from simdb.remote.core.cache import cache +from simdb.remote.core.typing import current_app +from simdb.uri import URI + +api = Namespace("data", path="/") + + +# Helpers + + +def _to_python(value: Any) -> Any: + """Convert a value returned by IDSPrimitive.value to a JSON-serialisable + Python object.""" + if isinstance(value, np.ndarray): + flat = value.tolist() + + def _clean(v): + if isinstance(v, float) and ( + v != v or v == float("inf") or v == float("-inf") or v == FLOAT_MISSING_VALUE + ): + return None + if isinstance(v, list): + return [_clean(x) for x in v] + return v + + return _clean(flat) + if isinstance(value, np.integer): + v = int(value) + return None if v == INT_MISSING_VALUE else v + if isinstance(value, np.floating): + v = float(value) + return None if (np.isnan(v) or np.isinf(v) or v == FLOAT_MISSING_VALUE) else v + if isinstance(value, np.complexfloating): + return {"real": float(value.real), "imag": float(value.imag)} + if isinstance(value, np.bool_): + return bool(value) + return value + +# TODO Replace this logic with slicing when supported by imas-python. +# TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. +def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): + """Walk *field_segments* inside *ids_name* and return (value, shape, coordinate_path). + + Each segment is either: + - a non-negative integer string → array-of-structures index + - a plain name → attribute access (IDSStructure child node) + """ + ids_obj = entry.get( + ids_name, occurrence, + lazy=True, autoconvert=False, ignore_unknown_dd_version=True, + ) + node = ids_obj + for segment in field_segments: + if segment.isdigit(): + node = node[int(segment)] + else: + try: + node = getattr(node, segment) + except AttributeError: + raise ValueError(f"segment '{segment}' not found in IDS path") + if not isinstance(node, IDSPrimitive): + raise ValueError( + f"path does not point to a scalar/array leaf " + f"(reached {type(node).__name__}); add more path segments" + ) + if not node.has_value: + raise ValueError(f"field is not populated (no data written)") + + node_shape = list(node.shape) if node.metadata.ndim > 0 else None + + coordinate_path = None + try: + def _replace_placeholder(m, _segs=field_segments): + idx = next((s for s in _segs if s.isdigit()), "0") + return "/" + idx + "/" + + for coord in node.metadata.coordinates: + clean = re.sub(r"\([^)]+\)/", _replace_placeholder, str(coord)) + coordinate_path = ids_name + "/" + clean + break + except Exception: + pass + + return _to_python(node.value), node_shape, coordinate_path + + +def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: int) -> tuple: + """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). + + Scalar results (``shape is None``) are written into the response cache so + that repeated requests skip the IMAS open. Array values are intentionally + *not* cached: caching large numpy-derived lists would create persistent + memory pressure and could fill the cache backend with multi-MB payloads. + """ + if ids_name and not field_segments: # bare IDS name only – no leaf, skip cache probe + pass + else: + cache_key = ( + f"simdb:field:{uri_str}:{ids_name}:" + f"{'/' .join(field_segments)}:{occurrence}" + ) + cached = cache.get(cache_key) + if cached is not None: + return cached + + entry = open_imas(URI(uri_str)) + with entry: + result = _traverse_path(entry, ids_name, list(field_segments), occurrence) + + _value, shape, _coord = result + if shape is None: # scalar leaf – safe to persist in cache + cache.set(cache_key, result) # type: ignore[possibly-undefined] + return result + + +def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): + try: + simulation = current_app.db.get_simulation(sim_id) + except DatabaseError as exc: + return None, None, ({"error": str(exc)}, 404) + + imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] + if not imas_outputs: + return None, None, ( + {"error": f"Simulation {sim_id} has no IMAS output files"}, 404 + ) + + if not file_uuid_str: + return simulation, imas_outputs[0], None + + try: + target_uuid = _uuid.UUID(file_uuid_str) + except ValueError: + return None, None, ({"error": f"Invalid file_uuid: {file_uuid_str!r}"}, 400) + + imas_file = next((f for f in imas_outputs if f.uuid == target_uuid), None) + if imas_file is None: + return None, None, ({"error": f"File {file_uuid_str} not found"}, 404) + + return simulation, imas_file, None + + +# Endpoints + +@api.route("/simulation//data") +class SimulationImasData(Resource): + @requires_auth() + def get(self, sim_id: str, user: User): + """Return the value at a given IDS path for a simulation's IMAS output. + + Query parameters + ---------------- + path (required) IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density`` + file_uuid (optional) UUID of an IMAS output file + occurrence (optional) IDS occurrence index (default 0) + """ + path = request.args.get("path", "").strip() + if not path: + return {"error": "Query parameter 'path' is required"}, 400 + + file_uuid_str = request.args.get("file_uuid", "").strip() or None + + try: + occurrence = int(request.args.get("occurrence", "0")) + except ValueError: + return {"error": "'occurrence' must be a non-negative integer"}, 400 + if occurrence < 0: + return {"error": "'occurrence' must be a non-negative integer"}, 400 + + simulation, imas_file, error = _get_simulation_and_imas_file( + sim_id, file_uuid_str + ) + if error: + payload, status = error + if file_uuid_str and status == 404 and "File " in payload["error"]: + return ( + { + "error": ( + f"File {file_uuid_str} not found or is not an IMAS " + "output for this simulation" + ) + }, + 404, + ) + return payload, status + + segments = [s for s in path.split("/") if s] + if not segments: + return {"error": "'path' must not be empty"}, 400 + + ids_name = segments[0] + field_segments = segments[1:] + + try: + value, shape, coordinate_path = _fetch_field( + str(imas_file.uri), ids_name, tuple(field_segments), occurrence + ) + except (ValueError, AttributeError, IndexError, KeyError) as exc: + return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 + except (ImasError,) as exc: + return {"error": f"Failed to open IMAS data: {exc}"}, 500 + except Exception as exc: + msg = str(exc) + status = 404 if "is empty" in msg or "not found" in msg.lower() else 500 + return {"error": msg}, status + + return { + "simulation": str(simulation.uuid), + "file_uuid": str(imas_file.uuid), + "path": path, + "occurrence": occurrence, + "value": value, + "shape": shape, + "coordinate": coordinate_path, + } From 05b8818a65c67374363c2ed67c6dde3b6bd3227c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 28 Apr 2026 16:20:49 +0200 Subject: [PATCH 02/25] fixed formatting --- src/simdb/remote/apis/v1_2/simulation_data.py | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 482f17a..41fe5ce 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -14,7 +14,12 @@ from simdb.cli.manifest import DataObject from simdb.database import DatabaseError -from simdb.imas.utils import FLOAT_MISSING_VALUE, INT_MISSING_VALUE, ImasError, open_imas +from simdb.imas.utils import ( + FLOAT_MISSING_VALUE, + INT_MISSING_VALUE, + ImasError, + open_imas, +) from simdb.remote.core.auth import User, requires_auth from simdb.remote.core.cache import cache from simdb.remote.core.typing import current_app @@ -34,7 +39,10 @@ def _to_python(value: Any) -> Any: def _clean(v): if isinstance(v, float) and ( - v != v or v == float("inf") or v == float("-inf") or v == FLOAT_MISSING_VALUE + v != v + or v == float("inf") + or v == float("-inf") + or v == FLOAT_MISSING_VALUE ): return None if isinstance(v, list): @@ -54,6 +62,7 @@ def _clean(v): return bool(value) return value + # TODO Replace this logic with slicing when supported by imas-python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): @@ -64,8 +73,11 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - a plain name → attribute access (IDSStructure child node) """ ids_obj = entry.get( - ids_name, occurrence, - lazy=True, autoconvert=False, ignore_unknown_dd_version=True, + ids_name, + occurrence, + lazy=True, + autoconvert=False, + ignore_unknown_dd_version=True, ) node = ids_obj for segment in field_segments: @@ -88,6 +100,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): coordinate_path = None try: + def _replace_placeholder(m, _segs=field_segments): idx = next((s for s in _segs if s.isdigit()), "0") return "/" + idx + "/" @@ -102,7 +115,9 @@ def _replace_placeholder(m, _segs=field_segments): return _to_python(node.value), node_shape, coordinate_path -def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: int) -> tuple: +def _fetch_field( + uri_str: str, ids_name: str, field_segments: tuple, occurrence: int +) -> tuple: """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). Scalar results (``shape is None``) are written into the response cache so @@ -110,12 +125,13 @@ def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: *not* cached: caching large numpy-derived lists would create persistent memory pressure and could fill the cache backend with multi-MB payloads. """ - if ids_name and not field_segments: # bare IDS name only – no leaf, skip cache probe + if ( + ids_name and not field_segments + ): # bare IDS name only – no leaf, skip cache probe pass else: cache_key = ( - f"simdb:field:{uri_str}:{ids_name}:" - f"{'/' .join(field_segments)}:{occurrence}" + f"simdb:field:{uri_str}:{ids_name}:{'/'.join(field_segments)}:{occurrence}" ) cached = cache.get(cache_key) if cached is not None: @@ -139,8 +155,10 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - return None, None, ( - {"error": f"Simulation {sim_id} has no IMAS output files"}, 404 + return ( + None, + None, + ({"error": f"Simulation {sim_id} has no IMAS output files"}, 404), ) if not file_uuid_str: @@ -160,6 +178,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): # Endpoints + @api.route("/simulation//data") class SimulationImasData(Resource): @requires_auth() From 66a6ba7d1d1b22a12ec00ba572fdb2aae77a3648 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Wed, 29 Apr 2026 09:20:56 +0200 Subject: [PATCH 03/25] fixed linting and typing issues --- src/simdb/remote/apis/v1_2/simulation_data.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 41fe5ce..1b67fdd 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -5,7 +5,7 @@ import re import uuid as _uuid -from typing import Any +from typing import Any, Optional import numpy as np from flask import request @@ -66,11 +66,11 @@ def _clean(v): # TODO Replace this logic with slicing when supported by imas-python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - """Walk *field_segments* inside *ids_name* and return (value, shape, coordinate_path). + """Walk inside *ids_name* and return (value, shape, coordinate_path). Each segment is either: - - a non-negative integer string → array-of-structures index - - a plain name → attribute access (IDSStructure child node) + - a non-negative integer string: array-of-structures index + - a plain name: attribute access (IDSStructure child node) """ ids_obj = entry.get( ids_name, @@ -86,15 +86,15 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): else: try: node = getattr(node, segment) - except AttributeError: - raise ValueError(f"segment '{segment}' not found in IDS path") + except AttributeError as err: + raise ValueError(f"segment '{segment}' not found in IDS path") from err if not isinstance(node, IDSPrimitive): raise ValueError( f"path does not point to a scalar/array leaf " f"(reached {type(node).__name__}); add more path segments" ) if not node.has_value: - raise ValueError(f"field is not populated (no data written)") + raise ValueError("field is not populated (no data written)") node_shape = list(node.shape) if node.metadata.ndim > 0 else None @@ -118,7 +118,7 @@ def _replace_placeholder(m, _segs=field_segments): def _fetch_field( uri_str: str, ids_name: str, field_segments: tuple, occurrence: int ) -> tuple: - """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). + """Open the IMAS entry and return (value, shape, coordinate_path). Scalar results (``shape is None``) are written into the response cache so that repeated requests skip the IMAS open. Array values are intentionally @@ -127,7 +127,7 @@ def _fetch_field( """ if ( ids_name and not field_segments - ): # bare IDS name only – no leaf, skip cache probe + ): # bare IDS name only - no leaf, skip cache probe pass else: cache_key = ( @@ -142,12 +142,12 @@ def _fetch_field( result = _traverse_path(entry, ids_name, list(field_segments), occurrence) _value, shape, _coord = result - if shape is None: # scalar leaf – safe to persist in cache + if shape is None: # scalar leaf - safe to persist in cache cache.set(cache_key, result) # type: ignore[possibly-undefined] return result -def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): +def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: @@ -187,7 +187,8 @@ def get(self, sim_id: str, user: User): Query parameters ---------------- - path (required) IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density`` + path (required) IDS path, e.g. + ``core_profiles/profiles_1d/0/electrons/density`` file_uuid (optional) UUID of an IMAS output file occurrence (optional) IDS occurrence index (default 0) """ @@ -234,7 +235,7 @@ def get(self, sim_id: str, user: User): ) except (ValueError, AttributeError, IndexError, KeyError) as exc: return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 - except (ImasError,) as exc: + except ImasError as exc: return {"error": f"Failed to open IMAS data: {exc}"}, 500 except Exception as exc: msg = str(exc) From 0aec0153771dea0c4f963c9a8e493a93b669ff44 Mon Sep 17 00:00:00 2001 From: Prasad Date: Mon, 4 May 2026 10:27:04 +0200 Subject: [PATCH 04/25] Apply suggestions from code review Co-authored-by: Simon Pinches --- src/simdb/remote/apis/v1_2/simulation_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 1b67fdd..afb1a33 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -1,6 +1,6 @@ -"""Simulation IMAS data endpoint: /data. +"""IMAS simulation data endpoint: /data. -TODO: Temporal solution to retrive data (Use IBEX backend) +TODO: Temporary solution to retrieve data (for IBEX backend) """ import re @@ -63,7 +63,7 @@ def _clean(v): return value -# TODO Replace this logic with slicing when supported by imas-python. +# TODO Replace this logic with slicing when supported by IMAS-Python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): """Walk inside *ids_name* and return (value, shape, coordinate_path). From 8a9ee4af63a067d162c5a6af8da36743f580cd72 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 4 May 2026 13:45:44 +0200 Subject: [PATCH 05/25] use pydantic models for input and output --- src/simdb/remote/apis/v1_2/simulation_data.py | 124 +++++++----------- src/simdb/remote/models.py | 47 +++++++ 2 files changed, 95 insertions(+), 76 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index afb1a33..13334b2 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -4,11 +4,10 @@ """ import re -import uuid as _uuid -from typing import Any, Optional +from typing import Annotated, Any, Optional +from uuid import UUID import numpy as np -from flask import request from flask_restx import Namespace, Resource from imas.ids_primitive import IDSPrimitive @@ -22,7 +21,14 @@ ) from simdb.remote.core.auth import User, requires_auth from simdb.remote.core.cache import cache +from simdb.remote.core.pydantic_utils import ( + Query, + ResponseException, + ServerException, + pydantic_validate, +) from simdb.remote.core.typing import current_app +from simdb.remote.models import ImasDataQueryParams, ImasDataResponse from simdb.uri import URI api = Namespace("data", path="/") @@ -147,33 +153,29 @@ def _fetch_field( return result -def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): +def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: - return None, None, ({"error": str(exc)}, 404) + raise ResponseException(str(exc), 404) from exc imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - return ( - None, - None, - ({"error": f"Simulation {sim_id} has no IMAS output files"}, 404), + raise ResponseException( + f"Simulation {sim_id} has no IMAS output files", 404 ) - if not file_uuid_str: - return simulation, imas_outputs[0], None - - try: - target_uuid = _uuid.UUID(file_uuid_str) - except ValueError: - return None, None, ({"error": f"Invalid file_uuid: {file_uuid_str!r}"}, 400) + if file_uuid is None: + return simulation, imas_outputs[0] - imas_file = next((f for f in imas_outputs if f.uuid == target_uuid), None) + imas_file = next((f for f in imas_outputs if f.uuid == file_uuid), None) if imas_file is None: - return None, None, ({"error": f"File {file_uuid_str} not found"}, 404) + raise ResponseException( + f"File {file_uuid} not found or is not an IMAS output for this simulation", + 404, + ) - return simulation, imas_file, None + return simulation, imas_file # Endpoints @@ -182,72 +184,42 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): @api.route("/simulation//data") class SimulationImasData(Resource): @requires_auth() - def get(self, sim_id: str, user: User): - """Return the value at a given IDS path for a simulation's IMAS output. - - Query parameters - ---------------- - path (required) IDS path, e.g. - ``core_profiles/profiles_1d/0/electrons/density`` - file_uuid (optional) UUID of an IMAS output file - occurrence (optional) IDS occurrence index (default 0) - """ - path = request.args.get("path", "").strip() - if not path: - return {"error": "Query parameter 'path' is required"}, 400 - - file_uuid_str = request.args.get("file_uuid", "").strip() or None - - try: - occurrence = int(request.args.get("occurrence", "0")) - except ValueError: - return {"error": "'occurrence' must be a non-negative integer"}, 400 - if occurrence < 0: - return {"error": "'occurrence' must be a non-negative integer"}, 400 - - simulation, imas_file, error = _get_simulation_and_imas_file( - sim_id, file_uuid_str + @pydantic_validate(api) + def get( + self, + sim_id: str, + user: User, + params: Annotated[ImasDataQueryParams, Query()], + ) -> ImasDataResponse: + """Return the value at a given IDS path for a simulation's IMAS output.""" + simulation, imas_file = _get_simulation_and_imas_file( + sim_id, params.file_uuid ) - if error: - payload, status = error - if file_uuid_str and status == 404 and "File " in payload["error"]: - return ( - { - "error": ( - f"File {file_uuid_str} not found or is not an IMAS " - "output for this simulation" - ) - }, - 404, - ) - return payload, status - - segments = [s for s in path.split("/") if s] - if not segments: - return {"error": "'path' must not be empty"}, 400 + segments = [s for s in params.path.split("/") if s] ids_name = segments[0] field_segments = segments[1:] try: value, shape, coordinate_path = _fetch_field( - str(imas_file.uri), ids_name, tuple(field_segments), occurrence + str(imas_file.uri), ids_name, tuple(field_segments), params.occurrence ) except (ValueError, AttributeError, IndexError, KeyError) as exc: - return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 + raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") except ImasError as exc: - return {"error": f"Failed to open IMAS data: {exc}"}, 500 + raise ServerException(f"Failed to open IMAS data: {exc}") except Exception as exc: msg = str(exc) - status = 404 if "is empty" in msg or "not found" in msg.lower() else 500 - return {"error": msg}, status - - return { - "simulation": str(simulation.uuid), - "file_uuid": str(imas_file.uuid), - "path": path, - "occurrence": occurrence, - "value": value, - "shape": shape, - "coordinate": coordinate_path, - } + if "is empty" in msg or "not found" in msg.lower(): + raise ResponseException(msg, 404) + raise ServerException(msg) + + return ImasDataResponse( + simulation=str(simulation.uuid), + file_uuid=str(imas_file.uuid), + path=params.path, + occurrence=params.occurrence, + value=value, + shape=shape, + coordinate=coordinate_path, + ) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 62cee41..2088b70 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -28,6 +28,7 @@ Field, InstanceOf, PlainSerializer, + field_validator, model_validator, ) from pydantic import ( @@ -553,6 +554,52 @@ class StagingDirectoryResponse(BaseModel): """Path to the staging dir.""" +class ImasDataQueryParams(BaseModel): + """Query parameters for the IMAS field-data endpoint.""" + + path: str + """IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density``.""" + file_uuid: Optional[UUID] = None + """UUID of a specific IMAS output file (optional).""" + occurrence: int = Field(0, ge=0) + """IDS occurrence index (default 0).""" + + @field_validator("path", mode="before") + @classmethod + def _strip_path(cls, v: Any) -> str: + v = str(v).strip() + if not v: + raise ValueError("must not be empty") + return v + + @field_validator("file_uuid", mode="before") + @classmethod + def _strip_file_uuid(cls, v: Any) -> Any: + if v is None: + return None + stripped = str(v).strip() + return stripped if stripped else None + + +class ImasDataResponse(BaseModel): + """Response from the IMAS field-data endpoint.""" + + simulation: str + """UUID of the simulation.""" + file_uuid: str + """UUID of the IMAS output file.""" + path: str + """Requested IDS path.""" + occurrence: int + """IDS occurrence index.""" + value: Any + """Value at the requested IDS path.""" + shape: Optional[List[int]] = None + """Shape of the returned array, or ``None`` for scalars.""" + coordinate: Optional[str] = None + """Coordinate path for the first dimension, if available.""" + + class ErrorResponse(BaseModel): """Response model for server errors.""" From 2c01fb1fe0bd92c00b057942984a05e7078625df Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:15:31 +0200 Subject: [PATCH 06/25] resolved pull request comments from Maarten --- src/simdb/remote/apis/v1_2/simulation_data.py | 179 ++++++++---------- src/simdb/remote/models.py | 28 ++- 2 files changed, 98 insertions(+), 109 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 13334b2..30bcb9f 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -3,24 +3,21 @@ TODO: Temporary solution to retrieve data (for IBEX backend) """ -import re from typing import Annotated, Any, Optional from uuid import UUID import numpy as np from flask_restx import Namespace, Resource +from imas.ids_defs import EMPTY_COMPLEX, EMPTY_FLOAT, EMPTY_INT from imas.ids_primitive import IDSPrimitive from simdb.cli.manifest import DataObject from simdb.database import DatabaseError from simdb.imas.utils import ( - FLOAT_MISSING_VALUE, - INT_MISSING_VALUE, ImasError, open_imas, ) from simdb.remote.core.auth import User, requires_auth -from simdb.remote.core.cache import cache from simdb.remote.core.pydantic_utils import ( Query, ResponseException, @@ -28,7 +25,7 @@ pydantic_validate, ) from simdb.remote.core.typing import current_app -from simdb.remote.models import ImasDataQueryParams, ImasDataResponse +from simdb.remote.models import ImasDataQueryParams, ImasDataResponse, QuantityData from simdb.uri import URI api = Namespace("data", path="/") @@ -45,10 +42,7 @@ def _to_python(value: Any) -> Any: def _clean(v): if isinstance(v, float) and ( - v != v - or v == float("inf") - or v == float("-inf") - or v == FLOAT_MISSING_VALUE + v != v or v == float("inf") or v == float("-inf") or v == EMPTY_FLOAT ): return None if isinstance(v, list): @@ -58,26 +52,68 @@ def _clean(v): return _clean(flat) if isinstance(value, np.integer): v = int(value) - return None if v == INT_MISSING_VALUE else v + return None if v == EMPTY_INT else v if isinstance(value, np.floating): v = float(value) - return None if (np.isnan(v) or np.isinf(v) or v == FLOAT_MISSING_VALUE) else v + return None if (np.isnan(v) or np.isinf(v) or v == EMPTY_FLOAT) else v if isinstance(value, np.complexfloating): - return {"real": float(value.real), "imag": float(value.imag)} + r, i = float(value.real), float(value.imag) + if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: + return None + return {"real": r, "imag": i} if isinstance(value, np.bool_): return bool(value) return value -# TODO Replace this logic with slicing when supported by IMAS-Python. -# TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. -def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - """Walk inside *ids_name* and return (value, shape, coordinate_path). +def _parse_ids_path(path: str) -> tuple: + """Parse ``ids_name[:occurrence][/ids_path]`` into a 3-tuple""" + head, _, ids_path = path.partition("/") + if ":" in head: + ids_name, occ_str = head.split(":", 1) + try: + occurrence = int(occ_str) + except ValueError as exc: + raise ValueError( + f"Invalid occurrence in path '{path}': '{occ_str}'" + ) from exc + else: + ids_name, occurrence = head, 0 + return ids_name, occurrence, ids_path + + +def _get_coordinates(node: IDSPrimitive, ids_name: str) -> list: + """Return a :class:`QuantityData` for each coordinate dimension of *node*.""" + coords = [] + for i in range(node.metadata.ndim): + coord = node.coordinates[i] + if isinstance(coord, IDSPrimitive): + data = ( + _to_python(coord.value) + if coord.has_value + else list(range(node.shape[i])) + ) + coords.append( + QuantityData( + name=f"{ids_name}/{coord._path}", + units=coord.metadata.units or "", + data=data, + ) + ) + else: + # Index-based coordinate: coord is already a numpy arange + coords.append( + QuantityData( + name=f"dim_{i + 1}", + units="", + data=coord.tolist(), + ) + ) + return coords - Each segment is either: - - a non-negative integer string: array-of-structures index - - a plain name: attribute access (IDSStructure child node) - """ + +def _get_ids_node(entry, ids_name: str, occurrence: int, ids_path: str) -> IDSPrimitive: + """Return the :class:`IDSPrimitive` leaf node at *ids_path* inside *ids_name*.""" ids_obj = entry.get( ids_name, occurrence, @@ -85,15 +121,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): autoconvert=False, ignore_unknown_dd_version=True, ) - node = ids_obj - for segment in field_segments: - if segment.isdigit(): - node = node[int(segment)] - else: - try: - node = getattr(node, segment) - except AttributeError as err: - raise ValueError(f"segment '{segment}' not found in IDS path") from err + node = ids_obj[ids_path] if ids_path else ids_obj if not isinstance(node, IDSPrimitive): raise ValueError( f"path does not point to a scalar/array leaf " @@ -101,56 +129,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): ) if not node.has_value: raise ValueError("field is not populated (no data written)") - - node_shape = list(node.shape) if node.metadata.ndim > 0 else None - - coordinate_path = None - try: - - def _replace_placeholder(m, _segs=field_segments): - idx = next((s for s in _segs if s.isdigit()), "0") - return "/" + idx + "/" - - for coord in node.metadata.coordinates: - clean = re.sub(r"\([^)]+\)/", _replace_placeholder, str(coord)) - coordinate_path = ids_name + "/" + clean - break - except Exception: - pass - - return _to_python(node.value), node_shape, coordinate_path - - -def _fetch_field( - uri_str: str, ids_name: str, field_segments: tuple, occurrence: int -) -> tuple: - """Open the IMAS entry and return (value, shape, coordinate_path). - - Scalar results (``shape is None``) are written into the response cache so - that repeated requests skip the IMAS open. Array values are intentionally - *not* cached: caching large numpy-derived lists would create persistent - memory pressure and could fill the cache backend with multi-MB payloads. - """ - if ( - ids_name and not field_segments - ): # bare IDS name only - no leaf, skip cache probe - pass - else: - cache_key = ( - f"simdb:field:{uri_str}:{ids_name}:{'/'.join(field_segments)}:{occurrence}" - ) - cached = cache.get(cache_key) - if cached is not None: - return cached - - entry = open_imas(URI(uri_str)) - with entry: - result = _traverse_path(entry, ids_name, list(field_segments), occurrence) - - _value, shape, _coord = result - if shape is None: # scalar leaf - safe to persist in cache - cache.set(cache_key, result) # type: ignore[possibly-undefined] - return result + return node def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): @@ -161,9 +140,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - raise ResponseException( - f"Simulation {sim_id} has no IMAS output files", 404 - ) + raise ResponseException(f"Simulation {sim_id} has no IMAS output files", 404) if file_uuid is None: return simulation, imas_outputs[0] @@ -192,34 +169,38 @@ def get( params: Annotated[ImasDataQueryParams, Query()], ) -> ImasDataResponse: """Return the value at a given IDS path for a simulation's IMAS output.""" - simulation, imas_file = _get_simulation_and_imas_file( - sim_id, params.file_uuid - ) + simulation, imas_file = _get_simulation_and_imas_file(sim_id, params.file_uuid) - segments = [s for s in params.path.split("/") if s] - ids_name = segments[0] - field_segments = segments[1:] + try: + ids_name, occurrence, ids_path = _parse_ids_path(params.path) + except ValueError as exc: + raise ResponseException(str(exc)) from exc try: - value, shape, coordinate_path = _fetch_field( - str(imas_file.uri), ids_name, tuple(field_segments), params.occurrence - ) + entry = open_imas(URI(str(imas_file.uri))) + with entry: + node = _get_ids_node(entry, ids_name, occurrence, ids_path) + coordinates = _get_coordinates(node, ids_name) + field = QuantityData( + name=f"{ids_name}/{node._path}", + units=node.metadata.units or "", + data=_to_python(node.value), + ) except (ValueError, AttributeError, IndexError, KeyError) as exc: - raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") + raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") from exc except ImasError as exc: - raise ServerException(f"Failed to open IMAS data: {exc}") + raise ServerException(f"Failed to open IMAS data: {exc}") from exc except Exception as exc: msg = str(exc) if "is empty" in msg or "not found" in msg.lower(): - raise ResponseException(msg, 404) - raise ServerException(msg) + raise ResponseException(msg, 404) from exc + raise ServerException(msg) from exc return ImasDataResponse( simulation=str(simulation.uuid), file_uuid=str(imas_file.uuid), path=params.path, - occurrence=params.occurrence, - value=value, - shape=shape, - coordinate=coordinate_path, + occurrence=occurrence, + field=field, + coordinates=coordinates, ) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 2088b70..b8936cd 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -558,11 +558,9 @@ class ImasDataQueryParams(BaseModel): """Query parameters for the IMAS field-data endpoint.""" path: str - """IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density``.""" + """Full IDS path including IDS name and optional occurrence.""" file_uuid: Optional[UUID] = None """UUID of a specific IMAS output file (optional).""" - occurrence: int = Field(0, ge=0) - """IDS occurrence index (default 0).""" @field_validator("path", mode="before") @classmethod @@ -578,7 +576,19 @@ def _strip_file_uuid(cls, v: Any) -> Any: if v is None: return None stripped = str(v).strip() - return stripped if stripped else None + return stripped or None + + +class QuantityData(BaseModel): + """A named, unit-bearing data quantity (field value or coordinate).""" + + name: str + """IDS path of this quantity relative to the IDS root""" + units: str + """Physical units of the quantity""" + data: Any + """Data value: a Python scalar for 0-D quantities, or a nested list for + arrays. """ class ImasDataResponse(BaseModel): @@ -592,12 +602,10 @@ class ImasDataResponse(BaseModel): """Requested IDS path.""" occurrence: int """IDS occurrence index.""" - value: Any - """Value at the requested IDS path.""" - shape: Optional[List[int]] = None - """Shape of the returned array, or ``None`` for scalars.""" - coordinate: Optional[str] = None - """Coordinate path for the first dimension, if available.""" + field: QuantityData + """The requested quantity""" + coordinates: List[QuantityData] + """Coordinates for each dimension of *field*, in dimension order.""" class ErrorResponse(BaseModel): From 4de51d93d33f45011c5251100281fc4910a8dbf1 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:29:41 +0200 Subject: [PATCH 07/25] removed _bool check --- src/simdb/remote/apis/v1_2/simulation_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 30bcb9f..76faf86 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -61,8 +61,6 @@ def _clean(v): if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: return None return {"real": r, "imag": i} - if isinstance(value, np.bool_): - return bool(value) return value From 678367628451cf5fa2583080827cd118db436ab6 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:39:52 +0200 Subject: [PATCH 08/25] use namedtuple when returning function values --- src/simdb/remote/apis/v1_2/simulation_data.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 76faf86..b3b141a 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -3,7 +3,7 @@ TODO: Temporary solution to retrieve data (for IBEX backend) """ -from typing import Annotated, Any, Optional +from typing import Annotated, Any, NamedTuple, Optional from uuid import UUID import numpy as np @@ -130,7 +130,14 @@ def _get_ids_node(entry, ids_name: str, occurrence: int, ids_path: str) -> IDSPr return node -def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): +class _SimulationImasFile(NamedTuple): + simulation: Any + imas_file: Any + + +def _get_simulation_and_imas_file( + sim_id: str, file_uuid: Optional[UUID] +) -> _SimulationImasFile: try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: @@ -141,7 +148,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): raise ResponseException(f"Simulation {sim_id} has no IMAS output files", 404) if file_uuid is None: - return simulation, imas_outputs[0] + return _SimulationImasFile(simulation, imas_outputs[0]) imas_file = next((f for f in imas_outputs if f.uuid == file_uuid), None) if imas_file is None: @@ -150,7 +157,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): 404, ) - return simulation, imas_file + return _SimulationImasFile(simulation, imas_file) # Endpoints @@ -167,7 +174,7 @@ def get( params: Annotated[ImasDataQueryParams, Query()], ) -> ImasDataResponse: """Return the value at a given IDS path for a simulation's IMAS output.""" - simulation, imas_file = _get_simulation_and_imas_file(sim_id, params.file_uuid) + result = _get_simulation_and_imas_file(sim_id, params.file_uuid) try: ids_name, occurrence, ids_path = _parse_ids_path(params.path) @@ -175,7 +182,7 @@ def get( raise ResponseException(str(exc)) from exc try: - entry = open_imas(URI(str(imas_file.uri))) + entry = open_imas(URI(str(result.imas_file.uri))) with entry: node = _get_ids_node(entry, ids_name, occurrence, ids_path) coordinates = _get_coordinates(node, ids_name) @@ -195,8 +202,8 @@ def get( raise ServerException(msg) from exc return ImasDataResponse( - simulation=str(simulation.uuid), - file_uuid=str(imas_file.uuid), + simulation=str(result.simulation.uuid), + file_uuid=str(result.imas_file.uuid), path=params.path, occurrence=occurrence, field=field, From ca0b9b4efa7117f83d2a1ec3e62481176ce83c9f Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:55:31 +0200 Subject: [PATCH 09/25] used node.has_value instead of manual checking scalar types --- src/simdb/remote/apis/v1_2/simulation_data.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index b3b141a..aad05bb 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -8,7 +8,7 @@ import numpy as np from flask_restx import Namespace, Resource -from imas.ids_defs import EMPTY_COMPLEX, EMPTY_FLOAT, EMPTY_INT +from imas.ids_defs import EMPTY_FLOAT from imas.ids_primitive import IDSPrimitive from simdb.cli.manifest import DataObject @@ -37,6 +37,7 @@ def _to_python(value: Any) -> Any: """Convert a value returned by IDSPrimitive.value to a JSON-serialisable Python object.""" + print(type(value)) if isinstance(value, np.ndarray): flat = value.tolist() @@ -50,17 +51,6 @@ def _clean(v): return v return _clean(flat) - if isinstance(value, np.integer): - v = int(value) - return None if v == EMPTY_INT else v - if isinstance(value, np.floating): - v = float(value) - return None if (np.isnan(v) or np.isinf(v) or v == EMPTY_FLOAT) else v - if isinstance(value, np.complexfloating): - r, i = float(value.real), float(value.imag) - if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: - return None - return {"real": r, "imag": i} return value From 9f2a7b00593f4c5b4110648f04ec70da3943bd46 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 16:19:32 +0200 Subject: [PATCH 10/25] remove leftover print statement --- src/simdb/remote/apis/v1_2/simulation_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index aad05bb..b64bacf 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -37,7 +37,6 @@ def _to_python(value: Any) -> Any: """Convert a value returned by IDSPrimitive.value to a JSON-serialisable Python object.""" - print(type(value)) if isinstance(value, np.ndarray): flat = value.tolist() From 3c5f45c23d0c60b6408049360b26fca31acbf02b Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 21 May 2026 20:40:55 +0200 Subject: [PATCH 11/25] removed file_uuid parameter as we will always use available imas uri --- src/simdb/remote/apis/v1_2/simulation_data.py | 22 ++++--------------- src/simdb/remote/models.py | 12 ---------- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index b64bacf..a35c87a 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -3,8 +3,7 @@ TODO: Temporary solution to retrieve data (for IBEX backend) """ -from typing import Annotated, Any, NamedTuple, Optional -from uuid import UUID +from typing import Annotated, Any, NamedTuple import numpy as np from flask_restx import Namespace, Resource @@ -124,9 +123,7 @@ class _SimulationImasFile(NamedTuple): imas_file: Any -def _get_simulation_and_imas_file( - sim_id: str, file_uuid: Optional[UUID] -) -> _SimulationImasFile: +def _get_simulation_and_imas_file(sim_id: str) -> _SimulationImasFile: try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: @@ -136,17 +133,7 @@ def _get_simulation_and_imas_file( if not imas_outputs: raise ResponseException(f"Simulation {sim_id} has no IMAS output files", 404) - if file_uuid is None: - return _SimulationImasFile(simulation, imas_outputs[0]) - - imas_file = next((f for f in imas_outputs if f.uuid == file_uuid), None) - if imas_file is None: - raise ResponseException( - f"File {file_uuid} not found or is not an IMAS output for this simulation", - 404, - ) - - return _SimulationImasFile(simulation, imas_file) + return _SimulationImasFile(simulation, imas_outputs[0]) # Endpoints @@ -163,7 +150,7 @@ def get( params: Annotated[ImasDataQueryParams, Query()], ) -> ImasDataResponse: """Return the value at a given IDS path for a simulation's IMAS output.""" - result = _get_simulation_and_imas_file(sim_id, params.file_uuid) + result = _get_simulation_and_imas_file(sim_id) try: ids_name, occurrence, ids_path = _parse_ids_path(params.path) @@ -192,7 +179,6 @@ def get( return ImasDataResponse( simulation=str(result.simulation.uuid), - file_uuid=str(result.imas_file.uuid), path=params.path, occurrence=occurrence, field=field, diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index b8936cd..229edff 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -559,8 +559,6 @@ class ImasDataQueryParams(BaseModel): path: str """Full IDS path including IDS name and optional occurrence.""" - file_uuid: Optional[UUID] = None - """UUID of a specific IMAS output file (optional).""" @field_validator("path", mode="before") @classmethod @@ -570,14 +568,6 @@ def _strip_path(cls, v: Any) -> str: raise ValueError("must not be empty") return v - @field_validator("file_uuid", mode="before") - @classmethod - def _strip_file_uuid(cls, v: Any) -> Any: - if v is None: - return None - stripped = str(v).strip() - return stripped or None - class QuantityData(BaseModel): """A named, unit-bearing data quantity (field value or coordinate).""" @@ -596,8 +586,6 @@ class ImasDataResponse(BaseModel): simulation: str """UUID of the simulation.""" - file_uuid: str - """UUID of the IMAS output file.""" path: str """Requested IDS path.""" occurrence: int From 00c21e275c1498eece0d61f80907a4dc18087172 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 22 May 2026 18:34:14 +0200 Subject: [PATCH 12/25] fix shape issue and cache_mode=none --- src/simdb/remote/apis/v1_2/simulation_data.py | 5 ++++- src/simdb/remote/models.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index a35c87a..ff1c8b2 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -158,7 +158,10 @@ def get( raise ResponseException(str(exc)) from exc try: - entry = open_imas(URI(str(result.imas_file.uri))) + imas_uri = URI(str(result.imas_file.uri)) + if imas_uri.authority.host and "cache_mode" not in imas_uri.query: + imas_uri.query.set("cache_mode", "none") + entry = open_imas(imas_uri) with entry: node = _get_ids_node(entry, ids_name, occurrence, ids_path) coordinates = _get_coordinates(node, ids_name) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index f1b3535..041648b 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -134,7 +134,10 @@ def _deserialize_numpy(v: Any) -> Any: return v if isinstance(v, dict) and v.get("_type") == "numpy.ndarray": np_bytes = base64.b64decode(v["bytes"].encode()) - return np.frombuffer(np_bytes, dtype=v["dtype"]).reshape(v["shape"]) + arr = np.frombuffer(np_bytes, dtype=v["dtype"]) + if "shape" in v: + arr = arr.reshape(v["shape"]) + return arr raise ValueError(f"Cannot deserialize {v} to np.ndarray") From 8609fdbc481b1f083c9e8dcd0015e5b6d46430ce Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 08:30:17 +0200 Subject: [PATCH 13/25] fix import and added TypeAlias --- src/simdb/remote/models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 939a820..d6809ec 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -12,6 +12,7 @@ List, Literal, Optional, + TypeAlias, TypeVar, Union, ) @@ -26,6 +27,7 @@ BeforeValidator, ConfigDict, Field, + InstanceOf, PlainSerializer, field_validator, model_validator, @@ -109,7 +111,7 @@ class RangeValue(BaseModel): max: float -MetadataValue = Union[ +MetadataValue: TypeAlias = Union[ CustomUUID, str, int, @@ -212,7 +214,7 @@ def _serialize_numpy(o: np.ndarray) -> dict: ] -MetadataValue = Union[ +MetadataValue: TypeAlias = Union[ CustomUUID, str, int, From 98479c7d91e71aebebd7bbed5b3fe89e098d4fd2 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 09:06:26 +0200 Subject: [PATCH 14/25] remove typealias --- src/simdb/remote/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index d6809ec..693d629 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -12,7 +12,6 @@ List, Literal, Optional, - TypeAlias, TypeVar, Union, ) @@ -111,7 +110,7 @@ class RangeValue(BaseModel): max: float -MetadataValue: TypeAlias = Union[ +MetadataValue = Union[ CustomUUID, str, int, @@ -214,7 +213,7 @@ def _serialize_numpy(o: np.ndarray) -> dict: ] -MetadataValue: TypeAlias = Union[ +MetadataValue = Union[ CustomUUID, str, int, From dfda0b9c4576fc9da57feeedf6dbed013d1374bb Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 10:39:45 +0200 Subject: [PATCH 15/25] support backward compatibility for metadata --- src/simdb/database/models/simulation.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 201e9bf..3c0dcf9 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -361,14 +361,17 @@ def from_data(cls, data: Dict[str, Union[str, Dict, List]]) -> "Simulation": outputs = checked_get(data, "outputs", list) simulation.outputs = [File.from_data(el) for el in outputs] if "metadata" in data: - metadata = checked_get(data, "metadata", list) - meta_dict = {} - for el in metadata: - if not isinstance(el, dict): - raise Exception("corrupted metadata element - expected dictionary") - if "element" in el and "value" in el: - meta_dict[el["element"]] = el["value"] - simulation._set_metadata_dict(meta_dict) + metadata = data.get("metadata") + if isinstance(metadata, list): + meta_dict = {} + for el in metadata: + if not isinstance(el, dict): + raise Exception("corrupted metadata element - expected dictionary") + if "element" in el and "value" in el: + meta_dict[el["element"]] = el["value"] + simulation._set_metadata_dict(meta_dict) + elif isinstance(metadata, dict): + simulation._set_metadata_dict(metadata) return simulation @classmethod From d08981380b7e5098a166567225de58e8bf32c41e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 11:30:31 +0200 Subject: [PATCH 16/25] fixed formatting --- src/simdb/database/models/simulation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 3c0dcf9..6b8d825 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -366,7 +366,9 @@ def from_data(cls, data: Dict[str, Union[str, Dict, List]]) -> "Simulation": meta_dict = {} for el in metadata: if not isinstance(el, dict): - raise Exception("corrupted metadata element - expected dictionary") + raise Exception( + "corrupted metadata element - expected dictionary" + ) if "element" in el and "value" in el: meta_dict[el["element"]] = el["value"] simulation._set_metadata_dict(meta_dict) From c661a54e4bc706484804fc6750b979d65876ba58 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 13:30:08 +0200 Subject: [PATCH 17/25] added RageValue and added test for list in metadata --- src/simdb/remote/models.py | 6 ++++-- tests/remote/api/test_metadata.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 693d629..7d85d8e 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -219,13 +219,15 @@ def _serialize_numpy(o: np.ndarray) -> dict: int, float, bool, + RangeValue, list, dict, NumpyArray, None, ] -"""Supported types for simulation metadata values. Numpy arrays and scalars are -automatically converted to their plain Python equivalents before validation.""" +"""Supported types for simulation metadata values. RangeValue, numpy arrays and +scalars are automatically converted to their plain Python equivalents before +validation.""" class MetadataData(BaseModel): diff --git a/tests/remote/api/test_metadata.py b/tests/remote/api/test_metadata.py index dbd99ce..f3e5e32 100644 --- a/tests/remote/api/test_metadata.py +++ b/tests/remote/api/test_metadata.py @@ -51,6 +51,30 @@ def test_get_metadata_values(client): assert "machine-a" in rv.json or "machine-b" in rv.json +def test_get_metadata_list_value(client): + """Test that float lists are auto-converted to Range (new behavior).""" + list_data = [1.0, 2.5, 3.7] + simulation_data_1 = generate_simulation_data(metadata={"ip": list_data}) + rv_post_1 = post_simulation(client, simulation_data_1) + assert rv_post_1.status_code == 200 + + rv = client.get("/v1.2/metadata", headers=HEADERS) + assert rv.status_code == 200 + mkeys = MetadataKeyInfoList.model_validate_json(rv.data) + mkey = next((k for k in mkeys.root if k.name == "ip"), None) + assert mkey is not None, "ip key not found in metadata keys" + assert mkey.type == "Range" + + rv = client.get("/v1.2/metadata/ip", headers=HEADERS) + assert rv.status_code == 200 + mdata = MetadataValueList.model_validate_json(rv.data) + assert len(mdata.root) == 1 + a = mdata.root[0] + assert isinstance(a, RangeValue) + assert a.min == 1.0 + assert a.max == 3.7 + + def test_get_metadata_range_value(client): """Test metadata Range storage""" # Create a simulation with a range metadata value From c1797bf7e23fca05c3f974777550a992a274eff4 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 14:08:04 +0200 Subject: [PATCH 18/25] removed duplicate metedataValue --- src/simdb/remote/models.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 7d85d8e..01b8fe4 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -110,21 +110,6 @@ class RangeValue(BaseModel): max: float -MetadataValue = Union[ - CustomUUID, - str, - int, - float, - bool, - list, - RangeValue, - dict[str, Any], - None, -] -"""Supported types for simulation metadata values. Numpy arrays and regular arrays -containing numeric data are automatically converted to RangeValue.""" - - class StatusPatchData(BaseModel): """Post data for updating simulation status.""" From e62b61a6cd914ca4a3e3782f1ed4075242a2ccdb Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 15:46:22 +0200 Subject: [PATCH 19/25] make json encode backward compatible- numpy arrays, reshape with shape key because it is returning flat 1d array --- .gitignore | 7 ++++++- src/simdb/json.py | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9702592..74957e4 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ test_manifest.yml /test-reports/ /tests/cli/test.cfg /tests/cli/*.yaml +/tests/remote/api/test_data_files/ .eggs .coverage *.xml @@ -30,4 +31,8 @@ simdb-coverage-report src/simdb/_version.py *.egg-info *.egg -*.whl \ No newline at end of file +*.whl +myenv/* +qdrant/* +imaspulse/* +simdb-local/* diff --git a/src/simdb/json.py b/src/simdb/json.py index 89301c8..a50c7bd 100644 --- a/src/simdb/json.py +++ b/src/simdb/json.py @@ -22,7 +22,10 @@ def _custom_hook(obj: Dict[str, str]) -> Any: return uuid.UUID(obj["hex"]) elif obj["_type"] == "numpy.ndarray": np_bytes = base64.decodebytes(obj["bytes"].encode()) - return np.frombuffer(np_bytes, dtype=obj["dtype"]) + arr = np.frombuffer(np_bytes, dtype=obj["dtype"]) + if "shape" in obj: + arr = arr.reshape(obj["shape"]) + return arr else: obj_type = obj["_type"] raise ValueError(f"Unknown type to deserialise {obj_type}.") @@ -49,4 +52,16 @@ def default(self, o: Any) -> Any: return {"_type": "uuid.UUID", "hex": o.hex} elif isinstance(o, enum.Enum): return o.value + elif isinstance(o, np.ndarray): + encoded_bytes = base64.b64encode(o.data).decode() + return { + "_type": "numpy.ndarray", + "dtype": o.dtype.name, + "shape": o.shape, + "bytes": encoded_bytes, + } + elif isinstance(o, np.integer): + return int(o) + elif isinstance(o, np.floating): + return float(o) return super().default(o) From 419c0dd681873a2dc9c3b96141bf5a5cef801c1b Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 22:32:33 +0200 Subject: [PATCH 20/25] fixed logic of checking numpy arrays --- .gitignore | 6 +---- src/simdb/validation/validator.py | 38 ++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 74957e4..9267dcb 100644 --- a/.gitignore +++ b/.gitignore @@ -31,8 +31,4 @@ simdb-coverage-report src/simdb/_version.py *.egg-info *.egg -*.whl -myenv/* -qdrant/* -imaspulse/* -simdb-local/* +*.whl \ No newline at end of file diff --git a/src/simdb/validation/validator.py b/src/simdb/validation/validator.py index 2daa21c..834598d 100644 --- a/src/simdb/validation/validator.py +++ b/src/simdb/validation/validator.py @@ -28,6 +28,22 @@ class CustomValidator(ValidatorBase): types_mapping = cast(Any, cerberus.Validator).types_mapping.copy() types_mapping["numpy"] = cerberus.TypeDefinition("numpy", (np.ndarray,), ()) + def _numeric_array(self, field, value) -> Optional[np.ndarray]: + if not isinstance(value, np.ndarray): + self._error(field, "Value is not a numpy array") + return None + try: + value = value.astype(float, copy=False) + except (TypeError, ValueError): + self._error(field, "Values in numpy array must be numeric") + return None + + value = value[~np.isnan(value)] + if value.size == 0: + self._error(field, "Values in numpy array are NaN or empty") + return None + return value + def _validate_exists(self, check_exists, field, value): """The rule's arguments are validated against this schema: {'type': ['string'], @@ -40,11 +56,9 @@ def _validate_min_value(self, min_value, field, value): {'type': 'float'} """ - if not isinstance(value, np.ndarray): - value = value[~np.isnan(value)] - if value.size == 0: - self._error(field, "Values in numpy array are NaN or empty") - self._error(field, "Value is not a numpy array") + value = self._numeric_array(field, value) + if value is None: + return if min_value is not None and value.min() < min_value: self._error(field, f"Minimum {value.min()} less than {min_value}") @@ -53,11 +67,9 @@ def _validate_max_value(self, max_value, field, value): {'type': 'float'} """ - if not isinstance(value, np.ndarray): - value = value[~np.isnan(value)] - if value.size == 0: - self._error(field, "Values in numpy array are NaN or empty") - self._error(field, "Value is not a numpy array") + value = self._numeric_array(field, value) + if value is None: + return if max_value is not None and value.max() > max_value: self._error(field, f"Maximum {value.max()} greater than {max_value}") @@ -65,9 +77,9 @@ def _compare(self, comparison, field, value, comparator: str, message: str): if comparison is None: return if isinstance(value, np.ndarray): - value = value[~np.isnan(value)] - if value.size == 0: - self._error(field, "Values in numpy array are NaN or empty") + value = self._numeric_array(field, value) + if value is None: + return if not getattr(value, comparator)(comparison).all(): self._error(field, f"Values are not {message} {comparison}") elif isinstance(value, float): From 3e0669690277307685eb7876e11b0b7ead848ed8 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 23:22:42 +0200 Subject: [PATCH 21/25] check values when validation failed --- src/simdb/validation/validator.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/simdb/validation/validator.py b/src/simdb/validation/validator.py index 834598d..9181918 100644 --- a/src/simdb/validation/validator.py +++ b/src/simdb/validation/validator.py @@ -28,14 +28,28 @@ class CustomValidator(ValidatorBase): types_mapping = cast(Any, cerberus.Validator).types_mapping.copy() types_mapping["numpy"] = cerberus.TypeDefinition("numpy", (np.ndarray,), ()) + @staticmethod + def _value_preview(value, max_length: int = 200) -> str: + preview = repr(value) + if len(preview) > max_length: + preview = f"{preview[:max_length]}..." + return f"{type(value).__name__} {preview}" + def _numeric_array(self, field, value) -> Optional[np.ndarray]: if not isinstance(value, np.ndarray): - self._error(field, "Value is not a numpy array") + self._error( + field, + f"Value is not a numpy array: {self._value_preview(value)}", + ) return None try: value = value.astype(float, copy=False) except (TypeError, ValueError): - self._error(field, "Values in numpy array must be numeric") + self._error( + field, + "Values in numpy array must be numeric: " + f"{self._value_preview(value)}", + ) return None value = value[~np.isnan(value)] From dd526fec5240a509055c47d907523ec8936c0696 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 28 May 2026 23:46:19 +0200 Subject: [PATCH 22/25] fix validator to understand RangeValue --- src/simdb/validation/validator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/simdb/validation/validator.py b/src/simdb/validation/validator.py index 9181918..ef28098 100644 --- a/src/simdb/validation/validator.py +++ b/src/simdb/validation/validator.py @@ -28,6 +28,14 @@ class CustomValidator(ValidatorBase): types_mapping = cast(Any, cerberus.Validator).types_mapping.copy() types_mapping["numpy"] = cerberus.TypeDefinition("numpy", (np.ndarray,), ()) + @staticmethod + def _range_value(value) -> Optional[np.ndarray]: + if isinstance(value, dict) and {"min", "max"} <= value.keys(): + return np.array([value["min"], value["max"]]) + if hasattr(value, "min") and hasattr(value, "max"): + return np.array([value.min, value.max]) + return None + @staticmethod def _value_preview(value, max_length: int = 200) -> str: preview = repr(value) @@ -138,6 +146,9 @@ def _normalize_coerce_float(cls, value): def _normalize_coerce_numpy(cls, value): if isinstance(value, np.ndarray): return value + range_value = cls._range_value(value) + if range_value is not None: + return range_value elif isinstance(value, str): return np.fromstring(value[1:-1], sep=" ") else: From 07d18c3e9aaf1e2317b061116a52c9e00c542586 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 29 May 2026 16:52:34 +0200 Subject: [PATCH 23/25] added cli for calling data endpoint --- .gitignore | 8 +- pyproject.toml | 1 + src/simdb/cli/commands/simulation.py | 70 ++++++- src/simdb/cli/commands/utils.py | 232 ++++++++++++++++++++++- src/simdb/cli/remote_api.py | 5 + src/simdb/validation/validator.py | 3 +- tests/cli/test_cli_simulation_command.py | 52 +++++ 7 files changed, 366 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9267dcb..b275c5e 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,10 @@ simdb-coverage-report src/simdb/_version.py *.egg-info *.egg -*.whl \ No newline at end of file +*.whl +myenv/* +qdrant/* +imaspulse/* +simdb-local/* +imaspluse/* +ASTRA/* \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2cc00bd..d3090db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "numpy>=1.14", "pydantic>=2.10.6", "python-dateutil>=2.6", + "plotext>=5.3.2", "pyyaml>=3.13", "requests>=2.27.0", "semantic-version>=2.8", diff --git a/src/simdb/cli/commands/simulation.py b/src/simdb/cli/commands/simulation.py index 7f442ce..d28955a 100644 --- a/src/simdb/cli/commands/simulation.py +++ b/src/simdb/cli/commands/simulation.py @@ -16,7 +16,12 @@ from simdb.validation import ValidationError, Validator from . import check_meta_args, pass_config -from .utils import print_simulations +from .utils import ( + is_numeric_1d, + print_quantity, + print_simulations, + show_quantity_textual_plot, +) from .validators import validate_non_negative @@ -353,6 +358,69 @@ def simulation_query( ) +@simulation.command("data", cls=n_required_args_adaptor(2)) +@pass_config +@click.argument("remote", required=False) +@click.argument("sim_id") +@click.argument("ids_path") +@click.option("--username", help="Username used to authenticate with the remote.") +@click.option("--password", help="Password used to authenticate with the remote.") +def simulation_data( + config: Config, + remote: Optional[str], + sim_id: str, + ids_path: str, + username: Optional[str], + password: Optional[str], +): + """Fetch IDS field data for simulation SIM_ID (UUID or alias) from REMOTE. + + \b + IDS_PATH format: + ids_name[:]/path/to/field + + \b + Examples: + simdb sim data iter 4dd781b... profiles_1d[0]/grid/rho_tor_norm + simdb sim data 4dd781b... equilibrium:0/time_slice[0]/profiles_1d/psi + """ + api = RemoteAPI(remote, username, password, config) + + try: + result = api.get_simulation_data(sim_id, ids_path) + except Exception as err: + raise click.ClickException(str(err)) from err + + click.echo(f"simulation : {result['simulation']}") + click.echo(f"path : {result['path']} (occurrence {result['occurrence']})") + + coordinates = result.get("coordinates") or [] + plot_coordinate = next( + ( + coord + for coord in coordinates + if isinstance(coord.get("data"), list) + and isinstance(result["field"].get("data"), list) + and len(coord["data"]) == len(result["field"]["data"]) + ), + None, + ) + field_is_1d = is_numeric_1d(result["field"].get("data")) + if field_is_1d: + show_quantity_textual_plot( + result["field"], label="field", x_quantity=plot_coordinate + ) + else: + print_quantity(result["field"], label="field") + + if config.verbose and coordinates: + for coord in coordinates: + if field_is_1d and is_numeric_1d(coord.get("data")): + continue + if isinstance(coord.get("data"), list): + print_quantity(coord, label=f"coord {coord['name']}", show_stats=False) + + @simulation.command("validate", cls=n_required_args_adaptor(1)) @pass_config @click.argument("remote", required=False) diff --git a/src/simdb/cli/commands/utils.py b/src/simdb/cli/commands/utils.py index ab2c919..abd352f 100644 --- a/src/simdb/cli/commands/utils.py +++ b/src/simdb/cli/commands/utils.py @@ -1,7 +1,12 @@ +import importlib from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeVar +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, TypeVar import click +from rich.console import Console, Group +from rich.panel import Panel +from rich.table import Table +from rich.text import Text if TYPE_CHECKING: # Only importing these for type checking and documentation generation in order to @@ -10,6 +15,231 @@ else: Config = TypeVar("Config") +_RICH_CONSOLE = Console() + + +def _get_shape(data: Any) -> Tuple[int, ...]: + """Recursively compute shape of a nested list""" + if not isinstance(data, list): + return () + if not data: + return (0,) + return (len(data), *_get_shape(data[0])) + + +def _fmt_val(v: Any) -> str: + if isinstance(v, float): + return f"{v:.6g}" + return str(v) + + +def _fmt_row(row: list) -> str: + """Format a 1-D list with numpy-style head/tail truncation.""" + if len(row) <= 8: + return " ".join(_fmt_val(v) for v in row) + head = " ".join(_fmt_val(v) for v in row[:3]) + tail = " ".join(_fmt_val(v) for v in row[-3:]) + return f"{head} ... {tail}" + + +def _is_numeric(v: Any) -> bool: + return isinstance(v, (int, float)) and not isinstance(v, bool) + + +def is_numeric_1d(data: Any) -> bool: + return isinstance(data, list) and bool(data) and all(_is_numeric(v) for v in data) + + +def _quantity_axis_label(q: dict, fallback: str = "") -> str: + name = q.get("name") or fallback + units = q.get("units") or "-" + label = str(name).rsplit("/", 1)[-1] or str(name) + return f"{label} [{units}]" + + +def _build_array_body(data: list, shape: Tuple[int, ...]) -> str: + """Build string for 1-D or 2-D arrays.""" + if len(shape) == 1: + return f"[{_fmt_row(data)}]" + + if len(shape) == 2: + rows = data if len(data) <= 8 else [*data[:3], ..., *data[-3:]] + lines = [] + for row in rows: + if row is ...: + lines.append(" ...") + else: + lines.append(f" [{_fmt_row(row)}]") + inner = "\n".join(lines) + return f"[\n{inner}\n]" + + return f"<{len(shape)}-D array, shape {shape}>" + + +def _iter_numeric(data: Any) -> Iterable[float]: + """Yield all numeric leaf values from a nested list, skipping None.""" + if isinstance(data, list): + for item in data: + yield from _iter_numeric(item) + elif isinstance(data, (int, float)) and data is not None: + yield float(data) + + +def _compute_stats(data: Any) -> Optional[Dict[str, float]]: + """Return basic statistics for numeric data, or None if not applicable.""" + values = list(_iter_numeric(data)) + if len(values) < 2: + return None + n = len(values) + vmin = min(values) + vmax = max(values) + mean = sum(values) / n + std = (sum((x - mean) ** 2 for x in values) / n) ** 0.5 + sorted_v = sorted(values) + mid = n // 2 + median = sorted_v[mid] if n % 2 else (sorted_v[mid - 1] + sorted_v[mid]) / 2 + return { + "n": n, + "min": vmin, + "max": vmax, + "mean": mean, + "std": std, + "median": median, + } + + +def _stats_table(stats: Dict[str, float]) -> Table: + table = Table(show_header=True, header_style="bold", box=None, padding=(0, 2)) + for key in ("n", "min", "max", "mean", "std", "median"): + table.add_column(key, justify="right") + table.add_row( + str(int(stats["n"])), + _fmt_val(stats["min"]), + _fmt_val(stats["max"]), + _fmt_val(stats["mean"]), + _fmt_val(stats["std"]), + _fmt_val(stats["median"]), + ) + return table + + +def _plot_stats_table(stats: Dict[str, float], shape: Tuple[int, ...]) -> Table: + table = Table(show_header=True, header_style="bold", box=None, padding=(0, 2)) + for key in ("n", "min", "max", "mean", "std", "median"): + table.add_column(key, justify="right") + table.add_row( + str(int(stats["n"])), + _fmt_val(stats["min"]), + _fmt_val(stats["max"]), + _fmt_val(stats["mean"]), + _fmt_val(stats["std"]), + _fmt_val(stats["median"]), + ) + return table + + +def _plot_panel( + *, + plot: Text, + title: str, + units: str, + stats: Optional[Dict[str, float]], + shape: Tuple[int, ...], +) -> None: + content = plot + if stats: + content = Group(plot, _plot_stats_table(stats, shape)) + + _RICH_CONSOLE.print( + Panel( + content, + title=f"[bold]{title}[/bold] [dim]\\[{units}][/dim]", + subtitle=f"shape {shape}", + ) + ) + + +def show_quantity_textual_plot( + q: dict, + label: str = "", + x_quantity: Optional[dict] = None, +) -> None: + """Print line plot for a 1-D numeric QuantityData dict.""" + name = q["name"] + units = q["units"] or "-" + data = q["data"] + if not is_numeric_1d(data): + print_quantity(q, label=label) + return + + try: + plotext = importlib.import_module("plotext") + except ImportError: + print_quantity(q, label=label) + return + + y_values = [float(value) for value in data] + shape = _get_shape(data) + x_values = None + xlabel = "index [-]" + if ( + x_quantity + and is_numeric_1d(x_quantity.get("data")) + and len(x_quantity["data"]) == len(y_values) + ): + x_values = [float(value) for value in x_quantity["data"]] + xlabel = _quantity_axis_label(x_quantity, fallback="x") + + title = label or name + if x_values is None: + x_values = [float(index) for index in range(len(y_values))] + + console_width = _RICH_CONSOLE.size.width + plot_width = max(48, min(70, console_width - 12)) + + plotext.clear_figure() + plotext.theme("clear") + plotext.plotsize(plot_width, 18) + plotext.xlabel(xlabel) + plotext.ylabel(_quantity_axis_label(q, fallback=label or "field")) + plotext.plot(x_values, y_values, marker="braille", color="cyan") + plot = Text.from_ansi(plotext.build()) + stats = _compute_stats(y_values) + _plot_panel( + plot=plot, + title=title, + units=units, + stats=stats, + shape=shape, + ) + + +def print_quantity(q: dict, label: str = "", show_stats: bool = True) -> None: + """Print a QuantityData dict with array display and stats.""" + name = q["name"] + units = q["units"] or "-" + data = q["data"] + title = f"[bold]{label or name}[/bold] [dim]\\[{units}][/dim]" + + if not isinstance(data, list): + _RICH_CONSOLE.print(Panel(f"{_fmt_val(data)}", title=title, subtitle="scalar")) + return + + shape = _get_shape(data) + stats = _compute_stats(data) + array_body = _build_array_body(data, shape) + subtitle = f"shape ({shape[0]},)" if len(shape) == 1 else f"shape {shape}" + if show_stats and stats: + _RICH_CONSOLE.print( + Panel( + Group(array_body, _stats_table(stats)), + title=title, + subtitle=subtitle, + ) + ) + else: + _RICH_CONSOLE.print(Panel(array_body, title=title, subtitle=subtitle)) + def _flatten_dict(values: Dict) -> List[Tuple[str, str]]: items = [] diff --git a/src/simdb/cli/remote_api.py b/src/simdb/cli/remote_api.py index a81d082..28520b2 100644 --- a/src/simdb/cli/remote_api.py +++ b/src/simdb/cli/remote_api.py @@ -663,6 +663,11 @@ def delete_metadata(self, sim_id: str, key: str) -> List[str]: res = self.delete("simulation/metadata/" + sim_id, {"key": key}) return [data["value"] for data in res.json()] + @try_request + def get_simulation_data(self, sim_id: str, path: str) -> Dict[str, Any]: + res = self.get(f"simulation/{sim_id}/data", params={"path": path}) + return res.json() + @try_request def get_directory(self) -> str: res = self.get("staging_dir") diff --git a/src/simdb/validation/validator.py b/src/simdb/validation/validator.py index ef28098..99f7edb 100644 --- a/src/simdb/validation/validator.py +++ b/src/simdb/validation/validator.py @@ -55,8 +55,7 @@ def _numeric_array(self, field, value) -> Optional[np.ndarray]: except (TypeError, ValueError): self._error( field, - "Values in numpy array must be numeric: " - f"{self._value_preview(value)}", + f"Values in numpy array must be numeric: {self._value_preview(value)}", ) return None diff --git a/tests/cli/test_cli_simulation_command.py b/tests/cli/test_cli_simulation_command.py index 0120fc0..5f61c60 100644 --- a/tests/cli/test_cli_simulation_command.py +++ b/tests/cli/test_cli_simulation_command.py @@ -85,3 +85,55 @@ def test_simulation_validate_command(remote_api, get_local_db): runner = CliRunner() result = runner.invoke(cli, [f"--config-file={config_file}", "simulation"]) assert result.exception is None + + +@mock.patch("simdb.cli.commands.simulation.show_quantity_textual_plot") +@mock.patch("simdb.cli.commands.simulation.RemoteAPI") +def test_simulation_data_command(mock_remote_api_cls, mock_textual_plot): + """``simdb simulation data`` prints field info.""" + mock_api = mock_remote_api_cls.return_value + mock_api.get_simulation_data.return_value = { + "simulation": "a304a6955b3f11f1809bd4f5ef75ec04", + "path": "core_profiles/profiles_1d[0]/electrons/temperature", + "occurrence": 0, + "field": { + "name": "core_profiles/profiles_1d[0]/electrons/temperature", + "units": "eV", + "data": [1000.0, 1200.0, 900.0], + }, + "coordinates": [ + { + "name": "core_profiles/profiles_1d[0]/grid/rho_tor_norm", + "units": "", + "data": [0.0, 0.5, 1.0], + } + ], + } + + config_file = config_test_file() + runner = CliRunner() + result = runner.invoke( + cli, + [ + f"--config-file={config_file}", + "simulation", + "data", + "test_sim", + "core_profiles/profiles_1d[0]/electrons/temperature", + ], + ) + + assert result.exception is None, result.output + mock_api.get_simulation_data.assert_called_once_with( + "test_sim", "core_profiles/profiles_1d[0]/electrons/temperature" + ) + result_data = mock_api.get_simulation_data.return_value + mock_textual_plot.assert_called_once_with( + result_data["field"], + label="field", + x_quantity=result_data["coordinates"][0], + ) + assert "simulation : a304a6955b3f11f1809bd4f5ef75ec04" in result.output + assert "shape (3,)" not in result.output + assert "1000" not in result.output + assert "1200" not in result.output From 96ee78ded57ca739f4d5e0a5b4b0cca10c412f5c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 29 May 2026 16:53:19 +0200 Subject: [PATCH 24/25] reverted .gitignore --- .gitignore | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index b275c5e..b4c1227 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,7 @@ test_manifest.yml /htmlcov /test-reports/ /tests/cli/test.cfg -/tests/cli/*.yaml -/tests/remote/api/test_data_files/ -.eggs +/tests/cli/*.yaml.eggs .coverage *.xml simdb-coverage-report @@ -31,10 +29,4 @@ simdb-coverage-report src/simdb/_version.py *.egg-info *.egg -*.whl -myenv/* -qdrant/* -imaspulse/* -simdb-local/* -imaspluse/* -ASTRA/* \ No newline at end of file +*.whl \ No newline at end of file From c86a515564d22d7dcad5b06689de4d8c0f714240 Mon Sep 17 00:00:00 2001 From: Yannick de Jong Date: Wed, 3 Jun 2026 15:21:28 +0200 Subject: [PATCH 25/25] Make metadata non-optional --- src/simdb/remote/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 0a8218c..d8d2d2d 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -352,7 +352,7 @@ class SimulationListItem(BaseModel): """Alias of the simulation.""" datetime: str """Creation timestamp.""" - metadata: Optional[MetadataDataList] = None + metadata: MetadataDataList = MetadataDataList() """Simulation metadata."""