diff --git a/isaaclab_arena/analysis/__init__.py b/isaaclab_arena/analysis/__init__.py new file mode 100644 index 000000000..fee3a6a9f --- /dev/null +++ b/isaaclab_arena/analysis/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/isaaclab_arena/analysis/sensitivity/__init__.py b/isaaclab_arena/analysis/sensitivity/__init__.py new file mode 100644 index 000000000..fee3a6a9f --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/isaaclab_arena/analysis/sensitivity/analyzer.py b/isaaclab_arena/analysis/sensitivity/analyzer.py new file mode 100644 index 000000000..04e729a3b --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/analyzer.py @@ -0,0 +1,363 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Inference-only analyzers for v0.3 sensitivity analysis. + +What this module does in plain English +-------------------------------------- +Given a dataset of (factor values, outcome values) pairs from a policy evaluation, the +analyzer learns the *conditional* distribution of factor values given a chosen outcome +value (e.g. "given the episode succeeded, which factor values were most consistent?"). +This is the **posterior** ``P(theta | outcome=success)``. Under v0.3's uniform prior, +this posterior's peak is also the operating point ``argmax P(success | theta)`` — so +plotting the marginal posterior over one factor identifies the values that maximize +success rate. + +The three concrete analyzers cover the three relevant factor-mix cases: + + - ``NPEAnalyzer`` — **N**eural **P**osterior **E**stimation. Used when *all* + declared factors are continuous. Trains a normalizing-flow density estimator on + ``(theta, x)`` pairs and exposes ``posterior.sample`` / ``posterior.log_prob``. + Limitation: with a binary outcome and a 1D theta, sbi falls back to a Gaussian + density and the recovered peak reflects the *mean* of successful theta values + rather than the true *mode* — a known caveat we surface as a [WARN] at fit time. + - ``MNPEAnalyzer`` — **M**ixed **N**eural **P**osterior **E**stimation. Used when + the schema has *both* continuous and categorical factors. sbi's MixedDensityEstimator + routes continuous columns through the same kind of flow NPE uses while routing + discrete columns through a categorical mass estimator. + - ``EmpiricalAnalyzer`` — Pure-categorical schemas. Skip the neural fit entirely: under + a uniform prior the posterior ``P(category | success)`` is *exactly* the normalized + per-category empirical success rate. No smoothing improves on that, and sbi MNPE + in version 0.26 also refuses to train without at least one continuous theta column. + +``make_analyzer(dataset, outcome_name)`` is the factory: callers don't need to know about +the hierarchy, they just hand it a dataset and outcome name. + +How rendering fits in +--------------------- +This module is *inference-only*. The sibling ``plotting`` module reads the analyzer's +public queries (``continuous_marginal_density``, ``categorical_marginal_probs``) and +renders matplotlib figures. Decoupling the two means new plot types don't require +analyzer changes, and analyzer changes don't risk breaking the plot. + +Public posterior-query surface used by ``plotting.py``: + - ``BaseAnalyzer.categorical_marginal_probs(factor_name, outcome_value, num_samples)`` + - ``PosteriorAnalyzer.continuous_marginal_density(factor_name, outcome_value, num_grid_points)`` + (NOT defined on ``EmpiricalAnalyzer`` — that analyzer rejects continuous factors at init time) +""" + +from __future__ import annotations + +import numpy as np +import torch +from abc import ABC, abstractmethod + +from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec, SensitivityDataset + + +class BaseAnalyzer(ABC): + """Abstract base — owns state validation and the abstract posterior-query surface. + + Subclasses must implement: + - ``fit`` — train (or no-op) so queries can be called afterwards. + - ``categorical_marginal_probs`` — return ``P(category | outcome)`` for a categorical factor. + Continuous-factor queries (``continuous_marginal_density``) live on ``PosteriorAnalyzer`` + only — the empirical analyzer never needs them by construction. + """ + + def __init__(self, dataset: SensitivityDataset, outcome_name: str): + self.dataset = dataset + self.outcome_name = outcome_name + assert ( + outcome_name in dataset.outcome_columns + ), f"Outcome {outcome_name!r} not found in schema; available: {list(dataset.outcome_columns)}" + assert len(dataset.schema.factors) > 0, "Schema declares no factors" + + @abstractmethod + def fit(self, training_batch_size: int = 50) -> None: + """Train the posterior (or no-op for empirical) so queries can be called afterwards. + + For NPE/MNPE this trains a neural density estimator on ``(theta, x_selected)``, + where ``x_selected`` is the single outcome column named by ``outcome_name``. For + the empirical analyzer this is a no-op — the categorical posterior is computed + directly from the data at query time. + """ + + @abstractmethod + def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray: + """Return ``P(category | outcome=outcome_value)`` for one categorical factor. + + Output is a 1D numpy array of length ``len(factor.choices)`` whose entries sum to 1. + For posterior analyzers this is computed by sampling the trained posterior and + counting category frequencies; for the empirical analyzer it's the normalized + per-category empirical success rate. + """ + + def _factor_spec(self, factor_name: str) -> FactorSpec: + """Return the ``FactorSpec`` for ``factor_name``, asserting it exists in the schema.""" + assert ( + factor_name in self.dataset.factor_columns + ), f"Factor {factor_name!r} not in schema; available: {list(self.dataset.factor_columns)}" + return next(factor for factor in self.dataset.schema.factors if factor.name == factor_name) + + +class PosteriorAnalyzer(BaseAnalyzer): + """Common base for the sbi-driven analyzers (NPE and MNPE). + + NPE and MNPE differ only in *which* sbi inference class they instantiate; everything + else (training loop, posterior storage, density and sample queries) is identical. + Subclasses override ``_make_inference`` to choose the class, and the + binary-outcome WARN hook to surface any method-specific caveats. + + After ``fit()`` returns, ``self.posterior`` is an sbi posterior object that supports + ``posterior.sample(shape, x=...)`` and (for NPE) ``posterior.log_prob(theta, x=...)``. + """ + + def __init__(self, dataset: SensitivityDataset, outcome_name: str): + super().__init__(dataset, outcome_name) + self.posterior = None + + def _make_inference(self): + """Return the sbi inference object to train with. + + Subclass-specific: ``NPEAnalyzer`` returns ``sbi.inference.NPE(...)``, + ``MNPEAnalyzer`` returns ``sbi.inference.MNPE(...)``. The lazy import of sbi + lives in the subclass so callers don't pay the (heavy) sbi import cost until + they actually fit. + """ + raise NotImplementedError("PosteriorAnalyzer subclasses must implement _make_inference") + + def fit(self, training_batch_size: int = 50) -> None: + """Train the chosen sbi estimator on ``(theta, x_selected)`` and stash the posterior. + + Steps: + 1. Slice ``self.dataset.x`` to the single outcome column named by ``outcome_name``. + 2. Surface any method-specific caveats about the outcome (e.g. NPE's + 1D-binary Gaussian fallback) via ``_maybe_warn_binary_outcome``. + 3. Instantiate the sbi inference object (NPE or MNPE) via ``_make_inference``. + 4. Append the simulations and train. + 5. Build a posterior object from the trained estimator and store it on ``self``. + """ + outcome_column_index = self.dataset.outcome_columns[self.outcome_name] + selected_outcome_column = self.dataset.x[:, outcome_column_index : outcome_column_index + 1] + self._maybe_warn_binary_outcome(selected_outcome_column) + + print( + f"[INFO] {type(self).__name__}: fitting on {self.dataset.theta.shape[0]} samples" + f" (theta dim={self.dataset.theta.shape[1]}," + f" x dim={selected_outcome_column.shape[1]})." + ) + inference = self._make_inference() + inference.append_simulations(self.dataset.theta, selected_outcome_column) + density_estimator = inference.train(training_batch_size=training_batch_size) + self.posterior = inference.build_posterior(density_estimator) + + def _maybe_warn_binary_outcome(self, selected_outcome_column: torch.Tensor) -> None: + """Optional hook for subclass-specific caveats about binary outcomes. Default: no-op. + + ``NPEAnalyzer`` overrides this to warn that with a single binary outcome column + sbi falls back to a Gaussian density, biasing the recovered peak toward the + mean of successful theta values rather than the true mode. + """ + + def continuous_marginal_density( + self, factor_name: str, outcome_value: float, num_grid_points: int + ) -> tuple[np.ndarray, np.ndarray]: + """Evaluate ``P(factor_value | outcome=outcome_value)`` over the factor's prior range. + + Returns ``(grid, density)`` as numpy arrays of length ``num_grid_points``, suitable + for plotting as a smooth curve. + + Two evaluation paths depending on whether other factors are present: + - **1D theta** (the only declared factor is this one): evaluate + ``posterior.log_prob`` directly on a regular grid — exact, no sampling. + - **Multi-dim theta**: sample the posterior at the given outcome value, extract + this factor's column, and histogram-then-interpolate to a grid. This + marginalizes over the other factor dims implicitly. + """ + assert self.posterior is not None, "Call fit() before querying the posterior" + factor_spec = self._factor_spec(factor_name) + assert ( + factor_spec.type == "continuous" + ), f"continuous_marginal_density expects a continuous factor; {factor_name!r} is {factor_spec.type!r}" + assert ( + factor_spec.range is not None and len(factor_spec.range) == 1 + ), "Continuous-factor marginal expects a populated 1D range" + + factor_column_slice = self.dataset.factor_columns[factor_name] + observed_outcome = torch.tensor([outcome_value], dtype=torch.float32) + range_low, range_high = factor_spec.range[0] + + if self.dataset.theta.shape[1] == 1: + grid_tensor = torch.linspace(range_low, range_high, num_grid_points, dtype=torch.float32).unsqueeze(1) + with torch.no_grad(): + log_probabilities = self.posterior.log_prob(grid_tensor, x=observed_outcome) + density_numpy = torch.exp(log_probabilities).cpu().numpy() + grid_numpy = grid_tensor.squeeze(-1).cpu().numpy() + else: + with torch.no_grad(): + posterior_samples = self.posterior.sample((10_000,), x=observed_outcome) + factor_column_samples = posterior_samples[:, factor_column_slice].squeeze(-1).cpu().numpy() + grid_numpy = np.linspace(range_low, range_high, num_grid_points) + histogram_density, bin_edges = np.histogram( + factor_column_samples, bins=40, range=(range_low, range_high), density=True + ) + density_numpy = np.interp(grid_numpy, 0.5 * (bin_edges[:-1] + bin_edges[1:]), histogram_density) + + return grid_numpy, density_numpy + + def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray: + """Estimate ``P(category | outcome)`` by sampling the trained posterior. + + Draws ``num_samples`` from ``posterior(theta | x=outcome_value)``, extracts the + factor's column (which sbi returns as floats over the BoxUniform support), rounds + to the nearest integer in ``[0, num_choices - 1]``, and tallies frequencies. + Result is a length-``num_choices`` numpy array that sums to 1. + """ + assert self.posterior is not None, "Call fit() before querying the posterior" + factor_spec = self._factor_spec(factor_name) + assert factor_spec.type == "categorical" + assert factor_spec.choices is not None + factor_column_slice = self.dataset.factor_columns[factor_name] + num_choices = len(factor_spec.choices) + + observed_outcome = torch.tensor([outcome_value], dtype=torch.float32) + with torch.no_grad(): + posterior_samples = self.posterior.sample((num_samples,), x=observed_outcome) + factor_column_samples = posterior_samples[:, factor_column_slice].squeeze(-1).cpu().numpy() + clipped_codes = np.clip(np.round(factor_column_samples), 0, num_choices - 1).astype(int) + return np.bincount(clipped_codes, minlength=num_choices) / num_samples + + +class NPEAnalyzer(PosteriorAnalyzer): + """Neural Posterior Estimation analyzer for continuous-only factor schemas. + + Use this when every declared factor is continuous (no categoricals). Internally + trains ``sbi.inference.NPE``, which fits a normalizing-flow density over + ``(theta, x_selected)`` and exposes both ``sample`` and ``log_prob`` on the result. + + **Caveat for binary outcomes (1D x):** sbi's flow code falls back to a Gaussian + density when the output space is 1D, which biases the recovered posterior peak + toward the *mean* of successful theta values rather than the true *mode* of the + success curve. We surface a [WARN] at fit time so users see this in plain text + rather than buried in sbi's UserWarning stream. + """ + + def _make_inference(self): + """Construct ``sbi.inference.NPE`` configured with the dataset's uniform prior.""" + from sbi.inference import NPE + + return NPE(prior=self.dataset.prior) + + def _maybe_warn_binary_outcome(self, selected_outcome_column: torch.Tensor) -> None: + """Warn if the selected outcome is binary — see class docstring for the caveat.""" + unique_values = set(selected_outcome_column.flatten().tolist()) + if unique_values.issubset({0.0, 1.0}): + print( + f"[WARN] Outcome {self.outcome_name!r} is binary (values in {{0, 1}}) and the" + " analyzer is using NPE (no categorical factors). sbi NPE falls back to a" + " Gaussian density in 1D output space, so the recovered posterior peak" + " reflects the *mean* of successful theta values rather than the true *mode*" + " of the success curve. Qualitative shape is still informative." + ) + + +class MNPEAnalyzer(PosteriorAnalyzer): + """Mixed Neural Posterior Estimation analyzer for schemas with at least one of each type. + + Use this when the schema mixes continuous and categorical factors. Internally trains + ``sbi.inference.MNPE``, whose mixed density estimator routes continuous theta columns + through a normalizing flow while routing categorical columns through a categorical + mass estimator. The continuous-first / categorical-after column ordering in + ``factor_columns`` matches MNPE's expected layout exactly. + + sbi MNPE 0.26 requires at least one continuous theta column. For pure-categorical + schemas use ``EmpiricalAnalyzer`` instead — ``make_analyzer`` dispatches correctly. + """ + + def _make_inference(self): + """Construct ``sbi.inference.MNPE`` configured with the dataset's uniform prior.""" + from sbi.inference import MNPE + + return MNPE(prior=self.dataset.prior) + + +class EmpiricalAnalyzer(BaseAnalyzer): + """Frequency-table analyzer for pure-categorical factor schemas — no neural fit. + + Use this when every declared factor is categorical. Under v0.3's uniform prior, + Bayes' rule simplifies ``P(category | success) ∝ P(success | category) · P(category)`` + to ``P(category | success) ∝ P(success | category)`` — i.e. the posterior is *exactly* + the per-category empirical success rate, normalized to sum to 1. No neural network + can do better than this with a uniform prior; smoothing only hurts. + + Also covers a sbi limitation: MNPE 0.26 refuses to train if theta has zero continuous + columns. The empirical path sidesteps that entirely. + + Rejects continuous factors at construction time — ``make_analyzer`` shouldn't even + dispatch here for mixed schemas, but the explicit guard makes the constraint clear. + """ + + def __init__(self, dataset: SensitivityDataset, outcome_name: str): + super().__init__(dataset, outcome_name) + has_continuous_factor = any(factor.type == "continuous" for factor in dataset.schema.factors) + assert not has_continuous_factor, ( + "EmpiricalAnalyzer is only valid for all-categorical schemas. For mixed" + " continuous + categorical factors, use MNPEAnalyzer." + ) + + def fit(self, training_batch_size: int = 50) -> None: + """No-op — the posterior is computed directly from the data at query time.""" + print(f"[INFO] {type(self).__name__}: no neural fit needed for pure-categorical schema.") + + def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray: + """Return ``P(category | outcome) = per_category_success_rate / sum(per_category_success_rate)``. + + For each category, computes the fraction of rows assigned to it whose outcome + column is ``>= 0.5`` (treating outcome as binary). Then normalizes across + categories so the result sums to 1. ``outcome_value`` and ``num_samples`` are + accepted for interface compatibility with ``PosteriorAnalyzer`` but not used — + empirical analysis treats outcome as binary (success vs not-success). + """ + factor_spec = self._factor_spec(factor_name) + assert factor_spec.type == "categorical" + assert factor_spec.choices is not None + factor_column_slice = self.dataset.factor_columns[factor_name] + num_choices = len(factor_spec.choices) + outcome_column_index = self.dataset.outcome_columns[self.outcome_name] + + empirical_theta_codes = self.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy() + empirical_outcomes = self.dataset.x[:, outcome_column_index].cpu().numpy() + empirical_rates = np.zeros(num_choices) + for code in range(num_choices): + category_mask = empirical_theta_codes == code + if category_mask.any(): + empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean()) + total_rate = float(empirical_rates.sum()) + if total_rate > 0: + return empirical_rates / total_rate + return np.full(num_choices, 1.0 / num_choices) + + +def make_analyzer(dataset: SensitivityDataset, outcome_name: str) -> BaseAnalyzer: + """Construct the right analyzer for the dataset's factor mix. + + Dispatch table: + - any continuous + any categorical → :class:`MNPEAnalyzer` + - all categorical (zero continuous) → :class:`EmpiricalAnalyzer` + - all continuous (zero categorical) → :class:`NPEAnalyzer` + + Callers should always go through this factory rather than instantiating a specific + subclass — the dispatch encodes invariants (e.g. sbi MNPE 0.26 not supporting + pure-categorical theta) that aren't enforced elsewhere. + """ + num_continuous_factors = sum(1 for factor in dataset.schema.factors if factor.type == "continuous") + num_categorical_factors = sum(1 for factor in dataset.schema.factors if factor.type == "categorical") + assert num_continuous_factors + num_categorical_factors > 0, "Schema declares no factors" + if num_continuous_factors > 0 and num_categorical_factors > 0: + return MNPEAnalyzer(dataset, outcome_name) + if num_categorical_factors > 0: + return EmpiricalAnalyzer(dataset, outcome_name) + return NPEAnalyzer(dataset, outcome_name) diff --git a/isaaclab_arena/analysis/sensitivity/dataset.py b/isaaclab_arena/analysis/sensitivity/dataset.py new file mode 100644 index 000000000..1a176bc23 --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/dataset.py @@ -0,0 +1,370 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Schema parser and dataset loader for sensitivity analysis. + +Combines a hand-authored ``factors.yaml`` (the declared schema + priors) with an +``episode_summary.jsonl`` (per-episode factor draws + outcome values, written by +``episode_writer``) into the tensors that ``sbi`` consumes for posterior inference. + +Vocabulary refresher (for readers new to simulation-based inference / SBI): + - **theta** — the *factor* values per episode. The "inputs" we vary in the eval (e.g. + ``light_intensity``, ``pick_up_object``). Shape ``(num_episodes, total_factor_dim)``, + continuous factors come first then categoricals. + - **x** — the *outcome* values per episode. The "outputs" the policy produced (e.g. + ``success_rate``, ``object_moved_rate``). Shape ``(num_episodes, num_outcomes)``. + - **prior** — the assumed distribution over theta *before* seeing data. v0.3 ships + uniform priors only, encoded as ``sbi.utils.BoxUniform``. + - **factor_columns** — map from factor name to its column slice in theta, so + downstream code can extract a marginal by name without knowing the layout. + +MVP-2 supports continuous-1D and categorical factors. Vector continuous (``dim > 1``) +factors still raise ``NotImplementedError`` so adding them later is a fill-in. +""" + +from __future__ import annotations + +import json +import torch +import yaml +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + + +@dataclass +class FactorSpec: + """One factor's schema as declared in ``factors.yaml``. + + Continuous factors carry a ``range`` (one ``[low, high]`` pair per dim); categorical + factors carry ``choices`` (a list of string labels, integer-encoded by index in theta). + """ + + name: str + type: Literal["continuous", "categorical"] + dim: int = 1 + range: list[list[float]] | None = None # one [low, high] pair per dim, continuous only + choices: list[str] | None = None # categorical only + + +@dataclass +class OutcomeSpec: + """One outcome's schema (just a name and a type hint; the loader treats all as float).""" + + name: str + type: str # "bool", "float", "int" — informational; loader treats all as float + + +@dataclass +class SliceSpec: + """The ``(policy, task, embodiment)`` tuple a dataset comes from. + + MNPE/NPE assume a single data-generating source per analysis, so all rows in a + dataset must belong to the same slice — enforced by the loader. + """ + + policy: str + task: str + embodiment: str + + +@dataclass +class FactorSchema: + """Parsed ``factors.yaml`` — slice + factor list + outcome list.""" + + slice: SliceSpec + factors: list[FactorSpec] + outcomes: list[OutcomeSpec] + + @classmethod + def from_yaml(cls, path: str | Path) -> FactorSchema: + """Load a ``factors.yaml`` from disk into a typed ``FactorSchema``. + + The YAML must have three top-level blocks: ``slice`` (policy/task/embodiment), + ``factors`` (one entry per varied input), and ``outcomes`` (one entry per + measured output). Each factor's ``type`` must be ``continuous`` or ``categorical``. + """ + with open(path, encoding="utf-8") as yaml_file: + yaml_data = yaml.safe_load(yaml_file) + assert isinstance(yaml_data, dict), f"factors.yaml at {path} must be a mapping at top level" + for required_key in ("slice", "factors", "outcomes"): + assert required_key in yaml_data, f"factors.yaml at {path} is missing top-level `{required_key}:` block" + + slice_block = yaml_data["slice"] + for required_key in ("policy", "task", "embodiment"): + assert ( + required_key in slice_block + ), f"factors.yaml at {path} `slice:` block is missing `{required_key}` (need policy/task/embodiment)" + slice_spec = SliceSpec( + policy=slice_block["policy"], + task=slice_block["task"], + embodiment=slice_block["embodiment"], + ) + + factors: list[FactorSpec] = [] + for factor_name, factor_block in yaml_data["factors"].items(): + assert "type" in factor_block, ( + f"factors.yaml at {path} factor {factor_name!r} is missing required `type:` field" + " (expected 'continuous' or 'categorical')" + ) + factor_type = factor_block["type"] + assert factor_type in ("continuous", "categorical"), ( + f"factors.yaml at {path} factor {factor_name!r} has unknown type {factor_type!r};" + " expected 'continuous' or 'categorical'" + ) + factors.append( + FactorSpec( + name=factor_name, + type=factor_type, + dim=factor_block.get("dim", 1), + range=factor_block.get("range"), + choices=factor_block.get("choices"), + ) + ) + + outcomes = [ + OutcomeSpec(name=outcome_name, type=outcome_block.get("type", "float")) + for outcome_name, outcome_block in yaml_data["outcomes"].items() + ] + + return cls(slice=slice_spec, factors=factors, outcomes=outcomes) + + @property + def total_factor_dim(self) -> int: + """Total width of theta — sum of ``dim`` over continuous factors plus 1 per categorical.""" + return sum(factor.dim if factor.type == "continuous" else 1 for factor in self.factors) + + @property + def factor_columns(self) -> dict[str, slice]: + """Map factor name → column slice in theta. + + Continuous factors occupy the leading columns (their ``dim`` columns each), then + each categorical factor occupies one trailing column. This continuous-first + ordering matches sbi's MNPE convention so the same theta layout works for both + NPE (all-continuous) and MNPE (mixed). + """ + continuous_factors = [factor for factor in self.factors if factor.type == "continuous"] + categorical_factors = [factor for factor in self.factors if factor.type == "categorical"] + column_slices: dict[str, slice] = {} + column_index = 0 + for factor in continuous_factors + categorical_factors: + column_width = factor.dim if factor.type == "continuous" else 1 + column_slices[factor.name] = slice(column_index, column_index + column_width) + column_index += column_width + return column_slices + + +class SensitivityDataset: + """Combines a ``factors.yaml`` schema with an ``episode_summary.jsonl`` data file. + + On construction: + 1. Parses the schema (factors + outcomes + slice metadata). + 2. Loads the JSONL rows (one row per episode). + 3. Validates that every row contains all declared factor and outcome keys. + 4. Fills any missing continuous ranges by inferring from observed min/max so the + analyzer can always trust ``schema.factors[i].range`` to be populated. + 5. Builds the ``theta`` and ``x`` tensors that sbi (or the empirical analyzer) + will consume. + + The four public attributes used by the analyzer (``theta``, ``x``, ``prior``, + ``factor_columns``) are properties — recomputed lazily where appropriate. + """ + + def __init__(self, factors_yaml: str | Path, jsonl_path: str | Path): + self.schema = FactorSchema.from_yaml(factors_yaml) + + jsonl_text = Path(jsonl_path).read_text(encoding="utf-8") + self.rows = [json.loads(line) for line in jsonl_text.splitlines() if line.strip()] + assert len(self.rows) > 0, f"Empty episode_summary.jsonl at {jsonl_path}" + + self._validate_rows(jsonl_path) + self._infer_missing_factor_ranges() + + self._theta = self._build_factor_tensor() + self._x = self._build_outcome_tensor() + + def _validate_rows(self, jsonl_path: str | Path) -> None: + """Assert every JSONL row carries the keys declared in the schema. + + The writer logs the *entire* arena_env_args dict per row, so the loader only + requires that the schema's declared factor names are a *subset* of what's in + ``row["arena_env_args"]`` — extra keys (other arena_env_args we don't analyze) + are fine and ignored. Same superset-not-equality check for outcomes. + + Catches the most common authoring mistake: a factor declared in factors.yaml + that the eval didn't actually vary or log. Surfaces a clear error pointing at + the first offending row. + """ + expected_factor_names = {factor.name for factor in self.schema.factors} + expected_outcome_names = {outcome.name for outcome in self.schema.outcomes} + for row_index, row in enumerate(self.rows): + assert ( + "arena_env_args" in row and "outcomes" in row + ), f"Row {row_index} of {jsonl_path} missing arena_env_args/outcomes block" + missing_factor_names = expected_factor_names - set(row["arena_env_args"].keys()) + assert not missing_factor_names, ( + f"Row {row_index} of {jsonl_path} is missing factor(s) " + f"{sorted(missing_factor_names)} from its arena_env_args block; " + f"factors.yaml declares: {sorted(expected_factor_names)}" + ) + missing_outcome_names = expected_outcome_names - set(row["outcomes"].keys()) + assert ( + not missing_outcome_names + ), f"Row {row_index} of {jsonl_path} missing outcomes {sorted(missing_outcome_names)}" + + def _infer_missing_factor_ranges(self) -> None: + """For any continuous factor without a declared ``range``, fill it from observed data. + + The prior bounds default to ``[min(values), max(values)]`` over the JSONL. Users + who want a principled prior (e.g. matching the variation system's declared + ``Uniform(low, high)``) should hand-author ``range`` in factors.yaml; that value + takes precedence and this method skips them. + """ + for factor in self.schema.factors: + if factor.type != "continuous" or factor.range is not None: + continue + if factor.dim != 1: + raise NotImplementedError( + "Range inference for vector factors (dim > 1) is not implemented;" + f" factor {factor.name!r} has dim={factor.dim}" + ) + observed_values = [float(row["arena_env_args"][factor.name]) for row in self.rows] + factor.range = [[min(observed_values), max(observed_values)]] + + def _build_factor_tensor(self) -> torch.Tensor: + """Assemble the per-episode factor matrix ``theta``. + + Layout: continuous factors fill the leading columns (one column per dim), then + each categorical factor fills one trailing column. Categorical values are + encoded as ``float32`` integers ``0..num_choices-1`` per the index in + ``FactorSpec.choices`` — sbi's MNPE expects exactly this layout (continuous-first, + discrete columns as floats, the density estimator handles them as discrete). + """ + continuous_factors = [factor for factor in self.schema.factors if factor.type == "continuous"] + categorical_factors = [factor for factor in self.schema.factors if factor.type == "categorical"] + + factor_columns: list[torch.Tensor] = [] + + # Continuous columns come first (sbi MNPE convention). + for factor in continuous_factors: + if factor.dim != 1: + raise NotImplementedError( + "Vector continuous factors (dim > 1) are not yet supported;" + f" factor {factor.name!r} has dim={factor.dim}" + ) + factor_column = torch.tensor( + [float(row["arena_env_args"][factor.name]) for row in self.rows], dtype=torch.float32 + ).unsqueeze(1) + factor_columns.append(factor_column) + + # Categorical columns: integer-code each string value as its index in FactorSpec.choices. + for factor in categorical_factors: + assert ( + factor.choices is not None and len(factor.choices) > 0 + ), f"Categorical factor {factor.name!r} has no `choices:` block in factors.yaml" + choice_to_code = {choice: code for code, choice in enumerate(factor.choices)} + category_codes: list[int] = [] + for row_index, row in enumerate(self.rows): + value = row["arena_env_args"][factor.name] + assert value in choice_to_code, ( + f"Row {row_index} factor {factor.name!r} has value {value!r}" + f" not in declared choices {factor.choices}" + ) + category_codes.append(choice_to_code[value]) + factor_column = torch.tensor(category_codes, dtype=torch.float32).unsqueeze(1) + factor_columns.append(factor_column) + + if factor_columns: + return torch.cat(factor_columns, dim=1) + return torch.zeros((len(self.rows), 0), dtype=torch.float32) + + def _build_outcome_tensor(self) -> torch.Tensor: + """Assemble the per-episode outcome matrix ``x`` (one column per declared outcome). + + Each outcome value is cast to float; bool outcomes become 0.0/1.0. The analyzer + usually selects a single outcome column at fit time and conditions queries on it. + """ + outcome_column_tensors = [ + torch.tensor([float(row["outcomes"][outcome.name]) for row in self.rows], dtype=torch.float32).unsqueeze(1) + for outcome in self.schema.outcomes + ] + return torch.cat(outcome_column_tensors, dim=1) + + @property + def theta(self) -> torch.Tensor: + """``(num_episodes, total_factor_dim)`` matrix of factor values, one row per episode. + + This is the "input" sbi infers a posterior over. Column layout is given by + ``factor_columns`` — continuous factors first, then categoricals (integer-coded). + """ + return self._theta + + @property + def x(self) -> torch.Tensor: + """``(num_episodes, num_outcomes)`` matrix of outcome values, one row per episode. + + This is what the analyzer conditions queries on. The analyzer typically selects a + single outcome column at fit time (e.g. ``success_rate``) and asks + "what theta values were consistent with observing this outcome?" + """ + return self._x + + @property + def factor_columns(self) -> dict[str, slice]: + """Map factor name → its column slice in theta. Same as ``schema.factor_columns``.""" + return self.schema.factor_columns + + @property + def outcome_columns(self) -> dict[str, int]: + """Map outcome name → its column index in x.""" + return {outcome.name: index for index, outcome in enumerate(self.schema.outcomes)} + + @property + def has_categorical_factors(self) -> bool: + """True iff the schema declares at least one categorical factor.""" + return any(factor.type == "categorical" for factor in self.schema.factors) + + @property + def prior(self): + """The uniform prior over all factor dims that the analyzer assumes. + + Built as a single ``sbi.utils.BoxUniform`` over the concatenated bounds in + continuous-first / categorical-after order: + - Continuous factor → uses the declared (or inferred) ``[low, high]`` per dim. + - Categorical factor → uses ``[0, num_choices - 1]`` (the integer codes from + ``_build_factor_tensor``); sbi MNPE's mixed density estimator treats them as + discrete from there. + + sbi is imported lazily so loading the dataset doesn't pay the sbi import cost + unless the analyzer actually runs. + """ + from sbi.utils import BoxUniform + + low_bounds: list[float] = [] + high_bounds: list[float] = [] + + # Continuous factor bounds (one [low, high] pair per dim). + for factor in self.schema.factors: + if factor.type != "continuous": + continue + assert factor.range is not None, f"Factor {factor.name!r} has no range and was not inferred" + for dim_low, dim_high in factor.range: + low_bounds.append(float(dim_low)) + high_bounds.append(float(dim_high)) + + # Categorical factor bounds: [0, num_choices - 1] per factor (one column). + for factor in self.schema.factors: + if factor.type != "categorical": + continue + assert ( + factor.choices is not None and len(factor.choices) > 0 + ), f"Categorical factor {factor.name!r} has no `choices:` block" + low_bounds.append(0.0) + high_bounds.append(float(len(factor.choices) - 1)) + + return BoxUniform( + low=torch.tensor(low_bounds, dtype=torch.float32), + high=torch.tensor(high_bounds, dtype=torch.float32), + ) diff --git a/isaaclab_arena/analysis/sensitivity/episode_writer.py b/isaaclab_arena/analysis/sensitivity/episode_writer.py new file mode 100644 index 000000000..54624456b --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/episode_writer.py @@ -0,0 +1,95 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Per-episode summary writer for sensitivity analysis. + +``write_episode_summaries`` appends one JSONL row per recorded demo for a just-completed +job. Each row carries: + + - ``job_name`` and ``episode_idx`` for traceability, + - ``arena_env_args`` — the *entire* job.arena_env_args_dict, i.e. every value that + parameterized the env for this episode, + - ``outcomes`` — per-episode outcome values from the task's registered metrics, extracted + from the recorded hdf5 demos via each metric's ``compute_metric_from_recording``. + +The eval-side writer is intentionally analysis-agnostic: it logs all env state, and the +analyzer's ``factors.yaml`` decides which subset of those keys to treat as factors. This +keeps the writer free of any "what counts as a factor?" knowledge. + +Import-order note: this module legitimately touches pxr at import time via +``isaaclab_arena.metrics.metrics`` (which imports ``isaaclab.envs.manager_based_rl_env``). +Like ``metrics`` itself, callers must defer importing this module until *after* +``SimulationAppContext`` is active — see ``policy_runner.py`` (which uses the same pattern +for ``compute_metrics``) and ``eval_runner.py``'s per-job try block for examples. +""" + +from __future__ import annotations + +import h5py +import json +from pathlib import Path +from typing import TYPE_CHECKING + +from isaaclab_arena.metrics.metrics import get_metric_recorder_dataset_path +from isaaclab_arena.metrics.metrics_logger import metrics_to_plain_python_types + +if TYPE_CHECKING: + from isaaclab_arena.evaluation.job_manager import Job + + +def write_episode_summaries(env, job: Job, output_path: str | Path) -> int: + """Append one JSONL row per recorded demo for the just-completed job. + + Each row has shape:: + + { + "job_name": "", + "episode_idx": , + "arena_env_args": , + "outcomes": + } + + Args: + env: The (possibly gym-wrapped) Arena env that just finished its rollout. The hdf5 + path and registered metrics are read from ``env.unwrapped.cfg``. + job: The Job that ran. Its ``arena_env_args_dict`` is logged verbatim under + ``arena_env_args``. + output_path: JSONL file to append to. Created (with parent dirs) if absent. + + Returns: + Number of rows written. + """ + unwrapped_env = env.unwrapped + if not hasattr(unwrapped_env.cfg, "metrics") or unwrapped_env.cfg.metrics is None: + return 0 + + arena_env_args_snapshot = dict(job.arena_env_args_dict) + + hdf5_dataset_path = get_metric_recorder_dataset_path(unwrapped_env) + registered_metrics = unwrapped_env.cfg.metrics + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + rows_written = 0 + with h5py.File(hdf5_dataset_path, "r") as hdf5_file: + recorded_demos = hdf5_file["data"] + with open(output_path, "a", encoding="utf-8") as jsonl_output: + for demo_index, demo_name in enumerate(recorded_demos): + demo_group = recorded_demos[demo_name] + raw_outcome_values = {} + for metric in registered_metrics: + recorded_metric_data = demo_group[metric.recorder_term_name][:] + raw_outcome_values[metric.name] = metric.compute_metric_from_recording([recorded_metric_data]) + outcome_values = metrics_to_plain_python_types(raw_outcome_values) + summary_row = { + "job_name": job.name, + "episode_idx": demo_index, + "arena_env_args": arena_env_args_snapshot, + "outcomes": outcome_values, + } + jsonl_output.write(json.dumps(summary_row) + "\n") + rows_written += 1 + + return rows_written diff --git a/isaaclab_arena/analysis/sensitivity/plotting.py b/isaaclab_arena/analysis/sensitivity/plotting.py new file mode 100644 index 000000000..2d2394da5 --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/plotting.py @@ -0,0 +1,209 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Plot renderers for sensitivity analysis. + +Pure-visualization module. Calls into the analyzer's public posterior queries +(``continuous_marginal_density`` and ``categorical_marginal_probs``) and renders matplotlib +figures. Decoupled from the analyzer hierarchy so new plot types can be added without +touching inference code, and so existing plot code can be tested with mock posteriors. + +The single entry point is ``plot_marginal(analyzer, factor_name, output_path, ...)``, +which dispatches by factor type to the right renderer. +""" + +from __future__ import annotations + +import numpy as np +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from isaaclab_arena.analysis.sensitivity.analyzer import BaseAnalyzer + from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec + + +def plot_marginal( + analyzer: BaseAnalyzer, + factor_name: str, + output_path: str | Path, + outcome_value: float = 1.0, + num_samples: int = 10_000, + num_grid_points: int = 200, +) -> None: + """Render the marginal posterior for ``factor_name``, dispatching by factor type. + + For continuous factors, the analyzer must expose ``continuous_marginal_density`` + (only ``PosteriorAnalyzer`` does — ``EmpiricalAnalyzer`` rejects continuous factors at + construction time, so this branch isn't reachable through ``make_analyzer``). + """ + factor_spec = analyzer._factor_spec(factor_name) + if factor_spec.type == "continuous": + if not hasattr(analyzer, "continuous_marginal_density"): + raise NotImplementedError( + f"{type(analyzer).__name__} cannot plot continuous factors; expected a PosteriorAnalyzer (NPE/MNPE)." + ) + _plot_continuous_marginal(analyzer, factor_spec, output_path, outcome_value, num_grid_points) + elif factor_spec.type == "categorical": + _plot_categorical_marginal(analyzer, factor_spec, output_path, outcome_value, num_samples) + else: + raise NotImplementedError(f"Unsupported factor type {factor_spec.type!r}") + + +def _plot_continuous_marginal( + analyzer: BaseAnalyzer, + factor_spec: FactorSpec, + output_path: str | Path, + outcome_value: float, + num_grid_points: int, +) -> None: + """Render a continuous factor's marginal posterior as a density curve. + + The blue curve shows ``P(factor_value | outcome=outcome_value)`` from the analyzer. + Below the x-axis is an empirical "rug" — small vertical ticks at the actual recorded + theta values, coloured green for episodes where the outcome was achieved (``≥ 0.5``) + and red for episodes where it was not. The rug lets a human eyeball whether the + smooth posterior actually agrees with where the successful episodes lived. + """ + import matplotlib.pyplot as plt + + grid, density = analyzer.continuous_marginal_density(factor_spec.name, outcome_value, num_grid_points) + # Empirical rug, coloured by outcome — gives the human a sanity-check on the curve. + factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name] + outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name] + empirical_theta_values = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).cpu().numpy() + empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy() + success_mask = empirical_outcomes >= 0.5 + + figure, axes = plt.subplots(figsize=(8, 5)) + axes.plot( + grid, + density, + color="steelblue", + linewidth=2, + label=f"P({factor_spec.name} | {analyzer.outcome_name}={outcome_value:g})", + ) + axes.fill_between(grid, 0, density, color="steelblue", alpha=0.2) + axes.scatter( + empirical_theta_values[success_mask], + np.full(success_mask.sum(), -0.05 * density.max()), + marker="|", + color="seagreen", + s=80, + label=f"{analyzer.outcome_name} ≥ 0.5 (n={success_mask.sum()})", + ) + axes.scatter( + empirical_theta_values[~success_mask], + np.full((~success_mask).sum(), -0.1 * density.max()), + marker="|", + color="firebrick", + s=80, + label=f"{analyzer.outcome_name} < 0.5 (n={(~success_mask).sum()})", + ) + axes.set_xlabel(factor_spec.name) + axes.set_ylabel("posterior density") + axes.set_title(_plot_title(analyzer, factor_spec.name)) + axes.legend(loc="best", fontsize=9) + axes.grid(alpha=0.3) + figure.tight_layout() + _save_figure(figure, output_path) + + +def _plot_categorical_marginal( + analyzer: BaseAnalyzer, + factor_spec: FactorSpec, + output_path: str | Path, + outcome_value: float, + num_samples: int, +) -> None: + """Render a categorical factor's marginal as side-by-side bars per category. + + The blue bar (left of each category) is the analyzer's ``P(category | outcome)``. + The green bar (right of each category) is the *empirical* per-category outcome rate + — independent of the analyzer's posterior, computed directly from the raw data. + For the ``EmpiricalAnalyzer`` the two will agree exactly (up to normalization); for + a posterior-based analyzer they may differ slightly if the model smooths. + + Each green bar is annotated with the sample count ``n`` for that category, so the + user can see how trustworthy each bar is. + """ + import matplotlib.pyplot as plt + + assert factor_spec.choices is not None + choices = factor_spec.choices + num_choices = len(choices) + factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name] + outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name] + + # Posterior probs come from the analyzer; empirical rate and counts are raw data, + # rendered alongside as a sanity reference. + posterior_probabilities = analyzer.categorical_marginal_probs(factor_spec.name, outcome_value, num_samples) + + empirical_theta_codes = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy() + empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy() + empirical_rates = np.zeros(num_choices) + empirical_counts = np.zeros(num_choices, dtype=int) + for code in range(num_choices): + category_mask = empirical_theta_codes == code + empirical_counts[code] = int(category_mask.sum()) + if category_mask.any(): + empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean()) + + figure, axes = plt.subplots(figsize=(max(8, 1.0 * num_choices), 5)) + bar_x_positions = np.arange(num_choices) + bar_width = 0.4 + axes.bar( + bar_x_positions - bar_width / 2, + posterior_probabilities, + bar_width, + color="steelblue", + alpha=0.8, + label=f"P(category | {analyzer.outcome_name}={outcome_value:g})", + ) + axes.bar( + bar_x_positions + bar_width / 2, + empirical_rates, + bar_width, + color="seagreen", + alpha=0.7, + label=f"empirical {analyzer.outcome_name} rate per category", + ) + for category_index, count in enumerate(empirical_counts): + axes.text( + category_index + bar_width / 2, + empirical_rates[category_index] + 0.02, + f"n={count}", + ha="center", + fontsize=8, + ) + + axes.set_xticks(bar_x_positions) + axes.set_xticklabels(choices, rotation=30, ha="right") + axes.set_ylabel("probability") + axes.set_ylim(0, 1.05) + axes.set_title(_plot_title(analyzer, factor_spec.name)) + axes.legend(loc="best", fontsize=9) + axes.grid(alpha=0.3, axis="y") + figure.tight_layout() + _save_figure(figure, output_path) + + +def _plot_title(analyzer: BaseAnalyzer, factor_name: str) -> str: + """Format the plot title as ``"Sensitivity of to " / slice block``.""" + return ( + f"Sensitivity of {analyzer.outcome_name} to {factor_name}\n" + f"slice: {analyzer.dataset.schema.slice.policy} / " + f"{analyzer.dataset.schema.slice.task} / {analyzer.dataset.schema.slice.embodiment}" + ) + + +def _save_figure(figure, output_path: str | Path) -> None: + """Save a matplotlib figure to disk (creating parent dirs) and close it.""" + import matplotlib.pyplot as plt + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + figure.savefig(output_path, dpi=150) + plt.close(figure) diff --git a/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py b/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py new file mode 100644 index 000000000..550048b03 --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py @@ -0,0 +1,146 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Synthetic JSONL generator for the MVP-2 categorical-factor analyzer smoke test. + +Generates a fake ``episode_summary.jsonl`` where a single categorical factor +``pick_up_object`` drives the success probability. Half of the choices are "easy" +(high success rate), the other half are "hard" (low success rate). With enough samples +the analyzer's recovered ``P(category | success=1)`` should concentrate on the easy +choices, and the empirical per-category bar should match the configured rates within +binomial noise. + +Sampling is **uniform over the categorical choices** (matches the semantics of +``Choose(...)`` in Alex's variation system and the uniform prior the analyzer assumes). + +Pair with the auto-emitted factors.yaml. End-to-end smoke test: + + /isaac-sim/python.sh -m isaaclab_arena.analysis.sensitivity.synthetic_data_categorical \\ + --output /tmp/syn_cat.jsonl + /isaac-sim/python.sh -m isaaclab_arena.scripts.analyze_sensitivity \\ + --factors_yaml /tmp/factors.yaml \\ + --episode_summary /tmp/syn_cat.jsonl \\ + --figure_path /tmp/syn_cat_plot.png + +Expected output: a bar chart where the "easy" choices have ~3x more posterior mass and +empirical success rate than the "hard" choices. +""" + +from __future__ import annotations + +import argparse +import json +import random +from pathlib import Path + +# Five distinct objects, like the maple-table droid sweep. The first three are "easy" +# (high success), the last two are "hard" (low success) — a known signal the analyzer +# should recover. +DEFAULT_CHOICES = [ + "rubiks_cube_hot3d_robolab", + "wooden_bowl_hot3d_robolab", + "alphabet_soup_can_hope_robolab", + "mug_ycb_robolab", + "sugar_box_ycb_robolab", +] +DEFAULT_SUCCESS_PROBABILITIES = [0.90, 0.85, 0.75, 0.25, 0.15] + + +def _factors_yaml_text(choices: list[str]) -> str: + """Build the factors.yaml content matching the synthetic data.""" + choices_string = ", ".join(choices) + return ( + "# factors.yaml — synthetic categorical dataset for analyzer smoke-testing.\n" + "# Auto-emitted by synthetic_data_categorical alongside the JSONL.\n" + "\n" + "slice:\n" + " policy: synthetic_categorical\n" + " task: synthetic_pick_and_place\n" + " embodiment: synthetic\n" + "\n" + "factors:\n" + " pick_up_object:\n" + " type: categorical\n" + f" choices: [{choices_string}]\n" + "\n" + "outcomes:\n" + " success_rate:\n" + " type: float\n" + " object_moved_rate:\n" + " type: float\n" + ) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + "--output", + type=str, + default="/tmp/synthetic_categorical_episode_summary.jsonl", + help="Output JSONL path.", + ) + parser.add_argument( + "--factors-yaml-out", + type=str, + default=None, + help="Output factors.yaml path. Default: same directory as --output, named factors.yaml.", + ) + parser.add_argument( + "--num-episodes", + type=int, + default=200, + help="Total episodes (uniform draws across all choices). Default 200 → ~40 per category for 5 choices.", + ) + parser.add_argument("--seed", type=int, default=42, help="RNG seed for reproducibility.") + args = parser.parse_args() + + random_generator = random.Random(args.seed) + choices = DEFAULT_CHOICES + success_probabilities = DEFAULT_SUCCESS_PROBABILITIES + assert len(choices) == len( + success_probabilities + ), "DEFAULT_CHOICES and DEFAULT_SUCCESS_PROBABILITIES lengths must match" + num_choices = len(choices) + + summary_rows = [] + per_category_stats: dict[str, list[int]] = {choice: [0, 0] for choice in choices} # category → [successes, total] + for episode_index in range(args.num_episodes): + category_index = random_generator.randrange(num_choices) + chosen_category = choices[category_index] + was_success = 1.0 if random_generator.random() < success_probabilities[category_index] else 0.0 + per_category_stats[chosen_category][0] += int(was_success) + per_category_stats[chosen_category][1] += 1 + summary_rows.append({ + "job_name": "synth_categorical", + "episode_idx": episode_index, + "arena_env_args": {"pick_up_object": chosen_category}, + "outcomes": {"success_rate": was_success, "object_moved_rate": was_success}, + }) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as jsonl_file: + for summary_row in summary_rows: + jsonl_file.write(json.dumps(summary_row) + "\n") + + factors_yaml_path = Path(args.factors_yaml_out) if args.factors_yaml_out else output_path.parent / "factors.yaml" + factors_yaml_path.parent.mkdir(parents=True, exist_ok=True) + factors_yaml_path.write_text(_factors_yaml_text(choices), encoding="utf-8") + + print(f"[INFO] Wrote {len(summary_rows)} rows to {output_path}") + print(f"[INFO] Wrote factors schema → {factors_yaml_path}") + print("[INFO] Per-category success counts (analyzer should pull posterior mass toward easy cats):") + for choice, target_probability in zip(choices, success_probabilities): + successes, total = per_category_stats[choice] + empirical_percentage = 100 * successes / total if total else 0.0 + bar_string = "█" * int(round(empirical_percentage / 5)) + print( + f" {choice:<35s} target={target_probability:>4.0%}" + f" empirical={successes:>3d}/{total:<3d} ({empirical_percentage:>5.1f}%) {bar_string}" + ) + + +if __name__ == "__main__": + main() diff --git a/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py b/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py new file mode 100644 index 000000000..24b1fba87 --- /dev/null +++ b/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py @@ -0,0 +1,163 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Synthetic JSONL generator for smoke-testing the sensitivity analysis pipeline. + +Produces a fake ``episode_summary.jsonl`` with a known linear-Gaussian competence band: + + P(success | intensity) = exp(-(intensity - center)^2 / (2 * sigma^2)) + +i.e. a Gaussian directly in linear intensity space centered on a trained operating point. + +Sampling is **linear-uniform** over ``[10, 5000]`` (one intensity drawn independently per +episode). This matches the semantics of ``Uniform(10, 5000)`` in Alex's variation system +and matches the uniform prior declared in factors.yaml. With these choices the smoke +test should recover the posterior peak exactly at ``center``, because: + + 1. linear uniform sampling matches the declared uniform prior (no sampling bias), + 2. a linear-Gaussian likelihood is symmetric in linear theta-space, so its mode + equals its mean — and the NPE Gaussian fallback for 1D binary outcomes fits + the mean, recovering the true center. + +A more realistic competence band would be log-Gaussian (asymmetric: cameras blind fast +at low intensity, saturate gradually at high), but that introduces a peak-bias artifact +that masks pipeline-correctness signal. This smoke test deliberately matches the +structural assumptions the analyzer can recover exactly, so any mismatch in the output +points to a real bug rather than a known statistical limitation. + +Pair with the hand-authored ``light_intensity_sweep_factors.yaml`` so the analyzer +script can be smoke-tested end-to-end without running Isaac Sim: + + /isaac-sim/python.sh -m isaaclab_arena.analysis.sensitivity.synthetic_data_continuous \\ + --output /tmp/syn.jsonl + /isaac-sim/python.sh -m isaaclab_arena.scripts.analyze_sensitivity \\ + --factors_yaml isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml \\ + --episode_summary /tmp/syn.jsonl \\ + --figure_path /tmp/syn_plot.png + +Expected output: a posterior-density curve peaking at ``center`` (default 500), with +empirical rug markers showing successes clustered around the center and failures at +both extremes. +""" + +from __future__ import annotations + +import argparse +import json +import math +import random +from pathlib import Path + +INTENSITY_LOW = 10.0 +INTENSITY_HIGH = 5000.0 + +# A self-contained factors.yaml template for the synthetic dataset. Kept inline (rather +# than imported from episode_writer.py) so this module stays a pure-python dev tool — +# importing episode_writer would transitively load pxr via isaaclab_arena.metrics. +_SYNTHETIC_FACTORS_YAML = """\ +# factors.yaml — synthetic dataset for analyzer smoke-testing. +# Auto-emitted by isaaclab_arena.analysis.sensitivity.synthetic_data_continuous alongside the JSONL. + +slice: + policy: synthetic_linear_uniform + task: synthetic_pick_and_place + embodiment: synthetic + +factors: + light_intensity: + type: continuous + dim: 1 + +outcomes: + success_rate: + type: float + object_moved_rate: + type: float +""" + + +def success_probability(intensity: float, center: float, sigma: float) -> float: + """Linear-Gaussian competence band: peaks at `center`, falls off symmetrically in linear space.""" + z_score = (intensity - center) / sigma + return math.exp(-0.5 * z_score * z_score) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--output", type=str, default="/tmp/synthetic_episode_summary.jsonl", help="Output JSONL path.") + parser.add_argument( + "--factors-yaml-out", + type=str, + default=None, + help="Output factors.yaml path. Default: same directory as --output, named factors.yaml.", + ) + parser.add_argument( + "--num-episodes", + type=int, + default=180, + help="Total number of episodes to generate. Each draws an intensity from Uniform(10, 5000).", + ) + parser.add_argument("--center", type=float, default=500.0, help="Intensity where success rate peaks. Default: 500.") + parser.add_argument( + "--sigma", + type=float, + default=400.0, + help=( + "Linear-space width of the competence band (1 sigma in intensity units). Default: 400," + " which gives ~95%% success in [100, 900] and near-zero success beyond ~1700." + ), + ) + parser.add_argument("--seed", type=int, default=42, help="RNG seed for reproducibility.") + args = parser.parse_args() + + random_generator = random.Random(args.seed) + + summary_rows = [] + for episode_index in range(args.num_episodes): + intensity = random_generator.uniform(INTENSITY_LOW, INTENSITY_HIGH) + probability_of_success = success_probability(intensity, args.center, args.sigma) + was_success = 1.0 if random_generator.random() < probability_of_success else 0.0 + summary_rows.append({ + "job_name": "synth_linear_uniform", + "episode_idx": episode_index, + "arena_env_args": {"light_intensity": intensity}, + "outcomes": {"success_rate": was_success, "object_moved_rate": was_success}, + }) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as jsonl_file: + for summary_row in summary_rows: + jsonl_file.write(json.dumps(summary_row) + "\n") + + # Emit a matching factors.yaml so the analyzer can be pointed at this synthetic dataset + # without any hand-authored schema. Inline string template — see _SYNTHETIC_FACTORS_YAML. + factors_yaml_path = Path(args.factors_yaml_out) if args.factors_yaml_out else output_path.parent / "factors.yaml" + factors_yaml_path.parent.mkdir(parents=True, exist_ok=True) + factors_yaml_path.write_text(_SYNTHETIC_FACTORS_YAML, encoding="utf-8") + + print(f"[INFO] Wrote {len(summary_rows)} rows to {output_path}") + print(f"[INFO] Wrote factors schema → {factors_yaml_path}") + print(f"[INFO] Linear-Gaussian competence band: center={args.center:g}, sigma={args.sigma:g}") + print("[INFO] Per-bin success rates (10 equal bins across the prior range):") + num_bins = 10 + bin_width = (INTENSITY_HIGH - INTENSITY_LOW) / num_bins + for bin_index in range(num_bins): + bin_low = INTENSITY_LOW + bin_index * bin_width + bin_high = bin_low + bin_width + rows_in_bin = [row for row in summary_rows if bin_low <= row["arena_env_args"]["light_intensity"] < bin_high] + if not rows_in_bin: + continue + successes_in_bin = sum(int(row["outcomes"]["success_rate"]) for row in rows_in_bin) + percentage = 100 * successes_in_bin / len(rows_in_bin) + bar_string = "█" * int(round(percentage / 5)) + print( + f" [{bin_low:>5g}, {bin_high:>5g}): {successes_in_bin:>3d}/{len(rows_in_bin):<3d}" + f" ({percentage:>5.1f}%) {bar_string}" + ) + + +if __name__ == "__main__": + main() diff --git a/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py b/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py index f6058015c..98cd752fd 100644 --- a/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py +++ b/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py @@ -7,7 +7,7 @@ from isaaclab.envs import ManagerBasedRLEnvCfg from isaaclab.envs.mimic_env_cfg import MimicEnvCfg -from isaaclab.sim import SimulationCfg +from isaaclab.sim import RenderCfg, SimulationCfg from isaaclab.utils import configclass from isaaclab_newton.physics.newton_manager_cfg import MJWarpSolverCfg, NewtonCfg from isaaclab_physx.physics import PhysxCfg @@ -72,7 +72,19 @@ class IsaacLabArenaManagerBasedRLEnvCfg(ManagerBasedRLEnvCfg): isaaclab_arena_env: IsaacLabArenaEnvironment | None = None # Overriding defaults from base class - sim: SimulationCfg = SimulationCfg(dt=1 / 200, render_interval=2) + # Override the RTX renderer's built-in scene ambient (carb /rtx/sceneDb/ambientLightIntensity, default 1.0 with + # color [0.1, 0.1, 0.1]) so that USD light prims fully control scene illumination. Without this, Arena scenes + # carry a ~10%-gray ambient floor independent of any UsdLuxLight, which silently confounds vision-policy evals. + sim: SimulationCfg = SimulationCfg( + dt=1 / 200, + render_interval=2, + render=RenderCfg( + carb_settings={ + "/rtx/sceneDb/ambientLightIntensity": 0.0, + "/rtx/sceneDb/ambientLightColor": [0.0, 0.0, 0.0], + }, + ), + ) decimation: int = 4 episode_length_s: float = 50.0 wait_for_textures: bool = False diff --git a/isaaclab_arena/evaluation/eval_runner.py b/isaaclab_arena/evaluation/eval_runner.py index cd6f845d4..d9902f638 100644 --- a/isaaclab_arena/evaluation/eval_runner.py +++ b/isaaclab_arena/evaluation/eval_runner.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser +from isaaclab_arena.evaluation.camera_video import CameraObsVideoRecorder from isaaclab_arena.evaluation.eval_runner_cli import add_eval_runner_arguments from isaaclab_arena.evaluation.job_manager import Job, JobManager, Status from isaaclab_arena.evaluation.policy_runner import get_policy_cls, rollout_policy @@ -144,13 +145,23 @@ def main(): # Check if any job requires cameras and enable them if needed before starting simulation enable_cameras_if_required(eval_jobs_config, args_cli) + # Per-episode summary recording is opt-in via --episode_summary. The writer logs the + # full arena_env_args dict per episode; the analyzer side decides which keys to treat + # as factors via factors.yaml. No eval-side knowledge of "factors" required. + episode_summary_enabled = args_cli.episode_summary is not None + if episode_summary_enabled: + print( + "[INFO] Episode summary recording enabled. Per-episode arena_env_args + outcomes" + f" → {args_cli.episode_summary}" + ) + with SimulationAppContext(args_cli): job_manager = JobManager(eval_jobs_config["jobs"]) metrics_logger = MetricsLogger() job_manager.print_jobs_info() - if args_cli.video: + if args_cli.video or args_cli.camera_video: os.makedirs(args_cli.video_dir, exist_ok=True) print(f"[INFO] Video recording enabled. Videos will be saved to: {args_cli.video_dir}") @@ -172,20 +183,32 @@ def main(): else: job.num_steps = args_cli.num_steps - if args_cli.video: + if args_cli.video or args_cli.camera_video: if job.num_steps is not None: video_length = job.num_steps else: video_length = job.num_episodes * env.unwrapped.max_episode_length + job_video_folder = os.path.join(args_cli.video_dir, job.name) + + if args_cli.video: video_kwargs = { - "video_folder": os.path.join(args_cli.video_dir, job.name), + "video_folder": job_video_folder, "step_trigger": lambda step: step == 0, "video_length": video_length, "disable_logger": True, } - print(f"[INFO] Recording video for job '{job.name}' -> {video_kwargs['video_folder']}") + print(f"[INFO] Recording viewport video for job '{job.name}' -> {job_video_folder}") env = RecordVideo(env, **video_kwargs) + if args_cli.camera_video: + print(f"[INFO] Recording per-camera videos for job '{job.name}' -> {job_video_folder}") + env = CameraObsVideoRecorder( + env, + video_folder=job_video_folder, + step_trigger=lambda step: step == 0, + video_length=video_length, + ) + metrics = rollout_policy( env, policy, @@ -194,6 +217,15 @@ def main(): language_instruction=job.language_instruction, ) + if episode_summary_enabled: + # Deferred import — episode_writer transitively touches pxr via + # isaaclab_arena.metrics.metrics. Matches the policy_runner.py:107 + # pattern for compute_metrics. + from isaaclab_arena.analysis.sensitivity.episode_writer import write_episode_summaries + + rows = write_episode_summaries(env, job, args_cli.episode_summary) + print(f"[INFO] Wrote {rows} episode summaries for job '{job.name}'") + job_manager.complete_job(job, metrics=metrics, status=Status.COMPLETED) # users may not specify metrics for a task, although it's not recommended diff --git a/isaaclab_arena/evaluation/eval_runner_cli.py b/isaaclab_arena/evaluation/eval_runner_cli.py index b39187b04..d776e180b 100644 --- a/isaaclab_arena/evaluation/eval_runner_cli.py +++ b/isaaclab_arena/evaluation/eval_runner_cli.py @@ -21,9 +21,32 @@ def add_eval_runner_arguments(parser: argparse.ArgumentParser) -> None: default="/eval/videos", help="Root directory for recorded videos. Each job gets a subdirectory.", ) + parser.add_argument( + "--camera_video", + "--camera-video", + action="store_true", + default=False, + help=( + "For each job, record one mp4 per camera in obs['camera_obs'] (what the policy actually sees)." + " Independent of --video; use either or both." + ), + ) parser.add_argument( "--continue_on_error", action="store_true", default=False, help="Continue evaluation with remaining jobs when a job fails instead of stopping immediately.", ) + parser.add_argument( + "--episode_summary", + type=str, + default=None, + help=( + "Output JSONL file for per-episode summaries. When set, eval_runner writes one" + " JSONL row per recorded demo containing the full arena_env_args dict (what" + " parameterized the env for that episode) and the task's registered outcomes." + " The analyzer side picks which arena_env_args keys to treat as factors via" + " factors.yaml — no eval-side flag needed. Absent here means no recording and" + " unchanged behavior for non-sensitivity workflows." + ), + ) diff --git a/isaaclab_arena/evaluation/job_manager.py b/isaaclab_arena/evaluation/job_manager.py index 8c4d09c46..43bbe1ffb 100644 --- a/isaaclab_arena/evaluation/job_manager.py +++ b/isaaclab_arena/evaluation/job_manager.py @@ -28,6 +28,7 @@ def __init__( policy_config_dict: dict = None, status: Status = None, language_instruction: str = None, + arena_env_args_dict: dict | None = None, ): """Initialize a Job instance. @@ -42,9 +43,13 @@ def __init__( status: Job status (defaults to PENDING) language_instruction: Optional language instruction override for the policy. When set, takes precedence over the task's own description. + arena_env_args_dict: The original dict form of arena_env_args before conversion to + CLI args list. Preserves typed values (e.g. floats stay floats) for downstream + consumers that need to index by key. """ self.name = name self.arena_env_args = arena_env_args + self.arena_env_args_dict = arena_env_args_dict if arena_env_args_dict is not None else {} assert num_envs > 0, "num_envs must be greater than 0" assert not ( num_steps is not None and num_episodes is not None @@ -102,6 +107,7 @@ def from_dict(cls, data: dict) -> "Job": return cls( name=data["name"], arena_env_args=cls.convert_args_dict_to_cli_args_list(data["arena_env_args"]), + arena_env_args_dict=data["arena_env_args"], policy_type=data["policy_type"], num_envs=num_envs, num_steps=num_steps, diff --git a/isaaclab_arena/scripts/analyze_sensitivity.py b/isaaclab_arena/scripts/analyze_sensitivity.py new file mode 100644 index 000000000..052948b2f --- /dev/null +++ b/isaaclab_arena/scripts/analyze_sensitivity.py @@ -0,0 +1,106 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""CLI driver for 1D continuous sensitivity analysis (MVP-1). + +Loads a SensitivityDataset from a paired (factors.yaml, episode_summary.jsonl), trains +NPE on the selected outcome column, and saves a posterior-marginal plot for the chosen +factor. + +Example: + python -m isaaclab_arena.scripts.analyze_sensitivity \\ + --factors_yaml isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml \\ + --episode_summary ./episode_summary.jsonl \\ + --figure_path ./light_intensity_sensitivity.png + +This script runs entirely offline — no Isaac Sim, no policy server. +""" + +from __future__ import annotations + +import argparse + +from isaaclab_arena.analysis.sensitivity.analyzer import make_analyzer +from isaaclab_arena.analysis.sensitivity.dataset import SensitivityDataset +from isaaclab_arena.analysis.sensitivity.plotting import plot_marginal + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--factors_yaml", type=str, required=True, help="Path to factors.yaml.") + parser.add_argument( + "--episode_summary", type=str, required=True, help="Path to episode_summary.jsonl produced by eval_runner." + ) + parser.add_argument( + "--input_factor", + type=str, + default=None, + help="Name of the factor to plot. Defaults to the only factor declared in factors.yaml.", + ) + parser.add_argument( + "--output_metric", + type=str, + default=None, + help="Outcome name to condition on. Defaults to the first outcome listed in factors.yaml.", + ) + parser.add_argument( + "--outcome_value", + type=float, + default=1.0, + help="Outcome value to condition on (1.0 = success). Default: 1.0.", + ) + parser.add_argument( + "--figure_path", + type=str, + default="./sensitivity.png", + help="Output figure path. Default: ./sensitivity.png.", + ) + args = parser.parse_args() + + print(f"[INFO] Loading dataset: factors={args.factors_yaml} jsonl={args.episode_summary}") + dataset = SensitivityDataset(args.factors_yaml, args.episode_summary) + + available_factors = list(dataset.factor_columns) + available_outcomes = [outcome.name for outcome in dataset.schema.outcomes] + + if args.input_factor is None: + factor_name = available_factors[0] + else: + if args.input_factor not in available_factors: + parser.error( + f"--input_factor {args.input_factor!r} not found in factors.yaml. " + f"Available factors: {available_factors}" + ) + factor_name = args.input_factor + + if args.output_metric is None: + outcome_name = available_outcomes[0] + else: + if args.output_metric not in available_outcomes: + parser.error( + f"--output_metric {args.output_metric!r} not found in factors.yaml. " + f"Available outcomes: {available_outcomes}" + ) + outcome_name = args.output_metric + + print( + f"[INFO] Analyzing factor '{factor_name}' against outcome '{outcome_name}'" + f" (conditioning on outcome={args.outcome_value:g})" + ) + print( + f"[INFO] num_episodes={len(dataset.rows)}; theta shape={tuple(dataset.theta.shape)};" + f" x shape={tuple(dataset.x.shape)}" + ) + + analyzer = make_analyzer(dataset, outcome_name=outcome_name) + print(f"[INFO] Dispatched analyzer: {type(analyzer).__name__}") + analyzer.fit() + print(f"[INFO] Plotting marginal -> {args.figure_path}") + plot_marginal(analyzer, factor_name, output_path=args.figure_path, outcome_value=args.outcome_value) + print("[INFO] Done.") + + +if __name__ == "__main__": + main() diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml new file mode 100644 index 000000000..4a4c82200 --- /dev/null +++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml @@ -0,0 +1,36 @@ +# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Sensitivity-analysis schema for the light_intensity sweep on droid + pi0. +# Paired with: light_intensity_sweep_jobs_config.json (and the minimal variant). +# Hand-authored — must stay in sync with --factor_keys passed to eval_runner. +# +# - slice identifies the (policy, task, embodiment) the dataset comes from; MNPE/NPE +# assumes a single data-generating source per analysis. +# - factors declares what the eval varies; eval_runner is told which arena_env_args +# keys to record via --factor_keys (must match the names here). +# - outcomes declares what the eval measures; the writer pulls these from the +# registered task metrics (compute_metric_from_recording on each demo). + +slice: + policy: pi0_remote + task: pick_and_place_maple_table + embodiment: droid_abs_joint_pos + +factors: + light_intensity: + type: continuous + dim: 1 + # Mirrors the robolab evaluated endpoints [10, 5000] for direct comparison; spans the + # dark / normal / bright regimes around the policy's trained operating point (~500). + range: [[10, 5000]] + +outcomes: + success_rate: + # Per-episode value of SuccessRateMetric. Returns 0.0 or 1.0 for a single demo. + type: float + object_moved_rate: + # Per-episode value of ObjectMovedRateMetric. Same shape as success_rate. + type: float diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json new file mode 100644 index 000000000..6da57f719 --- /dev/null +++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json @@ -0,0 +1,184 @@ +{ + "jobs": [ + { + "name": "light_intensity_sweep_10", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 10 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_25", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 25 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_60", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 60 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_150", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 150 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_350", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 350 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_800", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 800 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_1800", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 1800 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_4000", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 4000 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_sweep_5000", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 5000 + }, + "num_episodes": 20, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + } + ] +} diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json new file mode 100644 index 000000000..0e6f1d3ce --- /dev/null +++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json @@ -0,0 +1,64 @@ +{ + "jobs": [ + { + "name": "light_intensity_minimal_100", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 100 + }, + "num_episodes": 2, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_minimal_500", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 500 + }, + "num_episodes": 2, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "light_intensity_minimal_5000", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 5000 + }, + "num_episodes": 2, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + } + ] +} diff --git a/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml new file mode 100644 index 000000000..ab5eb24cd --- /dev/null +++ b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml @@ -0,0 +1,26 @@ +# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Sensitivity-analysis schema for the pick_up_object sweep on droid + pi0. +# Paired with: pick_up_object_sweep_minimal_jobs_config.json +# Hand-authored — must stay in sync with --factor_keys passed to eval_runner. + +slice: + policy: pi0_remote + task: pick_and_place_maple_table + embodiment: droid_abs_joint_pos + +factors: + pick_up_object: + type: categorical + # Three objects with distinct visual / shape characteristics. List them in the order + # the analyzer should use as integer codes (0=rubiks_cube, 1=alphabet_soup_can, 2=sugar_box). + choices: [rubiks_cube_hot3d_robolab, alphabet_soup_can_hope_robolab, sugar_box_ycb_robolab] + +outcomes: + success_rate: + type: float + object_moved_rate: + type: float diff --git a/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json new file mode 100644 index 000000000..fc2b3950c --- /dev/null +++ b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json @@ -0,0 +1,70 @@ +{ + "jobs": [ + { + "name": "pick_up_object_minimal_rubiks_cube", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 500, + "pick_up_object": "rubiks_cube_hot3d_robolab", + "destination_location": "wooden_bowl_hot3d_robolab" + }, + "num_episodes": 2, + "language_instruction": "Pick up the Rubik's cube and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "pick_up_object_minimal_alphabet_soup_can", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 500, + "pick_up_object": "alphabet_soup_can_hope_robolab", + "destination_location": "wooden_bowl_hot3d_robolab" + }, + "num_episodes": 2, + "language_instruction": "Pick up the soup can and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + }, + { + "name": "pick_up_object_minimal_sugar_box", + "arena_env_args": { + "enable_cameras": true, + "environment": "pick_and_place_maple_table", + "embodiment": "droid_abs_joint_pos", + "hdr": "billiard_hall_robolab", + "light_intensity": 500, + "pick_up_object": "sugar_box_ycb_robolab", + "destination_location": "wooden_bowl_hot3d_robolab" + }, + "num_episodes": 2, + "language_instruction": "Pick up the sugar box and place it in the bowl.", + "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy", + "policy_config_dict": { + "policy_variant": "pi05", + "policy_device": "cuda:0", + "remote_host": "127.0.0.1", + "remote_port": 8000, + "openpi_embodiment_adapter": "droid" + } + } + ] +} diff --git a/setup.py b/setup.py index 82cd92b56..2c1c5944f 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ "jupyter", "debugpy", "tenacity", + "sbi", ] setup(