isaac-sim · cvolkcvolk · May 28, 2026 · May 27, 2026 · May 27, 2026 · May 28, 2026
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,95 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Per-episode summary writer for sensitivity analysis.
+
+``write_episode_summaries`` appends one JSONL row per recorded demo for a just-completed
+job. Each row carries:
+
+  - ``job_name`` and ``episode_idx`` for traceability,
+  - ``arena_env_args`` — the *entire* job.arena_env_args_dict, i.e. every value that
+    parameterized the env for this episode,
+  - ``outcomes`` — per-episode outcome values from the task's registered metrics, extracted
+    from the recorded hdf5 demos via each metric's ``compute_metric_from_recording``.
+
+The eval-side writer is intentionally analysis-agnostic: it logs all env state, and the
+analyzer's ``factors.yaml`` decides which subset of those keys to treat as factors. This
+keeps the writer free of any "what counts as a factor?" knowledge.
+
+Import-order note: this module legitimately touches pxr at import time via
+``isaaclab_arena.metrics.metrics`` (which imports ``isaaclab.envs.manager_based_rl_env``).
+Like ``metrics`` itself, callers must defer importing this module until *after*
+``SimulationAppContext`` is active — see ``policy_runner.py`` (which uses the same pattern
+for ``compute_metrics``) and ``eval_runner.py``'s per-job try block for examples.
+"""
+
+from __future__ import annotations
+
+import h5py
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from isaaclab_arena.metrics.metrics import get_metric_recorder_dataset_path
+from isaaclab_arena.metrics.metrics_logger import metrics_to_plain_python_types
+
+if TYPE_CHECKING:
+    from isaaclab_arena.evaluation.job_manager import Job
+
+
+def write_episode_summaries(env, job: Job, output_path: str | Path) -> int:
+    """Append one JSONL row per recorded demo for the just-completed job.
+
+    Each row has shape::
+
+        {
+          "job_name": "<job.name>",
+          "episode_idx": <demo index in the hdf5>,
+          "arena_env_args": <full job.arena_env_args_dict>,
+          "outcomes": <per-metric value computed from the demo>
+        }
+
+    Args:
+        env: The (possibly gym-wrapped) Arena env that just finished its rollout. The hdf5
+            path and registered metrics are read from ``env.unwrapped.cfg``.
+        job: The Job that ran. Its ``arena_env_args_dict`` is logged verbatim under
+            ``arena_env_args``.
+        output_path: JSONL file to append to. Created (with parent dirs) if absent.
+
+    Returns:
+        Number of rows written.
+    """
+    unwrapped_env = env.unwrapped
+    if not hasattr(unwrapped_env.cfg, "metrics") or unwrapped_env.cfg.metrics is None:
+        return 0
+
+    arena_env_args_snapshot = dict(job.arena_env_args_dict)
+
+    hdf5_dataset_path = get_metric_recorder_dataset_path(unwrapped_env)
+    registered_metrics = unwrapped_env.cfg.metrics
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    rows_written = 0
+    with h5py.File(hdf5_dataset_path, "r") as hdf5_file:
+        recorded_demos = hdf5_file["data"]
+        with open(output_path, "a", encoding="utf-8") as jsonl_output:
+            for demo_index, demo_name in enumerate(recorded_demos):
+                demo_group = recorded_demos[demo_name]
+                raw_outcome_values = {}
+                for metric in registered_metrics:
+                    recorded_metric_data = demo_group[metric.recorder_term_name][:]
+                    raw_outcome_values[metric.name] = metric.compute_metric_from_recording([recorded_metric_data])
+                outcome_values = metrics_to_plain_python_types(raw_outcome_values)
+                summary_row = {
+                    "job_name": job.name,
+                    "episode_idx": demo_index,
+                    "arena_env_args": arena_env_args_snapshot,
+                    "outcomes": outcome_values,
+                }
+                jsonl_output.write(json.dumps(summary_row) + "\n")
+                rows_written += 1
+
+    return rows_written
@@ -0,0 +1,209 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Plot renderers for sensitivity analysis.
+
+Pure-visualization module. Calls into the analyzer's public posterior queries
+(``continuous_marginal_density`` and ``categorical_marginal_probs``) and renders matplotlib
+figures. Decoupled from the analyzer hierarchy so new plot types can be added without
+touching inference code, and so existing plot code can be tested with mock posteriors.
+
+The single entry point is ``plot_marginal(analyzer, factor_name, output_path, ...)``,
+which dispatches by factor type to the right renderer.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab_arena.analysis.sensitivity.analyzer import BaseAnalyzer
+    from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec
+
+
+def plot_marginal(
+    analyzer: BaseAnalyzer,
+    factor_name: str,
+    output_path: str | Path,
+    outcome_value: float = 1.0,
+    num_samples: int = 10_000,
+    num_grid_points: int = 200,
+) -> None:
+    """Render the marginal posterior for ``factor_name``, dispatching by factor type.
+
+    For continuous factors, the analyzer must expose ``continuous_marginal_density``
+    (only ``PosteriorAnalyzer`` does — ``EmpiricalAnalyzer`` rejects continuous factors at
+    construction time, so this branch isn't reachable through ``make_analyzer``).
+    """
+    factor_spec = analyzer._factor_spec(factor_name)
+    if factor_spec.type == "continuous":
+        if not hasattr(analyzer, "continuous_marginal_density"):
+            raise NotImplementedError(
+                f"{type(analyzer).__name__} cannot plot continuous factors; expected a PosteriorAnalyzer (NPE/MNPE)."
+            )
+        _plot_continuous_marginal(analyzer, factor_spec, output_path, outcome_value, num_grid_points)
+    elif factor_spec.type == "categorical":
+        _plot_categorical_marginal(analyzer, factor_spec, output_path, outcome_value, num_samples)
+    else:
+        raise NotImplementedError(f"Unsupported factor type {factor_spec.type!r}")
+
+
+def _plot_continuous_marginal(
+    analyzer: BaseAnalyzer,
+    factor_spec: FactorSpec,
+    output_path: str | Path,
+    outcome_value: float,
+    num_grid_points: int,
+) -> None:
+    """Render a continuous factor's marginal posterior as a density curve.
+
+    The blue curve shows ``P(factor_value | outcome=outcome_value)`` from the analyzer.
+    Below the x-axis is an empirical "rug" — small vertical ticks at the actual recorded
+    theta values, coloured green for episodes where the outcome was achieved (``≥ 0.5``)
+    and red for episodes where it was not. The rug lets a human eyeball whether the
+    smooth posterior actually agrees with where the successful episodes lived.
+    """
+    import matplotlib.pyplot as plt
+
+    grid, density = analyzer.continuous_marginal_density(factor_spec.name, outcome_value, num_grid_points)
+    # Empirical rug, coloured by outcome — gives the human a sanity-check on the curve.
+    factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
+    outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]
+    empirical_theta_values = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).cpu().numpy()
+    empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
+    success_mask = empirical_outcomes >= 0.5
+
+    figure, axes = plt.subplots(figsize=(8, 5))
+    axes.plot(
+        grid,
+        density,
+        color="steelblue",
+        linewidth=2,
+        label=f"P({factor_spec.name} | {analyzer.outcome_name}={outcome_value:g})",
+    )
+    axes.fill_between(grid, 0, density, color="steelblue", alpha=0.2)
+    axes.scatter(
+        empirical_theta_values[success_mask],
+        np.full(success_mask.sum(), -0.05 * density.max()),
+        marker="|",
+        color="seagreen",
+        s=80,
+        label=f"{analyzer.outcome_name} ≥ 0.5  (n={success_mask.sum()})",
+    )
+    axes.scatter(
+        empirical_theta_values[~success_mask],
+        np.full((~success_mask).sum(), -0.1 * density.max()),
+        marker="|",
+        color="firebrick",
+        s=80,
+        label=f"{analyzer.outcome_name} < 0.5  (n={(~success_mask).sum()})",
+    )
+    axes.set_xlabel(factor_spec.name)
+    axes.set_ylabel("posterior density")
+    axes.set_title(_plot_title(analyzer, factor_spec.name))
+    axes.legend(loc="best", fontsize=9)
+    axes.grid(alpha=0.3)
+    figure.tight_layout()
+    _save_figure(figure, output_path)
+
+
+def _plot_categorical_marginal(
+    analyzer: BaseAnalyzer,
+    factor_spec: FactorSpec,
+    output_path: str | Path,
+    outcome_value: float,
+    num_samples: int,
+) -> None:
+    """Render a categorical factor's marginal as side-by-side bars per category.
+
+    The blue bar (left of each category) is the analyzer's ``P(category | outcome)``.
+    The green bar (right of each category) is the *empirical* per-category outcome rate
+    — independent of the analyzer's posterior, computed directly from the raw data.
+    For the ``EmpiricalAnalyzer`` the two will agree exactly (up to normalization); for
+    a posterior-based analyzer they may differ slightly if the model smooths.
+
+    Each green bar is annotated with the sample count ``n`` for that category, so the
+    user can see how trustworthy each bar is.
+    """
+    import matplotlib.pyplot as plt
+
+    assert factor_spec.choices is not None
+    choices = factor_spec.choices
+    num_choices = len(choices)
+    factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
+    outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]
+
+    # Posterior probs come from the analyzer; empirical rate and counts are raw data,
+    # rendered alongside as a sanity reference.
+    posterior_probabilities = analyzer.categorical_marginal_probs(factor_spec.name, outcome_value, num_samples)
+
+    empirical_theta_codes = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy()
+    empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
+    empirical_rates = np.zeros(num_choices)
+    empirical_counts = np.zeros(num_choices, dtype=int)
+    for code in range(num_choices):
+        category_mask = empirical_theta_codes == code
+        empirical_counts[code] = int(category_mask.sum())
+        if category_mask.any():
+            empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean())
+
+    figure, axes = plt.subplots(figsize=(max(8, 1.0 * num_choices), 5))
+    bar_x_positions = np.arange(num_choices)
+    bar_width = 0.4
+    axes.bar(
+        bar_x_positions - bar_width / 2,
+        posterior_probabilities,
+        bar_width,
+        color="steelblue",
+        alpha=0.8,
+        label=f"P(category | {analyzer.outcome_name}={outcome_value:g})",
+    )
+    axes.bar(
+        bar_x_positions + bar_width / 2,
+        empirical_rates,
+        bar_width,
+        color="seagreen",
+        alpha=0.7,
+        label=f"empirical {analyzer.outcome_name} rate per category",
+    )
+    for category_index, count in enumerate(empirical_counts):
+        axes.text(
+            category_index + bar_width / 2,
+            empirical_rates[category_index] + 0.02,
+            f"n={count}",
+            ha="center",
+            fontsize=8,
+        )
+
+    axes.set_xticks(bar_x_positions)
+    axes.set_xticklabels(choices, rotation=30, ha="right")
+    axes.set_ylabel("probability")
+    axes.set_ylim(0, 1.05)
+    axes.set_title(_plot_title(analyzer, factor_spec.name))
+    axes.legend(loc="best", fontsize=9)
+    axes.grid(alpha=0.3, axis="y")
+    figure.tight_layout()
+    _save_figure(figure, output_path)
+
+
+def _plot_title(analyzer: BaseAnalyzer, factor_name: str) -> str:
+    """Format the plot title as ``"Sensitivity of <outcome> to <factor>" / slice block``."""
+    return (
+        f"Sensitivity of {analyzer.outcome_name} to {factor_name}\n"
+        f"slice: {analyzer.dataset.schema.slice.policy} / "
+        f"{analyzer.dataset.schema.slice.task} / {analyzer.dataset.schema.slice.embodiment}"
+    )
+
+
+def _save_figure(figure, output_path: str | Path) -> None:
+    """Save a matplotlib figure to disk (creating parent dirs) and close it."""
+    import matplotlib.pyplot as plt
+
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    figure.savefig(output_path, dpi=150)
+    plt.close(figure)