Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions isaaclab_arena/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
4 changes: 4 additions & 0 deletions isaaclab_arena/analysis/sensitivity/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
363 changes: 363 additions & 0 deletions isaaclab_arena/analysis/sensitivity/analyzer.py

Large diffs are not rendered by default.

370 changes: 370 additions & 0 deletions isaaclab_arena/analysis/sensitivity/dataset.py

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions isaaclab_arena/analysis/sensitivity/episode_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

"""Per-episode summary writer for sensitivity analysis.

``write_episode_summaries`` appends one JSONL row per recorded demo for a just-completed
job. Each row carries:

- ``job_name`` and ``episode_idx`` for traceability,
- ``arena_env_args`` — the *entire* job.arena_env_args_dict, i.e. every value that
parameterized the env for this episode,
- ``outcomes`` — per-episode outcome values from the task's registered metrics, extracted
from the recorded hdf5 demos via each metric's ``compute_metric_from_recording``.

The eval-side writer is intentionally analysis-agnostic: it logs all env state, and the
analyzer's ``factors.yaml`` decides which subset of those keys to treat as factors. This
keeps the writer free of any "what counts as a factor?" knowledge.

Import-order note: this module legitimately touches pxr at import time via
``isaaclab_arena.metrics.metrics`` (which imports ``isaaclab.envs.manager_based_rl_env``).
Like ``metrics`` itself, callers must defer importing this module until *after*
``SimulationAppContext`` is active — see ``policy_runner.py`` (which uses the same pattern
for ``compute_metrics``) and ``eval_runner.py``'s per-job try block for examples.
"""

from __future__ import annotations

import h5py
import json
from pathlib import Path
from typing import TYPE_CHECKING

from isaaclab_arena.metrics.metrics import get_metric_recorder_dataset_path
from isaaclab_arena.metrics.metrics_logger import metrics_to_plain_python_types

if TYPE_CHECKING:
from isaaclab_arena.evaluation.job_manager import Job


def write_episode_summaries(env, job: Job, output_path: str | Path) -> int:
"""Append one JSONL row per recorded demo for the just-completed job.

Each row has shape::

{
"job_name": "<job.name>",
"episode_idx": <demo index in the hdf5>,
"arena_env_args": <full job.arena_env_args_dict>,
"outcomes": <per-metric value computed from the demo>
}

Args:
env: The (possibly gym-wrapped) Arena env that just finished its rollout. The hdf5
path and registered metrics are read from ``env.unwrapped.cfg``.
job: The Job that ran. Its ``arena_env_args_dict`` is logged verbatim under
``arena_env_args``.
output_path: JSONL file to append to. Created (with parent dirs) if absent.

Returns:
Number of rows written.
"""
unwrapped_env = env.unwrapped
if not hasattr(unwrapped_env.cfg, "metrics") or unwrapped_env.cfg.metrics is None:
return 0

arena_env_args_snapshot = dict(job.arena_env_args_dict)

hdf5_dataset_path = get_metric_recorder_dataset_path(unwrapped_env)
registered_metrics = unwrapped_env.cfg.metrics
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)

rows_written = 0
with h5py.File(hdf5_dataset_path, "r") as hdf5_file:
recorded_demos = hdf5_file["data"]
with open(output_path, "a", encoding="utf-8") as jsonl_output:
for demo_index, demo_name in enumerate(recorded_demos):
demo_group = recorded_demos[demo_name]
raw_outcome_values = {}
for metric in registered_metrics:
recorded_metric_data = demo_group[metric.recorder_term_name][:]
raw_outcome_values[metric.name] = metric.compute_metric_from_recording([recorded_metric_data])
outcome_values = metrics_to_plain_python_types(raw_outcome_values)
summary_row = {
"job_name": job.name,
"episode_idx": demo_index,
"arena_env_args": arena_env_args_snapshot,
"outcomes": outcome_values,
}
jsonl_output.write(json.dumps(summary_row) + "\n")
rows_written += 1

return rows_written
209 changes: 209 additions & 0 deletions isaaclab_arena/analysis/sensitivity/plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

"""Plot renderers for sensitivity analysis.

Pure-visualization module. Calls into the analyzer's public posterior queries
(``continuous_marginal_density`` and ``categorical_marginal_probs``) and renders matplotlib
figures. Decoupled from the analyzer hierarchy so new plot types can be added without
touching inference code, and so existing plot code can be tested with mock posteriors.

The single entry point is ``plot_marginal(analyzer, factor_name, output_path, ...)``,
which dispatches by factor type to the right renderer.
"""

from __future__ import annotations

import numpy as np
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from isaaclab_arena.analysis.sensitivity.analyzer import BaseAnalyzer
from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec


def plot_marginal(
analyzer: BaseAnalyzer,
factor_name: str,
output_path: str | Path,
outcome_value: float = 1.0,
num_samples: int = 10_000,
num_grid_points: int = 200,
) -> None:
"""Render the marginal posterior for ``factor_name``, dispatching by factor type.

For continuous factors, the analyzer must expose ``continuous_marginal_density``
(only ``PosteriorAnalyzer`` does — ``EmpiricalAnalyzer`` rejects continuous factors at
construction time, so this branch isn't reachable through ``make_analyzer``).
"""
factor_spec = analyzer._factor_spec(factor_name)
if factor_spec.type == "continuous":
if not hasattr(analyzer, "continuous_marginal_density"):
raise NotImplementedError(
f"{type(analyzer).__name__} cannot plot continuous factors; expected a PosteriorAnalyzer (NPE/MNPE)."
)
_plot_continuous_marginal(analyzer, factor_spec, output_path, outcome_value, num_grid_points)
elif factor_spec.type == "categorical":
_plot_categorical_marginal(analyzer, factor_spec, output_path, outcome_value, num_samples)
else:
raise NotImplementedError(f"Unsupported factor type {factor_spec.type!r}")


def _plot_continuous_marginal(
analyzer: BaseAnalyzer,
factor_spec: FactorSpec,
output_path: str | Path,
outcome_value: float,
num_grid_points: int,
) -> None:
"""Render a continuous factor's marginal posterior as a density curve.

The blue curve shows ``P(factor_value | outcome=outcome_value)`` from the analyzer.
Below the x-axis is an empirical "rug" — small vertical ticks at the actual recorded
theta values, coloured green for episodes where the outcome was achieved (``≥ 0.5``)
and red for episodes where it was not. The rug lets a human eyeball whether the
smooth posterior actually agrees with where the successful episodes lived.
"""
import matplotlib.pyplot as plt

grid, density = analyzer.continuous_marginal_density(factor_spec.name, outcome_value, num_grid_points)
# Empirical rug, coloured by outcome — gives the human a sanity-check on the curve.
factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]
empirical_theta_values = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).cpu().numpy()
empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
success_mask = empirical_outcomes >= 0.5

figure, axes = plt.subplots(figsize=(8, 5))
axes.plot(
grid,
density,
color="steelblue",
linewidth=2,
label=f"P({factor_spec.name} | {analyzer.outcome_name}={outcome_value:g})",
)
axes.fill_between(grid, 0, density, color="steelblue", alpha=0.2)
axes.scatter(
empirical_theta_values[success_mask],
np.full(success_mask.sum(), -0.05 * density.max()),
marker="|",
color="seagreen",
s=80,
label=f"{analyzer.outcome_name} ≥ 0.5 (n={success_mask.sum()})",
)
axes.scatter(
empirical_theta_values[~success_mask],
np.full((~success_mask).sum(), -0.1 * density.max()),
marker="|",
color="firebrick",
s=80,
label=f"{analyzer.outcome_name} < 0.5 (n={(~success_mask).sum()})",
)
axes.set_xlabel(factor_spec.name)
axes.set_ylabel("posterior density")
axes.set_title(_plot_title(analyzer, factor_spec.name))
axes.legend(loc="best", fontsize=9)
axes.grid(alpha=0.3)
figure.tight_layout()
_save_figure(figure, output_path)


def _plot_categorical_marginal(
analyzer: BaseAnalyzer,
factor_spec: FactorSpec,
output_path: str | Path,
outcome_value: float,
num_samples: int,
) -> None:
"""Render a categorical factor's marginal as side-by-side bars per category.

The blue bar (left of each category) is the analyzer's ``P(category | outcome)``.
The green bar (right of each category) is the *empirical* per-category outcome rate
— independent of the analyzer's posterior, computed directly from the raw data.
For the ``EmpiricalAnalyzer`` the two will agree exactly (up to normalization); for
a posterior-based analyzer they may differ slightly if the model smooths.

Each green bar is annotated with the sample count ``n`` for that category, so the
user can see how trustworthy each bar is.
"""
import matplotlib.pyplot as plt

assert factor_spec.choices is not None
choices = factor_spec.choices
num_choices = len(choices)
factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]

# Posterior probs come from the analyzer; empirical rate and counts are raw data,
# rendered alongside as a sanity reference.
posterior_probabilities = analyzer.categorical_marginal_probs(factor_spec.name, outcome_value, num_samples)

empirical_theta_codes = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy()
empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
empirical_rates = np.zeros(num_choices)
empirical_counts = np.zeros(num_choices, dtype=int)
for code in range(num_choices):
category_mask = empirical_theta_codes == code
empirical_counts[code] = int(category_mask.sum())
if category_mask.any():
empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean())

figure, axes = plt.subplots(figsize=(max(8, 1.0 * num_choices), 5))
bar_x_positions = np.arange(num_choices)
bar_width = 0.4
axes.bar(
bar_x_positions - bar_width / 2,
posterior_probabilities,
bar_width,
color="steelblue",
alpha=0.8,
label=f"P(category | {analyzer.outcome_name}={outcome_value:g})",
)
axes.bar(
bar_x_positions + bar_width / 2,
empirical_rates,
bar_width,
color="seagreen",
alpha=0.7,
label=f"empirical {analyzer.outcome_name} rate per category",
)
for category_index, count in enumerate(empirical_counts):
axes.text(
category_index + bar_width / 2,
empirical_rates[category_index] + 0.02,
f"n={count}",
ha="center",
fontsize=8,
)

axes.set_xticks(bar_x_positions)
axes.set_xticklabels(choices, rotation=30, ha="right")
axes.set_ylabel("probability")
axes.set_ylim(0, 1.05)
axes.set_title(_plot_title(analyzer, factor_spec.name))
axes.legend(loc="best", fontsize=9)
axes.grid(alpha=0.3, axis="y")
figure.tight_layout()
_save_figure(figure, output_path)


def _plot_title(analyzer: BaseAnalyzer, factor_name: str) -> str:
"""Format the plot title as ``"Sensitivity of <outcome> to <factor>" / slice block``."""
return (
f"Sensitivity of {analyzer.outcome_name} to {factor_name}\n"
f"slice: {analyzer.dataset.schema.slice.policy} / "
f"{analyzer.dataset.schema.slice.task} / {analyzer.dataset.schema.slice.embodiment}"
)


def _save_figure(figure, output_path: str | Path) -> None:
"""Save a matplotlib figure to disk (creating parent dirs) and close it."""
import matplotlib.pyplot as plt

output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
figure.savefig(output_path, dpi=150)
plt.close(figure)
Loading
Loading