diff --git a/isaaclab_arena/analysis/__init__.py b/isaaclab_arena/analysis/__init__.py
new file mode 100644
index 000000000..fee3a6a9f
--- /dev/null
+++ b/isaaclab_arena/analysis/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/isaaclab_arena/analysis/sensitivity/__init__.py b/isaaclab_arena/analysis/sensitivity/__init__.py
new file mode 100644
index 000000000..fee3a6a9f
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/isaaclab_arena/analysis/sensitivity/analyzer.py b/isaaclab_arena/analysis/sensitivity/analyzer.py
new file mode 100644
index 000000000..04e729a3b
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/analyzer.py
@@ -0,0 +1,363 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Inference-only analyzers for v0.3 sensitivity analysis.
+
+What this module does in plain English
+--------------------------------------
+Given a dataset of (factor values, outcome values) pairs from a policy evaluation, the
+analyzer learns the *conditional* distribution of factor values given a chosen outcome
+value (e.g. "given the episode succeeded, which factor values were most consistent?").
+This is the **posterior** ``P(theta | outcome=success)``. Under v0.3's uniform prior,
+this posterior's peak is also the operating point ``argmax P(success | theta)`` — so
+plotting the marginal posterior over one factor identifies the values that maximize
+success rate.
+
+The three concrete analyzers cover the three relevant factor-mix cases:
+
+  - ``NPEAnalyzer``       — **N**eural **P**osterior **E**stimation. Used when *all*
+    declared factors are continuous. Trains a normalizing-flow density estimator on
+    ``(theta, x)`` pairs and exposes ``posterior.sample`` / ``posterior.log_prob``.
+    Limitation: with a binary outcome and a 1D theta, sbi falls back to a Gaussian
+    density and the recovered peak reflects the *mean* of successful theta values
+    rather than the true *mode* — a known caveat we surface as a [WARN] at fit time.
+  - ``MNPEAnalyzer``      — **M**ixed **N**eural **P**osterior **E**stimation. Used when
+    the schema has *both* continuous and categorical factors. sbi's MixedDensityEstimator
+    routes continuous columns through the same kind of flow NPE uses while routing
+    discrete columns through a categorical mass estimator.
+  - ``EmpiricalAnalyzer`` — Pure-categorical schemas. Skip the neural fit entirely: under
+    a uniform prior the posterior ``P(category | success)`` is *exactly* the normalized
+    per-category empirical success rate. No smoothing improves on that, and sbi MNPE
+    in version 0.26 also refuses to train without at least one continuous theta column.
+
+``make_analyzer(dataset, outcome_name)`` is the factory: callers don't need to know about
+the hierarchy, they just hand it a dataset and outcome name.
+
+How rendering fits in
+---------------------
+This module is *inference-only*. The sibling ``plotting`` module reads the analyzer's
+public queries (``continuous_marginal_density``, ``categorical_marginal_probs``) and
+renders matplotlib figures. Decoupling the two means new plot types don't require
+analyzer changes, and analyzer changes don't risk breaking the plot.
+
+Public posterior-query surface used by ``plotting.py``:
+  - ``BaseAnalyzer.categorical_marginal_probs(factor_name, outcome_value, num_samples)``
+  - ``PosteriorAnalyzer.continuous_marginal_density(factor_name, outcome_value, num_grid_points)``
+    (NOT defined on ``EmpiricalAnalyzer`` — that analyzer rejects continuous factors at init time)
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import torch
+from abc import ABC, abstractmethod
+
+from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec, SensitivityDataset
+
+
+class BaseAnalyzer(ABC):
+    """Abstract base — owns state validation and the abstract posterior-query surface.
+
+    Subclasses must implement:
+      - ``fit`` — train (or no-op) so queries can be called afterwards.
+      - ``categorical_marginal_probs`` — return ``P(category | outcome)`` for a categorical factor.
+    Continuous-factor queries (``continuous_marginal_density``) live on ``PosteriorAnalyzer``
+    only — the empirical analyzer never needs them by construction.
+    """
+
+    def __init__(self, dataset: SensitivityDataset, outcome_name: str):
+        self.dataset = dataset
+        self.outcome_name = outcome_name
+        assert (
+            outcome_name in dataset.outcome_columns
+        ), f"Outcome {outcome_name!r} not found in schema; available: {list(dataset.outcome_columns)}"
+        assert len(dataset.schema.factors) > 0, "Schema declares no factors"
+
+    @abstractmethod
+    def fit(self, training_batch_size: int = 50) -> None:
+        """Train the posterior (or no-op for empirical) so queries can be called afterwards.
+
+        For NPE/MNPE this trains a neural density estimator on ``(theta, x_selected)``,
+        where ``x_selected`` is the single outcome column named by ``outcome_name``. For
+        the empirical analyzer this is a no-op — the categorical posterior is computed
+        directly from the data at query time.
+        """
+
+    @abstractmethod
+    def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray:
+        """Return ``P(category | outcome=outcome_value)`` for one categorical factor.
+
+        Output is a 1D numpy array of length ``len(factor.choices)`` whose entries sum to 1.
+        For posterior analyzers this is computed by sampling the trained posterior and
+        counting category frequencies; for the empirical analyzer it's the normalized
+        per-category empirical success rate.
+        """
+
+    def _factor_spec(self, factor_name: str) -> FactorSpec:
+        """Return the ``FactorSpec`` for ``factor_name``, asserting it exists in the schema."""
+        assert (
+            factor_name in self.dataset.factor_columns
+        ), f"Factor {factor_name!r} not in schema; available: {list(self.dataset.factor_columns)}"
+        return next(factor for factor in self.dataset.schema.factors if factor.name == factor_name)
+
+
+class PosteriorAnalyzer(BaseAnalyzer):
+    """Common base for the sbi-driven analyzers (NPE and MNPE).
+
+    NPE and MNPE differ only in *which* sbi inference class they instantiate; everything
+    else (training loop, posterior storage, density and sample queries) is identical.
+    Subclasses override ``_make_inference`` to choose the class, and the
+    binary-outcome WARN hook to surface any method-specific caveats.
+
+    After ``fit()`` returns, ``self.posterior`` is an sbi posterior object that supports
+    ``posterior.sample(shape, x=...)`` and (for NPE) ``posterior.log_prob(theta, x=...)``.
+    """
+
+    def __init__(self, dataset: SensitivityDataset, outcome_name: str):
+        super().__init__(dataset, outcome_name)
+        self.posterior = None
+
+    def _make_inference(self):
+        """Return the sbi inference object to train with.
+
+        Subclass-specific: ``NPEAnalyzer`` returns ``sbi.inference.NPE(...)``,
+        ``MNPEAnalyzer`` returns ``sbi.inference.MNPE(...)``. The lazy import of sbi
+        lives in the subclass so callers don't pay the (heavy) sbi import cost until
+        they actually fit.
+        """
+        raise NotImplementedError("PosteriorAnalyzer subclasses must implement _make_inference")
+
+    def fit(self, training_batch_size: int = 50) -> None:
+        """Train the chosen sbi estimator on ``(theta, x_selected)`` and stash the posterior.
+
+        Steps:
+          1. Slice ``self.dataset.x`` to the single outcome column named by ``outcome_name``.
+          2. Surface any method-specific caveats about the outcome (e.g. NPE's
+             1D-binary Gaussian fallback) via ``_maybe_warn_binary_outcome``.
+          3. Instantiate the sbi inference object (NPE or MNPE) via ``_make_inference``.
+          4. Append the simulations and train.
+          5. Build a posterior object from the trained estimator and store it on ``self``.
+        """
+        outcome_column_index = self.dataset.outcome_columns[self.outcome_name]
+        selected_outcome_column = self.dataset.x[:, outcome_column_index : outcome_column_index + 1]
+        self._maybe_warn_binary_outcome(selected_outcome_column)
+
+        print(
+            f"[INFO] {type(self).__name__}: fitting on {self.dataset.theta.shape[0]} samples"
+            f" (theta dim={self.dataset.theta.shape[1]},"
+            f" x dim={selected_outcome_column.shape[1]})."
+        )
+        inference = self._make_inference()
+        inference.append_simulations(self.dataset.theta, selected_outcome_column)
+        density_estimator = inference.train(training_batch_size=training_batch_size)
+        self.posterior = inference.build_posterior(density_estimator)
+
+    def _maybe_warn_binary_outcome(self, selected_outcome_column: torch.Tensor) -> None:
+        """Optional hook for subclass-specific caveats about binary outcomes. Default: no-op.
+
+        ``NPEAnalyzer`` overrides this to warn that with a single binary outcome column
+        sbi falls back to a Gaussian density, biasing the recovered peak toward the
+        mean of successful theta values rather than the true mode.
+        """
+
+    def continuous_marginal_density(
+        self, factor_name: str, outcome_value: float, num_grid_points: int
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Evaluate ``P(factor_value | outcome=outcome_value)`` over the factor's prior range.
+
+        Returns ``(grid, density)`` as numpy arrays of length ``num_grid_points``, suitable
+        for plotting as a smooth curve.
+
+        Two evaluation paths depending on whether other factors are present:
+          - **1D theta** (the only declared factor is this one): evaluate
+            ``posterior.log_prob`` directly on a regular grid — exact, no sampling.
+          - **Multi-dim theta**: sample the posterior at the given outcome value, extract
+            this factor's column, and histogram-then-interpolate to a grid. This
+            marginalizes over the other factor dims implicitly.
+        """
+        assert self.posterior is not None, "Call fit() before querying the posterior"
+        factor_spec = self._factor_spec(factor_name)
+        assert (
+            factor_spec.type == "continuous"
+        ), f"continuous_marginal_density expects a continuous factor; {factor_name!r} is {factor_spec.type!r}"
+        assert (
+            factor_spec.range is not None and len(factor_spec.range) == 1
+        ), "Continuous-factor marginal expects a populated 1D range"
+
+        factor_column_slice = self.dataset.factor_columns[factor_name]
+        observed_outcome = torch.tensor([outcome_value], dtype=torch.float32)
+        range_low, range_high = factor_spec.range[0]
+
+        if self.dataset.theta.shape[1] == 1:
+            grid_tensor = torch.linspace(range_low, range_high, num_grid_points, dtype=torch.float32).unsqueeze(1)
+            with torch.no_grad():
+                log_probabilities = self.posterior.log_prob(grid_tensor, x=observed_outcome)
+            density_numpy = torch.exp(log_probabilities).cpu().numpy()
+            grid_numpy = grid_tensor.squeeze(-1).cpu().numpy()
+        else:
+            with torch.no_grad():
+                posterior_samples = self.posterior.sample((10_000,), x=observed_outcome)
+            factor_column_samples = posterior_samples[:, factor_column_slice].squeeze(-1).cpu().numpy()
+            grid_numpy = np.linspace(range_low, range_high, num_grid_points)
+            histogram_density, bin_edges = np.histogram(
+                factor_column_samples, bins=40, range=(range_low, range_high), density=True
+            )
+            density_numpy = np.interp(grid_numpy, 0.5 * (bin_edges[:-1] + bin_edges[1:]), histogram_density)
+
+        return grid_numpy, density_numpy
+
+    def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray:
+        """Estimate ``P(category | outcome)`` by sampling the trained posterior.
+
+        Draws ``num_samples`` from ``posterior(theta | x=outcome_value)``, extracts the
+        factor's column (which sbi returns as floats over the BoxUniform support), rounds
+        to the nearest integer in ``[0, num_choices - 1]``, and tallies frequencies.
+        Result is a length-``num_choices`` numpy array that sums to 1.
+        """
+        assert self.posterior is not None, "Call fit() before querying the posterior"
+        factor_spec = self._factor_spec(factor_name)
+        assert factor_spec.type == "categorical"
+        assert factor_spec.choices is not None
+        factor_column_slice = self.dataset.factor_columns[factor_name]
+        num_choices = len(factor_spec.choices)
+
+        observed_outcome = torch.tensor([outcome_value], dtype=torch.float32)
+        with torch.no_grad():
+            posterior_samples = self.posterior.sample((num_samples,), x=observed_outcome)
+        factor_column_samples = posterior_samples[:, factor_column_slice].squeeze(-1).cpu().numpy()
+        clipped_codes = np.clip(np.round(factor_column_samples), 0, num_choices - 1).astype(int)
+        return np.bincount(clipped_codes, minlength=num_choices) / num_samples
+
+
+class NPEAnalyzer(PosteriorAnalyzer):
+    """Neural Posterior Estimation analyzer for continuous-only factor schemas.
+
+    Use this when every declared factor is continuous (no categoricals). Internally
+    trains ``sbi.inference.NPE``, which fits a normalizing-flow density over
+    ``(theta, x_selected)`` and exposes both ``sample`` and ``log_prob`` on the result.
+
+    **Caveat for binary outcomes (1D x):** sbi's flow code falls back to a Gaussian
+    density when the output space is 1D, which biases the recovered posterior peak
+    toward the *mean* of successful theta values rather than the true *mode* of the
+    success curve. We surface a [WARN] at fit time so users see this in plain text
+    rather than buried in sbi's UserWarning stream.
+    """
+
+    def _make_inference(self):
+        """Construct ``sbi.inference.NPE`` configured with the dataset's uniform prior."""
+        from sbi.inference import NPE
+
+        return NPE(prior=self.dataset.prior)
+
+    def _maybe_warn_binary_outcome(self, selected_outcome_column: torch.Tensor) -> None:
+        """Warn if the selected outcome is binary — see class docstring for the caveat."""
+        unique_values = set(selected_outcome_column.flatten().tolist())
+        if unique_values.issubset({0.0, 1.0}):
+            print(
+                f"[WARN] Outcome {self.outcome_name!r} is binary (values in {{0, 1}}) and the"
+                " analyzer is using NPE (no categorical factors). sbi NPE falls back to a"
+                " Gaussian density in 1D output space, so the recovered posterior peak"
+                " reflects the *mean* of successful theta values rather than the true *mode*"
+                " of the success curve. Qualitative shape is still informative."
+            )
+
+
+class MNPEAnalyzer(PosteriorAnalyzer):
+    """Mixed Neural Posterior Estimation analyzer for schemas with at least one of each type.
+
+    Use this when the schema mixes continuous and categorical factors. Internally trains
+    ``sbi.inference.MNPE``, whose mixed density estimator routes continuous theta columns
+    through a normalizing flow while routing categorical columns through a categorical
+    mass estimator. The continuous-first / categorical-after column ordering in
+    ``factor_columns`` matches MNPE's expected layout exactly.
+
+    sbi MNPE 0.26 requires at least one continuous theta column. For pure-categorical
+    schemas use ``EmpiricalAnalyzer`` instead — ``make_analyzer`` dispatches correctly.
+    """
+
+    def _make_inference(self):
+        """Construct ``sbi.inference.MNPE`` configured with the dataset's uniform prior."""
+        from sbi.inference import MNPE
+
+        return MNPE(prior=self.dataset.prior)
+
+
+class EmpiricalAnalyzer(BaseAnalyzer):
+    """Frequency-table analyzer for pure-categorical factor schemas — no neural fit.
+
+    Use this when every declared factor is categorical. Under v0.3's uniform prior,
+    Bayes' rule simplifies ``P(category | success) ∝ P(success | category) · P(category)``
+    to ``P(category | success) ∝ P(success | category)`` — i.e. the posterior is *exactly*
+    the per-category empirical success rate, normalized to sum to 1. No neural network
+    can do better than this with a uniform prior; smoothing only hurts.
+
+    Also covers a sbi limitation: MNPE 0.26 refuses to train if theta has zero continuous
+    columns. The empirical path sidesteps that entirely.
+
+    Rejects continuous factors at construction time — ``make_analyzer`` shouldn't even
+    dispatch here for mixed schemas, but the explicit guard makes the constraint clear.
+    """
+
+    def __init__(self, dataset: SensitivityDataset, outcome_name: str):
+        super().__init__(dataset, outcome_name)
+        has_continuous_factor = any(factor.type == "continuous" for factor in dataset.schema.factors)
+        assert not has_continuous_factor, (
+            "EmpiricalAnalyzer is only valid for all-categorical schemas. For mixed"
+            " continuous + categorical factors, use MNPEAnalyzer."
+        )
+
+    def fit(self, training_batch_size: int = 50) -> None:
+        """No-op — the posterior is computed directly from the data at query time."""
+        print(f"[INFO] {type(self).__name__}: no neural fit needed for pure-categorical schema.")
+
+    def categorical_marginal_probs(self, factor_name: str, outcome_value: float, num_samples: int) -> np.ndarray:
+        """Return ``P(category | outcome) = per_category_success_rate / sum(per_category_success_rate)``.
+
+        For each category, computes the fraction of rows assigned to it whose outcome
+        column is ``>= 0.5`` (treating outcome as binary). Then normalizes across
+        categories so the result sums to 1. ``outcome_value`` and ``num_samples`` are
+        accepted for interface compatibility with ``PosteriorAnalyzer`` but not used —
+        empirical analysis treats outcome as binary (success vs not-success).
+        """
+        factor_spec = self._factor_spec(factor_name)
+        assert factor_spec.type == "categorical"
+        assert factor_spec.choices is not None
+        factor_column_slice = self.dataset.factor_columns[factor_name]
+        num_choices = len(factor_spec.choices)
+        outcome_column_index = self.dataset.outcome_columns[self.outcome_name]
+
+        empirical_theta_codes = self.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy()
+        empirical_outcomes = self.dataset.x[:, outcome_column_index].cpu().numpy()
+        empirical_rates = np.zeros(num_choices)
+        for code in range(num_choices):
+            category_mask = empirical_theta_codes == code
+            if category_mask.any():
+                empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean())
+        total_rate = float(empirical_rates.sum())
+        if total_rate > 0:
+            return empirical_rates / total_rate
+        return np.full(num_choices, 1.0 / num_choices)
+
+
+def make_analyzer(dataset: SensitivityDataset, outcome_name: str) -> BaseAnalyzer:
+    """Construct the right analyzer for the dataset's factor mix.
+
+    Dispatch table:
+      - any continuous + any categorical → :class:`MNPEAnalyzer`
+      - all categorical (zero continuous) → :class:`EmpiricalAnalyzer`
+      - all continuous (zero categorical) → :class:`NPEAnalyzer`
+
+    Callers should always go through this factory rather than instantiating a specific
+    subclass — the dispatch encodes invariants (e.g. sbi MNPE 0.26 not supporting
+    pure-categorical theta) that aren't enforced elsewhere.
+    """
+    num_continuous_factors = sum(1 for factor in dataset.schema.factors if factor.type == "continuous")
+    num_categorical_factors = sum(1 for factor in dataset.schema.factors if factor.type == "categorical")
+    assert num_continuous_factors + num_categorical_factors > 0, "Schema declares no factors"
+    if num_continuous_factors > 0 and num_categorical_factors > 0:
+        return MNPEAnalyzer(dataset, outcome_name)
+    if num_categorical_factors > 0:
+        return EmpiricalAnalyzer(dataset, outcome_name)
+    return NPEAnalyzer(dataset, outcome_name)
diff --git a/isaaclab_arena/analysis/sensitivity/dataset.py b/isaaclab_arena/analysis/sensitivity/dataset.py
new file mode 100644
index 000000000..1a176bc23
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/dataset.py
@@ -0,0 +1,370 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Schema parser and dataset loader for sensitivity analysis.
+
+Combines a hand-authored ``factors.yaml`` (the declared schema + priors) with an
+``episode_summary.jsonl`` (per-episode factor draws + outcome values, written by
+``episode_writer``) into the tensors that ``sbi`` consumes for posterior inference.
+
+Vocabulary refresher (for readers new to simulation-based inference / SBI):
+  - **theta** — the *factor* values per episode. The "inputs" we vary in the eval (e.g.
+    ``light_intensity``, ``pick_up_object``). Shape ``(num_episodes, total_factor_dim)``,
+    continuous factors come first then categoricals.
+  - **x** — the *outcome* values per episode. The "outputs" the policy produced (e.g.
+    ``success_rate``, ``object_moved_rate``). Shape ``(num_episodes, num_outcomes)``.
+  - **prior** — the assumed distribution over theta *before* seeing data. v0.3 ships
+    uniform priors only, encoded as ``sbi.utils.BoxUniform``.
+  - **factor_columns** — map from factor name to its column slice in theta, so
+    downstream code can extract a marginal by name without knowing the layout.
+
+MVP-2 supports continuous-1D and categorical factors. Vector continuous (``dim > 1``)
+factors still raise ``NotImplementedError`` so adding them later is a fill-in.
+"""
+
+from __future__ import annotations
+
+import json
+import torch
+import yaml
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
+
+
+@dataclass
+class FactorSpec:
+    """One factor's schema as declared in ``factors.yaml``.
+
+    Continuous factors carry a ``range`` (one ``[low, high]`` pair per dim); categorical
+    factors carry ``choices`` (a list of string labels, integer-encoded by index in theta).
+    """
+
+    name: str
+    type: Literal["continuous", "categorical"]
+    dim: int = 1
+    range: list[list[float]] | None = None  # one [low, high] pair per dim, continuous only
+    choices: list[str] | None = None  # categorical only
+
+
+@dataclass
+class OutcomeSpec:
+    """One outcome's schema (just a name and a type hint; the loader treats all as float)."""
+
+    name: str
+    type: str  # "bool", "float", "int" — informational; loader treats all as float
+
+
+@dataclass
+class SliceSpec:
+    """The ``(policy, task, embodiment)`` tuple a dataset comes from.
+
+    MNPE/NPE assume a single data-generating source per analysis, so all rows in a
+    dataset must belong to the same slice — enforced by the loader.
+    """
+
+    policy: str
+    task: str
+    embodiment: str
+
+
+@dataclass
+class FactorSchema:
+    """Parsed ``factors.yaml`` — slice + factor list + outcome list."""
+
+    slice: SliceSpec
+    factors: list[FactorSpec]
+    outcomes: list[OutcomeSpec]
+
+    @classmethod
+    def from_yaml(cls, path: str | Path) -> FactorSchema:
+        """Load a ``factors.yaml`` from disk into a typed ``FactorSchema``.
+
+        The YAML must have three top-level blocks: ``slice`` (policy/task/embodiment),
+        ``factors`` (one entry per varied input), and ``outcomes`` (one entry per
+        measured output). Each factor's ``type`` must be ``continuous`` or ``categorical``.
+        """
+        with open(path, encoding="utf-8") as yaml_file:
+            yaml_data = yaml.safe_load(yaml_file)
+        assert isinstance(yaml_data, dict), f"factors.yaml at {path} must be a mapping at top level"
+        for required_key in ("slice", "factors", "outcomes"):
+            assert required_key in yaml_data, f"factors.yaml at {path} is missing top-level `{required_key}:` block"
+
+        slice_block = yaml_data["slice"]
+        for required_key in ("policy", "task", "embodiment"):
+            assert (
+                required_key in slice_block
+            ), f"factors.yaml at {path} `slice:` block is missing `{required_key}` (need policy/task/embodiment)"
+        slice_spec = SliceSpec(
+            policy=slice_block["policy"],
+            task=slice_block["task"],
+            embodiment=slice_block["embodiment"],
+        )
+
+        factors: list[FactorSpec] = []
+        for factor_name, factor_block in yaml_data["factors"].items():
+            assert "type" in factor_block, (
+                f"factors.yaml at {path} factor {factor_name!r} is missing required `type:` field"
+                " (expected 'continuous' or 'categorical')"
+            )
+            factor_type = factor_block["type"]
+            assert factor_type in ("continuous", "categorical"), (
+                f"factors.yaml at {path} factor {factor_name!r} has unknown type {factor_type!r};"
+                " expected 'continuous' or 'categorical'"
+            )
+            factors.append(
+                FactorSpec(
+                    name=factor_name,
+                    type=factor_type,
+                    dim=factor_block.get("dim", 1),
+                    range=factor_block.get("range"),
+                    choices=factor_block.get("choices"),
+                )
+            )
+
+        outcomes = [
+            OutcomeSpec(name=outcome_name, type=outcome_block.get("type", "float"))
+            for outcome_name, outcome_block in yaml_data["outcomes"].items()
+        ]
+
+        return cls(slice=slice_spec, factors=factors, outcomes=outcomes)
+
+    @property
+    def total_factor_dim(self) -> int:
+        """Total width of theta — sum of ``dim`` over continuous factors plus 1 per categorical."""
+        return sum(factor.dim if factor.type == "continuous" else 1 for factor in self.factors)
+
+    @property
+    def factor_columns(self) -> dict[str, slice]:
+        """Map factor name → column slice in theta.
+
+        Continuous factors occupy the leading columns (their ``dim`` columns each), then
+        each categorical factor occupies one trailing column. This continuous-first
+        ordering matches sbi's MNPE convention so the same theta layout works for both
+        NPE (all-continuous) and MNPE (mixed).
+        """
+        continuous_factors = [factor for factor in self.factors if factor.type == "continuous"]
+        categorical_factors = [factor for factor in self.factors if factor.type == "categorical"]
+        column_slices: dict[str, slice] = {}
+        column_index = 0
+        for factor in continuous_factors + categorical_factors:
+            column_width = factor.dim if factor.type == "continuous" else 1
+            column_slices[factor.name] = slice(column_index, column_index + column_width)
+            column_index += column_width
+        return column_slices
+
+
+class SensitivityDataset:
+    """Combines a ``factors.yaml`` schema with an ``episode_summary.jsonl`` data file.
+
+    On construction:
+      1. Parses the schema (factors + outcomes + slice metadata).
+      2. Loads the JSONL rows (one row per episode).
+      3. Validates that every row contains all declared factor and outcome keys.
+      4. Fills any missing continuous ranges by inferring from observed min/max so the
+         analyzer can always trust ``schema.factors[i].range`` to be populated.
+      5. Builds the ``theta`` and ``x`` tensors that sbi (or the empirical analyzer)
+         will consume.
+
+    The four public attributes used by the analyzer (``theta``, ``x``, ``prior``,
+    ``factor_columns``) are properties — recomputed lazily where appropriate.
+    """
+
+    def __init__(self, factors_yaml: str | Path, jsonl_path: str | Path):
+        self.schema = FactorSchema.from_yaml(factors_yaml)
+
+        jsonl_text = Path(jsonl_path).read_text(encoding="utf-8")
+        self.rows = [json.loads(line) for line in jsonl_text.splitlines() if line.strip()]
+        assert len(self.rows) > 0, f"Empty episode_summary.jsonl at {jsonl_path}"
+
+        self._validate_rows(jsonl_path)
+        self._infer_missing_factor_ranges()
+
+        self._theta = self._build_factor_tensor()
+        self._x = self._build_outcome_tensor()
+
+    def _validate_rows(self, jsonl_path: str | Path) -> None:
+        """Assert every JSONL row carries the keys declared in the schema.
+
+        The writer logs the *entire* arena_env_args dict per row, so the loader only
+        requires that the schema's declared factor names are a *subset* of what's in
+        ``row["arena_env_args"]`` — extra keys (other arena_env_args we don't analyze)
+        are fine and ignored. Same superset-not-equality check for outcomes.
+
+        Catches the most common authoring mistake: a factor declared in factors.yaml
+        that the eval didn't actually vary or log. Surfaces a clear error pointing at
+        the first offending row.
+        """
+        expected_factor_names = {factor.name for factor in self.schema.factors}
+        expected_outcome_names = {outcome.name for outcome in self.schema.outcomes}
+        for row_index, row in enumerate(self.rows):
+            assert (
+                "arena_env_args" in row and "outcomes" in row
+            ), f"Row {row_index} of {jsonl_path} missing arena_env_args/outcomes block"
+            missing_factor_names = expected_factor_names - set(row["arena_env_args"].keys())
+            assert not missing_factor_names, (
+                f"Row {row_index} of {jsonl_path} is missing factor(s) "
+                f"{sorted(missing_factor_names)} from its arena_env_args block; "
+                f"factors.yaml declares: {sorted(expected_factor_names)}"
+            )
+            missing_outcome_names = expected_outcome_names - set(row["outcomes"].keys())
+            assert (
+                not missing_outcome_names
+            ), f"Row {row_index} of {jsonl_path} missing outcomes {sorted(missing_outcome_names)}"
+
+    def _infer_missing_factor_ranges(self) -> None:
+        """For any continuous factor without a declared ``range``, fill it from observed data.
+
+        The prior bounds default to ``[min(values), max(values)]`` over the JSONL. Users
+        who want a principled prior (e.g. matching the variation system's declared
+        ``Uniform(low, high)``) should hand-author ``range`` in factors.yaml; that value
+        takes precedence and this method skips them.
+        """
+        for factor in self.schema.factors:
+            if factor.type != "continuous" or factor.range is not None:
+                continue
+            if factor.dim != 1:
+                raise NotImplementedError(
+                    "Range inference for vector factors (dim > 1) is not implemented;"
+                    f" factor {factor.name!r} has dim={factor.dim}"
+                )
+            observed_values = [float(row["arena_env_args"][factor.name]) for row in self.rows]
+            factor.range = [[min(observed_values), max(observed_values)]]
+
+    def _build_factor_tensor(self) -> torch.Tensor:
+        """Assemble the per-episode factor matrix ``theta``.
+
+        Layout: continuous factors fill the leading columns (one column per dim), then
+        each categorical factor fills one trailing column. Categorical values are
+        encoded as ``float32`` integers ``0..num_choices-1`` per the index in
+        ``FactorSpec.choices`` — sbi's MNPE expects exactly this layout (continuous-first,
+        discrete columns as floats, the density estimator handles them as discrete).
+        """
+        continuous_factors = [factor for factor in self.schema.factors if factor.type == "continuous"]
+        categorical_factors = [factor for factor in self.schema.factors if factor.type == "categorical"]
+
+        factor_columns: list[torch.Tensor] = []
+
+        # Continuous columns come first (sbi MNPE convention).
+        for factor in continuous_factors:
+            if factor.dim != 1:
+                raise NotImplementedError(
+                    "Vector continuous factors (dim > 1) are not yet supported;"
+                    f" factor {factor.name!r} has dim={factor.dim}"
+                )
+            factor_column = torch.tensor(
+                [float(row["arena_env_args"][factor.name]) for row in self.rows], dtype=torch.float32
+            ).unsqueeze(1)
+            factor_columns.append(factor_column)
+
+        # Categorical columns: integer-code each string value as its index in FactorSpec.choices.
+        for factor in categorical_factors:
+            assert (
+                factor.choices is not None and len(factor.choices) > 0
+            ), f"Categorical factor {factor.name!r} has no `choices:` block in factors.yaml"
+            choice_to_code = {choice: code for code, choice in enumerate(factor.choices)}
+            category_codes: list[int] = []
+            for row_index, row in enumerate(self.rows):
+                value = row["arena_env_args"][factor.name]
+                assert value in choice_to_code, (
+                    f"Row {row_index} factor {factor.name!r} has value {value!r}"
+                    f" not in declared choices {factor.choices}"
+                )
+                category_codes.append(choice_to_code[value])
+            factor_column = torch.tensor(category_codes, dtype=torch.float32).unsqueeze(1)
+            factor_columns.append(factor_column)
+
+        if factor_columns:
+            return torch.cat(factor_columns, dim=1)
+        return torch.zeros((len(self.rows), 0), dtype=torch.float32)
+
+    def _build_outcome_tensor(self) -> torch.Tensor:
+        """Assemble the per-episode outcome matrix ``x`` (one column per declared outcome).
+
+        Each outcome value is cast to float; bool outcomes become 0.0/1.0. The analyzer
+        usually selects a single outcome column at fit time and conditions queries on it.
+        """
+        outcome_column_tensors = [
+            torch.tensor([float(row["outcomes"][outcome.name]) for row in self.rows], dtype=torch.float32).unsqueeze(1)
+            for outcome in self.schema.outcomes
+        ]
+        return torch.cat(outcome_column_tensors, dim=1)
+
+    @property
+    def theta(self) -> torch.Tensor:
+        """``(num_episodes, total_factor_dim)`` matrix of factor values, one row per episode.
+
+        This is the "input" sbi infers a posterior over. Column layout is given by
+        ``factor_columns`` — continuous factors first, then categoricals (integer-coded).
+        """
+        return self._theta
+
+    @property
+    def x(self) -> torch.Tensor:
+        """``(num_episodes, num_outcomes)`` matrix of outcome values, one row per episode.
+
+        This is what the analyzer conditions queries on. The analyzer typically selects a
+        single outcome column at fit time (e.g. ``success_rate``) and asks
+        "what theta values were consistent with observing this outcome?"
+        """
+        return self._x
+
+    @property
+    def factor_columns(self) -> dict[str, slice]:
+        """Map factor name → its column slice in theta. Same as ``schema.factor_columns``."""
+        return self.schema.factor_columns
+
+    @property
+    def outcome_columns(self) -> dict[str, int]:
+        """Map outcome name → its column index in x."""
+        return {outcome.name: index for index, outcome in enumerate(self.schema.outcomes)}
+
+    @property
+    def has_categorical_factors(self) -> bool:
+        """True iff the schema declares at least one categorical factor."""
+        return any(factor.type == "categorical" for factor in self.schema.factors)
+
+    @property
+    def prior(self):
+        """The uniform prior over all factor dims that the analyzer assumes.
+
+        Built as a single ``sbi.utils.BoxUniform`` over the concatenated bounds in
+        continuous-first / categorical-after order:
+          - Continuous factor → uses the declared (or inferred) ``[low, high]`` per dim.
+          - Categorical factor → uses ``[0, num_choices - 1]`` (the integer codes from
+            ``_build_factor_tensor``); sbi MNPE's mixed density estimator treats them as
+            discrete from there.
+
+        sbi is imported lazily so loading the dataset doesn't pay the sbi import cost
+        unless the analyzer actually runs.
+        """
+        from sbi.utils import BoxUniform
+
+        low_bounds: list[float] = []
+        high_bounds: list[float] = []
+
+        # Continuous factor bounds (one [low, high] pair per dim).
+        for factor in self.schema.factors:
+            if factor.type != "continuous":
+                continue
+            assert factor.range is not None, f"Factor {factor.name!r} has no range and was not inferred"
+            for dim_low, dim_high in factor.range:
+                low_bounds.append(float(dim_low))
+                high_bounds.append(float(dim_high))
+
+        # Categorical factor bounds: [0, num_choices - 1] per factor (one column).
+        for factor in self.schema.factors:
+            if factor.type != "categorical":
+                continue
+            assert (
+                factor.choices is not None and len(factor.choices) > 0
+            ), f"Categorical factor {factor.name!r} has no `choices:` block"
+            low_bounds.append(0.0)
+            high_bounds.append(float(len(factor.choices) - 1))
+
+        return BoxUniform(
+            low=torch.tensor(low_bounds, dtype=torch.float32),
+            high=torch.tensor(high_bounds, dtype=torch.float32),
+        )
diff --git a/isaaclab_arena/analysis/sensitivity/episode_writer.py b/isaaclab_arena/analysis/sensitivity/episode_writer.py
new file mode 100644
index 000000000..54624456b
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/episode_writer.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Per-episode summary writer for sensitivity analysis.
+
+``write_episode_summaries`` appends one JSONL row per recorded demo for a just-completed
+job. Each row carries:
+
+  - ``job_name`` and ``episode_idx`` for traceability,
+  - ``arena_env_args`` — the *entire* job.arena_env_args_dict, i.e. every value that
+    parameterized the env for this episode,
+  - ``outcomes`` — per-episode outcome values from the task's registered metrics, extracted
+    from the recorded hdf5 demos via each metric's ``compute_metric_from_recording``.
+
+The eval-side writer is intentionally analysis-agnostic: it logs all env state, and the
+analyzer's ``factors.yaml`` decides which subset of those keys to treat as factors. This
+keeps the writer free of any "what counts as a factor?" knowledge.
+
+Import-order note: this module legitimately touches pxr at import time via
+``isaaclab_arena.metrics.metrics`` (which imports ``isaaclab.envs.manager_based_rl_env``).
+Like ``metrics`` itself, callers must defer importing this module until *after*
+``SimulationAppContext`` is active — see ``policy_runner.py`` (which uses the same pattern
+for ``compute_metrics``) and ``eval_runner.py``'s per-job try block for examples.
+"""
+
+from __future__ import annotations
+
+import h5py
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from isaaclab_arena.metrics.metrics import get_metric_recorder_dataset_path
+from isaaclab_arena.metrics.metrics_logger import metrics_to_plain_python_types
+
+if TYPE_CHECKING:
+    from isaaclab_arena.evaluation.job_manager import Job
+
+
+def write_episode_summaries(env, job: Job, output_path: str | Path) -> int:
+    """Append one JSONL row per recorded demo for the just-completed job.
+
+    Each row has shape::
+
+        {
+          "job_name": "<job.name>",
+          "episode_idx": <demo index in the hdf5>,
+          "arena_env_args": <full job.arena_env_args_dict>,
+          "outcomes": <per-metric value computed from the demo>
+        }
+
+    Args:
+        env: The (possibly gym-wrapped) Arena env that just finished its rollout. The hdf5
+            path and registered metrics are read from ``env.unwrapped.cfg``.
+        job: The Job that ran. Its ``arena_env_args_dict`` is logged verbatim under
+            ``arena_env_args``.
+        output_path: JSONL file to append to. Created (with parent dirs) if absent.
+
+    Returns:
+        Number of rows written.
+    """
+    unwrapped_env = env.unwrapped
+    if not hasattr(unwrapped_env.cfg, "metrics") or unwrapped_env.cfg.metrics is None:
+        return 0
+
+    arena_env_args_snapshot = dict(job.arena_env_args_dict)
+
+    hdf5_dataset_path = get_metric_recorder_dataset_path(unwrapped_env)
+    registered_metrics = unwrapped_env.cfg.metrics
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    rows_written = 0
+    with h5py.File(hdf5_dataset_path, "r") as hdf5_file:
+        recorded_demos = hdf5_file["data"]
+        with open(output_path, "a", encoding="utf-8") as jsonl_output:
+            for demo_index, demo_name in enumerate(recorded_demos):
+                demo_group = recorded_demos[demo_name]
+                raw_outcome_values = {}
+                for metric in registered_metrics:
+                    recorded_metric_data = demo_group[metric.recorder_term_name][:]
+                    raw_outcome_values[metric.name] = metric.compute_metric_from_recording([recorded_metric_data])
+                outcome_values = metrics_to_plain_python_types(raw_outcome_values)
+                summary_row = {
+                    "job_name": job.name,
+                    "episode_idx": demo_index,
+                    "arena_env_args": arena_env_args_snapshot,
+                    "outcomes": outcome_values,
+                }
+                jsonl_output.write(json.dumps(summary_row) + "\n")
+                rows_written += 1
+
+    return rows_written
diff --git a/isaaclab_arena/analysis/sensitivity/plotting.py b/isaaclab_arena/analysis/sensitivity/plotting.py
new file mode 100644
index 000000000..2d2394da5
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/plotting.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Plot renderers for sensitivity analysis.
+
+Pure-visualization module. Calls into the analyzer's public posterior queries
+(``continuous_marginal_density`` and ``categorical_marginal_probs``) and renders matplotlib
+figures. Decoupled from the analyzer hierarchy so new plot types can be added without
+touching inference code, and so existing plot code can be tested with mock posteriors.
+
+The single entry point is ``plot_marginal(analyzer, factor_name, output_path, ...)``,
+which dispatches by factor type to the right renderer.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab_arena.analysis.sensitivity.analyzer import BaseAnalyzer
+    from isaaclab_arena.analysis.sensitivity.dataset import FactorSpec
+
+
+def plot_marginal(
+    analyzer: BaseAnalyzer,
+    factor_name: str,
+    output_path: str | Path,
+    outcome_value: float = 1.0,
+    num_samples: int = 10_000,
+    num_grid_points: int = 200,
+) -> None:
+    """Render the marginal posterior for ``factor_name``, dispatching by factor type.
+
+    For continuous factors, the analyzer must expose ``continuous_marginal_density``
+    (only ``PosteriorAnalyzer`` does — ``EmpiricalAnalyzer`` rejects continuous factors at
+    construction time, so this branch isn't reachable through ``make_analyzer``).
+    """
+    factor_spec = analyzer._factor_spec(factor_name)
+    if factor_spec.type == "continuous":
+        if not hasattr(analyzer, "continuous_marginal_density"):
+            raise NotImplementedError(
+                f"{type(analyzer).__name__} cannot plot continuous factors; expected a PosteriorAnalyzer (NPE/MNPE)."
+            )
+        _plot_continuous_marginal(analyzer, factor_spec, output_path, outcome_value, num_grid_points)
+    elif factor_spec.type == "categorical":
+        _plot_categorical_marginal(analyzer, factor_spec, output_path, outcome_value, num_samples)
+    else:
+        raise NotImplementedError(f"Unsupported factor type {factor_spec.type!r}")
+
+
+def _plot_continuous_marginal(
+    analyzer: BaseAnalyzer,
+    factor_spec: FactorSpec,
+    output_path: str | Path,
+    outcome_value: float,
+    num_grid_points: int,
+) -> None:
+    """Render a continuous factor's marginal posterior as a density curve.
+
+    The blue curve shows ``P(factor_value | outcome=outcome_value)`` from the analyzer.
+    Below the x-axis is an empirical "rug" — small vertical ticks at the actual recorded
+    theta values, coloured green for episodes where the outcome was achieved (``≥ 0.5``)
+    and red for episodes where it was not. The rug lets a human eyeball whether the
+    smooth posterior actually agrees with where the successful episodes lived.
+    """
+    import matplotlib.pyplot as plt
+
+    grid, density = analyzer.continuous_marginal_density(factor_spec.name, outcome_value, num_grid_points)
+    # Empirical rug, coloured by outcome — gives the human a sanity-check on the curve.
+    factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
+    outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]
+    empirical_theta_values = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).cpu().numpy()
+    empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
+    success_mask = empirical_outcomes >= 0.5
+
+    figure, axes = plt.subplots(figsize=(8, 5))
+    axes.plot(
+        grid,
+        density,
+        color="steelblue",
+        linewidth=2,
+        label=f"P({factor_spec.name} | {analyzer.outcome_name}={outcome_value:g})",
+    )
+    axes.fill_between(grid, 0, density, color="steelblue", alpha=0.2)
+    axes.scatter(
+        empirical_theta_values[success_mask],
+        np.full(success_mask.sum(), -0.05 * density.max()),
+        marker="|",
+        color="seagreen",
+        s=80,
+        label=f"{analyzer.outcome_name} ≥ 0.5  (n={success_mask.sum()})",
+    )
+    axes.scatter(
+        empirical_theta_values[~success_mask],
+        np.full((~success_mask).sum(), -0.1 * density.max()),
+        marker="|",
+        color="firebrick",
+        s=80,
+        label=f"{analyzer.outcome_name} < 0.5  (n={(~success_mask).sum()})",
+    )
+    axes.set_xlabel(factor_spec.name)
+    axes.set_ylabel("posterior density")
+    axes.set_title(_plot_title(analyzer, factor_spec.name))
+    axes.legend(loc="best", fontsize=9)
+    axes.grid(alpha=0.3)
+    figure.tight_layout()
+    _save_figure(figure, output_path)
+
+
+def _plot_categorical_marginal(
+    analyzer: BaseAnalyzer,
+    factor_spec: FactorSpec,
+    output_path: str | Path,
+    outcome_value: float,
+    num_samples: int,
+) -> None:
+    """Render a categorical factor's marginal as side-by-side bars per category.
+
+    The blue bar (left of each category) is the analyzer's ``P(category | outcome)``.
+    The green bar (right of each category) is the *empirical* per-category outcome rate
+    — independent of the analyzer's posterior, computed directly from the raw data.
+    For the ``EmpiricalAnalyzer`` the two will agree exactly (up to normalization); for
+    a posterior-based analyzer they may differ slightly if the model smooths.
+
+    Each green bar is annotated with the sample count ``n`` for that category, so the
+    user can see how trustworthy each bar is.
+    """
+    import matplotlib.pyplot as plt
+
+    assert factor_spec.choices is not None
+    choices = factor_spec.choices
+    num_choices = len(choices)
+    factor_column_slice = analyzer.dataset.factor_columns[factor_spec.name]
+    outcome_column_index = analyzer.dataset.outcome_columns[analyzer.outcome_name]
+
+    # Posterior probs come from the analyzer; empirical rate and counts are raw data,
+    # rendered alongside as a sanity reference.
+    posterior_probabilities = analyzer.categorical_marginal_probs(factor_spec.name, outcome_value, num_samples)
+
+    empirical_theta_codes = analyzer.dataset.theta[:, factor_column_slice].squeeze(-1).long().cpu().numpy()
+    empirical_outcomes = analyzer.dataset.x[:, outcome_column_index].cpu().numpy()
+    empirical_rates = np.zeros(num_choices)
+    empirical_counts = np.zeros(num_choices, dtype=int)
+    for code in range(num_choices):
+        category_mask = empirical_theta_codes == code
+        empirical_counts[code] = int(category_mask.sum())
+        if category_mask.any():
+            empirical_rates[code] = float((empirical_outcomes[category_mask] >= 0.5).mean())
+
+    figure, axes = plt.subplots(figsize=(max(8, 1.0 * num_choices), 5))
+    bar_x_positions = np.arange(num_choices)
+    bar_width = 0.4
+    axes.bar(
+        bar_x_positions - bar_width / 2,
+        posterior_probabilities,
+        bar_width,
+        color="steelblue",
+        alpha=0.8,
+        label=f"P(category | {analyzer.outcome_name}={outcome_value:g})",
+    )
+    axes.bar(
+        bar_x_positions + bar_width / 2,
+        empirical_rates,
+        bar_width,
+        color="seagreen",
+        alpha=0.7,
+        label=f"empirical {analyzer.outcome_name} rate per category",
+    )
+    for category_index, count in enumerate(empirical_counts):
+        axes.text(
+            category_index + bar_width / 2,
+            empirical_rates[category_index] + 0.02,
+            f"n={count}",
+            ha="center",
+            fontsize=8,
+        )
+
+    axes.set_xticks(bar_x_positions)
+    axes.set_xticklabels(choices, rotation=30, ha="right")
+    axes.set_ylabel("probability")
+    axes.set_ylim(0, 1.05)
+    axes.set_title(_plot_title(analyzer, factor_spec.name))
+    axes.legend(loc="best", fontsize=9)
+    axes.grid(alpha=0.3, axis="y")
+    figure.tight_layout()
+    _save_figure(figure, output_path)
+
+
+def _plot_title(analyzer: BaseAnalyzer, factor_name: str) -> str:
+    """Format the plot title as ``"Sensitivity of <outcome> to <factor>" / slice block``."""
+    return (
+        f"Sensitivity of {analyzer.outcome_name} to {factor_name}\n"
+        f"slice: {analyzer.dataset.schema.slice.policy} / "
+        f"{analyzer.dataset.schema.slice.task} / {analyzer.dataset.schema.slice.embodiment}"
+    )
+
+
+def _save_figure(figure, output_path: str | Path) -> None:
+    """Save a matplotlib figure to disk (creating parent dirs) and close it."""
+    import matplotlib.pyplot as plt
+
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    figure.savefig(output_path, dpi=150)
+    plt.close(figure)
diff --git a/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py b/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py
new file mode 100644
index 000000000..550048b03
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/synthetic_data_categorical.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Synthetic JSONL generator for the MVP-2 categorical-factor analyzer smoke test.
+
+Generates a fake ``episode_summary.jsonl`` where a single categorical factor
+``pick_up_object`` drives the success probability. Half of the choices are "easy"
+(high success rate), the other half are "hard" (low success rate). With enough samples
+the analyzer's recovered ``P(category | success=1)`` should concentrate on the easy
+choices, and the empirical per-category bar should match the configured rates within
+binomial noise.
+
+Sampling is **uniform over the categorical choices** (matches the semantics of
+``Choose(...)`` in Alex's variation system and the uniform prior the analyzer assumes).
+
+Pair with the auto-emitted factors.yaml. End-to-end smoke test:
+
+    /isaac-sim/python.sh -m isaaclab_arena.analysis.sensitivity.synthetic_data_categorical \\
+        --output /tmp/syn_cat.jsonl
+    /isaac-sim/python.sh -m isaaclab_arena.scripts.analyze_sensitivity \\
+        --factors_yaml /tmp/factors.yaml \\
+        --episode_summary /tmp/syn_cat.jsonl \\
+        --figure_path /tmp/syn_cat_plot.png
+
+Expected output: a bar chart where the "easy" choices have ~3x more posterior mass and
+empirical success rate than the "hard" choices.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import random
+from pathlib import Path
+
+# Five distinct objects, like the maple-table droid sweep. The first three are "easy"
+# (high success), the last two are "hard" (low success) — a known signal the analyzer
+# should recover.
+DEFAULT_CHOICES = [
+    "rubiks_cube_hot3d_robolab",
+    "wooden_bowl_hot3d_robolab",
+    "alphabet_soup_can_hope_robolab",
+    "mug_ycb_robolab",
+    "sugar_box_ycb_robolab",
+]
+DEFAULT_SUCCESS_PROBABILITIES = [0.90, 0.85, 0.75, 0.25, 0.15]
+
+
+def _factors_yaml_text(choices: list[str]) -> str:
+    """Build the factors.yaml content matching the synthetic data."""
+    choices_string = ", ".join(choices)
+    return (
+        "# factors.yaml — synthetic categorical dataset for analyzer smoke-testing.\n"
+        "# Auto-emitted by synthetic_data_categorical alongside the JSONL.\n"
+        "\n"
+        "slice:\n"
+        "  policy: synthetic_categorical\n"
+        "  task: synthetic_pick_and_place\n"
+        "  embodiment: synthetic\n"
+        "\n"
+        "factors:\n"
+        "  pick_up_object:\n"
+        "    type: categorical\n"
+        f"    choices: [{choices_string}]\n"
+        "\n"
+        "outcomes:\n"
+        "  success_rate:\n"
+        "    type: float\n"
+        "  object_moved_rate:\n"
+        "    type: float\n"
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="/tmp/synthetic_categorical_episode_summary.jsonl",
+        help="Output JSONL path.",
+    )
+    parser.add_argument(
+        "--factors-yaml-out",
+        type=str,
+        default=None,
+        help="Output factors.yaml path. Default: same directory as --output, named factors.yaml.",
+    )
+    parser.add_argument(
+        "--num-episodes",
+        type=int,
+        default=200,
+        help="Total episodes (uniform draws across all choices). Default 200 → ~40 per category for 5 choices.",
+    )
+    parser.add_argument("--seed", type=int, default=42, help="RNG seed for reproducibility.")
+    args = parser.parse_args()
+
+    random_generator = random.Random(args.seed)
+    choices = DEFAULT_CHOICES
+    success_probabilities = DEFAULT_SUCCESS_PROBABILITIES
+    assert len(choices) == len(
+        success_probabilities
+    ), "DEFAULT_CHOICES and DEFAULT_SUCCESS_PROBABILITIES lengths must match"
+    num_choices = len(choices)
+
+    summary_rows = []
+    per_category_stats: dict[str, list[int]] = {choice: [0, 0] for choice in choices}  # category → [successes, total]
+    for episode_index in range(args.num_episodes):
+        category_index = random_generator.randrange(num_choices)
+        chosen_category = choices[category_index]
+        was_success = 1.0 if random_generator.random() < success_probabilities[category_index] else 0.0
+        per_category_stats[chosen_category][0] += int(was_success)
+        per_category_stats[chosen_category][1] += 1
+        summary_rows.append({
+            "job_name": "synth_categorical",
+            "episode_idx": episode_index,
+            "arena_env_args": {"pick_up_object": chosen_category},
+            "outcomes": {"success_rate": was_success, "object_moved_rate": was_success},
+        })
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as jsonl_file:
+        for summary_row in summary_rows:
+            jsonl_file.write(json.dumps(summary_row) + "\n")
+
+    factors_yaml_path = Path(args.factors_yaml_out) if args.factors_yaml_out else output_path.parent / "factors.yaml"
+    factors_yaml_path.parent.mkdir(parents=True, exist_ok=True)
+    factors_yaml_path.write_text(_factors_yaml_text(choices), encoding="utf-8")
+
+    print(f"[INFO] Wrote {len(summary_rows)} rows to {output_path}")
+    print(f"[INFO] Wrote factors schema → {factors_yaml_path}")
+    print("[INFO] Per-category success counts (analyzer should pull posterior mass toward easy cats):")
+    for choice, target_probability in zip(choices, success_probabilities):
+        successes, total = per_category_stats[choice]
+        empirical_percentage = 100 * successes / total if total else 0.0
+        bar_string = "█" * int(round(empirical_percentage / 5))
+        print(
+            f"       {choice:<35s} target={target_probability:>4.0%}"
+            f"  empirical={successes:>3d}/{total:<3d} ({empirical_percentage:>5.1f}%) {bar_string}"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py b/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py
new file mode 100644
index 000000000..24b1fba87
--- /dev/null
+++ b/isaaclab_arena/analysis/sensitivity/synthetic_data_continuous.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Synthetic JSONL generator for smoke-testing the sensitivity analysis pipeline.
+
+Produces a fake ``episode_summary.jsonl`` with a known linear-Gaussian competence band:
+
+    P(success | intensity) = exp(-(intensity - center)^2 / (2 * sigma^2))
+
+i.e. a Gaussian directly in linear intensity space centered on a trained operating point.
+
+Sampling is **linear-uniform** over ``[10, 5000]`` (one intensity drawn independently per
+episode). This matches the semantics of ``Uniform(10, 5000)`` in Alex's variation system
+and matches the uniform prior declared in factors.yaml. With these choices the smoke
+test should recover the posterior peak exactly at ``center``, because:
+
+    1. linear uniform sampling matches the declared uniform prior (no sampling bias),
+    2. a linear-Gaussian likelihood is symmetric in linear theta-space, so its mode
+       equals its mean — and the NPE Gaussian fallback for 1D binary outcomes fits
+       the mean, recovering the true center.
+
+A more realistic competence band would be log-Gaussian (asymmetric: cameras blind fast
+at low intensity, saturate gradually at high), but that introduces a peak-bias artifact
+that masks pipeline-correctness signal. This smoke test deliberately matches the
+structural assumptions the analyzer can recover exactly, so any mismatch in the output
+points to a real bug rather than a known statistical limitation.
+
+Pair with the hand-authored ``light_intensity_sweep_factors.yaml`` so the analyzer
+script can be smoke-tested end-to-end without running Isaac Sim:
+
+    /isaac-sim/python.sh -m isaaclab_arena.analysis.sensitivity.synthetic_data_continuous \\
+        --output /tmp/syn.jsonl
+    /isaac-sim/python.sh -m isaaclab_arena.scripts.analyze_sensitivity \\
+        --factors_yaml isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml \\
+        --episode_summary /tmp/syn.jsonl \\
+        --figure_path /tmp/syn_plot.png
+
+Expected output: a posterior-density curve peaking at ``center`` (default 500), with
+empirical rug markers showing successes clustered around the center and failures at
+both extremes.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import random
+from pathlib import Path
+
+INTENSITY_LOW = 10.0
+INTENSITY_HIGH = 5000.0
+
+# A self-contained factors.yaml template for the synthetic dataset. Kept inline (rather
+# than imported from episode_writer.py) so this module stays a pure-python dev tool —
+# importing episode_writer would transitively load pxr via isaaclab_arena.metrics.
+_SYNTHETIC_FACTORS_YAML = """\
+# factors.yaml — synthetic dataset for analyzer smoke-testing.
+# Auto-emitted by isaaclab_arena.analysis.sensitivity.synthetic_data_continuous alongside the JSONL.
+
+slice:
+  policy: synthetic_linear_uniform
+  task: synthetic_pick_and_place
+  embodiment: synthetic
+
+factors:
+  light_intensity:
+    type: continuous
+    dim: 1
+
+outcomes:
+  success_rate:
+    type: float
+  object_moved_rate:
+    type: float
+"""
+
+
+def success_probability(intensity: float, center: float, sigma: float) -> float:
+    """Linear-Gaussian competence band: peaks at `center`, falls off symmetrically in linear space."""
+    z_score = (intensity - center) / sigma
+    return math.exp(-0.5 * z_score * z_score)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--output", type=str, default="/tmp/synthetic_episode_summary.jsonl", help="Output JSONL path.")
+    parser.add_argument(
+        "--factors-yaml-out",
+        type=str,
+        default=None,
+        help="Output factors.yaml path. Default: same directory as --output, named factors.yaml.",
+    )
+    parser.add_argument(
+        "--num-episodes",
+        type=int,
+        default=180,
+        help="Total number of episodes to generate. Each draws an intensity from Uniform(10, 5000).",
+    )
+    parser.add_argument("--center", type=float, default=500.0, help="Intensity where success rate peaks. Default: 500.")
+    parser.add_argument(
+        "--sigma",
+        type=float,
+        default=400.0,
+        help=(
+            "Linear-space width of the competence band (1 sigma in intensity units). Default: 400,"
+            " which gives ~95%% success in [100, 900] and near-zero success beyond ~1700."
+        ),
+    )
+    parser.add_argument("--seed", type=int, default=42, help="RNG seed for reproducibility.")
+    args = parser.parse_args()
+
+    random_generator = random.Random(args.seed)
+
+    summary_rows = []
+    for episode_index in range(args.num_episodes):
+        intensity = random_generator.uniform(INTENSITY_LOW, INTENSITY_HIGH)
+        probability_of_success = success_probability(intensity, args.center, args.sigma)
+        was_success = 1.0 if random_generator.random() < probability_of_success else 0.0
+        summary_rows.append({
+            "job_name": "synth_linear_uniform",
+            "episode_idx": episode_index,
+            "arena_env_args": {"light_intensity": intensity},
+            "outcomes": {"success_rate": was_success, "object_moved_rate": was_success},
+        })
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as jsonl_file:
+        for summary_row in summary_rows:
+            jsonl_file.write(json.dumps(summary_row) + "\n")
+
+    # Emit a matching factors.yaml so the analyzer can be pointed at this synthetic dataset
+    # without any hand-authored schema. Inline string template — see _SYNTHETIC_FACTORS_YAML.
+    factors_yaml_path = Path(args.factors_yaml_out) if args.factors_yaml_out else output_path.parent / "factors.yaml"
+    factors_yaml_path.parent.mkdir(parents=True, exist_ok=True)
+    factors_yaml_path.write_text(_SYNTHETIC_FACTORS_YAML, encoding="utf-8")
+
+    print(f"[INFO] Wrote {len(summary_rows)} rows to {output_path}")
+    print(f"[INFO] Wrote factors schema → {factors_yaml_path}")
+    print(f"[INFO] Linear-Gaussian competence band: center={args.center:g}, sigma={args.sigma:g}")
+    print("[INFO] Per-bin success rates (10 equal bins across the prior range):")
+    num_bins = 10
+    bin_width = (INTENSITY_HIGH - INTENSITY_LOW) / num_bins
+    for bin_index in range(num_bins):
+        bin_low = INTENSITY_LOW + bin_index * bin_width
+        bin_high = bin_low + bin_width
+        rows_in_bin = [row for row in summary_rows if bin_low <= row["arena_env_args"]["light_intensity"] < bin_high]
+        if not rows_in_bin:
+            continue
+        successes_in_bin = sum(int(row["outcomes"]["success_rate"]) for row in rows_in_bin)
+        percentage = 100 * successes_in_bin / len(rows_in_bin)
+        bar_string = "█" * int(round(percentage / 5))
+        print(
+            f"       [{bin_low:>5g}, {bin_high:>5g}): {successes_in_bin:>3d}/{len(rows_in_bin):<3d}"
+            f" ({percentage:>5.1f}%) {bar_string}"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py b/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py
index f6058015c..98cd752fd 100644
--- a/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py
+++ b/isaaclab_arena/environments/isaaclab_arena_manager_based_env.py
@@ -7,7 +7,7 @@
 
 from isaaclab.envs import ManagerBasedRLEnvCfg
 from isaaclab.envs.mimic_env_cfg import MimicEnvCfg
-from isaaclab.sim import SimulationCfg
+from isaaclab.sim import RenderCfg, SimulationCfg
 from isaaclab.utils import configclass
 from isaaclab_newton.physics.newton_manager_cfg import MJWarpSolverCfg, NewtonCfg
 from isaaclab_physx.physics import PhysxCfg
@@ -72,7 +72,19 @@ class IsaacLabArenaManagerBasedRLEnvCfg(ManagerBasedRLEnvCfg):
     isaaclab_arena_env: IsaacLabArenaEnvironment | None = None
 
     # Overriding defaults from base class
-    sim: SimulationCfg = SimulationCfg(dt=1 / 200, render_interval=2)
+    # Override the RTX renderer's built-in scene ambient (carb /rtx/sceneDb/ambientLightIntensity, default 1.0 with
+    # color [0.1, 0.1, 0.1]) so that USD light prims fully control scene illumination. Without this, Arena scenes
+    # carry a ~10%-gray ambient floor independent of any UsdLuxLight, which silently confounds vision-policy evals.
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 200,
+        render_interval=2,
+        render=RenderCfg(
+            carb_settings={
+                "/rtx/sceneDb/ambientLightIntensity": 0.0,
+                "/rtx/sceneDb/ambientLightColor": [0.0, 0.0, 0.0],
+            },
+        ),
+    )
     decimation: int = 4
     episode_length_s: float = 50.0
     wait_for_textures: bool = False
diff --git a/isaaclab_arena/evaluation/eval_runner.py b/isaaclab_arena/evaluation/eval_runner.py
index cd6f845d4..d9902f638 100644
--- a/isaaclab_arena/evaluation/eval_runner.py
+++ b/isaaclab_arena/evaluation/eval_runner.py
@@ -14,6 +14,7 @@
 from typing import TYPE_CHECKING
 
 from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
+from isaaclab_arena.evaluation.camera_video import CameraObsVideoRecorder
 from isaaclab_arena.evaluation.eval_runner_cli import add_eval_runner_arguments
 from isaaclab_arena.evaluation.job_manager import Job, JobManager, Status
 from isaaclab_arena.evaluation.policy_runner import get_policy_cls, rollout_policy
@@ -144,13 +145,23 @@ def main():
     # Check if any job requires cameras and enable them if needed before starting simulation
     enable_cameras_if_required(eval_jobs_config, args_cli)
 
+    # Per-episode summary recording is opt-in via --episode_summary. The writer logs the
+    # full arena_env_args dict per episode; the analyzer side decides which keys to treat
+    # as factors via factors.yaml. No eval-side knowledge of "factors" required.
+    episode_summary_enabled = args_cli.episode_summary is not None
+    if episode_summary_enabled:
+        print(
+            "[INFO] Episode summary recording enabled. Per-episode arena_env_args + outcomes"
+            f" → {args_cli.episode_summary}"
+        )
+
     with SimulationAppContext(args_cli):
         job_manager = JobManager(eval_jobs_config["jobs"])
         metrics_logger = MetricsLogger()
 
         job_manager.print_jobs_info()
 
-        if args_cli.video:
+        if args_cli.video or args_cli.camera_video:
             os.makedirs(args_cli.video_dir, exist_ok=True)
             print(f"[INFO] Video recording enabled. Videos will be saved to: {args_cli.video_dir}")
 
@@ -172,20 +183,32 @@ def main():
                         else:
                             job.num_steps = args_cli.num_steps
 
-                    if args_cli.video:
+                    if args_cli.video or args_cli.camera_video:
                         if job.num_steps is not None:
                             video_length = job.num_steps
                         else:
                             video_length = job.num_episodes * env.unwrapped.max_episode_length
+                        job_video_folder = os.path.join(args_cli.video_dir, job.name)
+
+                    if args_cli.video:
                         video_kwargs = {
-                            "video_folder": os.path.join(args_cli.video_dir, job.name),
+                            "video_folder": job_video_folder,
                             "step_trigger": lambda step: step == 0,
                             "video_length": video_length,
                             "disable_logger": True,
                         }
-                        print(f"[INFO] Recording video for job '{job.name}' -> {video_kwargs['video_folder']}")
+                        print(f"[INFO] Recording viewport video for job '{job.name}' -> {job_video_folder}")
                         env = RecordVideo(env, **video_kwargs)
 
+                    if args_cli.camera_video:
+                        print(f"[INFO] Recording per-camera videos for job '{job.name}' -> {job_video_folder}")
+                        env = CameraObsVideoRecorder(
+                            env,
+                            video_folder=job_video_folder,
+                            step_trigger=lambda step: step == 0,
+                            video_length=video_length,
+                        )
+
                     metrics = rollout_policy(
                         env,
                         policy,
@@ -194,6 +217,15 @@ def main():
                         language_instruction=job.language_instruction,
                     )
 
+                    if episode_summary_enabled:
+                        # Deferred import — episode_writer transitively touches pxr via
+                        # isaaclab_arena.metrics.metrics. Matches the policy_runner.py:107
+                        # pattern for compute_metrics.
+                        from isaaclab_arena.analysis.sensitivity.episode_writer import write_episode_summaries
+
+                        rows = write_episode_summaries(env, job, args_cli.episode_summary)
+                        print(f"[INFO] Wrote {rows} episode summaries for job '{job.name}'")
+
                     job_manager.complete_job(job, metrics=metrics, status=Status.COMPLETED)
 
                     # users may not specify metrics for a task, although it's not recommended
diff --git a/isaaclab_arena/evaluation/eval_runner_cli.py b/isaaclab_arena/evaluation/eval_runner_cli.py
index b39187b04..d776e180b 100644
--- a/isaaclab_arena/evaluation/eval_runner_cli.py
+++ b/isaaclab_arena/evaluation/eval_runner_cli.py
@@ -21,9 +21,32 @@ def add_eval_runner_arguments(parser: argparse.ArgumentParser) -> None:
         default="/eval/videos",
         help="Root directory for recorded videos. Each job gets a subdirectory.",
     )
+    parser.add_argument(
+        "--camera_video",
+        "--camera-video",
+        action="store_true",
+        default=False,
+        help=(
+            "For each job, record one mp4 per camera in obs['camera_obs'] (what the policy actually sees)."
+            " Independent of --video; use either or both."
+        ),
+    )
     parser.add_argument(
         "--continue_on_error",
         action="store_true",
         default=False,
         help="Continue evaluation with remaining jobs when a job fails instead of stopping immediately.",
     )
+    parser.add_argument(
+        "--episode_summary",
+        type=str,
+        default=None,
+        help=(
+            "Output JSONL file for per-episode summaries. When set, eval_runner writes one"
+            " JSONL row per recorded demo containing the full arena_env_args dict (what"
+            " parameterized the env for that episode) and the task's registered outcomes."
+            " The analyzer side picks which arena_env_args keys to treat as factors via"
+            " factors.yaml — no eval-side flag needed. Absent here means no recording and"
+            " unchanged behavior for non-sensitivity workflows."
+        ),
+    )
diff --git a/isaaclab_arena/evaluation/job_manager.py b/isaaclab_arena/evaluation/job_manager.py
index 8c4d09c46..43bbe1ffb 100644
--- a/isaaclab_arena/evaluation/job_manager.py
+++ b/isaaclab_arena/evaluation/job_manager.py
@@ -28,6 +28,7 @@ def __init__(
         policy_config_dict: dict = None,
         status: Status = None,
         language_instruction: str = None,
+        arena_env_args_dict: dict | None = None,
     ):
         """Initialize a Job instance.
 
@@ -42,9 +43,13 @@ def __init__(
             status: Job status (defaults to PENDING)
             language_instruction: Optional language instruction override for the policy. When set,
                 takes precedence over the task's own description.
+            arena_env_args_dict: The original dict form of arena_env_args before conversion to
+                CLI args list. Preserves typed values (e.g. floats stay floats) for downstream
+                consumers that need to index by key.
         """
         self.name = name
         self.arena_env_args = arena_env_args
+        self.arena_env_args_dict = arena_env_args_dict if arena_env_args_dict is not None else {}
         assert num_envs > 0, "num_envs must be greater than 0"
         assert not (
             num_steps is not None and num_episodes is not None
@@ -102,6 +107,7 @@ def from_dict(cls, data: dict) -> "Job":
         return cls(
             name=data["name"],
             arena_env_args=cls.convert_args_dict_to_cli_args_list(data["arena_env_args"]),
+            arena_env_args_dict=data["arena_env_args"],
             policy_type=data["policy_type"],
             num_envs=num_envs,
             num_steps=num_steps,
diff --git a/isaaclab_arena/scripts/analyze_sensitivity.py b/isaaclab_arena/scripts/analyze_sensitivity.py
new file mode 100644
index 000000000..052948b2f
--- /dev/null
+++ b/isaaclab_arena/scripts/analyze_sensitivity.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""CLI driver for 1D continuous sensitivity analysis (MVP-1).
+
+Loads a SensitivityDataset from a paired (factors.yaml, episode_summary.jsonl), trains
+NPE on the selected outcome column, and saves a posterior-marginal plot for the chosen
+factor.
+
+Example:
+    python -m isaaclab_arena.scripts.analyze_sensitivity \\
+        --factors_yaml isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml \\
+        --episode_summary ./episode_summary.jsonl \\
+        --figure_path ./light_intensity_sensitivity.png
+
+This script runs entirely offline — no Isaac Sim, no policy server.
+"""
+
+from __future__ import annotations
+
+import argparse
+
+from isaaclab_arena.analysis.sensitivity.analyzer import make_analyzer
+from isaaclab_arena.analysis.sensitivity.dataset import SensitivityDataset
+from isaaclab_arena.analysis.sensitivity.plotting import plot_marginal
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--factors_yaml", type=str, required=True, help="Path to factors.yaml.")
+    parser.add_argument(
+        "--episode_summary", type=str, required=True, help="Path to episode_summary.jsonl produced by eval_runner."
+    )
+    parser.add_argument(
+        "--input_factor",
+        type=str,
+        default=None,
+        help="Name of the factor to plot. Defaults to the only factor declared in factors.yaml.",
+    )
+    parser.add_argument(
+        "--output_metric",
+        type=str,
+        default=None,
+        help="Outcome name to condition on. Defaults to the first outcome listed in factors.yaml.",
+    )
+    parser.add_argument(
+        "--outcome_value",
+        type=float,
+        default=1.0,
+        help="Outcome value to condition on (1.0 = success). Default: 1.0.",
+    )
+    parser.add_argument(
+        "--figure_path",
+        type=str,
+        default="./sensitivity.png",
+        help="Output figure path. Default: ./sensitivity.png.",
+    )
+    args = parser.parse_args()
+
+    print(f"[INFO] Loading dataset: factors={args.factors_yaml}  jsonl={args.episode_summary}")
+    dataset = SensitivityDataset(args.factors_yaml, args.episode_summary)
+
+    available_factors = list(dataset.factor_columns)
+    available_outcomes = [outcome.name for outcome in dataset.schema.outcomes]
+
+    if args.input_factor is None:
+        factor_name = available_factors[0]
+    else:
+        if args.input_factor not in available_factors:
+            parser.error(
+                f"--input_factor {args.input_factor!r} not found in factors.yaml. "
+                f"Available factors: {available_factors}"
+            )
+        factor_name = args.input_factor
+
+    if args.output_metric is None:
+        outcome_name = available_outcomes[0]
+    else:
+        if args.output_metric not in available_outcomes:
+            parser.error(
+                f"--output_metric {args.output_metric!r} not found in factors.yaml. "
+                f"Available outcomes: {available_outcomes}"
+            )
+        outcome_name = args.output_metric
+
+    print(
+        f"[INFO] Analyzing factor '{factor_name}' against outcome '{outcome_name}'"
+        f" (conditioning on outcome={args.outcome_value:g})"
+    )
+    print(
+        f"[INFO] num_episodes={len(dataset.rows)};  theta shape={tuple(dataset.theta.shape)};"
+        f"  x shape={tuple(dataset.x.shape)}"
+    )
+
+    analyzer = make_analyzer(dataset, outcome_name=outcome_name)
+    print(f"[INFO] Dispatched analyzer: {type(analyzer).__name__}")
+    analyzer.fit()
+    print(f"[INFO] Plotting marginal -> {args.figure_path}")
+    plot_marginal(analyzer, factor_name, output_path=args.figure_path, outcome_value=args.outcome_value)
+    print("[INFO] Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml
new file mode 100644
index 000000000..4a4c82200
--- /dev/null
+++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_factors.yaml
@@ -0,0 +1,36 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Sensitivity-analysis schema for the light_intensity sweep on droid + pi0.
+# Paired with: light_intensity_sweep_jobs_config.json (and the minimal variant).
+# Hand-authored — must stay in sync with --factor_keys passed to eval_runner.
+#
+# - slice identifies the (policy, task, embodiment) the dataset comes from; MNPE/NPE
+#   assumes a single data-generating source per analysis.
+# - factors declares what the eval varies; eval_runner is told which arena_env_args
+#   keys to record via --factor_keys (must match the names here).
+# - outcomes declares what the eval measures; the writer pulls these from the
+#   registered task metrics (compute_metric_from_recording on each demo).
+
+slice:
+  policy: pi0_remote
+  task: pick_and_place_maple_table
+  embodiment: droid_abs_joint_pos
+
+factors:
+  light_intensity:
+    type: continuous
+    dim: 1
+    # Mirrors the robolab evaluated endpoints [10, 5000] for direct comparison; spans the
+    # dark / normal / bright regimes around the policy's trained operating point (~500).
+    range: [[10, 5000]]
+
+outcomes:
+  success_rate:
+    # Per-episode value of SuccessRateMetric. Returns 0.0 or 1.0 for a single demo.
+    type: float
+  object_moved_rate:
+    # Per-episode value of ObjectMovedRateMetric. Same shape as success_rate.
+    type: float
diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json
new file mode 100644
index 000000000..6da57f719
--- /dev/null
+++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_jobs_config.json
@@ -0,0 +1,184 @@
+{
+    "jobs": [
+        {
+            "name": "light_intensity_sweep_10",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 10
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_25",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 25
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_60",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 60
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_150",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 150
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_350",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 350
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_800",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 800
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_1800",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 1800
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_4000",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 4000
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_sweep_5000",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 5000
+            },
+            "num_episodes": 20,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        }
+    ]
+}
diff --git a/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json
new file mode 100644
index 000000000..0e6f1d3ce
--- /dev/null
+++ b/isaaclab_arena_environments/eval_jobs_configs/light_intensity_sweep_minimal_jobs_config.json
@@ -0,0 +1,64 @@
+{
+    "jobs": [
+        {
+            "name": "light_intensity_minimal_100",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 100
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_minimal_500",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 500
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "light_intensity_minimal_5000",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 5000
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        }
+    ]
+}
diff --git a/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml
new file mode 100644
index 000000000..ab5eb24cd
--- /dev/null
+++ b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_factors.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Sensitivity-analysis schema for the pick_up_object sweep on droid + pi0.
+# Paired with: pick_up_object_sweep_minimal_jobs_config.json
+# Hand-authored — must stay in sync with --factor_keys passed to eval_runner.
+
+slice:
+  policy: pi0_remote
+  task: pick_and_place_maple_table
+  embodiment: droid_abs_joint_pos
+
+factors:
+  pick_up_object:
+    type: categorical
+    # Three objects with distinct visual / shape characteristics. List them in the order
+    # the analyzer should use as integer codes (0=rubiks_cube, 1=alphabet_soup_can, 2=sugar_box).
+    choices: [rubiks_cube_hot3d_robolab, alphabet_soup_can_hope_robolab, sugar_box_ycb_robolab]
+
+outcomes:
+  success_rate:
+    type: float
+  object_moved_rate:
+    type: float
diff --git a/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json
new file mode 100644
index 000000000..fc2b3950c
--- /dev/null
+++ b/isaaclab_arena_environments/eval_jobs_configs/pick_up_object_sweep_minimal_jobs_config.json
@@ -0,0 +1,70 @@
+{
+    "jobs": [
+        {
+            "name": "pick_up_object_minimal_rubiks_cube",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 500,
+                "pick_up_object": "rubiks_cube_hot3d_robolab",
+                "destination_location": "wooden_bowl_hot3d_robolab"
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the Rubik's cube and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "pick_up_object_minimal_alphabet_soup_can",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 500,
+                "pick_up_object": "alphabet_soup_can_hope_robolab",
+                "destination_location": "wooden_bowl_hot3d_robolab"
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the soup can and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        },
+        {
+            "name": "pick_up_object_minimal_sugar_box",
+            "arena_env_args": {
+                "enable_cameras": true,
+                "environment": "pick_and_place_maple_table",
+                "embodiment": "droid_abs_joint_pos",
+                "hdr": "billiard_hall_robolab",
+                "light_intensity": 500,
+                "pick_up_object": "sugar_box_ycb_robolab",
+                "destination_location": "wooden_bowl_hot3d_robolab"
+            },
+            "num_episodes": 2,
+            "language_instruction": "Pick up the sugar box and place it in the bowl.",
+            "policy_type": "isaaclab_arena_openpi.policy.pi0_remote_policy.Pi0RemotePolicy",
+            "policy_config_dict": {
+                "policy_variant": "pi05",
+                "policy_device": "cuda:0",
+                "remote_host": "127.0.0.1",
+                "remote_port": 8000,
+                "openpi_embodiment_adapter": "droid"
+            }
+        }
+    ]
+}
diff --git a/setup.py b/setup.py
index 82cd92b56..2c1c5944f 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@
     "jupyter",
     "debugpy",
     "tenacity",
+    "sbi",
 ]
 
 setup(