From 012b8e3715849a86201e3e01e18ca98eac9e85c2 Mon Sep 17 00:00:00 2001 From: odoherty Date: Tue, 7 Apr 2026 13:03:30 -0700 Subject: [PATCH 01/12] Add NIST gear insertion task with RL-Games training pipeline NIST peg-insert gear assembly environment: custom OSC action term, keypoint squashing rewards, IK gripper reset, and domain randomization. Includes RL-Games PPO config, train/play scripts, and policy runner wrapper. --- .gitattributes | 2 + README.md | 136 +++++ isaaclab_arena/assets/object_library.py | 94 +++- isaaclab_arena/policy/__init__.py | 1 + .../policy/rl_games_action_policy.py | 212 +++++++ .../nist_gear_insertion_osc_rl_games.yaml | 124 +++++ .../reinforcement_learning/play_rl_games.py | 133 +++++ .../reinforcement_learning/train_rl_games.py | 160 ++++++ .../tasks/nist_gear_insertion_task.py | 523 ++++++++++++++++++ .../gear_insertion_observations.py | 94 ++++ .../tasks/rewards/gear_insertion_rewards.py | 301 ++++++++++ isaaclab_arena/tasks/terminations.py | 90 ++- isaaclab_arena/terms/events.py | 166 +++++- isaaclab_arena_environments/cli.py | 2 + isaaclab_arena_environments/mdp/__init__.py | 2 + .../mdp/nist_gear_insertion_osc_action.py | 252 +++++++++ .../mdp/observations.py | 210 +++++++ .../mdp/robot_configs.py | 94 ++++ ...nist_assembled_gearmesh_osc_environment.py | 259 +++++++++ 19 files changed, 2852 insertions(+), 3 deletions(-) create mode 100644 isaaclab_arena/policy/rl_games_action_policy.py create mode 100644 isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml create mode 100644 isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py create mode 100644 isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion_task.py create mode 100644 isaaclab_arena/tasks/observations/gear_insertion_observations.py create mode 100644 isaaclab_arena/tasks/rewards/gear_insertion_rewards.py create mode 100644 isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py create mode 100644 isaaclab_arena_environments/mdp/observations.py create mode 100644 isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py diff --git a/.gitattributes b/.gitattributes index 0d4fdcf71..4e641c670 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,3 +5,5 @@ *.gif filter=lfs diff=lfs merge=lfs -text *.mp4 filter=lfs diff=lfs merge=lfs -text docs/images/ filter=lfs diff=lfs merge=lfs -text +assets/*.usd filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 3ee626e3b..5e3f733a3 100644 --- a/README.md +++ b/README.md @@ -255,3 +255,139 @@ Isaac Lab-Arena builds on [NVIDIA Isaac Lab](https://github.com/isaac-sim/IsaacL Made with ❤️ by the NVIDIA Robotics Team + +--- + +## NIST Gear Insertion Task + +Reinforcement learning for NIST gear-mesh insertion using operational-space torque +control (OSC). A Franka Panda robot inserts a medium NIST gear onto a peg on the +gear base, trained with PPO (RL Games) and an LSTM recurrent policy. + +### Repository Layout (NIST task) + +``` +assets/ # USD scene assets (Git LFS) + gear_medium.usd # Held gear + gearbase_and_gears_gearbase_root.usd # Gear base + flanking gears + nist_assembled.usd # Assembled board (visual background) + +isaaclab_arena_environments/ + nist_assembled_gearmesh_osc_environment.py # Environment definition + mdp/ + nist_gear_insertion_osc_action.py # OSC action term (asset-relative, EMA, dead-zones) + observations.py # 24-D policy observation builder (NIST gear insertion) + +isaaclab_arena/ + policy/ + rl_games_action_policy.py # PolicyBase wrapper for RL Games inference + rl_policy/ + nist_gear_insertion_osc_rl_games.yaml # RL Games PPO hyperparameters + nist_gear_insertion_osc_rsl_rl.json # RSL-RL-style policy JSON (optional) + scripts/reinforcement_learning/ + train_rl_games.py # Training script + play_rl_games.py # Visual playback script + tasks/ + nist_gear_insertion_task.py # Task: rewards, terminations, events, observations + rewards/ + gear_insertion_rewards.py # Keypoints, bonuses, insertion regularizers + terms/ + events.py # Event terms (place gear in gripper, etc.) + assets/ + object_library.py # Asset registry (USD paths, physics props) +``` + +### Environment + +**Name:** `nist_assembled_gear_mesh_osc` + +| Property | Value | +|----------|-------| +| Robot | Franka Panda (`franka_mimic.usd`) | +| Controller | OSC — stiffness [565, 565, 565, 28, 28, 28] | +| Action space | 7-D (3 pos + 3 rot + 1 auxiliary scalar) | +| Observation space | 24-D policy obs + task state (gear pos, peg pos, board quat, etc.) | +| Episode length | 15 s (configurable in environment) | +| Success criteria | Gear Z within 5% of peg height, XY within 2.5 mm | +| Terminations | Success (early reset) + time-out | + +**Domain randomization:** physics materials (friction), held object mass (±5 g), +fixed-asset yaw (0–15°). + +**RL Games experiment name** (log folder under `logs/rl_games/`): `NistGearInsertionOscRlg`. + +### Reward Architecture (Current) + +- Reward configuration is defined in + `isaaclab_arena/tasks/nist_gear_insertion_task.py` and + `isaaclab_arena/tasks/rewards/gear_insertion_rewards.py`. +- Base rewards include: + - 3 keypoint squashing terms (`kp_baseline`, `kp_coarse`, `kp_fine`) + - engagement bonus + - success bonus +- When `include_insertion_regularizers=True`, optional terms include (implementation + classes in `gear_insertion_rewards.py`): + - `osc_action_magnitude_penalty` + - `osc_action_delta_penalty` + - `wrist_contact_force_penalty` + - `success_prediction_error` + +### Commands + +All commands run **inside the Docker container** (`isaaclab_arena-latest`). + +**Training:** + +```bash +python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --num_envs 128 \ + --max_iterations 300 \ + --headless \ + nist_assembled_gear_mesh_osc +``` + +**Resume from checkpoint:** + +```bash +python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --num_envs 128 \ + --max_iterations 300 \ + --headless \ + --checkpoint \ + nist_assembled_gear_mesh_osc +``` + +**Visual playback:** + +```bash +python isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py \ + --num_envs 1 \ + --sigma 0 \ + --checkpoint \ + nist_assembled_gear_mesh_osc +``` + +**Policy evaluation (metrics):** + +```bash +python isaaclab_arena/evaluation/policy_runner.py \ + --policy_type rl_games \ + --checkpoint_path \ + --num_envs 1 \ + --num_steps 2000 \ + nist_assembled_gear_mesh_osc +``` + +**Copy checkpoint to local machine:** + +```bash +docker cp isaaclab_arena-latest:/checkpoint.pth ./local_path/ +``` + +### Assets + +USD assets in `assets/` are tracked with **Git LFS**. After cloning, run: + +```bash +git lfs pull +``` diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index 72c6b6b4f..70e350c83 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -3,11 +3,13 @@ # # SPDX-License-Identifier: Apache-2.0 - +from pathlib import Path from typing import TYPE_CHECKING, Any import isaaclab.sim as sim_utils +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent + if TYPE_CHECKING: from isaaclab_arena.assets.hdr_image import HDRImage from isaaclab.assets import RigidObjectCfg @@ -92,6 +94,96 @@ def __init__( super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose, scale=scale) +@register_asset +class GearsAndBase(LibraryObject): + """NIST gear base with SDF collision (local USD, gear_base as root prim).""" + + name = "gears_and_base" + tags = ["object"] + usd_path = str(_REPO_ROOT / "assets" / "gearbase_and_gears_gearbase_root.usd") + + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + kinematic_enabled=True, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__(self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class MediumNistGear(LibraryObject): + """NIST medium gear (held asset for gear insertion).""" + + name = "medium_nist_gear" + tags = ["object"] + usd_path = str(_REPO_ROOT / "assets" / "gear_medium.usd") + + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = None): + super().__init__(prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistAssembledBoard(LibraryObject): + """NIST assembled board (kinematic background for gear insertion scene).""" + + name = "nist_assembled_board" + tags = ["object"] + usd_path = str(_REPO_ROOT / "assets" / "nist_assembled.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=None), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + @register_asset class MustardBottle(LibraryObject): """ diff --git a/isaaclab_arena/policy/__init__.py b/isaaclab_arena/policy/__init__.py index 66caa51a1..9c3dbfb18 100644 --- a/isaaclab_arena/policy/__init__.py +++ b/isaaclab_arena/policy/__init__.py @@ -6,5 +6,6 @@ from .action_chunking import ActionChunkingState from .action_chunking_client import * from .replay_action_policy import * +from .rl_games_action_policy import * from .rsl_rl_action_policy import * from .zero_action_policy import * diff --git a/isaaclab_arena/policy/rl_games_action_policy.py b/isaaclab_arena/policy/rl_games_action_policy.py new file mode 100644 index 000000000..66efd5b98 --- /dev/null +++ b/isaaclab_arena/policy/rl_games_action_policy.py @@ -0,0 +1,212 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import math +import gymnasium as gym +import torch +import yaml +from dataclasses import dataclass +from gymnasium.spaces.dict import Dict as GymSpacesDict +from pathlib import Path + +from isaaclab.utils.assets import retrieve_file_path +from rl_games.common import env_configurations, vecenv +from rl_games.common.player import BasePlayer +from rl_games.torch_runner import Runner + +from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper + +from isaaclab_arena.assets.register import register_policy +from isaaclab_arena.policy.policy_base import PolicyBase + + +@dataclass +class RlGamesActionPolicyConfig: + """Configuration for RL-Games action policy. + + Supports both dict-based (from JSON eval runner) and CLI-based configuration. + """ + + checkpoint_path: str + """Path to the RL-Games .pth checkpoint file.""" + + agent_cfg_path: Path = Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml") + """Path to the RL-Games agent YAML configuration file.""" + + device: str = "cuda:0" + """Device to run the policy on.""" + + deterministic: bool = True + """Use mean actions (no exploration noise) during evaluation.""" + + @classmethod + def from_cli_args(cls, args: argparse.Namespace) -> RlGamesActionPolicyConfig: + return cls( + checkpoint_path=args.checkpoint_path, + agent_cfg_path=args.agent_cfg_path, + device=args.device if hasattr(args, "device") else "cuda:0", + deterministic=getattr(args, "deterministic", True), + ) + + +@register_policy +class RlGamesActionPolicy(PolicyBase): + """Policy that uses a trained RL-Games model for inference. + + Wraps the RL-Games player for use with the Arena policy runner and eval runner. + Handles observation processing (concatenation, clipping) and LSTM state management. + + Example JSON configuration for eval runner:: + + { + "jobs": [{ + "name": "eval_gear_insertion", + "policy_type": "rl_games", + "policy_config_dict": { + "checkpoint_path": "logs/rl_games/.../nn/model.pth", + "agent_cfg_path": "isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml", + "device": "cuda:0", + "deterministic": true + }, + "arena_env_args": {"environment": "nist_assembled_gear_mesh_osc"}, + "num_episodes": 100 + }] + } + """ + + name = "rl_games" + config_class = RlGamesActionPolicyConfig + + def __init__(self, config: RlGamesActionPolicyConfig, args_cli: argparse.Namespace | None = None): + super().__init__(config) + self.config: RlGamesActionPolicyConfig = config + self._player: BasePlayer | None = None + self._wrapper: RlGamesVecEnvWrapper | None = None + self._rnn_initialized = False + self.args_cli = args_cli + + def _load_policy(self, env: gym.Env) -> None: + """Set up the RL-Games infrastructure and load the checkpoint. + + Creates the RlGamesVecEnvWrapper (for observation processing only), + registers the env with the RL-Games runner, creates the player, + and restores the checkpoint weights. + """ + with open(self.config.agent_cfg_path) as f: + agent_cfg = yaml.safe_load(f) + + device = self.config.device + agent_cfg["params"]["config"]["device"] = device + agent_cfg["params"]["config"]["device_name"] = device + + resume_path = retrieve_file_path(self.config.checkpoint_path) + agent_cfg["params"]["load_checkpoint"] = True + agent_cfg["params"]["load_path"] = resume_path + + clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) + clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) + obs_groups = agent_cfg["params"]["env"].get("obs_groups") + concate_obs = agent_cfg["params"]["env"].get("concate_obs_groups", True) + + self._wrapper = RlGamesVecEnvWrapper( + env, device, clip_obs, clip_actions, obs_groups, concate_obs, + ) + + vecenv.register( + "IsaacRlgWrapper", + lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs), + ) + env_configurations.register( + "rlgpu", + {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: self._wrapper}, + ) + + agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs + + runner = Runner() + runner.load(agent_cfg) + self._player = runner.create_player() + self._player.restore(resume_path) + self._player.reset() + + print(f"[INFO] Loaded RL-Games checkpoint from: {resume_path}") + + def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor: + if self._player is None: + self._load_policy(env) + + assert self._player is not None + assert self._wrapper is not None + + obs_copy = dict(observation) + obs_rlg = self._wrapper._process_obs(obs_copy) + obs_tensor = obs_rlg["obs"] + + if not self._rnn_initialized: + _ = self._player.get_batch_size(obs_tensor, 1) + if self._player.is_rnn: + self._player.init_rnn() + self._rnn_initialized = True + + obs_tensor = self._player.obs_to_torch(obs_tensor) + + with torch.inference_mode(): + action = self._player.get_action(obs_tensor, is_deterministic=self.config.deterministic) + + return action + + def reset(self, env_ids: torch.Tensor | None = None) -> None: + if self._player is None or not self._player.is_rnn: + return + if not hasattr(self._player, "states") or self._player.states is None: + return + if env_ids is None: + for s in self._player.states: + s.zero_() + else: + for s in self._player.states: + s[:, env_ids, :] = 0.0 + + @classmethod + def from_dict(cls, config_dict: dict) -> RlGamesActionPolicy: + config = RlGamesActionPolicyConfig(**config_dict) + return cls(config, args_cli=None) + + @staticmethod + def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + group = parser.add_argument_group("RL-Games Action Policy", "Arguments for RL-Games action policy") + group.add_argument( + "--checkpoint_path", + type=str, + required=True, + help="Path to the .pth checkpoint file containing the RL-Games policy", + ) + group.add_argument( + "--agent_cfg_path", + type=Path, + default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), + help="Path to the RL-Games agent YAML configuration file.", + ) + group.add_argument( + "--deterministic", + action="store_true", + default=True, + help="Use mean actions without exploration noise (default: True).", + ) + group.add_argument( + "--stochastic", + dest="deterministic", + action="store_false", + help="Use stochastic actions with exploration noise.", + ) + return parser + + @staticmethod + def from_args(args: argparse.Namespace) -> RlGamesActionPolicy: + config = RlGamesActionPolicyConfig.from_cli_args(args) + return RlGamesActionPolicy(config, args_cli=args) diff --git a/isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml new file mode 100644 index 000000000..adb1630cc --- /dev/null +++ b/isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml @@ -0,0 +1,124 @@ +# RL Games PPO config for NIST gear insertion (operational-space torque control). +# Network and hyperparameters align with common assembly peg-insert RL benchmarks. + +params: + seed: 0 + algo: + name: a2c_continuous + + env: + clip_actions: 1.0 + obs_groups: + obs: [policy, task_obs] + states: [policy, task_obs] + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: False + mlp: + units: [512, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + rnn: + name: lstm + units: 1024 + layers: 2 + before_mlp: True + concat_input: True + layer_norm: True + + load_checkpoint: False + load_path: "" + + config: + name: NistGearInsertionOscRlg + device: cuda:0 + full_experiment_name: test + env_name: rlgpu + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: 128 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.995 + tau: 0.95 + learning_rate: 1.0e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 4000 + save_best_after: 10 + save_frequency: 10 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 128 + minibatch_size: 512 + mini_epochs: 4 + critic_coef: 2 + clip_value: True + seq_length: 128 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 512 + mini_epochs: 4 + learning_rate: 1e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + clip_value: True + normalize_input: True + truncate_grads: True + + network: + name: actor_critic + central_value: True + + mlp: + units: [512, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + rnn: + name: lstm + units: 1024 + layers: 2 + before_mlp: True + concat_input: True + layer_norm: True + + player: + deterministic: False diff --git a/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py new file mode 100644 index 000000000..86f111cbe --- /dev/null +++ b/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py @@ -0,0 +1,133 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers. +# SPDX-License-Identifier: Apache-2.0 + +"""Play/evaluate an RL-Games checkpoint using the Arena environment builder. + +Launch Isaac Sim Simulator first. +""" + +import math +from pathlib import Path + +from isaaclab.app import AppLauncher + +from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser +from isaaclab_arena_environments.cli import add_example_environments_cli_args + +parser = get_isaaclab_arena_cli_parser() +parser.add_argument("--video", action="store_true", default=False, help="Record videos during playback.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--agent_cfg_path", + type=Path, + default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), + help="Path to the RL-Games agent YAML configuration file.", +) +parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint.") +parser.add_argument("--sigma", type=str, default=None, help="Override the policy's standard deviation.") +add_example_environments_cli_args(parser) +args_cli = parser.parse_args() + +if args_cli.video: + args_cli.enable_cameras = True + +if getattr(args_cli, "enable_pinocchio", False): + import pinocchio # noqa: F401 + +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import yaml + +import omni.log +from rl_games.common import env_configurations, vecenv +from rl_games.common.algo_observer import IsaacAlgoObserver +from rl_games.torch_runner import Runner + +from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict + +from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper + +from isaaclab_arena_environments.cli import get_arena_builder_from_cli + + +def main(): + """Play with RL-Games agent.""" + try: + arena_builder = get_arena_builder_from_cli(args_cli) + env_name, env_cfg = arena_builder.build_registered() + except Exception as e: + omni.log.error(f"Failed to parse environment configuration: {e}") + exit(1) + + with open(args_cli.agent_cfg_path) as f: + agent_cfg = yaml.safe_load(f) + + if args_cli.num_envs is not None: + env_cfg.scene.num_envs = args_cli.num_envs + if args_cli.device is not None: + env_cfg.sim.device = args_cli.device + agent_cfg["params"]["config"]["device"] = args_cli.device + agent_cfg["params"]["config"]["device_name"] = args_cli.device + + resume_path = retrieve_file_path(args_cli.checkpoint) + agent_cfg["params"]["load_checkpoint"] = True + agent_cfg["params"]["load_path"] = resume_path + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + + env_cfg.seed = agent_cfg["params"]["seed"] + + log_dir = os.path.dirname(resume_path) + env_cfg.log_dir = log_dir + + env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during playback.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + rl_device = agent_cfg["params"]["config"]["device"] + clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) + clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) + obs_groups = agent_cfg["params"]["env"].get("obs_groups") + concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + + env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) + + vecenv.register( + "IsaacRlgWrapper", + lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs), + ) + env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) + + agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs + + runner = Runner(IsaacAlgoObserver()) + runner.load(agent_cfg) + runner.reset() + + play_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None + runner.run({"train": False, "play": True, "sigma": play_sigma, "checkpoint": resume_path}) + + env.close() + + +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py new file mode 100644 index 000000000..d06f031e0 --- /dev/null +++ b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py @@ -0,0 +1,160 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers. +# SPDX-License-Identifier: Apache-2.0 + +"""Train an RL agent with RL-Games using the Arena environment builder. + +Launch Isaac Sim Simulator first. +""" + +import math +from pathlib import Path + +from isaaclab.app import AppLauncher + +from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser +from isaaclab_arena_environments.cli import add_example_environments_cli_args + +parser = get_isaaclab_arena_cli_parser() +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument( + "--agent_cfg_path", + type=Path, + default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), + help="Path to the RL-Games agent YAML configuration file.", +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint to resume from.") +parser.add_argument("--sigma", type=str, default=None, help="The policy's initial standard deviation.") +parser.add_argument("--max_iterations", type=int, default=None, help="Override max training epochs.") +parser.add_argument("--experiment_name", type=str, default=None, help="Override experiment name in config.") +add_example_environments_cli_args(parser) +args_cli = parser.parse_args() + +if args_cli.video: + args_cli.enable_cameras = True + +if getattr(args_cli, "enable_pinocchio", False): + import pinocchio # noqa: F401 + +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import yaml +from datetime import datetime + +import omni.log +from rl_games.common import env_configurations, vecenv +from rl_games.common.algo_observer import IsaacAlgoObserver +from rl_games.torch_runner import Runner + +from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper + +from isaaclab_arena_environments.cli import get_arena_builder_from_cli + + +def main(): + """Train with RL-Games agent.""" + try: + arena_builder = get_arena_builder_from_cli(args_cli) + env_name, env_cfg = arena_builder.build_registered() + except Exception as e: + omni.log.error(f"Failed to parse environment configuration: {e}") + exit(1) + + with open(args_cli.agent_cfg_path) as f: + agent_cfg = yaml.safe_load(f) + + if args_cli.num_envs is not None: + env_cfg.scene.num_envs = args_cli.num_envs + if args_cli.device is not None: + env_cfg.sim.device = args_cli.device + agent_cfg["params"]["config"]["device"] = args_cli.device + agent_cfg["params"]["config"]["device_name"] = args_cli.device + + if args_cli.max_iterations is not None: + agent_cfg["params"]["config"]["max_epochs"] = args_cli.max_iterations + + if args_cli.experiment_name is not None: + agent_cfg["params"]["config"]["name"] = args_cli.experiment_name + + if args_cli.checkpoint is not None: + resume_path = retrieve_file_path(args_cli.checkpoint) + agent_cfg["params"]["load_checkpoint"] = True + agent_cfg["params"]["load_path"] = resume_path + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + + train_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None + + env_cfg.seed = agent_cfg["params"]["seed"] + + config_name = agent_cfg["params"]["config"]["name"] + log_root_path = os.path.join("logs", "rl_games", config_name) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + agent_cfg["params"]["config"]["train_dir"] = log_root_path + agent_cfg["params"]["config"]["full_experiment_name"] = log_dir + + dump_yaml(os.path.join(log_root_path, log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_root_path, log_dir, "params", "agent.yaml"), agent_cfg) + + env_cfg.log_dir = os.path.join(log_root_path, log_dir) + + env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_root_path, log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + rl_device = agent_cfg["params"]["config"]["device"] + clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) + clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) + obs_groups = agent_cfg["params"]["env"].get("obs_groups") + concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + + env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) + + vecenv.register( + "IsaacRlgWrapper", + lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs), + ) + env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) + + agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs + + runner = Runner(IsaacAlgoObserver()) + runner.load(agent_cfg) + runner.reset() + + if args_cli.checkpoint is not None: + runner.run({"train": True, "play": False, "sigma": train_sigma, "checkpoint": resume_path}) + else: + runner.run({"train": True, "play": False, "sigma": train_sigma}) + + env.close() + + +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion_task.py new file mode 100644 index 000000000..77df82f5f --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion_task.py @@ -0,0 +1,523 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Simple gear insertion task for the assembled NIST board. + +The medium gear is a separate object that the robot must pick up and insert +onto the peg. The peg can be on the assembled board or on the gearbase +asset (gears_and_base; USD gearbase_and_gears.usd). Use :attr:`gear_base_asset` to specify the +asset whose pose + offset defines the peg (fixed asset + local offset); if omitted, the +assembled board is used. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import MISSING +from typing import TYPE_CHECKING + +import numpy as np + +import isaaclab.envs.mdp as mdp_isaac_lab +from isaaclab.envs.common import ViewerCfg +from isaaclab.managers import EventTermCfg, ObservationGroupCfg as ObsGroup, ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg, SceneEntityCfg, TerminationTermCfg +from isaaclab.utils import configclass + +from isaaclab_arena.assets.asset import Asset +from isaaclab_arena.metrics.metric_base import MetricBase +from isaaclab_arena.metrics.success_rate import SuccessRateMetric +from isaaclab_arena.tasks.observations import gear_insertion_observations +from isaaclab_arena_environments.mdp.observations import body_pos_in_env_frame, body_quat_canonical +from isaaclab_arena.tasks.rewards import gear_insertion_rewards +from isaaclab_arena.tasks.task_base import TaskBase +from isaaclab_arena.tasks.terminations import ( + gear_dropped_from_gripper, + gear_mesh_insertion_success, + gear_orientation_exceeded, +) +from isaaclab_arena.terms.events import place_gear_in_gripper +from isaaclab_arena.utils.cameras import get_viewer_cfg_look_at_object + +if TYPE_CHECKING: + from collections.abc import Sequence + + import torch + + +class NistGearInsertionTask(TaskBase): + """Gear insertion task: insert the medium gear onto the peg. + + Peg position is computed from a fixed asset (board or separate gear-base USD) + plus an offset in that asset's local frame (assembly peg-insert convention). Use + :attr:`gear_base_asset` when the peg is on the gearbase (gears_and_base / gearbase_and_gears.usd). + """ + + def __init__( + self, + assembled_board: Asset, + held_gear: Asset, + background_scene: Asset, + peg_offset_from_board: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.30, + xy_threshold: float = 0.0025, + episode_length_s: float | None = None, + task_description: str | None = None, + start_in_gripper: bool = False, + num_arm_joints: int = 7, + hand_grasp_width: float = 0.03, + hand_close_width: float = 0.0, + gripper_joint_setter_func: Callable[ + [torch.Tensor, Sequence[int], Sequence[int], float], None + ] | None = None, + end_effector_body_name: str = "panda_hand", + grasp_rot_offset: list[float] | None = None, + grasp_offset: list[float] | None = None, + include_success_bonus: bool = True, + enable_randomization: bool = False, + arm_joint_names: str = "panda_joint.*", + finger_body_names: str = ".*finger", + peg_offset_xy_noise: float = 0.0, + gear_base_asset: Asset | None = None, + peg_offset_for_obs: list[float] | None = None, + disable_drop_terminations: bool = False, + engagement_xy_threshold: float | None = None, + success_bonus_weight: float | None = None, + include_insertion_regularizers: bool = False, + pos_action_threshold: float = 0.02, + rot_action_threshold: float = 0.097, + action_penalty_weight: float = -0.0005, + action_grad_penalty_weight: float = -0.01, + contact_penalty_weight: float = -0.001, + success_pred_error_weight: float = -1.0, + success_pred_error_delay: float = 0.25, + extra_event_terms: dict[str, EventTermCfg] | None = None, + ): + super().__init__(episode_length_s=episode_length_s) + self.assembled_board = assembled_board + self.held_gear = held_gear + self.background_scene = background_scene + self._gear_base_asset = gear_base_asset if gear_base_asset is not None else assembled_board + self.peg_offset_from_board = peg_offset_from_board or [2.025e-2, 0.0, 0.0] + self.peg_offset_for_obs = peg_offset_for_obs + self.held_gear_base_offset = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + self.peg_offset_xy_noise = peg_offset_xy_noise + self.gear_peg_height = gear_peg_height + self.success_z_fraction = success_z_fraction + self.xy_threshold = xy_threshold + self.start_in_gripper = start_in_gripper + self.num_arm_joints = num_arm_joints + self.hand_grasp_width = hand_grasp_width + self.hand_close_width = hand_close_width + self.gripper_joint_setter_func = gripper_joint_setter_func + self.end_effector_body_name = end_effector_body_name + self.grasp_rot_offset = grasp_rot_offset or [1.0, 0.0, 0.0, 0.0] + self.grasp_offset = grasp_offset or [0.0, 0.0, 0.0] + self.include_success_bonus = include_success_bonus + self.enable_randomization = enable_randomization + self.arm_joint_names = arm_joint_names + self.finger_body_names = finger_body_names + self.disable_drop_terminations = disable_drop_terminations + self.engagement_xy_threshold = engagement_xy_threshold + self.success_bonus_weight = success_bonus_weight + self.include_insertion_regularizers = include_insertion_regularizers + self.pos_action_threshold = pos_action_threshold + self.rot_action_threshold = rot_action_threshold + self.action_penalty_weight = action_penalty_weight + self.action_grad_penalty_weight = action_grad_penalty_weight + self.contact_penalty_weight = contact_penalty_weight + self.success_pred_error_weight = success_pred_error_weight + self.success_pred_error_delay = success_pred_error_delay + self.extra_event_terms = extra_event_terms or {} + self.task_description = ( + f"Insert the {held_gear.name} onto the gear base on the {assembled_board.name}" + if task_description is None + else task_description + ) + + def get_scene_cfg(self): + return None + + def get_observation_cfg(self): + peg_obs = self.peg_offset_for_obs if self.peg_offset_for_obs is not None else self.peg_offset_from_board + return _GearInsertionObservationsCfg( + gear_name=self.held_gear.name, + board_name=self._gear_base_asset.name, + peg_offset=peg_obs, + held_gear_base_offset=self.held_gear_base_offset, + ) + + def get_rewards_cfg(self): + return _GearInsertionRewardsCfg( + gear_name=self.held_gear.name, + board_name=self._gear_base_asset.name, + peg_offset=self.peg_offset_from_board, + held_gear_base_offset=self.held_gear_base_offset, + include_success_bonus=self.include_success_bonus, + peg_offset_xy_noise=self.peg_offset_xy_noise, + gear_peg_height=self.gear_peg_height, + success_z_fraction=self.success_z_fraction, + xy_threshold=self.xy_threshold, + engagement_xy_threshold=self.engagement_xy_threshold, + success_bonus_weight=self.success_bonus_weight, + include_insertion_regularizers=self.include_insertion_regularizers, + pos_action_threshold=self.pos_action_threshold, + rot_action_threshold=self.rot_action_threshold, + action_penalty_weight=self.action_penalty_weight, + action_grad_penalty_weight=self.action_grad_penalty_weight, + contact_penalty_weight=self.contact_penalty_weight, + success_pred_error_weight=self.success_pred_error_weight, + success_pred_error_delay=self.success_pred_error_delay, + ) + + def get_termination_cfg(self): + success = TerminationTermCfg( + func=gear_mesh_insertion_success, + params={ + "held_object_cfg": SceneEntityCfg(self.held_gear.name), + "fixed_object_cfg": SceneEntityCfg(self._gear_base_asset.name), + "gear_base_offset": self.peg_offset_from_board, + "held_gear_base_offset": self.held_gear_base_offset, + "gear_peg_height": self.gear_peg_height, + "success_z_fraction": self.success_z_fraction, + "xy_threshold": self.xy_threshold, + }, + ) + object_dropped = TerminationTermCfg( + func=mdp_isaac_lab.root_height_below_minimum, + params={ + "minimum_height": self.background_scene.object_min_z, + "asset_cfg": SceneEntityCfg(self.held_gear.name), + }, + ) + cfg = _TerminationsCfg(success=success, object_dropped=object_dropped) + if self.start_in_gripper: + cfg.gear_dropped_from_gripper = TerminationTermCfg( + func=gear_dropped_from_gripper, + params={ + "gear_cfg": SceneEntityCfg(self.held_gear.name), + "robot_cfg": SceneEntityCfg("robot"), + "ee_body_name": self.end_effector_body_name, + "distance_threshold": 0.15, + }, + ) + if self.disable_drop_terminations: + cfg.object_dropped = None + cfg.gear_dropped_from_gripper = None + return cfg + + def get_events_cfg(self): + cfg = _EventsCfg() + if self.start_in_gripper and self.gripper_joint_setter_func is not None: + cfg.place_gear = EventTermCfg( + func=place_gear_in_gripper, + mode="reset", + params={ + "gear_cfg": SceneEntityCfg(self.held_gear.name), + "num_arm_joints": self.num_arm_joints, + "hand_grasp_width": self.hand_grasp_width, + "hand_close_width": self.hand_close_width, + "gripper_joint_setter_func": self.gripper_joint_setter_func, + "end_effector_body_name": self.end_effector_body_name, + "grasp_rot_offset": self.grasp_rot_offset, + "grasp_offset": self.grasp_offset, + }, + ) + if self.enable_randomization: + cfg.robot_actuator_gains = EventTermCfg( + func=mdp_isaac_lab.randomize_actuator_gains, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=[self.arm_joint_names]), + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + cfg.robot_joint_friction = EventTermCfg( + func=mdp_isaac_lab.randomize_joint_parameters, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=[self.arm_joint_names]), + "friction_distribution_params": (0.3, 0.7), + "operation": "add", + "distribution": "uniform", + }, + ) + cfg.gear_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(self.held_gear.name, body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + cfg.board_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(self._gear_base_asset.name, body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + cfg.robot_finger_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=self.finger_body_names), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + for name, term_cfg in self.extra_event_terms.items(): + setattr(cfg, name, term_cfg) + return cfg + + def get_prompt(self): + raise NotImplementedError("Function not implemented yet.") + + def get_mimic_env_cfg(self, embodiment_name: str): + raise NotImplementedError("Function not implemented yet.") + + def get_metrics(self) -> list[MetricBase]: + return [SuccessRateMetric()] + + def get_viewer_cfg(self) -> ViewerCfg: + return get_viewer_cfg_look_at_object( + lookat_object=self.held_gear, + offset=np.array([1.5, -0.5, 1.0]), + ) + + +@configclass +class _TerminationsCfg: + """Termination terms for the gear insertion task.""" + + time_out: TerminationTermCfg = TerminationTermCfg(func=mdp_isaac_lab.time_out) + success: TerminationTermCfg = MISSING + object_dropped: TerminationTermCfg | None = MISSING + gear_dropped_from_gripper: TerminationTermCfg | None = None + gear_orientation_exceeded: TerminationTermCfg | None = None + + +@configclass +class _EventsCfg: + """Events: reset to default poses plus optional deploy-style randomization.""" + + reset_all: EventTermCfg = EventTermCfg( + func=mdp_isaac_lab.reset_scene_to_default, + mode="reset", + params={"reset_joint_targets": True}, + ) + fixed_asset_pose: EventTermCfg | None = None + place_gear: EventTermCfg | None = None + gripper_init_yaw_noise: EventTermCfg | None = None + robot_actuator_gains: EventTermCfg | None = None + robot_joint_friction: EventTermCfg | None = None + gear_physics_material: EventTermCfg | None = None + board_physics_material: EventTermCfg | None = None + robot_finger_physics_material: EventTermCfg | None = None + held_asset_pos_noise: EventTermCfg | None = None + + +@configclass +class _GearInsertionObservationsCfg: + """Task-specific observations for the gear insertion task.""" + + task_obs: ObsGroup = MISSING + + def __init__( + self, + gear_name: str, + board_name: str, + peg_offset: list[float], + held_gear_base_offset: list[float] | None = None, + ): + hgo = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + @configclass + class _TaskObsCfg(ObsGroup): + gear_pos = ObsTerm( + func=gear_insertion_observations.gear_pos_in_env_frame, + params={"gear_cfg": SceneEntityCfg(gear_name)}, + ) + gear_quat = ObsTerm( + func=gear_insertion_observations.gear_quat_canonical, + params={"gear_cfg": SceneEntityCfg(gear_name)}, + ) + peg_pos = ObsTerm( + func=gear_insertion_observations.peg_pos_in_env_frame, + params={"board_cfg": SceneEntityCfg(board_name), "peg_offset": peg_offset}, + ) + board_quat = ObsTerm( + func=gear_insertion_observations.board_quat_canonical, + params={"board_cfg": SceneEntityCfg(board_name)}, + ) + peg_delta = ObsTerm( + func=gear_insertion_observations.peg_delta_from_held_gear_base, + params={ + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + }, + ) + joint_pos = ObsTerm( + func=mdp_isaac_lab.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + joint_vel = ObsTerm( + func=mdp_isaac_lab.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + ee_pos_noiseless = ObsTerm( + func=body_pos_in_env_frame, + params={"body_name": "panda_fingertip_centered"}, + ) + ee_quat_noiseless = ObsTerm( + func=body_quat_canonical, + params={"body_name": "panda_fingertip_centered"}, + ) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + self.task_obs = _TaskObsCfg() + + +@configclass +class _GearInsertionRewardsCfg: + """Keypoint squashing, bonuses, and optional insertion regularizers for gear insertion.""" + + kp_baseline: RewardTermCfg = MISSING + kp_coarse: RewardTermCfg = MISSING + kp_fine: RewardTermCfg = MISSING + engagement_bonus: RewardTermCfg | None = None + success_bonus: RewardTermCfg | None = None + + action_penalty_asset: RewardTermCfg | None = None + action_grad_penalty: RewardTermCfg | None = None + contact_penalty: RewardTermCfg | None = None + success_pred_error: RewardTermCfg | None = None + + def __init__( + self, + gear_name: str, + board_name: str, + peg_offset: list[float], + held_gear_base_offset: list[float] | None = None, + include_success_bonus: bool = True, + peg_offset_xy_noise: float = 0.0, + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.05, + xy_threshold: float = 0.0025, + engagement_xy_threshold: float | None = None, + success_bonus_weight: float | None = None, + include_insertion_regularizers: bool = False, + pos_action_threshold: float = 0.02, + rot_action_threshold: float = 0.097, + action_penalty_weight: float = -0.0005, + action_grad_penalty_weight: float = -0.01, + contact_penalty_weight: float = -0.001, + success_pred_error_weight: float = -1.0, + success_pred_error_delay: float = 0.25, + ): + self.action_penalty_asset = None + self.action_grad_penalty = None + self.contact_penalty = None + self.success_pred_error = None + hgo = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + common_params = { + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + "keypoint_scale": 0.15, + "num_keypoints": 4, + "peg_offset_xy_noise": peg_offset_xy_noise, + } + self.kp_baseline = RewardTermCfg( + func=gear_insertion_rewards.gear_peg_keypoint_squashing, + weight=1.0, + params={**common_params, "squash_a": 5.0, "squash_b": 4.0}, + ) + self.kp_coarse = RewardTermCfg( + func=gear_insertion_rewards.gear_peg_keypoint_squashing, + weight=1.0, + params={**common_params, "squash_a": 50.0, "squash_b": 2.0}, + ) + self.kp_fine = RewardTermCfg( + func=gear_insertion_rewards.gear_peg_keypoint_squashing, + weight=1.0, + params={**common_params, "squash_a": 100.0, "squash_b": 0.0}, + ) + bonus_params = { + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + } + if include_success_bonus: + self.engagement_bonus = RewardTermCfg( + func=gear_insertion_rewards.gear_insertion_engagement_bonus, + weight=1.0, + params={ + **bonus_params, + "engage_z_fraction": 0.90, + "xy_threshold": 0.015 if engagement_xy_threshold is None else engagement_xy_threshold, + }, + ) + self.success_bonus = RewardTermCfg( + func=gear_insertion_rewards.gear_insertion_success_bonus, + weight=1.0 if success_bonus_weight is None else success_bonus_weight, + params={**bonus_params, "success_z_fraction": success_z_fraction}, + ) + else: + self.engagement_bonus = None + self.success_bonus = None + if include_insertion_regularizers: + self.action_penalty_asset = RewardTermCfg( + func=gear_insertion_rewards.osc_action_magnitude_penalty, + weight=action_penalty_weight, + params={ + "pos_action_threshold": pos_action_threshold, + "rot_action_threshold": rot_action_threshold, + }, + ) + self.action_grad_penalty = RewardTermCfg( + func=gear_insertion_rewards.osc_action_delta_penalty, + weight=action_grad_penalty_weight, + params={}, + ) + self.contact_penalty = RewardTermCfg( + func=gear_insertion_rewards.wrist_contact_force_penalty, + weight=contact_penalty_weight, + params={}, + ) + self.success_pred_error = RewardTermCfg( + func=gear_insertion_rewards.success_prediction_error, + weight=success_pred_error_weight, + params={ + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + "gear_peg_height": gear_peg_height, + "success_z_fraction": success_z_fraction, + "xy_threshold": xy_threshold, + "delay_until_ratio": success_pred_error_delay, + }, + ) diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py new file mode 100644 index 000000000..2a4961432 --- /dev/null +++ b/isaaclab_arena/tasks/observations/gear_insertion_observations.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Observation terms for the NIST gear insertion task.""" + +import torch + +import isaaclab.utils.math as math_utils +from isaaclab.assets import RigidObject +from isaaclab.envs import ManagerBasedRLEnv +from isaaclab.managers import SceneEntityCfg + + +def gear_pos_in_env_frame( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), +) -> torch.Tensor: + """Position of the held gear relative to the environment origin.""" + gear: RigidObject = env.scene[gear_cfg.name] + return gear.data.root_pos_w - env.scene.env_origins + + +def gear_quat_canonical( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), +) -> torch.Tensor: + """Orientation of the held gear, canonicalized so w >= 0.""" + gear: RigidObject = env.scene[gear_cfg.name] + quat = gear.data.root_quat_w + sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + return quat * sign + + +def board_pos_in_env_frame( + env: ManagerBasedRLEnv, + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), +) -> torch.Tensor: + """Position of the board/base relative to the environment origin.""" + board: RigidObject = env.scene[board_cfg.name] + return board.data.root_pos_w - env.scene.env_origins + + +def peg_pos_in_env_frame( + env: ManagerBasedRLEnv, + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] = [0.0, 0.0, 0.0], +) -> torch.Tensor: + """Target peg position: fixed asset pose + offset in its local frame.""" + board: RigidObject = env.scene[board_cfg.name] + pos = board.data.root_pos_w - env.scene.env_origins + quat = board.data.root_quat_w + offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + return pos + math_utils.quat_apply(quat, offset) + + +def board_quat_canonical( + env: ManagerBasedRLEnv, + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), +) -> torch.Tensor: + """Orientation of the assembled board / peg, canonicalized so w >= 0.""" + board: RigidObject = env.scene[board_cfg.name] + quat = board.data.root_quat_w + sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + return quat * sign + + +def held_gear_base_pos_in_env_frame( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], +) -> torch.Tensor: + """Position of the held gear's insertion point (root + offset in gear frame) in env frame.""" + gear: RigidObject = env.scene[gear_cfg.name] + gear_pos = gear.data.root_pos_w - env.scene.env_origins + gear_quat = gear.data.root_quat_w + held_off = torch.tensor( + held_gear_base_offset, device=env.device, dtype=torch.float32 + ).unsqueeze(0).expand(env.num_envs, 3) + return gear_pos + math_utils.quat_apply(gear_quat, held_off) + + +def peg_delta_from_held_gear_base( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] = [0.0, 0.0, 0.0], + held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], +) -> torch.Tensor: + """Vector from held gear insertion point to peg (peg_pos - held_gear_base_pos). Positive = peg is ahead in that axis.""" + held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) + peg_pos = peg_pos_in_env_frame(env, board_cfg, peg_offset) + return peg_pos - held_base diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py new file mode 100644 index 000000000..b33cf702e --- /dev/null +++ b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py @@ -0,0 +1,301 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Reward terms for the NIST gear insertion task. + +Includes squashing-function keypoint rewards (baseline, coarse, fine), engagement +and success bonuses, and optional OSC / contact regularizers. Design follows +common assembly peg-insert RL practice; see e.g. Appendix B of +https://arxiv.org/pdf/2408.04587 for related shaping. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch + +from isaaclab.assets import RigidObject +from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg +from isaaclab.utils.math import combine_frame_transforms, quat_apply + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def _get_keypoint_offsets(num_keypoints: int = 4, device: torch.device | None = None) -> torch.Tensor: + """Uniformly-spaced keypoints along the Z-axis, centered at 0.""" + offsets = torch.zeros((num_keypoints, 3), device=device, dtype=torch.float32) + offsets[:, 2] = torch.linspace(0.0, 1.0, num_keypoints, device=device) - 0.5 + return offsets + + +class _KeypointDistanceComputer: + """Pre-cached keypoint distance calculator matching Factory's pattern.""" + + def __init__(self, num_envs: int, device: torch.device, num_keypoints: int = 4): + self.offsets_base = _get_keypoint_offsets(num_keypoints=num_keypoints, device=device) + self.n_kp = self.offsets_base.shape[0] + self.identity_quat = ( + torch.tensor([[1.0, 0.0, 0.0, 0.0]], device=device, dtype=torch.float32) + .repeat(num_envs * self.n_kp, 1) + .contiguous() + ) + self.offsets_buf = torch.zeros(num_envs, self.n_kp, 3, device=device, dtype=torch.float32) + + def compute( + self, + pos_a: torch.Tensor, + quat_a: torch.Tensor, + pos_b: torch.Tensor, + quat_b: torch.Tensor, + scale: float = 1.0, + ) -> torch.Tensor: + """Returns mean keypoint L2 distance, shape (num_envs,).""" + n = pos_a.shape[0] + offsets = self.offsets_base * scale + self.offsets_buf[:n] = offsets.unsqueeze(0) + off_flat = self.offsets_buf[:n].reshape(-1, 3) + iq = self.identity_quat[: n * self.n_kp] + + def _expand(t: torch.Tensor) -> torch.Tensor: + return t.unsqueeze(1).expand(-1, self.n_kp, -1).reshape(-1, t.shape[-1]) + + kp_a, _ = combine_frame_transforms(_expand(pos_a), _expand(quat_a), off_flat, iq) + kp_b, _ = combine_frame_transforms(_expand(pos_b), _expand(quat_b), off_flat, iq) + per_kp_dist = torch.norm(kp_b.reshape(n, self.n_kp, 3) - kp_a.reshape(n, self.n_kp, 3), p=2, dim=-1) + return per_kp_dist.mean(-1) + + +def _squashing_fn(x: torch.Tensor, a: float, b: float) -> torch.Tensor: + """Squashing function r(x) = 1 / (exp(a*x) + b + exp(-a*x)).""" + return 1.0 / (torch.exp(a * x) + b + torch.exp(-a * x)) + + +class gear_peg_keypoint_squashing(ManagerTermBase): + """Squashing-function keypoint reward for gear vs peg alignment. + + Instantiate three times with different [a, b] for baseline/coarse/fine. + Supports optional per-episode XY noise on the peg offset. + """ + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self.gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] + self.board_cfg: SceneEntityCfg = cfg.params["board_cfg"] + self.peg_offset = torch.tensor(cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device, dtype=torch.float32) + self.held_gear_base_offset = torch.tensor( + cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0]), device=env.device, dtype=torch.float32 + ) + self._xy_noise_range = cfg.params.get("peg_offset_xy_noise", 0.0) + num_keypoints = cfg.params.get("num_keypoints", 4) + self.kp = _KeypointDistanceComputer(env.num_envs, env.device, num_keypoints=num_keypoints) + self._offset_noise = torch.zeros(env.num_envs, 3, device=env.device, dtype=torch.float32) + + def reset(self, env_ids: torch.Tensor) -> None: + if self._xy_noise_range > 0.0: + n = len(env_ids) + self._offset_noise[env_ids, 0] = (torch.rand(n, device=self._offset_noise.device) * 2 - 1) * self._xy_noise_range + self._offset_noise[env_ids, 1] = (torch.rand(n, device=self._offset_noise.device) * 2 - 1) * self._xy_noise_range + + def __call__( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] = [0.0, 0.0, 0.0], + held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + keypoint_scale: float = 0.15, + num_keypoints: int = 4, + squash_a: float = 50.0, + squash_b: float = 2.0, + peg_offset_xy_noise: float = 0.0, + ) -> torch.Tensor: + gear: RigidObject = env.scene[self.gear_cfg.name] + gear_pos = gear.data.root_pos_w - env.scene.env_origins + gear_quat = gear.data.root_quat_w + n = gear_pos.shape[0] + held_offset = self.held_gear_base_offset.unsqueeze(0).expand(n, 3) + held_base_pos = gear_pos + quat_apply(gear_quat, held_offset) + + board: RigidObject = env.scene[self.board_cfg.name] + pos = board.data.root_pos_w[:n] - env.scene.env_origins[:n] + quat = board.data.root_quat_w[:n] + offset = self.peg_offset.unsqueeze(0).expand(n, 3) + target_pos = pos + quat_apply(quat, offset) + self._offset_noise[:n] + target_quat = quat + kp_dist = self.kp.compute(target_pos, target_quat, held_base_pos, gear_quat, scale=keypoint_scale) + return _squashing_fn(kp_dist, squash_a, squash_b) + + +def _check_gear_position( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + peg_offset: list[float], + held_gear_base_offset: list[float], + gear_peg_height: float, + z_fraction: float, + xy_threshold: float, +) -> torch.Tensor: + """Return bool tensor indicating whether gear meets XY centering and Z depth criteria. + + Compares the held gear's insertion base (root + offset in gear frame) against + the peg position (fixed asset + offset in fixed asset frame). + """ + gear: RigidObject = env.scene[gear_cfg.name] + gear_pos = gear.data.root_pos_w - env.scene.env_origins + gear_quat = gear.data.root_quat_w + held_off = torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + held_base_pos = gear_pos + quat_apply(gear_quat, held_off) + + board: RigidObject = env.scene[board_cfg.name] + pos = board.data.root_pos_w - env.scene.env_origins + quat = board.data.root_quat_w + offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + peg_pos = pos + quat_apply(quat, offset) + + xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) + z_diff = held_base_pos[:, 2] - peg_pos[:, 2] + height_threshold = gear_peg_height * z_fraction + + is_centered = xy_dist < xy_threshold + is_inserted = z_diff < height_threshold + return is_centered & is_inserted + + +def gear_insertion_engagement_bonus( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] = [0.0, 0.0, 0.0], + held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + gear_peg_height: float = 0.02, + engage_z_fraction: float = 0.90, + xy_threshold: float = 0.015, +) -> torch.Tensor: + """Bonus when the gear is partially engaged on the peg.""" + return _check_gear_position( + env, gear_cfg, board_cfg, peg_offset, held_gear_base_offset, gear_peg_height, engage_z_fraction, xy_threshold, + ).float() + + +def gear_insertion_success_bonus( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] = [0.0, 0.0, 0.0], + held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.05, + xy_threshold: float = 0.0025, +) -> torch.Tensor: + """Bonus when the gear is fully inserted (binary success geometry).""" + return _check_gear_position( + env, gear_cfg, board_cfg, peg_offset, held_gear_base_offset, gear_peg_height, success_z_fraction, xy_threshold, + ).float() + + +class osc_action_magnitude_penalty(ManagerTermBase): + """Penalize large asset-relative position/rotation commands.""" + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self._pos_thresh: float = cfg.params.get("pos_action_threshold", 0.02) + self._rot_thresh: float = cfg.params.get("rot_action_threshold", 0.097) + + def __call__( + self, + env: ManagerBasedRLEnv, + pos_action_threshold: float = 0.02, + rot_action_threshold: float = 0.097, + ) -> torch.Tensor: + action_term = env.action_manager._terms["arm_action"] + pos_error = torch.norm(action_term.delta_pos, p=2, dim=-1) / self._pos_thresh + rot_error = torch.abs(action_term.delta_yaw) / self._rot_thresh + return pos_error + rot_error + + +class osc_action_delta_penalty(ManagerTermBase): + """Penalize jerky actions using smoothed action deltas.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = env.action_manager._terms["arm_action"] + return torch.norm( + action_term._smoothed_actions - action_term._prev_smoothed_actions, + p=2, + dim=-1, + ) + + +class wrist_contact_force_penalty(ManagerTermBase): + """Penalize wrist/contact force magnitude above per-episode threshold.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = env.action_manager._terms["arm_action"] + force_mag = torch.norm(action_term.force_smooth, p=2, dim=-1) + return torch.nn.functional.relu(force_mag - action_term.contact_thresholds) + + +class success_prediction_error(ManagerTermBase): + """Penalize incorrect success predictions from the 7th action dimension. + + ``true_success`` uses held insertion base vs target peg (same geometry as + ``gear_insertion_success_bonus`` / ``gear_mesh_insertion_success``), not the gear + rigid-body root, consistent with common assembly peg-insert benchmarks. + """ + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self._pred_scale = 0.0 + self._delay_until_ratio: float = cfg.params.get("delay_until_ratio", 0.25) + self._gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] + self._board_cfg: SceneEntityCfg = cfg.params["board_cfg"] + hgo = cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0]) + self._held_gear_base_offset = torch.tensor(hgo, device=env.device, dtype=torch.float32) + self._peg_offset = torch.tensor(cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) + self._gear_peg_height: float = cfg.params.get("gear_peg_height", 0.02) + self._success_z_fraction: float = cfg.params.get("success_z_fraction", 0.05) + self._xy_threshold: float = cfg.params.get("xy_threshold", 0.0025) + + def __call__( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.05, + xy_threshold: float = 0.0025, + delay_until_ratio: float = 0.25, + ) -> torch.Tensor: + gear: RigidObject = env.scene[self._gear_cfg.name] + gear_pos = gear.data.root_pos_w - env.scene.env_origins + gear_quat = gear.data.root_quat_w + n = gear_pos.shape[0] + held_off = self._held_gear_base_offset.unsqueeze(0).expand(n, 3) + held_base_pos = gear_pos + quat_apply(gear_quat, held_off) + + board: RigidObject = env.scene[self._board_cfg.name] + board_pos = board.data.root_pos_w - env.scene.env_origins + board_quat = board.data.root_quat_w + peg_off = self._peg_offset.unsqueeze(0).expand(n, 3) + peg_pos = board_pos + quat_apply(board_quat, peg_off) + + xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) + z_diff = held_base_pos[:, 2] - peg_pos[:, 2] + height_threshold = self._gear_peg_height * self._success_z_fraction + true_success = (xy_dist < self._xy_threshold) & (z_diff < height_threshold) + + if true_success.float().mean() >= self._delay_until_ratio: + self._pred_scale = 1.0 + + arm_osc_action = env.action_manager._terms["arm_action"] + pred = (arm_osc_action.success_pred + 1.0) / 2.0 + error = torch.abs(true_success.float() - pred) + return error * self._pred_scale + + diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index d8f18d851..1e1d94d5a 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -7,7 +7,9 @@ import torch import warp as wp -from isaaclab.assets import RigidObject + +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation, RigidObject from isaaclab.envs import ManagerBasedRLEnv from isaaclab.envs.mdp.terminations import root_height_below_minimum from isaaclab.managers import SceneEntityCfg @@ -255,3 +257,89 @@ def root_height_below_minimum_multi_objects( outs_tensor = torch.stack(outs, dim=0) # [X, N] terminated = outs_tensor.any(dim=0) # [N], bool return terminated + + +def gear_mesh_insertion_success( + env: ManagerBasedRLEnv, + held_object_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + fixed_object_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + held_gear_base_offset: list[float] | None = None, + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.80, + xy_threshold: float = 0.0025, +) -> torch.Tensor: + """Terminate when the gear is inserted onto the peg to the required depth. + + Checks that the held gear's base (root + held_gear_base_offset in gear frame) + is centered on the peg (XY) and lowered past a fraction of the peg height (Z). + Peg position is fixed_asset pose + gear_base_offset in the fixed asset's local frame. + """ + held_object: RigidObject = env.scene[held_object_cfg.name] + fixed_object: RigidObject = env.scene[fixed_object_cfg.name] + + held_root = held_object.data.root_pos_w - env.scene.env_origins + held_quat = held_object.data.root_quat_w + h_offset = held_gear_base_offset if held_gear_base_offset is not None else gear_base_offset + held_off = torch.tensor(h_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + held_base_pos = held_root + math_utils.quat_apply(held_quat, held_off) + + fixed_pos = fixed_object.data.root_pos_w - env.scene.env_origins + fixed_quat = fixed_object.data.root_quat_w + offset = torch.tensor(gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + peg_pos = fixed_pos + math_utils.quat_apply(fixed_quat, offset) + + xy_dist = torch.linalg.vector_norm(peg_pos[:, 0:2] - held_base_pos[:, 0:2], dim=1) + is_centered = xy_dist < xy_threshold + + z_disp = held_base_pos[:, 2] - peg_pos[:, 2] + height_threshold = gear_peg_height * success_z_fraction + is_inserted = z_disp < height_threshold + + return torch.logical_and(is_centered, is_inserted) + + +def gear_dropped_from_gripper( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ee_body_name: str = "panda_hand", + distance_threshold: float = 0.15, +) -> torch.Tensor: + """Reset when the gear has fallen too far from the end-effector.""" + gear: RigidObject = env.scene[gear_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + eef_indices, _ = robot.find_bodies([ee_body_name]) + ee_pos = robot.data.body_pos_w[:, eef_indices[0]] + gear_pos = gear.data.root_pos_w + distance = torch.norm(gear_pos - ee_pos, dim=-1) + return distance > distance_threshold + + +def gear_orientation_exceeded( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ee_body_name: str = "panda_hand", + roll_threshold_deg: float = 15.0, + pitch_threshold_deg: float = 15.0, +) -> torch.Tensor: + """Reset when the gear has tilted too far relative to the end-effector.""" + if not hasattr(env, "_initial_gear_ee_relative_quat"): + return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device) + + gear: RigidObject = env.scene[gear_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + eef_indices, _ = robot.find_bodies([ee_body_name]) + eef_quat = robot.data.body_quat_w[:, eef_indices[0]] + current_relative = math_utils.quat_mul(gear.data.root_quat_w, math_utils.quat_conjugate(eef_quat)) + + initial_relative = env._initial_gear_ee_relative_quat + deviation = math_utils.quat_mul(current_relative, math_utils.quat_conjugate(initial_relative)) + + roll, pitch, _yaw = math_utils.euler_xyz_from_quat(deviation) + + roll_limit = torch.deg2rad(torch.tensor(roll_threshold_deg, device=env.device)) + pitch_limit = torch.deg2rad(torch.tensor(pitch_threshold_deg, device=env.device)) + return (torch.abs(roll) > roll_limit) | (torch.abs(pitch) > pitch_limit) diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index e0e859667..4ed0b0cd8 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -3,11 +3,20 @@ # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +from collections.abc import Callable, Sequence +from typing import Any, cast + import torch import warp as wp + +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation from isaaclab.envs import ManagerBasedEnv -from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab_tasks.direct.automate import factory_control as fc from isaaclab_arena.utils.pose import Pose from isaaclab_arena.utils.velocity import Velocity @@ -77,3 +86,158 @@ def reset_all_articulation_joints(env: ManagerBasedEnv, env_ids: torch.Tensor): default_joint_vel = wp.to_torch(articulation_asset.data.default_joint_vel)[env_ids].clone() # set into the physics simulation articulation_asset.write_joint_state_to_sim(default_joint_pos, default_joint_vel, env_ids=env_ids) + + +class place_gear_in_gripper(ManagerTermBase): + """Solve IK to position the gripper at the gear, then close fingers. + + Uses iterative DLS IK to move the end-effector to the gear's pose + (with configurable grasp offset/orientation), then sets gripper joints. + """ + + def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + + robot_cfg: SceneEntityCfg = cfg.params.get("robot_cfg", SceneEntityCfg("robot")) + self.robot: Articulation = env.scene[robot_cfg.name] + + gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] + self.gear = env.scene[gear_cfg.name] + + self.num_arm_joints: int = cast(int, cfg.params["num_arm_joints"]) + self.hand_grasp_width: float = cast(float, cfg.params["hand_grasp_width"]) + self.hand_close_width: float = cast(float, cfg.params["hand_close_width"]) + self.gripper_joint_setter_func: Callable[..., Any] = cast( + Callable[..., Any], cfg.params["gripper_joint_setter_func"], + ) + + grasp_rot_offset = cfg.params["grasp_rot_offset"] + self.grasp_rot_offset_tensor = ( + torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32) + .unsqueeze(0) + .repeat(env.num_envs, 1) + ) + + grasp_offset = cfg.params["grasp_offset"] + self.grasp_offset_tensor = torch.tensor( + grasp_offset, device=env.device, dtype=torch.float32 + ) + + ee_name: str = cast(str, cfg.params.get("end_effector_body_name", "panda_hand")) + eef_indices, _ = self.robot.find_bodies([ee_name]) + if not eef_indices: + raise ValueError(f"End-effector body '{ee_name}' not found in robot") + self.eef_idx: int = eef_indices[0] + self.jacobi_body_idx: int = self.eef_idx - 1 + + all_joints, _ = self.robot.find_joints([".*"]) + self.all_joints = all_joints + self.finger_joints = all_joints[self.num_arm_joints:] + + def __call__( + self, + env: ManagerBasedEnv, + env_ids: torch.Tensor, + robot_cfg: SceneEntityCfg | None = None, + gear_cfg: SceneEntityCfg | None = None, + num_arm_joints: int | None = None, + hand_grasp_width: float | None = None, + hand_close_width: float | None = None, + gripper_joint_setter_func: Callable | None = None, + end_effector_body_name: str | None = None, + grasp_rot_offset: list | None = None, + grasp_offset: list | None = None, + max_ik_iterations: int = 10, + pos_threshold: float = 1e-6, + rot_threshold: float = 1e-6, + ) -> None: + n = len(env_ids) + device = env.device + + grasp_rot_offset_tensor = self.grasp_rot_offset_tensor[env_ids] + hand_init_orn_noise_yaw = 0.0 + random_yaw = (2.0 * torch.rand(n, device=device) - 1.0) * hand_init_orn_noise_yaw + yaw_quat = math_utils.quat_from_euler_xyz( + torch.zeros(n, device=device), + torch.zeros(n, device=device), + random_yaw, + ) + grasp_rot_offset_tensor = math_utils.quat_mul(grasp_rot_offset_tensor, yaw_quat) + grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(n, -1) + + for _ in range(max_ik_iterations): + joint_pos = self.robot.data.joint_pos[env_ids].clone() + joint_vel = self.robot.data.joint_vel[env_ids].clone() + + gear_pos_w = self.gear.data.root_link_pos_w[env_ids].clone() + gear_quat_w = self.gear.data.root_link_quat_w[env_ids].clone() + + target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset_tensor) + target_pos = gear_pos_w + math_utils.quat_apply( + target_quat, grasp_offset_batch + ) + + eef_pos = self.robot.data.body_pos_w[env_ids, self.eef_idx] + eef_quat = self.robot.data.body_quat_w[env_ids, self.eef_idx] + + pos_error, aa_error = fc.get_pose_error( + fingertip_midpoint_pos=eef_pos, + fingertip_midpoint_quat=eef_quat, + ctrl_target_fingertip_midpoint_pos=target_pos, + ctrl_target_fingertip_midpoint_quat=target_quat, + jacobian_type="geometric", + rot_error_type="axis_angle", + ) + delta_hand_pose = torch.cat((pos_error, aa_error), dim=-1) + + if (torch.norm(pos_error, dim=-1).max() < pos_threshold + and torch.norm(aa_error, dim=-1).max() < rot_threshold): + break + + jacobians = self.robot.root_physx_view.get_jacobians().clone() + jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] + + delta_dof_pos = fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=jacobian, + device=device, + ) + + joint_pos = joint_pos + delta_dof_pos + joint_vel = torch.zeros_like(joint_pos) + + self.robot.set_joint_position_target(joint_pos, env_ids=env_ids) + self.robot.set_joint_velocity_target(joint_vel, env_ids=env_ids) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + joint_pos = self.robot.data.joint_pos[env_ids].clone() + + for row_idx in range(n): + self.gripper_joint_setter_func( + joint_pos, [row_idx], self.finger_joints, self.hand_grasp_width, + ) + + self.robot.set_joint_position_target( + joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + ) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + for row_idx in range(n): + self.gripper_joint_setter_func( + joint_pos, [row_idx], self.finger_joints, self.hand_close_width, + ) + + self.robot.set_joint_position_target( + joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + ) + + gear_quat = self.gear.data.root_quat_w[env_ids] + eef_quat = self.robot.data.body_quat_w[env_ids, self.eef_idx] + rel_quat = math_utils.quat_mul(gear_quat, math_utils.quat_conjugate(eef_quat)) + if not hasattr(env, "_initial_gear_ee_relative_quat"): + env._initial_gear_ee_relative_quat = torch.zeros( + env.num_envs, 4, device=env.device, dtype=torch.float32 + ) + env._initial_gear_ee_relative_quat[:, 0] = 1.0 + env._initial_gear_ee_relative_quat[env_ids] = rel_quat diff --git a/isaaclab_arena_environments/cli.py b/isaaclab_arena_environments/cli.py index 7c7cd0725..73db46043 100644 --- a/isaaclab_arena_environments/cli.py +++ b/isaaclab_arena_environments/cli.py @@ -23,6 +23,7 @@ from isaaclab_arena_environments.gr1_turn_stand_mixer_knob_environment import Gr1TurnStandMixerKnobEnvironment from isaaclab_arena_environments.kitchen_pick_and_place_environment import KitchenPickAndPlaceEnvironment from isaaclab_arena_environments.lift_object_environment import LiftObjectEnvironment +from isaaclab_arena_environments.nist_assembled_gearmesh_osc_environment import NISTAssembledGearMeshOSCEnvironment from isaaclab_arena_environments.pick_and_place_maple_table_environment import PickAndPlaceMapleTableEnvironment from isaaclab_arena_environments.press_button_environment import PressButtonEnvironment from isaaclab_arena_environments.tabletop_place_upright_environment import TableTopPlaceUprightEnvironment @@ -50,6 +51,7 @@ TableTopPlaceUprightEnvironment.name: TableTopPlaceUprightEnvironment, Gr1TurnStandMixerKnobEnvironment.name: Gr1TurnStandMixerKnobEnvironment, GR1TableMultiObjectNoCollisionEnvironment.name: GR1TableMultiObjectNoCollisionEnvironment, + NISTAssembledGearMeshOSCEnvironment.name: NISTAssembledGearMeshOSCEnvironment, } diff --git a/isaaclab_arena_environments/mdp/__init__.py b/isaaclab_arena_environments/mdp/__init__.py index c00282bc9..e422b79a9 100644 --- a/isaaclab_arena_environments/mdp/__init__.py +++ b/isaaclab_arena_environments/mdp/__init__.py @@ -11,4 +11,6 @@ from isaaclab_tasks.manager_based.manipulation.stack.mdp import * # noqa: F401, F403 from .env_callbacks import * # noqa: F401, F403 +from .nist_gear_insertion_osc_action import * # noqa: F401, F403 +from .observations import * # noqa: F401, F403 from .robot_configs import * # noqa: F401, F403 diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py new file mode 100644 index 000000000..5f2e08057 --- /dev/null +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py @@ -0,0 +1,252 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""OSC torque action term for NIST gear insertion: asset-relative commands, EMA smoothing, +roll/pitch lock, target clipping, and success prediction (7-D policy output). +""" + +from __future__ import annotations + +import math +import torch +from collections.abc import Sequence +from typing import TYPE_CHECKING + +import isaacsim.core.utils.torch as torch_utils +import isaaclab.utils.math as math_utils + +from isaaclab.envs.mdp.actions.actions_cfg import OperationalSpaceControllerActionCfg +from isaaclab.envs.mdp.actions.task_space_actions import OperationalSpaceControllerAction +from isaaclab.managers.action_manager import ActionTerm +from isaaclab.utils import configclass + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +def _wrap_yaw(angle: torch.Tensor) -> torch.Tensor: + """Map yaw angles so the Franka joint-limit discontinuity (~-135 deg) is avoided.""" + return torch.where(angle > math.radians(235), angle - 2 * math.pi, angle) + + +def _randomize_gains( + default_values: torch.Tensor, + noise_levels: tuple[float, ...], + num_envs: int, + device: torch.device, +) -> torch.Tensor: + """Multiplicative gain randomization for position/rotation clip thresholds.""" + ndim = default_values.shape[-1] + noise = torch.rand(num_envs, ndim, device=device) * torch.tensor(noise_levels, device=device) + multiplier = 1.0 + noise + decrease = torch.rand(num_envs, ndim, device=device) > 0.5 + return default_values * torch.where(decrease, 1.0 / multiplier, multiplier) + + +class NistGearInsertionOscAction(OperationalSpaceControllerAction): + """Operational-space torque control for peg-style insertion with a 7-D policy. + + 3 position + 3 rotation + 1 success prediction. Asset-relative position, roll/pitch + locked, EMA smoothing, target clipping. Layout follows common assembly RL benchmarks. + """ + + cfg: NistGearInsertionOscActionCfg + + def __init__(self, cfg: NistGearInsertionOscActionCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + + self._smoothed_actions = torch.zeros(self.num_envs, 7, device=self.device) + self.ema_factor = torch.full((self.num_envs, 1), 0.05, device=self.device) + self._pos_bounds = torch.tensor(cfg.pos_action_bounds, device=self.device) + + _pt = torch.tensor(cfg.pos_action_threshold, device=self.device) + _rt = torch.tensor(cfg.rot_action_threshold, device=self.device) + self._default_pos_thresh = _pt.unsqueeze(0).expand(self.num_envs, -1).clone() + self._default_rot_thresh = _rt.unsqueeze(0).expand(self.num_envs, -1).clone() + self._pos_thresh = self._default_pos_thresh.clone() + self._rot_thresh = self._default_rot_thresh.clone() + + self._peg_offset = torch.tensor(cfg.peg_offset, device=self.device) + self._fixed_pos_noise_levels = torch.tensor(cfg.fixed_pos_noise_levels, device=self.device) + self.fixed_pos_noise = torch.zeros(self.num_envs, 3, device=self.device) + self.contact_thresholds = torch.full((self.num_envs,), 7.5, device=self.device) + self.force_smooth = torch.zeros(self.num_envs, 3, device=self.device) + self._prev_smoothed_actions = torch.zeros(self.num_envs, 7, device=self.device) + + self.delta_pos = torch.zeros(self.num_envs, 3, device=self.device) + self.delta_yaw = torch.zeros(self.num_envs, device=self.device) + self.success_pred = torch.full((self.num_envs,), -1.0, device=self.device) + self._pos_dead_zone = torch.tensor(cfg.pos_dead_zone, device=self.device).unsqueeze(0) + self._rot_dead_zone = cfg.rot_dead_zone + + def _get_bolt_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: + """Peg tip in env frame from fixed asset pose + local peg offset.""" + origins = self._env.scene.env_origins + board = self._env.scene[self.cfg.fixed_asset_name] + pos = board.data.root_pos_w - origins + quat = board.data.root_quat_w + offset = self._peg_offset.unsqueeze(0).expand(pos.shape[0], 3) + peg_pos = pos + math_utils.quat_apply(quat, offset) + if env_ids is not None: + return peg_pos[env_ids] + return peg_pos + + @property + def action_dim(self) -> int: + return 7 + + def process_actions(self, actions: torch.Tensor): + self._raw_actions[:] = actions + self._prev_smoothed_actions[:] = self._smoothed_actions + self._smoothed_actions[:] = ( + self.ema_factor * actions + (1.0 - self.ema_factor) * self._smoothed_actions + ) + self.success_pred[:] = self._smoothed_actions[:, 6] + + self._compute_ee_pose() + ee_pos_b = self._ee_pose_b[:, :3] + ee_quat_b = self._ee_pose_b[:, 3:7] + + bolt_pos_b = self._get_bolt_pos() + self.fixed_pos_noise + + pos_actions = self._smoothed_actions[:, :3] + target_pos = bolt_pos_b + pos_actions * self._pos_bounds + + self.delta_pos[:] = target_pos - ee_pos_b + clipped_delta = torch.clamp(self.delta_pos, -self._pos_thresh, self._pos_thresh) + clipped_delta = torch.where( + torch.abs(clipped_delta) > self._pos_dead_zone, + clipped_delta, + torch.zeros_like(clipped_delta), + ) + final_pos = ee_pos_b + clipped_delta + + rot_actions = self._smoothed_actions[:, 3:6].clone() + rot_actions[:, 0:2] = 0.0 + + yaw_rad = math.radians(-180.0) + math.radians(270.0) * (rot_actions[:, 2] + 1.0) / 2.0 + zero = torch.zeros_like(yaw_rad) + bolt_quat = torch_utils.quat_from_euler_xyz(zero, zero, yaw_rad) + pi_t = torch.full_like(yaw_rad, math.pi) + flip_quat = torch_utils.quat_from_euler_xyz(pi_t, zero, zero) + target_quat = torch_utils.quat_mul(flip_quat, bolt_quat) + + curr_roll, curr_pitch, curr_yaw = torch_utils.get_euler_xyz(ee_quat_b) + desired_roll, desired_pitch, desired_yaw = torch_utils.get_euler_xyz(target_quat) + desired_xyz = torch.stack([desired_roll, desired_pitch, desired_yaw], dim=1) + + curr_yaw = _wrap_yaw(curr_yaw) + desired_yaw = _wrap_yaw(desired_yaw) + + self.delta_yaw[:] = desired_yaw - curr_yaw + clipped_yaw = torch.clamp(self.delta_yaw, -self._rot_thresh[:, 2], self._rot_thresh[:, 2]) + clipped_yaw = torch.where( + torch.abs(clipped_yaw) > self._rot_dead_zone, + clipped_yaw, + torch.zeros_like(clipped_yaw), + ) + desired_xyz[:, 2] = curr_yaw + clipped_yaw + + desired_roll = torch.where(desired_roll < 0.0, desired_roll + 2 * math.pi, desired_roll) + delta_roll = desired_roll - curr_roll + clipped_roll = torch.clamp(delta_roll, -self._rot_thresh[:, 0], self._rot_thresh[:, 0]) + desired_xyz[:, 0] = curr_roll + clipped_roll + + curr_pitch_w = torch.where(curr_pitch > math.pi, curr_pitch - 2 * math.pi, curr_pitch) + desired_pitch = torch.where(desired_pitch < 0.0, desired_pitch + 2 * math.pi, desired_pitch) + desired_pitch_w = torch.where(desired_pitch > math.pi, desired_pitch - 2 * math.pi, desired_pitch) + delta_pitch = desired_pitch_w - curr_pitch_w + clipped_pitch = torch.clamp(delta_pitch, -self._rot_thresh[:, 1], self._rot_thresh[:, 1]) + desired_xyz[:, 1] = curr_pitch_w + clipped_pitch + + final_quat = torch_utils.quat_from_euler_xyz( + roll=desired_xyz[:, 0], pitch=desired_xyz[:, 1], yaw=desired_xyz[:, 2], + ) + + self._processed_actions[:, :3] = final_pos + self._processed_actions[:, 3:7] = final_quat + + self._compute_task_frame_pose() + self._osc.set_command( + command=self._processed_actions, + current_ee_pose_b=self._ee_pose_b, + current_task_frame_pose_b=self._task_frame_pose_b, + ) + + def reset(self, env_ids: Sequence[int] | None = None) -> None: + super().reset(env_ids) + if env_ids is None or (hasattr(env_ids, '__len__') and len(env_ids) == 0): + return + + n = len(env_ids) + + lo, hi = self.cfg.ema_factor_range + self.ema_factor[env_ids] = lo + torch.rand(n, 1, device=self.device) * (hi - lo) + + self._pos_thresh[env_ids] = _randomize_gains( + self._default_pos_thresh[env_ids], self.cfg.pos_threshold_noise_level, n, self.device, + ) + self._rot_thresh[env_ids] = _randomize_gains( + self._default_rot_thresh[env_ids], self.cfg.rot_threshold_noise_level, n, self.device, + ) + + self.fixed_pos_noise[env_ids] = ( + torch.randn(n, 3, device=self.device) * self._fixed_pos_noise_levels + ) + + ct_lo, ct_hi = self.cfg.contact_threshold_range + self.contact_thresholds[env_ids] = ct_lo + torch.rand(n, device=self.device) * (ct_hi - ct_lo) + + self.force_smooth[env_ids] = 0.0 + + self._compute_ee_pose() + ee_pos = self._ee_pose_b[env_ids, :3] + ee_quat = self._ee_pose_b[env_ids, 3:7] + + bolt_pos = self._get_bolt_pos(env_ids) + self.fixed_pos_noise[env_ids] + + pos_actions = (ee_pos - bolt_pos) / self._pos_bounds + self._smoothed_actions[env_ids, 0:3] = pos_actions + + unrot_pi = torch_utils.quat_from_euler_xyz( + torch.full((n,), -math.pi, device=self.device), + torch.zeros(n, device=self.device), + torch.zeros(n, device=self.device), + ) + quat_rel_bolt = torch_utils.quat_mul(unrot_pi, ee_quat) + yaw_bolt = torch_utils.get_euler_xyz(quat_rel_bolt)[2] + yaw_bolt = torch.where(yaw_bolt > math.pi / 2, yaw_bolt - 2 * math.pi, yaw_bolt) + yaw_bolt = torch.where(yaw_bolt < -math.pi, yaw_bolt + 2 * math.pi, yaw_bolt) + yaw_action = (yaw_bolt + math.radians(180.0)) / math.radians(270.0) * 2.0 - 1.0 + + self._smoothed_actions[env_ids, 3:5] = 0.0 + self._smoothed_actions[env_ids, 5] = yaw_action + self._smoothed_actions[env_ids, 6] = -1.0 + + self._prev_smoothed_actions[env_ids] = self._smoothed_actions[env_ids] + self.delta_pos[env_ids] = 0.0 + self.delta_yaw[env_ids] = 0.0 + self.success_pred[env_ids] = -1.0 + + +@configclass +class NistGearInsertionOscActionCfg(OperationalSpaceControllerActionCfg): + """Config for :class:`NistGearInsertionOscAction`.""" + + class_type: type[ActionTerm] = NistGearInsertionOscAction + + fixed_asset_name: str = "gears_and_base" + peg_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) + fixed_pos_noise_levels: tuple[float, float, float] = (0.001, 0.001, 0.001) + pos_action_bounds: tuple[float, float, float] = (0.05, 0.05, 0.05) + pos_action_threshold: tuple[float, float, float] = (0.02, 0.02, 0.02) + rot_action_threshold: tuple[float, float, float] = (0.097, 0.097, 0.097) + pos_threshold_noise_level: tuple[float, float, float] = (0.25, 0.25, 0.25) + rot_threshold_noise_level: tuple[float, float, float] = (0.29, 0.29, 0.29) + ema_factor_range: tuple[float, float] = (0.05, 0.2) + contact_threshold_range: tuple[float, float] = (5.0, 10.0) + # Dead zone: zero out small commanded deltas on the task wrench. + pos_dead_zone: tuple[float, float, float] = (0.0005, 0.0005, 0.0005) # m, ~0.5 mm + rot_dead_zone: float = 0.001 # rad diff --git a/isaaclab_arena_environments/mdp/observations.py b/isaaclab_arena_environments/mdp/observations.py new file mode 100644 index 000000000..7210660ed --- /dev/null +++ b/isaaclab_arena_environments/mdp/observations.py @@ -0,0 +1,210 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Custom observation terms for assembly environments.""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +import isaacsim.core.utils.torch as torch_utils + +from isaaclab.assets import Articulation +from isaaclab.managers import ManagerTermBase, SceneEntityCfg +from isaaclab.utils import math as math_utils +from isaaclab.utils.math import axis_angle_from_quat + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + from isaaclab.managers import ObservationTermCfg + + +def body_pos_in_env_frame( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Noiseless body position in env frame (privileged state for critic).""" + robot: Articulation = env.scene[robot_cfg.name] + idx = robot.body_names.index(body_name) + pos_w = robot.data.body_pos_w[:, idx, :] + return pos_w - env.scene.env_origins + + +def body_quat_canonical( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Noiseless body quaternion, canonicalized w >= 0 (privileged state for critic).""" + robot: Articulation = env.scene[robot_cfg.name] + idx = robot.body_names.index(body_name) + quat = robot.data.body_quat_w[:, idx, :] + sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + return quat * sign + + +def force_torque_at_body( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "force_sensor", + return_torque: bool = False, +) -> torch.Tensor: + """Read joint reaction wrench at a specific body link.""" + robot: Articulation = env.scene[robot_cfg.name] + body_idx = robot.body_names.index(body_name) + wrench = robot.root_physx_view.get_link_incoming_joint_force()[:, body_idx] + if return_torque: + return wrench + return wrench[:, :3] + + +class NistGearInsertionPolicyObservations(ManagerTermBase): + """24-D policy observation stack for NIST gear insertion (OSC + wrist sensing). + + Output layout (per env):: + + fingertip_pos_rel_fixed (3) + fingertip_quat (4) + ee_linvel (3) + ee_angvel (3) + ft_force (3) + force_threshold (1) + prev_actions (7) + """ + + def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + + p = cfg.params + self._robot_name: str = p.get("robot_name", "robot") + self._board_name: str = p.get("board_name", "gears_and_base") + self._peg_offset = torch.tensor(p.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) + self._fingertip_body: str = p.get("fingertip_body_name", "panda_fingertip_centered") + self._force_body: str = p.get("force_body_name", "force_sensor") + + self._pos_noise: float = p.get("pos_noise_level", 0.00025) + self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) + self._force_noise: float = p.get("force_noise_level", 1.0) + self._ft_alpha: float = p.get("ft_smoothing_factor", 0.25) + self._contact_thresh_range: tuple[float, float] = tuple( + p.get("contact_threshold_range", [5.0, 10.0]) + ) + + n = env.num_envs + dev = env.device + + self._fingertip_idx: int | None = None + self._force_idx: int | None = None + + self._flip_quats = torch.ones(n, device=dev) + self._prev_noisy_pos = torch.zeros(n, 3, device=dev) + self._prev_noisy_quat = torch.zeros(n, 4, device=dev) + self._prev_noisy_quat[:, 0] = 1.0 + + def _ensure_body_indices(self): + if self._fingertip_idx is not None: + return + robot: Articulation = self._env.scene[self._robot_name] + self._fingertip_idx = robot.body_names.index(self._fingertip_body) + self._force_idx = robot.body_names.index(self._force_body) + + def reset(self, env_ids: list[int] | None = None): + if env_ids is None or len(env_ids) == 0: + return + + n = len(env_ids) + dev = self._env.device + + flip = torch.ones(n, device=dev) + flip[torch.rand(n, device=dev) > 0.5] = -1.0 + self._flip_quats[env_ids] = flip + + self._ensure_body_indices() + robot: Articulation = self._env.scene[self._robot_name] + origins = self._env.scene.env_origins + self._prev_noisy_pos[env_ids] = ( + robot.data.body_pos_w[env_ids, self._fingertip_idx] - origins[env_ids] + ) + self._prev_noisy_quat[env_ids] = robot.data.body_quat_w[env_ids, self._fingertip_idx] + + def __call__( + self, + env: ManagerBasedRLEnv, + robot_name: str = "robot", + board_name: str = "gears_and_base", + peg_offset: list[float] | None = None, + fingertip_body_name: str = "panda_fingertip_centered", + force_body_name: str = "force_sensor", + pos_noise_level: float = 0.00025, + rot_noise_level_deg: float = 0.1, + force_noise_level: float = 1.0, + ) -> torch.Tensor: + self._ensure_body_indices() + + n = env.num_envs + dev = env.device + dt = env.step_dt + + robot: Articulation = env.scene[self._robot_name] + board = env.scene[self._board_name] + origins = env.scene.env_origins + + ft_pos = robot.data.body_pos_w[:, self._fingertip_idx] - origins + ft_quat = robot.data.body_quat_w[:, self._fingertip_idx] + + pos_noise = torch.randn(n, 3, device=dev) * self._pos_noise + noisy_pos = ft_pos + pos_noise + + rot_noise_axis = torch.randn(n, 3, device=dev) + rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) + rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) + noisy_quat = torch_utils.quat_mul( + ft_quat, torch_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + ) + noisy_quat[:, [0, 3]] = 0.0 + noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) + + arm_osc_action = env.action_manager._terms["arm_action"] + board_pos = board.data.root_pos_w - origins + board_quat = board.data.root_quat_w + peg_offset_exp = self._peg_offset.unsqueeze(0).expand(n, 3) + peg_pos = board_pos + math_utils.quat_apply(board_quat, peg_offset_exp) + noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise + + fingertip_pos_rel = noisy_pos - noisy_fixed_pos + + ee_linvel = (noisy_pos - self._prev_noisy_pos) / dt + self._prev_noisy_pos[:] = noisy_pos + + rot_diff = torch_utils.quat_mul(noisy_quat, torch_utils.quat_conjugate(self._prev_noisy_quat)) + rot_diff = rot_diff * torch.sign(rot_diff[:, 0]).unsqueeze(-1) + ee_angvel = axis_angle_from_quat(rot_diff) / dt + ee_angvel[:, 0:2] = 0.0 + self._prev_noisy_quat[:] = noisy_quat + + raw_force = robot.root_physx_view.get_link_incoming_joint_force()[:, self._force_idx, :3] + arm_osc_action.force_smooth[:] = ( + self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth + ) + noisy_force = arm_osc_action.force_smooth + torch.randn(n, 3, device=dev) * self._force_noise + + force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) + + prev_actions = arm_osc_action._smoothed_actions.clone() + prev_actions[:, 3:5] = 0.0 + + return torch.cat([ + fingertip_pos_rel, + noisy_quat, + ee_linvel, + ee_angvel, + noisy_force, + force_threshold, + prev_actions, + ], dim=-1) diff --git a/isaaclab_arena_environments/mdp/robot_configs.py b/isaaclab_arena_environments/mdp/robot_configs.py index 182585253..3f4c13325 100644 --- a/isaaclab_arena_environments/mdp/robot_configs.py +++ b/isaaclab_arena_environments/mdp/robot_configs.py @@ -8,8 +8,30 @@ from __future__ import annotations +from collections.abc import Sequence + +import torch + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR from isaaclab_assets.robots.franka import FRANKA_PANDA_HIGH_PD_CFG + +def franka_gripper_joint_setter( + joint_pos: torch.Tensor, + row_indices: Sequence[int], + finger_joint_indices: Sequence[int], + width: float, +) -> None: + """Set Franka Panda finger joints to achieve a given total opening *width*. + + Each finger joint is set to ``width / 2`` (symmetric grasp). + """ + for jid in finger_joint_indices: + joint_pos[row_indices, jid] = width / 2.0 + # =========================================================================================== # Franka Panda robot configuration optimized for assembly tasks (peg insert, gear mesh, etc.). # =========================================================================================== @@ -32,3 +54,75 @@ # Set initial position for tabletop tasks FRANKA_PANDA_ASSEMBLY_HIGH_PD_CFG.init_state.pos = (0.0, 0.0, 0.0) + + +_FACTORY_ASSET_DIR = f"{ISAACLAB_NUCLEUS_DIR}/Factory" + +FRANKA_MIMIC_OSC_CFG = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{_FACTORY_ASSET_DIR}/franka_mimic.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "panda_joint1": 0.0, + "panda_joint2": -0.569, + "panda_joint3": 0.0, + "panda_joint4": -2.810, + "panda_joint5": 0.0, + "panda_joint6": 3.037, + "panda_joint7": 0.741, + "panda_finger_joint2": 0.04, + }, + pos=(0.0, 0.0, 0.0), + rot=(1.0, 0.0, 0.0, 0.0), + ), + actuators={ + "panda_arm1": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-4]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=87, + velocity_limit_sim=124.6, + ), + "panda_arm2": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[5-7]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=12, + velocity_limit_sim=149.5, + ), + "panda_hand": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint[1-2]"], + effort_limit_sim=40.0, + velocity_limit_sim=0.04, + stiffness=7500.0, + damping=173.0, + friction=0.1, + armature=0.0, + ), + }, +) diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py new file mode 100644 index 000000000..da0327673 --- /dev/null +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -0,0 +1,259 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""NIST assembled board gear-mesh environment with operational-space torque control.""" + +from __future__ import annotations + +import argparse + +from isaaclab_arena_environments.example_environment_base import ExampleEnvironmentBase + + +class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): + """NIST gear insertion using OSC torque control and assembly-style observations.""" + + name: str = "nist_assembled_gear_mesh_osc" + + def get_env(self, args_cli: argparse.Namespace): + import isaaclab.envs.mdp as mdp_isaac_lab + import isaaclab.sim as sim_utils + from isaaclab.controllers import OperationalSpaceControllerCfg + from isaaclab.envs.mdp.actions import BinaryJointPositionActionCfg + from isaaclab.managers import EventTermCfg, ObservationTermCfg as ObsTerm, SceneEntityCfg + from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg + + from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment + from isaaclab_arena.scene.scene import Scene + from isaaclab_arena.tasks.nist_gear_insertion_task import NistGearInsertionTask + from isaaclab_arena.utils.pose import Pose + import isaaclab_arena_environments.mdp as mdp + + peg_tip_offset = (0.02025, 0.0, 0.025) + peg_base_offset = (0.02025, 0.0, 0.0) + success_z_fraction = 0.05 + xy_threshold = 0.0025 + episode_length_s = 15.0 + pos_action_threshold = 0.02 + rot_action_threshold = 0.097 + + table = self.asset_registry.get_asset_by_name("table")() + assembled_board = self.asset_registry.get_asset_by_name("nist_assembled_board")() + gears_and_base = self.asset_registry.get_asset_by_name("gears_and_base")() + medium_gear = self.asset_registry.get_asset_by_name("medium_nist_gear")() + light_spawner_cfg = sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=1500.0) + light = self.asset_registry.get_asset_by_name("light")(spawner_cfg=light_spawner_cfg) + + embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)( + enable_cameras=args_cli.enable_cameras, + concatenate_observation_terms=True, + ) + embodiment.scene_config.robot = mdp.FRANKA_MIMIC_OSC_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + embodiment.scene_config.ee_frame = FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_link0", + debug_vis=False, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_fingertip_centered", + name="end_effector", + offset=OffsetCfg(pos=(0.0, 0.0, 0.0)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_rightfinger", + name="tool_rightfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_leftfinger", + name="tool_leftfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + ], + ) + embodiment.set_initial_joint_pose( + [ + 0.561824, + 0.287201, + -0.543103, + -2.410188, + 0.507908, + 2.847644, + 0.454298, + 0.04, + 0.04, + ] + ) + embodiment.action_config.arm_action = mdp.NistGearInsertionOscActionCfg( + asset_name="robot", + joint_names=["panda_joint[1-7]"], + body_name="panda_fingertip_centered", + controller_cfg=OperationalSpaceControllerCfg( + target_types=["pose_abs"], + impedance_mode="fixed", + inertial_dynamics_decoupling=True, + partial_inertial_dynamics_decoupling=False, + gravity_compensation=False, + motion_stiffness_task=[565.0, 565.0, 565.0, 28.0, 28.0, 28.0], + motion_damping_ratio_task=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + nullspace_control="position", + nullspace_stiffness=10.0, + nullspace_damping_ratio=1.0, + ), + position_scale=1.0, + orientation_scale=1.0, + nullspace_joint_pos_target="default", + fixed_asset_name=gears_and_base.name, + peg_offset=peg_tip_offset, + ) + embodiment.action_config.gripper_action = BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["panda_finger_joint[1-2]"], + open_command_expr={"panda_finger_joint1": 0.0, "panda_finger_joint2": 0.0}, + close_command_expr={"panda_finger_joint1": 0.0, "panda_finger_joint2": 0.0}, + ) + + embodiment.observation_config.policy.actions = None + embodiment.observation_config.policy.gripper_pos = None + embodiment.observation_config.policy.eef_pos = None + embodiment.observation_config.policy.eef_quat = None + embodiment.observation_config.policy.joint_pos = None + embodiment.observation_config.policy.joint_vel = None + embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm( + func=mdp.NistGearInsertionPolicyObservations, + params={ + "robot_name": "robot", + "board_name": gears_and_base.name, + "peg_offset": list(peg_tip_offset), + "fingertip_body_name": "panda_fingertip_centered", + "force_body_name": "force_sensor", + "pos_noise_level": 0.0, + "rot_noise_level_deg": 0.0, + "force_noise_level": 0.0, + }, + ) + embodiment.reward_config.action_rate = None + embodiment.reward_config.joint_vel = None + + if args_cli.teleop_device is not None: + teleop_device = self.device_registry.get_device_by_name(args_cli.teleop_device)() + else: + teleop_device = None + + table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, 0.0), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) + assembled_board.set_initial_pose(Pose(position_xyz=(0.71, -0.005, 0.0), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) + medium_gear.set_initial_pose(Pose(position_xyz=(0.5462, -0.02386, 0.12858), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) + gears_and_base.set_initial_pose( + Pose(position_xyz=(0.585, -0.074, 0.0), rotation_xyzw=(0.0, 0.0, 0.9239, 0.3827)) + ) + scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) + + nist_randomization_events = { + "held_physics_material": EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(medium_gear.name), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 1, + }, + ), + "robot_physics_material": EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 1, + }, + ), + "fixed_physics_material": EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(gears_and_base.name), + "static_friction_range": (0.25, 1.25), + "dynamic_friction_range": (0.25, 0.25), + "restitution_range": (0.0, 0.0), + "num_buckets": 128, + }, + ), + "held_object_mass": EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_mass, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(medium_gear.name), + "mass_distribution_params": (-0.005, 0.005), + "operation": "add", + "distribution": "uniform", + }, + ), + "fixed_asset_pose": EventTermCfg( + func=mdp_isaac_lab.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(gears_and_base.name), + "pose_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.2617993877991494), + }, + "velocity_range": {}, + }, + ), + } + + task = NistGearInsertionTask( + assembled_board=assembled_board, + held_gear=medium_gear, + background_scene=table, + peg_offset_from_board=list(peg_base_offset), + success_z_fraction=success_z_fraction, + xy_threshold=xy_threshold, + start_in_gripper=True, + num_arm_joints=7, + hand_grasp_width=0.03, + hand_close_width=0.0, + gripper_joint_setter_func=mdp.franka_gripper_joint_setter, + end_effector_body_name="panda_hand", + grasp_rot_offset=[0.0, 1.0, 0.0, 0.0], + grasp_offset=[0.02, 0.0, -0.128], + episode_length_s=episode_length_s, + enable_randomization=True, + arm_joint_names="panda_joint[1-7]", + finger_body_names=".*finger", + peg_offset_xy_noise=0.005, + gear_base_asset=gears_and_base, + peg_offset_for_obs=list(peg_tip_offset), + include_insertion_regularizers=True, + pos_action_threshold=pos_action_threshold, + rot_action_threshold=rot_action_threshold, + engagement_xy_threshold=xy_threshold, + success_bonus_weight=1.0, + disable_drop_terminations=True, + extra_event_terms=nist_randomization_events, + ) + + return IsaacLabArenaEnvironment( + name=self.name, + embodiment=embodiment, + scene=scene, + task=task, + teleop_device=teleop_device, + env_cfg_callback=mdp.assembly_env_cfg_callback, + ) + + @staticmethod + def add_cli_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--embodiment", type=str, default="franka_ik", help="Robot embodiment") + parser.add_argument( + "--teleop_device", type=str, default=None, help="Teleoperation device (e.g., keyboard, spacemouse)" + ) From 21b1e7badd0fdcaedb3ca76ad4dd75502d6235ea Mon Sep 17 00:00:00 2001 From: odoherty Date: Tue, 7 Apr 2026 16:04:08 -0700 Subject: [PATCH 02/12] Migrate quaternion convention from wxyz to xyzw for Isaac Lab 3.0 Isaac Lab 3.0 changed from wxyz to xyzw quaternion ordering. This caused the robot to spawn upside down, leading to IK solver failure, NaN observations, and training crashes. Key fixes: - Robot init rotation: (1,0,0,0) -> (0,0,0,0,1) in robot_configs.py - Grasp rotation offset: wxyz -> xyzw in environment config - Quaternion canonicalization: check w at index 3 (not 0) everywhere - Replace torch_utils (wxyz) with math_utils (xyzw) in OSC action - Wrap all warp arrays with wp.to_torch() for PyTorch compatibility - Update deprecated IsaacLab API calls to _index variants - Increase gpu_collision_stack_size to 4 GB for contact-heavy scenes --- isaaclab_arena/assets/object_library.py | 2 +- .../gear_insertion_observations.py | 21 +++---- .../tasks/rewards/gear_insertion_rewards.py | 27 +++++---- isaaclab_arena/tasks/terminations.py | 16 ++--- isaaclab_arena/terms/events.py | 59 ++++++++++--------- .../mdp/env_callbacks.py | 1 + .../mdp/nist_gear_insertion_osc_action.py | 27 +++++---- .../mdp/observations.py | 42 ++++++------- .../mdp/robot_configs.py | 2 +- ...nist_assembled_gearmesh_osc_environment.py | 2 +- 10 files changed, 104 insertions(+), 95 deletions(-) diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index 70e350c83..e078ea7f9 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -155,7 +155,7 @@ def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = Non @register_asset class NistAssembledBoard(LibraryObject): - """NIST assembled board (kinematic background for gear insertion scene).""" + """NIST assembled board (kinematic visual prop for gear insertion scene).""" name = "nist_assembled_board" tags = ["object"] diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py index 2a4961432..705e5c102 100644 --- a/isaaclab_arena/tasks/observations/gear_insertion_observations.py +++ b/isaaclab_arena/tasks/observations/gear_insertion_observations.py @@ -6,6 +6,7 @@ """Observation terms for the NIST gear insertion task.""" import torch +import warp as wp import isaaclab.utils.math as math_utils from isaaclab.assets import RigidObject @@ -19,7 +20,7 @@ def gear_pos_in_env_frame( ) -> torch.Tensor: """Position of the held gear relative to the environment origin.""" gear: RigidObject = env.scene[gear_cfg.name] - return gear.data.root_pos_w - env.scene.env_origins + return wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins def gear_quat_canonical( @@ -28,8 +29,8 @@ def gear_quat_canonical( ) -> torch.Tensor: """Orientation of the held gear, canonicalized so w >= 0.""" gear: RigidObject = env.scene[gear_cfg.name] - quat = gear.data.root_quat_w - sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + quat = wp.to_torch(gear.data.root_quat_w) + sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) return quat * sign @@ -39,7 +40,7 @@ def board_pos_in_env_frame( ) -> torch.Tensor: """Position of the board/base relative to the environment origin.""" board: RigidObject = env.scene[board_cfg.name] - return board.data.root_pos_w - env.scene.env_origins + return wp.to_torch(board.data.root_pos_w) - env.scene.env_origins def peg_pos_in_env_frame( @@ -49,8 +50,8 @@ def peg_pos_in_env_frame( ) -> torch.Tensor: """Target peg position: fixed asset pose + offset in its local frame.""" board: RigidObject = env.scene[board_cfg.name] - pos = board.data.root_pos_w - env.scene.env_origins - quat = board.data.root_quat_w + pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins + quat = wp.to_torch(board.data.root_quat_w) offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) return pos + math_utils.quat_apply(quat, offset) @@ -61,8 +62,8 @@ def board_quat_canonical( ) -> torch.Tensor: """Orientation of the assembled board / peg, canonicalized so w >= 0.""" board: RigidObject = env.scene[board_cfg.name] - quat = board.data.root_quat_w - sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + quat = wp.to_torch(board.data.root_quat_w) + sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) return quat * sign @@ -73,8 +74,8 @@ def held_gear_base_pos_in_env_frame( ) -> torch.Tensor: """Position of the held gear's insertion point (root + offset in gear frame) in env frame.""" gear: RigidObject = env.scene[gear_cfg.name] - gear_pos = gear.data.root_pos_w - env.scene.env_origins - gear_quat = gear.data.root_quat_w + gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins + gear_quat = wp.to_torch(gear.data.root_quat_w) held_off = torch.tensor( held_gear_base_offset, device=env.device, dtype=torch.float32 ).unsqueeze(0).expand(env.num_envs, 3) diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py index b33cf702e..49dc4dfac 100644 --- a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py +++ b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py @@ -16,6 +16,7 @@ from typing import TYPE_CHECKING import torch +import warp as wp from isaaclab.assets import RigidObject from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg @@ -39,7 +40,7 @@ def __init__(self, num_envs: int, device: torch.device, num_keypoints: int = 4): self.offsets_base = _get_keypoint_offsets(num_keypoints=num_keypoints, device=device) self.n_kp = self.offsets_base.shape[0] self.identity_quat = ( - torch.tensor([[1.0, 0.0, 0.0, 0.0]], device=device, dtype=torch.float32) + torch.tensor([[0.0, 0.0, 0.0, 1.0]], device=device, dtype=torch.float32) .repeat(num_envs * self.n_kp, 1) .contiguous() ) @@ -114,15 +115,15 @@ def __call__( peg_offset_xy_noise: float = 0.0, ) -> torch.Tensor: gear: RigidObject = env.scene[self.gear_cfg.name] - gear_pos = gear.data.root_pos_w - env.scene.env_origins - gear_quat = gear.data.root_quat_w + gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins + gear_quat = wp.to_torch(gear.data.root_quat_w) n = gear_pos.shape[0] held_offset = self.held_gear_base_offset.unsqueeze(0).expand(n, 3) held_base_pos = gear_pos + quat_apply(gear_quat, held_offset) board: RigidObject = env.scene[self.board_cfg.name] - pos = board.data.root_pos_w[:n] - env.scene.env_origins[:n] - quat = board.data.root_quat_w[:n] + pos = wp.to_torch(board.data.root_pos_w)[:n] - env.scene.env_origins[:n] + quat = wp.to_torch(board.data.root_quat_w)[:n] offset = self.peg_offset.unsqueeze(0).expand(n, 3) target_pos = pos + quat_apply(quat, offset) + self._offset_noise[:n] target_quat = quat @@ -146,14 +147,14 @@ def _check_gear_position( the peg position (fixed asset + offset in fixed asset frame). """ gear: RigidObject = env.scene[gear_cfg.name] - gear_pos = gear.data.root_pos_w - env.scene.env_origins - gear_quat = gear.data.root_quat_w + gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins + gear_quat = wp.to_torch(gear.data.root_quat_w) held_off = torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) held_base_pos = gear_pos + quat_apply(gear_quat, held_off) board: RigidObject = env.scene[board_cfg.name] - pos = board.data.root_pos_w - env.scene.env_origins - quat = board.data.root_quat_w + pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins + quat = wp.to_torch(board.data.root_quat_w) offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) peg_pos = pos + quat_apply(quat, offset) @@ -273,15 +274,15 @@ def __call__( delay_until_ratio: float = 0.25, ) -> torch.Tensor: gear: RigidObject = env.scene[self._gear_cfg.name] - gear_pos = gear.data.root_pos_w - env.scene.env_origins - gear_quat = gear.data.root_quat_w + gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins + gear_quat = wp.to_torch(gear.data.root_quat_w) n = gear_pos.shape[0] held_off = self._held_gear_base_offset.unsqueeze(0).expand(n, 3) held_base_pos = gear_pos + quat_apply(gear_quat, held_off) board: RigidObject = env.scene[self._board_cfg.name] - board_pos = board.data.root_pos_w - env.scene.env_origins - board_quat = board.data.root_quat_w + board_pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins + board_quat = wp.to_torch(board.data.root_quat_w) peg_off = self._peg_offset.unsqueeze(0).expand(n, 3) peg_pos = board_pos + quat_apply(board_quat, peg_off) diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index 1e1d94d5a..8a0d3905a 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -278,14 +278,14 @@ def gear_mesh_insertion_success( held_object: RigidObject = env.scene[held_object_cfg.name] fixed_object: RigidObject = env.scene[fixed_object_cfg.name] - held_root = held_object.data.root_pos_w - env.scene.env_origins - held_quat = held_object.data.root_quat_w + held_root = wp.to_torch(held_object.data.root_pos_w) - env.scene.env_origins + held_quat = wp.to_torch(held_object.data.root_quat_w) h_offset = held_gear_base_offset if held_gear_base_offset is not None else gear_base_offset held_off = torch.tensor(h_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) held_base_pos = held_root + math_utils.quat_apply(held_quat, held_off) - fixed_pos = fixed_object.data.root_pos_w - env.scene.env_origins - fixed_quat = fixed_object.data.root_quat_w + fixed_pos = wp.to_torch(fixed_object.data.root_pos_w) - env.scene.env_origins + fixed_quat = wp.to_torch(fixed_object.data.root_quat_w) offset = torch.tensor(gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) peg_pos = fixed_pos + math_utils.quat_apply(fixed_quat, offset) @@ -310,8 +310,8 @@ def gear_dropped_from_gripper( gear: RigidObject = env.scene[gear_cfg.name] robot: Articulation = env.scene[robot_cfg.name] eef_indices, _ = robot.find_bodies([ee_body_name]) - ee_pos = robot.data.body_pos_w[:, eef_indices[0]] - gear_pos = gear.data.root_pos_w + ee_pos = wp.to_torch(robot.data.body_pos_w)[:, eef_indices[0]] + gear_pos = wp.to_torch(gear.data.root_pos_w) distance = torch.norm(gear_pos - ee_pos, dim=-1) return distance > distance_threshold @@ -332,8 +332,8 @@ def gear_orientation_exceeded( robot: Articulation = env.scene[robot_cfg.name] eef_indices, _ = robot.find_bodies([ee_body_name]) - eef_quat = robot.data.body_quat_w[:, eef_indices[0]] - current_relative = math_utils.quat_mul(gear.data.root_quat_w, math_utils.quat_conjugate(eef_quat)) + eef_quat = wp.to_torch(robot.data.body_quat_w)[:, eef_indices[0]] + current_relative = math_utils.quat_mul(wp.to_torch(gear.data.root_quat_w), math_utils.quat_conjugate(eef_quat)) initial_relative = env._initial_gear_ee_relative_quat deviation = math_utils.quat_mul(current_relative, math_utils.quat_conjugate(initial_relative)) diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index 4ed0b0cd8..a5768f759 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -38,12 +38,12 @@ def set_object_pose( pose_t_xyz_q_xyzw = pose.to_tensor(device=env.device).repeat(num_envs, 1) pose_t_xyz_q_xyzw[:, :3] += env.scene.env_origins[env_ids] # Set the pose and velocity - asset.write_root_pose_to_sim(pose_t_xyz_q_xyzw, env_ids=env_ids) + asset.write_root_pose_to_sim_index(root_pose=pose_t_xyz_q_xyzw, env_ids=env_ids) if velocity is not None: vel = velocity.to_tensor(device=env.device).unsqueeze(0).expand(num_envs, -1) - asset.write_root_velocity_to_sim(vel, env_ids=env_ids) + asset.write_root_velocity_to_sim_index(root_velocity=vel, env_ids=env_ids) else: - asset.write_root_velocity_to_sim(torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids) + asset.write_root_velocity_to_sim_index(root_velocity=torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids) def set_object_pose_per_env( @@ -66,9 +66,9 @@ def set_object_pose_per_env( pose_t_xyz_q_xyzw = pose.to_tensor(device=env.device).unsqueeze(0) pose_t_xyz_q_xyzw[0, :3] += env.scene.env_origins[cur_env, :] # Set the pose and velocity - asset.write_root_pose_to_sim(pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device)) - asset.write_root_velocity_to_sim( - torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) + asset.write_root_pose_to_sim_index(root_pose=pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device)) + asset.write_root_velocity_to_sim_index( + root_velocity=torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) ) @@ -79,13 +79,14 @@ def reset_all_articulation_joints(env: ManagerBasedEnv, env_ids: torch.Tensor): default_root_state = wp.to_torch(articulation_asset.data.default_root_state)[env_ids].clone() default_root_state[:, 0:3] += env.scene.env_origins[env_ids] # set into the physics simulation - articulation_asset.write_root_pose_to_sim(default_root_state[:, :7], env_ids=env_ids) - articulation_asset.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids=env_ids) + articulation_asset.write_root_pose_to_sim_index(root_pose=default_root_state[:, :7], env_ids=env_ids) + articulation_asset.write_root_velocity_to_sim_index(root_velocity=default_root_state[:, 7:], env_ids=env_ids) # obtain default joint positions default_joint_pos = wp.to_torch(articulation_asset.data.default_joint_pos)[env_ids].clone() default_joint_vel = wp.to_torch(articulation_asset.data.default_joint_vel)[env_ids].clone() # set into the physics simulation - articulation_asset.write_joint_state_to_sim(default_joint_pos, default_joint_vel, env_ids=env_ids) + articulation_asset.write_joint_position_to_sim_index(position=default_joint_pos, env_ids=env_ids) + articulation_asset.write_joint_velocity_to_sim_index(velocity=default_joint_vel, env_ids=env_ids) class place_gear_in_gripper(ManagerTermBase): @@ -166,19 +167,19 @@ def __call__( grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(n, -1) for _ in range(max_ik_iterations): - joint_pos = self.robot.data.joint_pos[env_ids].clone() - joint_vel = self.robot.data.joint_vel[env_ids].clone() + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() + joint_vel = wp.to_torch(self.robot.data.joint_vel)[env_ids].clone() - gear_pos_w = self.gear.data.root_link_pos_w[env_ids].clone() - gear_quat_w = self.gear.data.root_link_quat_w[env_ids].clone() + gear_pos_w = wp.to_torch(self.gear.data.root_link_pos_w)[env_ids].clone() + gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset_tensor) target_pos = gear_pos_w + math_utils.quat_apply( target_quat, grasp_offset_batch ) - eef_pos = self.robot.data.body_pos_w[env_ids, self.eef_idx] - eef_quat = self.robot.data.body_quat_w[env_ids, self.eef_idx] + eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] + eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] pos_error, aa_error = fc.get_pose_error( fingertip_midpoint_pos=eef_pos, @@ -194,7 +195,7 @@ def __call__( and torch.norm(aa_error, dim=-1).max() < rot_threshold): break - jacobians = self.robot.root_physx_view.get_jacobians().clone() + jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] delta_dof_pos = fc._get_delta_dof_pos( @@ -207,37 +208,39 @@ def __call__( joint_pos = joint_pos + delta_dof_pos joint_vel = torch.zeros_like(joint_pos) - self.robot.set_joint_position_target(joint_pos, env_ids=env_ids) - self.robot.set_joint_velocity_target(joint_vel, env_ids=env_ids) - self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + self.robot.set_joint_position_target_index(target=joint_pos, env_ids=env_ids) + self.robot.set_joint_velocity_target_index(target=joint_vel, env_ids=env_ids) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - joint_pos = self.robot.data.joint_pos[env_ids].clone() + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() for row_idx in range(n): self.gripper_joint_setter_func( joint_pos, [row_idx], self.finger_joints, self.hand_grasp_width, ) - self.robot.set_joint_position_target( - joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + self.robot.set_joint_position_target_index( + target=joint_pos, joint_ids=self.all_joints, env_ids=env_ids, ) - self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) for row_idx in range(n): self.gripper_joint_setter_func( joint_pos, [row_idx], self.finger_joints, self.hand_close_width, ) - self.robot.set_joint_position_target( - joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + self.robot.set_joint_position_target_index( + target=joint_pos, joint_ids=self.all_joints, env_ids=env_ids, ) - gear_quat = self.gear.data.root_quat_w[env_ids] - eef_quat = self.robot.data.body_quat_w[env_ids, self.eef_idx] + gear_quat = wp.to_torch(self.gear.data.root_quat_w)[env_ids] + eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] rel_quat = math_utils.quat_mul(gear_quat, math_utils.quat_conjugate(eef_quat)) if not hasattr(env, "_initial_gear_ee_relative_quat"): env._initial_gear_ee_relative_quat = torch.zeros( env.num_envs, 4, device=env.device, dtype=torch.float32 ) - env._initial_gear_ee_relative_quat[:, 0] = 1.0 + env._initial_gear_ee_relative_quat[:, 3] = 1.0 env._initial_gear_ee_relative_quat[env_ids] = rel_quat diff --git a/isaaclab_arena_environments/mdp/env_callbacks.py b/isaaclab_arena_environments/mdp/env_callbacks.py index c2ce506f3..4f15f1833 100644 --- a/isaaclab_arena_environments/mdp/env_callbacks.py +++ b/isaaclab_arena_environments/mdp/env_callbacks.py @@ -58,6 +58,7 @@ def assembly_env_cfg_callback(env_cfg: IsaacLabArenaManagerBasedRLEnvCfg) -> Isa friction_correlation_distance=0.00625, gpu_max_rigid_contact_count=2**23, gpu_max_rigid_patch_count=2**23, + gpu_collision_stack_size=2**32 - 1, gpu_max_num_partitions=1, # Important for stable simulation ), physics_material=RigidBodyMaterialCfg( diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py index 5f2e08057..bc188397d 100644 --- a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py @@ -11,10 +11,10 @@ import math import torch +import warp as wp from collections.abc import Sequence from typing import TYPE_CHECKING -import isaacsim.core.utils.torch as torch_utils import isaaclab.utils.math as math_utils from isaaclab.envs.mdp.actions.actions_cfg import OperationalSpaceControllerActionCfg @@ -85,8 +85,8 @@ def _get_bolt_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: """Peg tip in env frame from fixed asset pose + local peg offset.""" origins = self._env.scene.env_origins board = self._env.scene[self.cfg.fixed_asset_name] - pos = board.data.root_pos_w - origins - quat = board.data.root_quat_w + pos = wp.to_torch(board.data.root_pos_w) - origins + quat = wp.to_torch(board.data.root_quat_w) offset = self._peg_offset.unsqueeze(0).expand(pos.shape[0], 3) peg_pos = pos + math_utils.quat_apply(quat, offset) if env_ids is not None: @@ -97,6 +97,9 @@ def _get_bolt_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: def action_dim(self) -> int: return 7 + def apply_actions(self): + super().apply_actions() + def process_actions(self, actions: torch.Tensor): self._raw_actions[:] = actions self._prev_smoothed_actions[:] = self._smoothed_actions @@ -128,13 +131,13 @@ def process_actions(self, actions: torch.Tensor): yaw_rad = math.radians(-180.0) + math.radians(270.0) * (rot_actions[:, 2] + 1.0) / 2.0 zero = torch.zeros_like(yaw_rad) - bolt_quat = torch_utils.quat_from_euler_xyz(zero, zero, yaw_rad) + bolt_quat = math_utils.quat_from_euler_xyz(zero, zero, yaw_rad) pi_t = torch.full_like(yaw_rad, math.pi) - flip_quat = torch_utils.quat_from_euler_xyz(pi_t, zero, zero) - target_quat = torch_utils.quat_mul(flip_quat, bolt_quat) + flip_quat = math_utils.quat_from_euler_xyz(pi_t, zero, zero) + target_quat = math_utils.quat_mul(flip_quat, bolt_quat) - curr_roll, curr_pitch, curr_yaw = torch_utils.get_euler_xyz(ee_quat_b) - desired_roll, desired_pitch, desired_yaw = torch_utils.get_euler_xyz(target_quat) + curr_roll, curr_pitch, curr_yaw = math_utils.euler_xyz_from_quat(ee_quat_b, wrap_to_2pi=True) + desired_roll, desired_pitch, desired_yaw = math_utils.euler_xyz_from_quat(target_quat, wrap_to_2pi=True) desired_xyz = torch.stack([desired_roll, desired_pitch, desired_yaw], dim=1) curr_yaw = _wrap_yaw(curr_yaw) @@ -161,7 +164,7 @@ def process_actions(self, actions: torch.Tensor): clipped_pitch = torch.clamp(delta_pitch, -self._rot_thresh[:, 1], self._rot_thresh[:, 1]) desired_xyz[:, 1] = curr_pitch_w + clipped_pitch - final_quat = torch_utils.quat_from_euler_xyz( + final_quat = math_utils.quat_from_euler_xyz( roll=desired_xyz[:, 0], pitch=desired_xyz[:, 1], yaw=desired_xyz[:, 2], ) @@ -210,13 +213,13 @@ def reset(self, env_ids: Sequence[int] | None = None) -> None: pos_actions = (ee_pos - bolt_pos) / self._pos_bounds self._smoothed_actions[env_ids, 0:3] = pos_actions - unrot_pi = torch_utils.quat_from_euler_xyz( + unrot_pi = math_utils.quat_from_euler_xyz( torch.full((n,), -math.pi, device=self.device), torch.zeros(n, device=self.device), torch.zeros(n, device=self.device), ) - quat_rel_bolt = torch_utils.quat_mul(unrot_pi, ee_quat) - yaw_bolt = torch_utils.get_euler_xyz(quat_rel_bolt)[2] + quat_rel_bolt = math_utils.quat_mul(unrot_pi, ee_quat) + yaw_bolt = math_utils.euler_xyz_from_quat(quat_rel_bolt, wrap_to_2pi=True)[2] yaw_bolt = torch.where(yaw_bolt > math.pi / 2, yaw_bolt - 2 * math.pi, yaw_bolt) yaw_bolt = torch.where(yaw_bolt < -math.pi, yaw_bolt + 2 * math.pi, yaw_bolt) yaw_action = (yaw_bolt + math.radians(180.0)) / math.radians(270.0) * 2.0 - 1.0 diff --git a/isaaclab_arena_environments/mdp/observations.py b/isaaclab_arena_environments/mdp/observations.py index 7210660ed..6c5564d95 100644 --- a/isaaclab_arena_environments/mdp/observations.py +++ b/isaaclab_arena_environments/mdp/observations.py @@ -11,8 +11,7 @@ from typing import TYPE_CHECKING import torch - -import isaacsim.core.utils.torch as torch_utils +import warp as wp from isaaclab.assets import Articulation from isaaclab.managers import ManagerTermBase, SceneEntityCfg @@ -32,7 +31,7 @@ def body_pos_in_env_frame( """Noiseless body position in env frame (privileged state for critic).""" robot: Articulation = env.scene[robot_cfg.name] idx = robot.body_names.index(body_name) - pos_w = robot.data.body_pos_w[:, idx, :] + pos_w = wp.to_torch(robot.data.body_pos_w)[:, idx, :] return pos_w - env.scene.env_origins @@ -44,8 +43,8 @@ def body_quat_canonical( """Noiseless body quaternion, canonicalized w >= 0 (privileged state for critic).""" robot: Articulation = env.scene[robot_cfg.name] idx = robot.body_names.index(body_name) - quat = robot.data.body_quat_w[:, idx, :] - sign = torch.where(quat[:, 0:1] < 0, -1.0, 1.0) + quat = wp.to_torch(robot.data.body_quat_w)[:, idx, :] + sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) return quat * sign @@ -58,7 +57,7 @@ def force_torque_at_body( """Read joint reaction wrench at a specific body link.""" robot: Articulation = env.scene[robot_cfg.name] body_idx = robot.body_names.index(body_name) - wrench = robot.root_physx_view.get_link_incoming_joint_force()[:, body_idx] + wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] if return_torque: return wrench return wrench[:, :3] @@ -105,7 +104,7 @@ def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): self._flip_quats = torch.ones(n, device=dev) self._prev_noisy_pos = torch.zeros(n, 3, device=dev) self._prev_noisy_quat = torch.zeros(n, 4, device=dev) - self._prev_noisy_quat[:, 0] = 1.0 + self._prev_noisy_quat[:, 3] = 1.0 def _ensure_body_indices(self): if self._fingertip_idx is not None: @@ -129,9 +128,9 @@ def reset(self, env_ids: list[int] | None = None): robot: Articulation = self._env.scene[self._robot_name] origins = self._env.scene.env_origins self._prev_noisy_pos[env_ids] = ( - robot.data.body_pos_w[env_ids, self._fingertip_idx] - origins[env_ids] + wp.to_torch(robot.data.body_pos_w)[env_ids, self._fingertip_idx] - origins[env_ids] ) - self._prev_noisy_quat[env_ids] = robot.data.body_quat_w[env_ids, self._fingertip_idx] + self._prev_noisy_quat[env_ids] = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx] def __call__( self, @@ -155,8 +154,8 @@ def __call__( board = env.scene[self._board_name] origins = env.scene.env_origins - ft_pos = robot.data.body_pos_w[:, self._fingertip_idx] - origins - ft_quat = robot.data.body_quat_w[:, self._fingertip_idx] + ft_pos = wp.to_torch(robot.data.body_pos_w)[:, self._fingertip_idx] - origins + ft_quat = wp.to_torch(robot.data.body_quat_w)[:, self._fingertip_idx] pos_noise = torch.randn(n, 3, device=dev) * self._pos_noise noisy_pos = ft_pos + pos_noise @@ -164,31 +163,32 @@ def __call__( rot_noise_axis = torch.randn(n, 3, device=dev) rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) - noisy_quat = torch_utils.quat_mul( - ft_quat, torch_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + noisy_quat = math_utils.quat_mul( + ft_quat, math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), ) - noisy_quat[:, [0, 3]] = 0.0 + noisy_quat[:, [2, 3]] = 0.0 noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) arm_osc_action = env.action_manager._terms["arm_action"] - board_pos = board.data.root_pos_w - origins - board_quat = board.data.root_quat_w + board_pos = wp.to_torch(board.data.root_pos_w) - origins + board_quat = wp.to_torch(board.data.root_quat_w) peg_offset_exp = self._peg_offset.unsqueeze(0).expand(n, 3) peg_pos = board_pos + math_utils.quat_apply(board_quat, peg_offset_exp) noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise fingertip_pos_rel = noisy_pos - noisy_fixed_pos - ee_linvel = (noisy_pos - self._prev_noisy_pos) / dt + safe_dt = max(dt, 1e-6) + ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt self._prev_noisy_pos[:] = noisy_pos - rot_diff = torch_utils.quat_mul(noisy_quat, torch_utils.quat_conjugate(self._prev_noisy_quat)) - rot_diff = rot_diff * torch.sign(rot_diff[:, 0]).unsqueeze(-1) - ee_angvel = axis_angle_from_quat(rot_diff) / dt + rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) + rot_diff = rot_diff * torch.sign(rot_diff[:, 3]).unsqueeze(-1) + ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt ee_angvel[:, 0:2] = 0.0 self._prev_noisy_quat[:] = noisy_quat - raw_force = robot.root_physx_view.get_link_incoming_joint_force()[:, self._force_idx, :3] + raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] arm_osc_action.force_smooth[:] = ( self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth ) diff --git a/isaaclab_arena_environments/mdp/robot_configs.py b/isaaclab_arena_environments/mdp/robot_configs.py index 3f4c13325..342a93cfb 100644 --- a/isaaclab_arena_environments/mdp/robot_configs.py +++ b/isaaclab_arena_environments/mdp/robot_configs.py @@ -94,7 +94,7 @@ def franka_gripper_joint_setter( "panda_finger_joint2": 0.04, }, pos=(0.0, 0.0, 0.0), - rot=(1.0, 0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), ), actuators={ "panda_arm1": ImplicitActuatorCfg( diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index da0327673..6c5605e54 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -224,7 +224,7 @@ def get_env(self, args_cli: argparse.Namespace): hand_close_width=0.0, gripper_joint_setter_func=mdp.franka_gripper_joint_setter, end_effector_body_name="panda_hand", - grasp_rot_offset=[0.0, 1.0, 0.0, 0.0], + grasp_rot_offset=[1.0, 0.0, 0.0, 0.0], grasp_offset=[0.02, 0.0, -0.128], episode_length_s=episode_length_s, enable_randomization=True, From 4397017c40bb7ee599593e0f0816fe5375d7e560 Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 8 Apr 2026 13:12:35 -0700 Subject: [PATCH 03/12] Refactor NIST task for upstream alignment and reduce code duplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Consolidate 3 observation files into single gear_insertion_observations.py - Replace 4 custom obs functions with Isaac Lab built-ins (root_pos_w, root_quat_w) - Slim NistGearInsertionTask constructor (40 → 17 params) via GraspConfig dataclass - Hardcode reward weights in configclass instead of passing through constructor - Delete bespoke play_rl_games.py; use generic policy_runner.py for evaluation - Genericise RlGamesActionPolicy (remove NIST-specific defaults) - Move RL-Games YAML config to isaaclab_arena_examples/policy/ - Clean up mdp/__init__.py re-exports - Add merge readiness report and NIST vs Lift comparison doc --- docs/nist_vs_lift_comparison.md | 219 +++++++++++ .../policy/rl_games_action_policy.py | 25 +- .../reinforcement_learning/play_rl_games.py | 133 ------- .../reinforcement_learning/train_rl_games.py | 2 +- .../tasks/nist_gear_insertion_task.py | 371 +++++++++--------- .../gear_insertion_observations.py | 262 +++++++++++-- isaaclab_arena_environments/mdp/__init__.py | 2 - .../mdp/observations.py | 210 ---------- ...nist_assembled_gearmesh_osc_environment.py | 126 ++---- .../nist_gear_insertion_osc_rl_games.yaml | 0 10 files changed, 673 insertions(+), 677 deletions(-) create mode 100644 docs/nist_vs_lift_comparison.md delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py delete mode 100644 isaaclab_arena_environments/mdp/observations.py rename {isaaclab_arena/policy/rl_policy => isaaclab_arena_examples/policy}/nist_gear_insertion_osc_rl_games.yaml (100%) diff --git a/docs/nist_vs_lift_comparison.md b/docs/nist_vs_lift_comparison.md new file mode 100644 index 000000000..ac2514c13 --- /dev/null +++ b/docs/nist_vs_lift_comparison.md @@ -0,0 +1,219 @@ +# NIST Gear Insertion vs Lift Object — Arena Integration Comparison + +This document shows the structural differences between the **Lift Object** task (the upstream reference) and the **NIST Gear Insertion** task, organized by integration layer. Use it to identify what needs to change to bring NIST into alignment. + +--- + +## 1. Environment File (`isaaclab_arena_environments/`) + +| Aspect | Lift (`lift_object_environment.py`) | NIST (`nist_assembled_gearmesh_osc_environment.py`) | Delta | +|--------|------|------|-------| +| **RL Framework** | `RLFramework.RSL_RL` | `RLFramework.RL_GAMES` | Different RL library | +| **Policy Config** | `f"{base_rsl_rl_policy.__name__}:RLPolicyCfg"` (Python class) | `"isaaclab_arena.policy.rl_policy:nist_gear_insertion_osc_rl_games.yaml"` (YAML string) | Lift uses a Python configclass; NIST uses a YAML file. The `rl_policy/` directory has no `__init__.py`, which breaks `importlib.import_module` in Lab's `load_cfg_from_registry`. | +| **Embodiment default** | `"franka_joint_pos"` | `"franka_ik"` | Different default robot mode | +| **Robot config override** | None (uses embodiment default) | Replaces `scene_config.robot` with `FRANKA_MIMIC_OSC_CFG`, overrides `ee_frame`, `initial_joint_pose`, `arm_action`, `gripper_action` | NIST deeply customizes the embodiment post-construction | +| **Observation overrides** | None (uses embodiment defaults) | Nulls out 6 default policy obs, adds custom `nist_gear_policy_obs` ObsTerm | NIST replaces the entire policy observation group | +| **Reward overrides** | None | Nulls out `action_rate` and `joint_vel` from embodiment | NIST replaces default regularizers with task-specific ones | +| **env_cfg_callback** | None | `mdp.assembly_env_cfg_callback` (sets PhysX `gpu_collision_stack_size`) | NIST needs custom sim config | +| **Extra imports** | Minimal (5 imports in `get_env`) | Heavy (10+ imports including OSC controller, frame transformer, custom action/obs) | NIST has significantly more complexity in the env file | + +### Lift environment file (89 lines) + +```python +# Key structure: +embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)(concatenate_observation_terms=True) +# No overrides — uses embodiment defaults for robot, actions, observations, rewards + +task = LiftObjectTaskRL(pick_up_object, background, embodiment, ...) + +return IsaacLabArenaEnvironment( + ..., + rl_framework=RLFramework.RSL_RL, + rl_policy_cfg=f"{base_rsl_rl_policy.__name__}:RLPolicyCfg", +) +``` + +### NIST environment file (216 lines) + +```python +# Key structure: +embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)(...) +embodiment.scene_config.robot = mdp.FRANKA_MIMIC_OSC_CFG.replace(...) # Override robot +embodiment.scene_config.ee_frame = FrameTransformerCfg(...) # Override EE frame +embodiment.set_initial_joint_pose([...]) # Override joint pose +embodiment.action_config.arm_action = NistGearInsertionOscActionCfg(...) # Override action +embodiment.action_config.gripper_action = BinaryJointPositionActionCfg(...) +embodiment.observation_config.policy.joint_pos = None # Null out defaults +# ... null out 5 more default obs ... +embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm(...) # Custom obs +embodiment.reward_config.action_rate = None # Null out defaults +embodiment.reward_config.joint_vel = None + +task = NistGearInsertionTask(assembled_board, held_gear, background, ...) + +return IsaacLabArenaEnvironment( + ..., + env_cfg_callback=mdp.assembly_env_cfg_callback, + rl_framework=RLFramework.RL_GAMES, + rl_policy_cfg="isaaclab_arena.policy.rl_policy:nist_gear_insertion_osc_rl_games.yaml", +) +``` + +--- + +## 2. Task Class (`isaaclab_arena/tasks/`) + +| Aspect | Lift (`lift_object_task.py`) | NIST (`nist_gear_insertion_task.py`) | Delta | +|--------|------|------|-------| +| **Base class** | `TaskBase` (IL) / `LiftObjectTaskRL(LiftObjectTask)` (RL) | `TaskBase` (single class) | Lift has IL/RL split; NIST is RL-only | +| **Constructor args** | 8 params (RL version) | 35+ params | NIST is far more parameterized | +| **Observations** | `LiftObjectObservationsCfg` — 2 terms (target_pos, object_pos) in `task_obs` group | `_GearInsertionObservationsCfg` — 9 terms (gear_pos/quat, peg_pos, board_quat, peg_delta, joint_pos/vel, ee_pos/quat) in `task_obs` group | NIST has richer privileged observations | +| **Rewards** | 4 terms: reaching, lifting, goal_tracking, goal_tracking_fine | 10 terms: 3x keypoint_squashing, engagement_bonus, success_bonus, action_penalty, action_grad, contact_penalty, success_pred_error | NIST has a much more complex reward structure | +| **Terminations** | `time_out`, `object_dropped`, `success` (conditional on `rl_training_mode`) | `time_out`, `success` (conditional), `object_dropped`, `gear_dropped_from_gripper`, `gear_orientation_exceeded` | Similar pattern, NIST has more termination conditions | +| **Events** | None (uses embodiment defaults) | 13+ events: `place_gear`, `fixed_asset_pose`, 6x physics materials, `held_object_mass`, `robot_actuator_gains`, `robot_joint_friction` | NIST has extensive domain randomization | +| **Commands** | `LiftObjectCommandsCfg` with `UniformPoseCommandCfg` | None | Lift uses a command manager for goal poses; NIST has no commands | +| **Curriculum** | None | None | Same | +| **`rl_training_mode`** | Passed to termination; disables success termination | Same pattern | Aligned | + +--- + +## 3. Training Scripts + +| Aspect | Lift | NIST | Delta | +|--------|------|------|-------| +| **Script** | Isaac Lab's `submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py` | Arena's custom `isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py` | **Completely different scripts** | +| **Environment registration** | `--external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback` | `get_arena_builder_from_cli(args_cli)` → `arena_builder.build_registered()` | Lift uses Lab's callback mechanism; NIST builds directly | +| **Agent config source** | Auto-resolved from gym registry via `@hydra_task_config` decorator → `rsl_rl_cfg_entry_point` | Loaded manually from `--agent_cfg_path` CLI arg (YAML file) | Lift is auto-discovered; NIST is manual | +| **Hydra support** | Yes (override hyperparams via CLI: `agent.policy.activation=relu`) | No | NIST cannot use Hydra overrides | +| **Distributed training** | Built-in `--distributed` flag with multi-GPU rank handling | Not supported | Missing from NIST | +| **W&B tracking** | Not in RSL-RL Arena flow (but available in Lab RL-Games script) | Not supported | Missing from NIST | + +### Lift training command + +```bash +python submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ + --task lift_object \ + --rl_training_mode \ + --num_envs 4096 \ + --max_iterations 2000 +``` + +### NIST training command + +```bash +/isaac-sim/python.sh isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --environment nist_assembled_gear_mesh_osc \ + --rl_training_mode \ + --num_envs 4096 \ + --max_iterations 1000 \ + --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml +``` + +--- + +## 4. Evaluation / Play Scripts + +| Aspect | Lift | NIST | Delta | +|--------|------|------|-------| +| **Script** | Arena's `isaaclab_arena/evaluation/policy_runner.py` (generic, framework-agnostic) | Arena's custom `isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py` | Lift uses the generic policy runner; NIST has its own | +| **Policy class** | Uses `PolicyBase` subclass (e.g., RSL-RL policy loaded by policy runner) | Directly creates RL-Games `Runner` and calls `runner.run(play=True)` | NIST bypasses the policy abstraction | +| **NIST also has** | — | `isaaclab_arena/policy/rl_games_action_policy.py` (`RlGamesActionPolicy`) which IS a `PolicyBase` subclass and CAN work with `policy_runner.py` | This exists but isn't the primary eval path | + +### Lift play command + +```bash +python isaaclab_arena/evaluation/policy_runner.py \ + --task lift_object \ + --num_envs 50 \ + --checkpoint \ + --policy_type rsl_rl +``` + +### NIST play command + +```bash +/isaac-sim/python.sh isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py \ + --environment nist_assembled_gear_mesh_osc \ + --num_envs 50 \ + --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml \ + --checkpoint +``` + +### NIST alternative (via generic policy runner) + +```bash +python isaaclab_arena/evaluation/policy_runner.py \ + --task nist_assembled_gear_mesh_osc \ + --num_envs 50 \ + --policy_type rl_games \ + --checkpoint_path \ + --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml +``` + +--- + +## 5. Policy Configuration + +| Aspect | Lift | NIST | Delta | +|--------|------|------|-------| +| **Format** | Python `@configclass` (`RslRlOnPolicyRunnerCfg` subclass) | YAML file | Different formats | +| **Location** | `isaaclab_arena_examples/policy/base_rsl_rl_policy.py` | `isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml` | Different packages | +| **Network** | MLP `[256, 128, 64]`, activation=elu | MLP `[512, 128, 64]` + LSTM (1024 units, 2 layers), activation=elu | NIST uses recurrent policy | +| **PPO params** | `gamma=0.98`, `lam=0.95`, `lr=1e-4`, `entropy_coef=0.006` | `gamma=0.995`, `tau=0.95`, `lr=1e-4`, `entropy_coef=0.0` | Different discount/entropy | +| **Horizon** | `num_steps_per_env=24` | `horizon_length=128`, `seq_length=128` | NIST uses longer rollouts (LSTM needs it) | +| **Obs groups** | `{"actor": ["policy", "task_obs"], "critic": ["policy", "task_obs"]}` | `{"obs": ["policy", "task_obs"], "states": ["policy", "task_obs"]}` + `central_value_config` | NIST uses asymmetric actor-critic with central value | +| **Resolvable by Lab** | Yes — Python class loaded via `load_cfg_from_registry` | Partially — YAML can be loaded but `isaaclab_arena.policy.rl_policy` has no `__init__.py` so `importlib.import_module` fails | **Blocker** for Lab standard path | + +--- + +## 6. Custom MDP Components (NIST-only) + +These files exist only for NIST and have no lift equivalent: + +| File | Purpose | +|------|---------| +| `isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py` | Custom OSC action with peg-relative targets, EMA smoothing, contact-aware dead zones | +| `isaaclab_arena_environments/mdp/nist_gear_insertion_observations.py` | 24-D policy observation (peg-relative EE pose, force feedback, prev actions) | +| `isaaclab_arena_environments/mdp/robot_configs.py` | `FRANKA_MIMIC_OSC_CFG` — custom Franka config for torque control | +| `isaaclab_arena_environments/mdp/env_callbacks.py` | `assembly_env_cfg_callback` — sets PhysX GPU collision stack size | +| `isaaclab_arena/tasks/rewards/gear_insertion_rewards.py` | 7 reward classes/functions (keypoint squashing, bonuses, penalties) | +| `isaaclab_arena/tasks/observations/gear_insertion_observations.py` | 5 observation functions (gear/peg pos, quat, delta) | +| `isaaclab_arena/tasks/terminations.py` | 3 termination functions (insertion success, gear dropped, orientation exceeded) | +| `isaaclab_arena/terms/events.py` | `place_gear_in_gripper` event | + +--- + +## 7. Key Blocker: Isaac Lab Standard Training Path + +The `--external_callback` mechanism exists in **RSL-RL's `train.py`** but **not in RL-Games' `train.py`**. + +``` +Isaac Lab rsl_rl/train.py: + --external_callback → environment_registration_callback() → gym.register() → gym.make() + ✅ Lift uses this path + +Isaac Lab rl_games/train.py: + No --external_callback → resolve_task_config(task, agent) → load_cfg_from_registry() + ❌ NIST cannot use this path (no callback + YAML module path has no __init__.py) +``` + +### Options to resolve + +| Option | What changes | Effort | +|--------|-------------|--------| +| **A. Switch NIST to RSL-RL** | Write `RslRlOnPolicyRunnerCfg` equivalent; retune hyperparams; lose LSTM (RSL-RL has GRU) | Medium | +| **B. Add `--external_callback` to RL-Games script** | ~8 lines added to `rl_games/train.py` (upstream PR to IsaacLab) + add `__init__.py` to `rl_policy/` | Small code, needs upstream approval | +| **C. Keep custom Arena scripts** | No changes needed, already working | Zero (but stays off standard path) | + +--- + +## 8. Summary: What to Align + +Sorted by priority for upstream integration: + +1. **Training script path** — The biggest structural difference. Choose option A, B, or C above. +2. **Policy config format** — If staying on RL-Games, add `__init__.py` to `isaaclab_arena/policy/rl_policy/` so the YAML path is resolvable by `importlib`. +3. **Environment file complexity** — NIST's deep embodiment customization (overriding robot, actions, observations, rewards) is inherently more complex than lift. This is unavoidable given OSC control, but the pattern of null-then-replace could be formalized. +4. **Play/eval path** — `RlGamesActionPolicy` is now generic (no NIST defaults). Use `policy_runner.py --policy_type rl_games --agent_cfg_path --checkpoint_path ` as the primary eval path. RESOLVED. +5. **Task constructor** — 35+ params could be simplified with a config dataclass pattern (like lift's cleaner constructor), but this is cosmetic. diff --git a/isaaclab_arena/policy/rl_games_action_policy.py b/isaaclab_arena/policy/rl_games_action_policy.py index 66efd5b98..b8cad65ba 100644 --- a/isaaclab_arena/policy/rl_games_action_policy.py +++ b/isaaclab_arena/policy/rl_games_action_policy.py @@ -35,8 +35,11 @@ class RlGamesActionPolicyConfig: checkpoint_path: str """Path to the RL-Games .pth checkpoint file.""" - agent_cfg_path: Path = Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml") - """Path to the RL-Games agent YAML configuration file.""" + agent_cfg_path: Path = None + """Path to the RL-Games agent YAML configuration file. + + When using the CLI (``policy_runner.py``), this is set via ``--agent_cfg_path``. + """ device: str = "cuda:0" """Device to run the policy on.""" @@ -61,22 +64,6 @@ class RlGamesActionPolicy(PolicyBase): Wraps the RL-Games player for use with the Arena policy runner and eval runner. Handles observation processing (concatenation, clipping) and LSTM state management. - Example JSON configuration for eval runner:: - - { - "jobs": [{ - "name": "eval_gear_insertion", - "policy_type": "rl_games", - "policy_config_dict": { - "checkpoint_path": "logs/rl_games/.../nn/model.pth", - "agent_cfg_path": "isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml", - "device": "cuda:0", - "deterministic": true - }, - "arena_env_args": {"environment": "nist_assembled_gear_mesh_osc"}, - "num_episodes": 100 - }] - } """ name = "rl_games" @@ -189,7 +176,7 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars group.add_argument( "--agent_cfg_path", type=Path, - default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), + required=True, help="Path to the RL-Games agent YAML configuration file.", ) group.add_argument( diff --git a/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py deleted file mode 100644 index 86f111cbe..000000000 --- a/isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers. -# SPDX-License-Identifier: Apache-2.0 - -"""Play/evaluate an RL-Games checkpoint using the Arena environment builder. - -Launch Isaac Sim Simulator first. -""" - -import math -from pathlib import Path - -from isaaclab.app import AppLauncher - -from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser -from isaaclab_arena_environments.cli import add_example_environments_cli_args - -parser = get_isaaclab_arena_cli_parser() -parser.add_argument("--video", action="store_true", default=False, help="Record videos during playback.") -parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument( - "--agent_cfg_path", - type=Path, - default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), - help="Path to the RL-Games agent YAML configuration file.", -) -parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint.") -parser.add_argument("--sigma", type=str, default=None, help="Override the policy's standard deviation.") -add_example_environments_cli_args(parser) -args_cli = parser.parse_args() - -if args_cli.video: - args_cli.enable_cameras = True - -if getattr(args_cli, "enable_pinocchio", False): - import pinocchio # noqa: F401 - -app_launcher = AppLauncher(args_cli) -simulation_app = app_launcher.app - -"""Rest everything follows.""" - -import gymnasium as gym -import os -import yaml - -import omni.log -from rl_games.common import env_configurations, vecenv -from rl_games.common.algo_observer import IsaacAlgoObserver -from rl_games.torch_runner import Runner - -from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent -from isaaclab.utils.assets import retrieve_file_path -from isaaclab.utils.dict import print_dict - -from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper - -from isaaclab_arena_environments.cli import get_arena_builder_from_cli - - -def main(): - """Play with RL-Games agent.""" - try: - arena_builder = get_arena_builder_from_cli(args_cli) - env_name, env_cfg = arena_builder.build_registered() - except Exception as e: - omni.log.error(f"Failed to parse environment configuration: {e}") - exit(1) - - with open(args_cli.agent_cfg_path) as f: - agent_cfg = yaml.safe_load(f) - - if args_cli.num_envs is not None: - env_cfg.scene.num_envs = args_cli.num_envs - if args_cli.device is not None: - env_cfg.sim.device = args_cli.device - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device - - resume_path = retrieve_file_path(args_cli.checkpoint) - agent_cfg["params"]["load_checkpoint"] = True - agent_cfg["params"]["load_path"] = resume_path - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - - env_cfg.seed = agent_cfg["params"]["seed"] - - log_dir = os.path.dirname(resume_path) - env_cfg.log_dir = log_dir - - env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) - - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - - if args_cli.video: - video_kwargs = { - "video_folder": os.path.join(log_dir, "videos", "play"), - "step_trigger": lambda step: step == 0, - "video_length": args_cli.video_length, - "disable_logger": True, - } - print("[INFO] Recording videos during playback.") - print_dict(video_kwargs, nesting=4) - env = gym.wrappers.RecordVideo(env, **video_kwargs) - - rl_device = agent_cfg["params"]["config"]["device"] - clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) - clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) - obs_groups = agent_cfg["params"]["env"].get("obs_groups") - concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) - - env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) - - vecenv.register( - "IsaacRlgWrapper", - lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs), - ) - env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) - - agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs - - runner = Runner(IsaacAlgoObserver()) - runner.load(agent_cfg) - runner.reset() - - play_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None - runner.run({"train": False, "play": True, "sigma": play_sigma, "checkpoint": resume_path}) - - env.close() - - -if __name__ == "__main__": - main() - simulation_app.close() diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py index d06f031e0..1ed9a9fe5 100644 --- a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py +++ b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py @@ -21,7 +21,7 @@ parser.add_argument( "--agent_cfg_path", type=Path, - default=Path("isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml"), + required=True, help="Path to the RL-Games agent YAML configuration file.", ) parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint to resume from.") diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion_task.py index 77df82f5f..5c000f0bb 100644 --- a/isaaclab_arena/tasks/nist_gear_insertion_task.py +++ b/isaaclab_arena/tasks/nist_gear_insertion_task.py @@ -5,17 +5,14 @@ """Simple gear insertion task for the assembled NIST board. -The medium gear is a separate object that the robot must pick up and insert -onto the peg. The peg can be on the assembled board or on the gearbase -asset (gears_and_base; USD gearbase_and_gears.usd). Use :attr:`gear_base_asset` to specify the -asset whose pose + offset defines the peg (fixed asset + local offset); if omitted, the -assembled board is used. +The medium gear is a separate object that the robot must insert +onto the peg. """ from __future__ import annotations from collections.abc import Callable -from dataclasses import MISSING +from dataclasses import MISSING, dataclass, field from typing import TYPE_CHECKING import numpy as np @@ -30,7 +27,7 @@ from isaaclab_arena.metrics.metric_base import MetricBase from isaaclab_arena.metrics.success_rate import SuccessRateMetric from isaaclab_arena.tasks.observations import gear_insertion_observations -from isaaclab_arena_environments.mdp.observations import body_pos_in_env_frame, body_quat_canonical +from isaaclab_arena.tasks.observations.gear_insertion_observations import body_pos_in_env_frame, body_quat_canonical from isaaclab_arena.tasks.rewards import gear_insertion_rewards from isaaclab_arena.tasks.task_base import TaskBase from isaaclab_arena.tasks.terminations import ( @@ -47,12 +44,30 @@ import torch +@dataclass +class GraspConfig: + """Configuration for placing the gear in the robot's gripper at reset. + + Groups all embodiment-specific grasp parameters so the task constructor + stays focused on task-level concerns (geometry, success criteria). + """ + + num_arm_joints: int = 7 + hand_grasp_width: float = 0.03 + hand_close_width: float = 0.0 + gripper_joint_setter_func: Callable | None = None + end_effector_body_name: str = "panda_hand" + grasp_rot_offset: list[float] = field(default_factory=lambda: [1.0, 0.0, 0.0, 0.0]) + grasp_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) + arm_joint_names: str = "panda_joint.*" + finger_body_names: str = ".*finger" + + class NistGearInsertionTask(TaskBase): """Gear insertion task: insert the medium gear onto the peg. Peg position is computed from a fixed asset (board or separate gear-base USD) - plus an offset in that asset's local frame (assembly peg-insert convention). Use - :attr:`gear_base_asset` when the peg is on the gearbase (gears_and_base / gearbase_and_gears.usd). + plus an offset in that asset's local frame (assembly peg-insert convention). """ def __init__( @@ -61,41 +76,19 @@ def __init__( held_gear: Asset, background_scene: Asset, peg_offset_from_board: list[float] | None = None, + peg_offset_for_obs: list[float] | None = None, held_gear_base_offset: list[float] | None = None, + gear_base_asset: Asset | None = None, gear_peg_height: float = 0.02, success_z_fraction: float = 0.30, xy_threshold: float = 0.0025, episode_length_s: float | None = None, task_description: str | None = None, - start_in_gripper: bool = False, - num_arm_joints: int = 7, - hand_grasp_width: float = 0.03, - hand_close_width: float = 0.0, - gripper_joint_setter_func: Callable[ - [torch.Tensor, Sequence[int], Sequence[int], float], None - ] | None = None, - end_effector_body_name: str = "panda_hand", - grasp_rot_offset: list[float] | None = None, - grasp_offset: list[float] | None = None, - include_success_bonus: bool = True, + grasp_cfg: GraspConfig | None = None, enable_randomization: bool = False, - arm_joint_names: str = "panda_joint.*", - finger_body_names: str = ".*finger", - peg_offset_xy_noise: float = 0.0, - gear_base_asset: Asset | None = None, - peg_offset_for_obs: list[float] | None = None, - disable_drop_terminations: bool = False, - engagement_xy_threshold: float | None = None, - success_bonus_weight: float | None = None, - include_insertion_regularizers: bool = False, - pos_action_threshold: float = 0.02, - rot_action_threshold: float = 0.097, - action_penalty_weight: float = -0.0005, - action_grad_penalty_weight: float = -0.01, - contact_penalty_weight: float = -0.001, - success_pred_error_weight: float = -1.0, - success_pred_error_delay: float = 0.25, - extra_event_terms: dict[str, EventTermCfg] | None = None, + peg_offset_xy_noise: float = 0.005, + disable_drop_terminations: bool = True, + rl_training_mode: bool = False, ): super().__init__(episode_length_s=episode_length_s) self.assembled_board = assembled_board @@ -109,30 +102,10 @@ def __init__( self.gear_peg_height = gear_peg_height self.success_z_fraction = success_z_fraction self.xy_threshold = xy_threshold - self.start_in_gripper = start_in_gripper - self.num_arm_joints = num_arm_joints - self.hand_grasp_width = hand_grasp_width - self.hand_close_width = hand_close_width - self.gripper_joint_setter_func = gripper_joint_setter_func - self.end_effector_body_name = end_effector_body_name - self.grasp_rot_offset = grasp_rot_offset or [1.0, 0.0, 0.0, 0.0] - self.grasp_offset = grasp_offset or [0.0, 0.0, 0.0] - self.include_success_bonus = include_success_bonus + self.grasp_cfg = grasp_cfg self.enable_randomization = enable_randomization - self.arm_joint_names = arm_joint_names - self.finger_body_names = finger_body_names self.disable_drop_terminations = disable_drop_terminations - self.engagement_xy_threshold = engagement_xy_threshold - self.success_bonus_weight = success_bonus_weight - self.include_insertion_regularizers = include_insertion_regularizers - self.pos_action_threshold = pos_action_threshold - self.rot_action_threshold = rot_action_threshold - self.action_penalty_weight = action_penalty_weight - self.action_grad_penalty_weight = action_grad_penalty_weight - self.contact_penalty_weight = contact_penalty_weight - self.success_pred_error_weight = success_pred_error_weight - self.success_pred_error_delay = success_pred_error_delay - self.extra_event_terms = extra_event_terms or {} + self.rl_training_mode = rl_training_mode self.task_description = ( f"Insert the {held_gear.name} onto the gear base on the {assembled_board.name}" if task_description is None @@ -157,21 +130,10 @@ def get_rewards_cfg(self): board_name=self._gear_base_asset.name, peg_offset=self.peg_offset_from_board, held_gear_base_offset=self.held_gear_base_offset, - include_success_bonus=self.include_success_bonus, - peg_offset_xy_noise=self.peg_offset_xy_noise, gear_peg_height=self.gear_peg_height, success_z_fraction=self.success_z_fraction, xy_threshold=self.xy_threshold, - engagement_xy_threshold=self.engagement_xy_threshold, - success_bonus_weight=self.success_bonus_weight, - include_insertion_regularizers=self.include_insertion_regularizers, - pos_action_threshold=self.pos_action_threshold, - rot_action_threshold=self.rot_action_threshold, - action_penalty_weight=self.action_penalty_weight, - action_grad_penalty_weight=self.action_grad_penalty_weight, - contact_penalty_weight=self.contact_penalty_weight, - success_pred_error_weight=self.success_pred_error_weight, - success_pred_error_delay=self.success_pred_error_delay, + peg_offset_xy_noise=self.peg_offset_xy_noise, ) def get_termination_cfg(self): @@ -195,13 +157,15 @@ def get_termination_cfg(self): }, ) cfg = _TerminationsCfg(success=success, object_dropped=object_dropped) - if self.start_in_gripper: + if self.rl_training_mode: + cfg.success = None + if self.grasp_cfg is not None: cfg.gear_dropped_from_gripper = TerminationTermCfg( func=gear_dropped_from_gripper, params={ "gear_cfg": SceneEntityCfg(self.held_gear.name), "robot_cfg": SceneEntityCfg("robot"), - "ee_body_name": self.end_effector_body_name, + "ee_body_name": self.grasp_cfg.end_effector_body_name, "distance_threshold": 0.15, }, ) @@ -212,27 +176,89 @@ def get_termination_cfg(self): def get_events_cfg(self): cfg = _EventsCfg() - if self.start_in_gripper and self.gripper_joint_setter_func is not None: + gc = self.grasp_cfg + if gc is not None and gc.gripper_joint_setter_func is not None: cfg.place_gear = EventTermCfg( func=place_gear_in_gripper, mode="reset", params={ "gear_cfg": SceneEntityCfg(self.held_gear.name), - "num_arm_joints": self.num_arm_joints, - "hand_grasp_width": self.hand_grasp_width, - "hand_close_width": self.hand_close_width, - "gripper_joint_setter_func": self.gripper_joint_setter_func, - "end_effector_body_name": self.end_effector_body_name, - "grasp_rot_offset": self.grasp_rot_offset, - "grasp_offset": self.grasp_offset, + "num_arm_joints": gc.num_arm_joints, + "hand_grasp_width": gc.hand_grasp_width, + "hand_close_width": gc.hand_close_width, + "gripper_joint_setter_func": gc.gripper_joint_setter_func, + "end_effector_body_name": gc.end_effector_body_name, + "grasp_rot_offset": gc.grasp_rot_offset, + "grasp_offset": gc.grasp_offset, }, ) if self.enable_randomization: + arm_joints = gc.arm_joint_names if gc is not None else "panda_joint.*" + finger_bodies = gc.finger_body_names if gc is not None else ".*finger" + cfg.held_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(self.held_gear.name), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 1, + }, + ) + cfg.robot_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 1, + }, + ) + cfg.fixed_physics_material = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg(self._gear_base_asset.name), + "static_friction_range": (0.25, 1.25), + "dynamic_friction_range": (0.25, 0.25), + "restitution_range": (0.0, 0.0), + "num_buckets": 128, + }, + ) + cfg.held_object_mass = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_mass, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(self.held_gear.name), + "mass_distribution_params": (-0.005, 0.005), + "operation": "add", + "distribution": "uniform", + }, + ) + cfg.fixed_asset_pose = EventTermCfg( + func=mdp_isaac_lab.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(self._gear_base_asset.name), + "pose_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.2617993877991494), + }, + "velocity_range": {}, + }, + ) cfg.robot_actuator_gains = EventTermCfg( func=mdp_isaac_lab.randomize_actuator_gains, mode="reset", params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=[self.arm_joint_names]), + "asset_cfg": SceneEntityCfg("robot", joint_names=[arm_joints]), "stiffness_distribution_params": (0.75, 1.5), "damping_distribution_params": (0.3, 3.0), "operation": "scale", @@ -243,7 +269,7 @@ def get_events_cfg(self): func=mdp_isaac_lab.randomize_joint_parameters, mode="reset", params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=[self.arm_joint_names]), + "asset_cfg": SceneEntityCfg("robot", joint_names=[arm_joints]), "friction_distribution_params": (0.3, 0.7), "operation": "add", "distribution": "uniform", @@ -275,15 +301,13 @@ def get_events_cfg(self): func=mdp_isaac_lab.randomize_rigid_body_material, mode="startup", params={ - "asset_cfg": SceneEntityCfg("robot", body_names=self.finger_body_names), + "asset_cfg": SceneEntityCfg("robot", body_names=finger_bodies), "static_friction_range": (0.75, 0.75), "dynamic_friction_range": (0.75, 0.75), "restitution_range": (0.0, 0.0), "num_buckets": 16, }, ) - for name, term_cfg in self.extra_event_terms.items(): - setattr(cfg, name, term_cfg) return cfg def get_prompt(self): @@ -307,7 +331,7 @@ class _TerminationsCfg: """Termination terms for the gear insertion task.""" time_out: TerminationTermCfg = TerminationTermCfg(func=mdp_isaac_lab.time_out) - success: TerminationTermCfg = MISSING + success: TerminationTermCfg | None = MISSING object_dropped: TerminationTermCfg | None = MISSING gear_dropped_from_gripper: TerminationTermCfg | None = None gear_orientation_exceeded: TerminationTermCfg | None = None @@ -322,8 +346,12 @@ class _EventsCfg: mode="reset", params={"reset_joint_targets": True}, ) - fixed_asset_pose: EventTermCfg | None = None place_gear: EventTermCfg | None = None + fixed_asset_pose: EventTermCfg | None = None + held_physics_material: EventTermCfg | None = None + robot_physics_material: EventTermCfg | None = None + fixed_physics_material: EventTermCfg | None = None + held_object_mass: EventTermCfg | None = None gripper_init_yaw_noise: EventTermCfg | None = None robot_actuator_gains: EventTermCfg | None = None robot_joint_friction: EventTermCfg | None = None @@ -350,20 +378,20 @@ def __init__( @configclass class _TaskObsCfg(ObsGroup): gear_pos = ObsTerm( - func=gear_insertion_observations.gear_pos_in_env_frame, - params={"gear_cfg": SceneEntityCfg(gear_name)}, + func=mdp_isaac_lab.root_pos_w, + params={"asset_cfg": SceneEntityCfg(gear_name)}, ) gear_quat = ObsTerm( - func=gear_insertion_observations.gear_quat_canonical, - params={"gear_cfg": SceneEntityCfg(gear_name)}, + func=mdp_isaac_lab.root_quat_w, + params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(gear_name)}, ) peg_pos = ObsTerm( func=gear_insertion_observations.peg_pos_in_env_frame, params={"board_cfg": SceneEntityCfg(board_name), "peg_offset": peg_offset}, ) board_quat = ObsTerm( - func=gear_insertion_observations.board_quat_canonical, - params={"board_cfg": SceneEntityCfg(board_name)}, + func=mdp_isaac_lab.root_quat_w, + params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(board_name)}, ) peg_delta = ObsTerm( func=gear_insertion_observations.peg_delta_from_held_gear_base, @@ -400,55 +428,52 @@ def __post_init__(self): @configclass class _GearInsertionRewardsCfg: - """Keypoint squashing, bonuses, and optional insertion regularizers for gear insertion.""" + """Keypoint squashing, bonuses, and insertion regularisers for gear insertion. + + Reward weights are hardcoded here (matching the lift-task pattern) rather + than being surfaced through the task constructor. + """ kp_baseline: RewardTermCfg = MISSING kp_coarse: RewardTermCfg = MISSING kp_fine: RewardTermCfg = MISSING - engagement_bonus: RewardTermCfg | None = None - success_bonus: RewardTermCfg | None = None - - action_penalty_asset: RewardTermCfg | None = None - action_grad_penalty: RewardTermCfg | None = None - contact_penalty: RewardTermCfg | None = None - success_pred_error: RewardTermCfg | None = None + engagement_bonus: RewardTermCfg = MISSING + success_bonus: RewardTermCfg = MISSING + action_penalty_asset: RewardTermCfg = MISSING + action_grad_penalty: RewardTermCfg = MISSING + contact_penalty: RewardTermCfg = MISSING + success_pred_error: RewardTermCfg = MISSING def __init__( self, gear_name: str, board_name: str, peg_offset: list[float], - held_gear_base_offset: list[float] | None = None, - include_success_bonus: bool = True, - peg_offset_xy_noise: float = 0.0, - gear_peg_height: float = 0.02, - success_z_fraction: float = 0.05, - xy_threshold: float = 0.0025, - engagement_xy_threshold: float | None = None, - success_bonus_weight: float | None = None, - include_insertion_regularizers: bool = False, - pos_action_threshold: float = 0.02, - rot_action_threshold: float = 0.097, - action_penalty_weight: float = -0.0005, - action_grad_penalty_weight: float = -0.01, - contact_penalty_weight: float = -0.001, - success_pred_error_weight: float = -1.0, - success_pred_error_delay: float = 0.25, + held_gear_base_offset: list[float], + gear_peg_height: float, + success_z_fraction: float, + xy_threshold: float, + peg_offset_xy_noise: float = 0.005, ): - self.action_penalty_asset = None - self.action_grad_penalty = None - self.contact_penalty = None - self.success_pred_error = None - hgo = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + hgo = held_gear_base_offset + gear_cfg = SceneEntityCfg(gear_name) + board_cfg = SceneEntityCfg(board_name) common_params = { - "gear_cfg": SceneEntityCfg(gear_name), - "board_cfg": SceneEntityCfg(board_name), + "gear_cfg": gear_cfg, + "board_cfg": board_cfg, "peg_offset": peg_offset, "held_gear_base_offset": hgo, "keypoint_scale": 0.15, "num_keypoints": 4, "peg_offset_xy_noise": peg_offset_xy_noise, } + bonus_params = { + "gear_cfg": gear_cfg, + "board_cfg": board_cfg, + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + } + self.kp_baseline = RewardTermCfg( func=gear_insertion_rewards.gear_peg_keypoint_squashing, weight=1.0, @@ -464,60 +489,42 @@ def __init__( weight=1.0, params={**common_params, "squash_a": 100.0, "squash_b": 0.0}, ) - bonus_params = { - "gear_cfg": SceneEntityCfg(gear_name), - "board_cfg": SceneEntityCfg(board_name), - "peg_offset": peg_offset, - "held_gear_base_offset": hgo, - } - if include_success_bonus: - self.engagement_bonus = RewardTermCfg( - func=gear_insertion_rewards.gear_insertion_engagement_bonus, - weight=1.0, - params={ - **bonus_params, - "engage_z_fraction": 0.90, - "xy_threshold": 0.015 if engagement_xy_threshold is None else engagement_xy_threshold, - }, - ) - self.success_bonus = RewardTermCfg( - func=gear_insertion_rewards.gear_insertion_success_bonus, - weight=1.0 if success_bonus_weight is None else success_bonus_weight, - params={**bonus_params, "success_z_fraction": success_z_fraction}, - ) - else: - self.engagement_bonus = None - self.success_bonus = None - if include_insertion_regularizers: - self.action_penalty_asset = RewardTermCfg( - func=gear_insertion_rewards.osc_action_magnitude_penalty, - weight=action_penalty_weight, - params={ - "pos_action_threshold": pos_action_threshold, - "rot_action_threshold": rot_action_threshold, - }, - ) - self.action_grad_penalty = RewardTermCfg( - func=gear_insertion_rewards.osc_action_delta_penalty, - weight=action_grad_penalty_weight, - params={}, - ) - self.contact_penalty = RewardTermCfg( - func=gear_insertion_rewards.wrist_contact_force_penalty, - weight=contact_penalty_weight, - params={}, - ) - self.success_pred_error = RewardTermCfg( - func=gear_insertion_rewards.success_prediction_error, - weight=success_pred_error_weight, - params={ - "gear_cfg": SceneEntityCfg(gear_name), - "board_cfg": SceneEntityCfg(board_name), - "peg_offset": peg_offset, - "held_gear_base_offset": hgo, - "gear_peg_height": gear_peg_height, - "success_z_fraction": success_z_fraction, - "xy_threshold": xy_threshold, - "delay_until_ratio": success_pred_error_delay, - }, - ) + self.engagement_bonus = RewardTermCfg( + func=gear_insertion_rewards.gear_insertion_engagement_bonus, + weight=1.0, + params={**bonus_params, "engage_z_fraction": 0.90, "xy_threshold": xy_threshold}, + ) + self.success_bonus = RewardTermCfg( + func=gear_insertion_rewards.gear_insertion_success_bonus, + weight=1.0, + params={**bonus_params, "success_z_fraction": success_z_fraction}, + ) + self.action_penalty_asset = RewardTermCfg( + func=gear_insertion_rewards.osc_action_magnitude_penalty, + weight=-0.0005, + params={"pos_action_threshold": 0.02, "rot_action_threshold": 0.097}, + ) + self.action_grad_penalty = RewardTermCfg( + func=gear_insertion_rewards.osc_action_delta_penalty, + weight=-0.01, + params={}, + ) + self.contact_penalty = RewardTermCfg( + func=gear_insertion_rewards.wrist_contact_force_penalty, + weight=-0.001, + params={}, + ) + self.success_pred_error = RewardTermCfg( + func=gear_insertion_rewards.success_prediction_error, + weight=-1.0, + params={ + "gear_cfg": gear_cfg, + "board_cfg": board_cfg, + "peg_offset": peg_offset, + "held_gear_base_offset": hgo, + "gear_peg_height": gear_peg_height, + "success_z_fraction": success_z_fraction, + "xy_threshold": xy_threshold, + "delay_until_ratio": 0.25, + }, + ) diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py index 705e5c102..1eaf89f1a 100644 --- a/isaaclab_arena/tasks/observations/gear_insertion_observations.py +++ b/isaaclab_arena/tasks/observations/gear_insertion_observations.py @@ -3,44 +3,36 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Observation terms for the NIST gear insertion task.""" +"""All observation terms for the NIST gear insertion task. -import torch -import warp as wp +This module contains: -import isaaclab.utils.math as math_utils -from isaaclab.assets import RigidObject -from isaaclab.envs import ManagerBasedRLEnv -from isaaclab.managers import SceneEntityCfg +* Task-level observation primitives (peg position, gear-base offsets, deltas). +* Privileged critic helpers (body pose, force/torque at a body link). +* The 24-D policy observation class (``NistGearInsertionPolicyObservations``). +""" +from __future__ import annotations -def gear_pos_in_env_frame( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), -) -> torch.Tensor: - """Position of the held gear relative to the environment origin.""" - gear: RigidObject = env.scene[gear_cfg.name] - return wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins +import math +from typing import TYPE_CHECKING +import torch +import warp as wp -def gear_quat_canonical( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), -) -> torch.Tensor: - """Orientation of the held gear, canonicalized so w >= 0.""" - gear: RigidObject = env.scene[gear_cfg.name] - quat = wp.to_torch(gear.data.root_quat_w) - sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) - return quat * sign +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import ManagerTermBase, SceneEntityCfg +from isaaclab.utils.math import axis_angle_from_quat +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + from isaaclab.managers import ObservationTermCfg -def board_pos_in_env_frame( - env: ManagerBasedRLEnv, - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), -) -> torch.Tensor: - """Position of the board/base relative to the environment origin.""" - board: RigidObject = env.scene[board_cfg.name] - return wp.to_torch(board.data.root_pos_w) - env.scene.env_origins + +# --------------------------------------------------------------------------- +# Task-level observation primitives +# --------------------------------------------------------------------------- def peg_pos_in_env_frame( @@ -56,17 +48,6 @@ def peg_pos_in_env_frame( return pos + math_utils.quat_apply(quat, offset) -def board_quat_canonical( - env: ManagerBasedRLEnv, - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), -) -> torch.Tensor: - """Orientation of the assembled board / peg, canonicalized so w >= 0.""" - board: RigidObject = env.scene[board_cfg.name] - quat = wp.to_torch(board.data.root_quat_w) - sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) - return quat * sign - - def held_gear_base_pos_in_env_frame( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), @@ -89,7 +70,204 @@ def peg_delta_from_held_gear_base( peg_offset: list[float] = [0.0, 0.0, 0.0], held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], ) -> torch.Tensor: - """Vector from held gear insertion point to peg (peg_pos - held_gear_base_pos). Positive = peg is ahead in that axis.""" + """Vector from held gear insertion point to peg. Positive = peg is ahead in that axis.""" held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) peg_pos = peg_pos_in_env_frame(env, board_cfg, peg_offset) return peg_pos - held_base + + +# --------------------------------------------------------------------------- +# Privileged critic helpers (body-level pose & wrench) +# --------------------------------------------------------------------------- + + +def body_pos_in_env_frame( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Noiseless body position in env frame (privileged state for critic).""" + robot: Articulation = env.scene[robot_cfg.name] + idx = robot.body_names.index(body_name) + pos_w = wp.to_torch(robot.data.body_pos_w)[:, idx, :] + return pos_w - env.scene.env_origins + + +def body_quat_canonical( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Noiseless body quaternion, canonicalized w >= 0 (privileged state for critic).""" + robot: Articulation = env.scene[robot_cfg.name] + idx = robot.body_names.index(body_name) + quat = wp.to_torch(robot.data.body_quat_w)[:, idx, :] + sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) + return quat * sign + + +def force_torque_at_body( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "force_sensor", + return_torque: bool = False, +) -> torch.Tensor: + """Read joint reaction wrench at a specific body link.""" + robot: Articulation = env.scene[robot_cfg.name] + body_idx = robot.body_names.index(body_name) + wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] + if return_torque: + return wrench + return wrench[:, :3] + + +# --------------------------------------------------------------------------- +# 24-D policy observation (OSC + wrist sensing) +# --------------------------------------------------------------------------- + + +class NistGearInsertionPolicyObservations(ManagerTermBase): + """24-D policy observation stack for NIST gear insertion (OSC + wrist sensing). + + Output layout (per env):: + + fingertip_pos_rel_fixed (3) + fingertip_quat (4) + ee_linvel (3) + ee_angvel (3) + ft_force (3) + force_threshold (1) + prev_actions (7) + """ + + def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + + p = cfg.params + self._robot_name: str = p.get("robot_name", "robot") + self._board_name: str = p.get("board_name", "gears_and_base") + self._peg_offset = torch.tensor(p.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) + self._fingertip_body: str = p.get("fingertip_body_name", "panda_fingertip_centered") + self._force_body: str = p.get("force_body_name", "force_sensor") + + self._pos_noise: float = p.get("pos_noise_level", 0.00025) + self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) + self._force_noise: float = p.get("force_noise_level", 1.0) + self._ft_alpha: float = p.get("ft_smoothing_factor", 0.25) + self._contact_thresh_range: tuple[float, float] = tuple( + p.get("contact_threshold_range", [5.0, 10.0]) + ) + + n = env.num_envs + dev = env.device + + self._fingertip_idx: int | None = None + self._force_idx: int | None = None + + self._flip_quats = torch.ones(n, device=dev) + self._prev_noisy_pos = torch.zeros(n, 3, device=dev) + self._prev_noisy_quat = torch.zeros(n, 4, device=dev) + self._prev_noisy_quat[:, 3] = 1.0 + + def _ensure_body_indices(self): + if self._fingertip_idx is not None: + return + robot: Articulation = self._env.scene[self._robot_name] + self._fingertip_idx = robot.body_names.index(self._fingertip_body) + self._force_idx = robot.body_names.index(self._force_body) + + def reset(self, env_ids: list[int] | None = None): + if env_ids is None or len(env_ids) == 0: + return + + n = len(env_ids) + dev = self._env.device + + flip = torch.ones(n, device=dev) + flip[torch.rand(n, device=dev) > 0.5] = -1.0 + self._flip_quats[env_ids] = flip + + self._ensure_body_indices() + robot: Articulation = self._env.scene[self._robot_name] + origins = self._env.scene.env_origins + self._prev_noisy_pos[env_ids] = ( + wp.to_torch(robot.data.body_pos_w)[env_ids, self._fingertip_idx] - origins[env_ids] + ) + self._prev_noisy_quat[env_ids] = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx] + + def __call__( + self, + env: ManagerBasedRLEnv, + robot_name: str = "robot", + board_name: str = "gears_and_base", + peg_offset: list[float] | None = None, + fingertip_body_name: str = "panda_fingertip_centered", + force_body_name: str = "force_sensor", + pos_noise_level: float = 0.00025, + rot_noise_level_deg: float = 0.1, + force_noise_level: float = 1.0, + ) -> torch.Tensor: + self._ensure_body_indices() + + n = env.num_envs + dev = env.device + dt = env.step_dt + + robot: Articulation = env.scene[self._robot_name] + board = env.scene[self._board_name] + origins = env.scene.env_origins + + ft_pos = wp.to_torch(robot.data.body_pos_w)[:, self._fingertip_idx] - origins + ft_quat = wp.to_torch(robot.data.body_quat_w)[:, self._fingertip_idx] + + pos_noise = torch.randn(n, 3, device=dev) * self._pos_noise + noisy_pos = ft_pos + pos_noise + + rot_noise_axis = torch.randn(n, 3, device=dev) + rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) + rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) + noisy_quat = math_utils.quat_mul( + ft_quat, math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + ) + noisy_quat[:, [2, 3]] = 0.0 + noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) + + arm_osc_action = env.action_manager._terms["arm_action"] + board_pos = wp.to_torch(board.data.root_pos_w) - origins + board_quat = wp.to_torch(board.data.root_quat_w) + peg_offset_exp = self._peg_offset.unsqueeze(0).expand(n, 3) + peg_pos = board_pos + math_utils.quat_apply(board_quat, peg_offset_exp) + noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise + + fingertip_pos_rel = noisy_pos - noisy_fixed_pos + + safe_dt = max(dt, 1e-6) + ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt + self._prev_noisy_pos[:] = noisy_pos + + rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) + rot_diff = rot_diff * torch.sign(rot_diff[:, 3]).unsqueeze(-1) + ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt + ee_angvel[:, 0:2] = 0.0 + self._prev_noisy_quat[:] = noisy_quat + + raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] + arm_osc_action.force_smooth[:] = ( + self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth + ) + noisy_force = arm_osc_action.force_smooth + torch.randn(n, 3, device=dev) * self._force_noise + + force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) + + prev_actions = arm_osc_action._smoothed_actions.clone() + prev_actions[:, 3:5] = 0.0 + + return torch.cat([ + fingertip_pos_rel, + noisy_quat, + ee_linvel, + ee_angvel, + noisy_force, + force_threshold, + prev_actions, + ], dim=-1) diff --git a/isaaclab_arena_environments/mdp/__init__.py b/isaaclab_arena_environments/mdp/__init__.py index e422b79a9..c00282bc9 100644 --- a/isaaclab_arena_environments/mdp/__init__.py +++ b/isaaclab_arena_environments/mdp/__init__.py @@ -11,6 +11,4 @@ from isaaclab_tasks.manager_based.manipulation.stack.mdp import * # noqa: F401, F403 from .env_callbacks import * # noqa: F401, F403 -from .nist_gear_insertion_osc_action import * # noqa: F401, F403 -from .observations import * # noqa: F401, F403 from .robot_configs import * # noqa: F401, F403 diff --git a/isaaclab_arena_environments/mdp/observations.py b/isaaclab_arena_environments/mdp/observations.py deleted file mode 100644 index 6c5564d95..000000000 --- a/isaaclab_arena_environments/mdp/observations.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Custom observation terms for assembly environments.""" - -from __future__ import annotations - -import math -from typing import TYPE_CHECKING - -import torch -import warp as wp - -from isaaclab.assets import Articulation -from isaaclab.managers import ManagerTermBase, SceneEntityCfg -from isaaclab.utils import math as math_utils -from isaaclab.utils.math import axis_angle_from_quat - -if TYPE_CHECKING: - from isaaclab.envs import ManagerBasedRLEnv - from isaaclab.managers import ObservationTermCfg - - -def body_pos_in_env_frame( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Noiseless body position in env frame (privileged state for critic).""" - robot: Articulation = env.scene[robot_cfg.name] - idx = robot.body_names.index(body_name) - pos_w = wp.to_torch(robot.data.body_pos_w)[:, idx, :] - return pos_w - env.scene.env_origins - - -def body_quat_canonical( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Noiseless body quaternion, canonicalized w >= 0 (privileged state for critic).""" - robot: Articulation = env.scene[robot_cfg.name] - idx = robot.body_names.index(body_name) - quat = wp.to_torch(robot.data.body_quat_w)[:, idx, :] - sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) - return quat * sign - - -def force_torque_at_body( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "force_sensor", - return_torque: bool = False, -) -> torch.Tensor: - """Read joint reaction wrench at a specific body link.""" - robot: Articulation = env.scene[robot_cfg.name] - body_idx = robot.body_names.index(body_name) - wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] - if return_torque: - return wrench - return wrench[:, :3] - - -class NistGearInsertionPolicyObservations(ManagerTermBase): - """24-D policy observation stack for NIST gear insertion (OSC + wrist sensing). - - Output layout (per env):: - - fingertip_pos_rel_fixed (3) - fingertip_quat (4) - ee_linvel (3) - ee_angvel (3) - ft_force (3) - force_threshold (1) - prev_actions (7) - """ - - def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): - super().__init__(cfg, env) - - p = cfg.params - self._robot_name: str = p.get("robot_name", "robot") - self._board_name: str = p.get("board_name", "gears_and_base") - self._peg_offset = torch.tensor(p.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) - self._fingertip_body: str = p.get("fingertip_body_name", "panda_fingertip_centered") - self._force_body: str = p.get("force_body_name", "force_sensor") - - self._pos_noise: float = p.get("pos_noise_level", 0.00025) - self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) - self._force_noise: float = p.get("force_noise_level", 1.0) - self._ft_alpha: float = p.get("ft_smoothing_factor", 0.25) - self._contact_thresh_range: tuple[float, float] = tuple( - p.get("contact_threshold_range", [5.0, 10.0]) - ) - - n = env.num_envs - dev = env.device - - self._fingertip_idx: int | None = None - self._force_idx: int | None = None - - self._flip_quats = torch.ones(n, device=dev) - self._prev_noisy_pos = torch.zeros(n, 3, device=dev) - self._prev_noisy_quat = torch.zeros(n, 4, device=dev) - self._prev_noisy_quat[:, 3] = 1.0 - - def _ensure_body_indices(self): - if self._fingertip_idx is not None: - return - robot: Articulation = self._env.scene[self._robot_name] - self._fingertip_idx = robot.body_names.index(self._fingertip_body) - self._force_idx = robot.body_names.index(self._force_body) - - def reset(self, env_ids: list[int] | None = None): - if env_ids is None or len(env_ids) == 0: - return - - n = len(env_ids) - dev = self._env.device - - flip = torch.ones(n, device=dev) - flip[torch.rand(n, device=dev) > 0.5] = -1.0 - self._flip_quats[env_ids] = flip - - self._ensure_body_indices() - robot: Articulation = self._env.scene[self._robot_name] - origins = self._env.scene.env_origins - self._prev_noisy_pos[env_ids] = ( - wp.to_torch(robot.data.body_pos_w)[env_ids, self._fingertip_idx] - origins[env_ids] - ) - self._prev_noisy_quat[env_ids] = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx] - - def __call__( - self, - env: ManagerBasedRLEnv, - robot_name: str = "robot", - board_name: str = "gears_and_base", - peg_offset: list[float] | None = None, - fingertip_body_name: str = "panda_fingertip_centered", - force_body_name: str = "force_sensor", - pos_noise_level: float = 0.00025, - rot_noise_level_deg: float = 0.1, - force_noise_level: float = 1.0, - ) -> torch.Tensor: - self._ensure_body_indices() - - n = env.num_envs - dev = env.device - dt = env.step_dt - - robot: Articulation = env.scene[self._robot_name] - board = env.scene[self._board_name] - origins = env.scene.env_origins - - ft_pos = wp.to_torch(robot.data.body_pos_w)[:, self._fingertip_idx] - origins - ft_quat = wp.to_torch(robot.data.body_quat_w)[:, self._fingertip_idx] - - pos_noise = torch.randn(n, 3, device=dev) * self._pos_noise - noisy_pos = ft_pos + pos_noise - - rot_noise_axis = torch.randn(n, 3, device=dev) - rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) - rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) - noisy_quat = math_utils.quat_mul( - ft_quat, math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), - ) - noisy_quat[:, [2, 3]] = 0.0 - noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) - - arm_osc_action = env.action_manager._terms["arm_action"] - board_pos = wp.to_torch(board.data.root_pos_w) - origins - board_quat = wp.to_torch(board.data.root_quat_w) - peg_offset_exp = self._peg_offset.unsqueeze(0).expand(n, 3) - peg_pos = board_pos + math_utils.quat_apply(board_quat, peg_offset_exp) - noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise - - fingertip_pos_rel = noisy_pos - noisy_fixed_pos - - safe_dt = max(dt, 1e-6) - ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt - self._prev_noisy_pos[:] = noisy_pos - - rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) - rot_diff = rot_diff * torch.sign(rot_diff[:, 3]).unsqueeze(-1) - ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt - ee_angvel[:, 0:2] = 0.0 - self._prev_noisy_quat[:] = noisy_quat - - raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] - arm_osc_action.force_smooth[:] = ( - self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth - ) - noisy_force = arm_osc_action.force_smooth + torch.randn(n, 3, device=dev) * self._force_noise - - force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) - - prev_actions = arm_osc_action._smoothed_actions.clone() - prev_actions[:, 3:5] = 0.0 - - return torch.cat([ - fingertip_pos_rel, - noisy_quat, - ee_linvel, - ee_angvel, - noisy_force, - force_threshold, - prev_actions, - ], dim=-1) diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index 6c5605e54..5844b2faf 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -18,26 +18,30 @@ class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): name: str = "nist_assembled_gear_mesh_osc" def get_env(self, args_cli: argparse.Namespace): - import isaaclab.envs.mdp as mdp_isaac_lab import isaaclab.sim as sim_utils from isaaclab.controllers import OperationalSpaceControllerCfg from isaaclab.envs.mdp.actions import BinaryJointPositionActionCfg - from isaaclab.managers import EventTermCfg, ObservationTermCfg as ObsTerm, SceneEntityCfg + from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment + from isaaclab_arena.reinforcement_learning.frameworks import RLFramework from isaaclab_arena.scene.scene import Scene - from isaaclab_arena.tasks.nist_gear_insertion_task import NistGearInsertionTask + from isaaclab_arena.tasks.nist_gear_insertion_task import GraspConfig, NistGearInsertionTask from isaaclab_arena.utils.pose import Pose import isaaclab_arena_environments.mdp as mdp + from isaaclab_arena.tasks.observations.gear_insertion_observations import ( + NistGearInsertionPolicyObservations, + ) + from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import ( + NistGearInsertionOscActionCfg, + ) peg_tip_offset = (0.02025, 0.0, 0.025) peg_base_offset = (0.02025, 0.0, 0.0) success_z_fraction = 0.05 xy_threshold = 0.0025 episode_length_s = 15.0 - pos_action_threshold = 0.02 - rot_action_threshold = 0.097 table = self.asset_registry.get_asset_by_name("table")() assembled_board = self.asset_registry.get_asset_by_name("nist_assembled_board")() @@ -85,7 +89,7 @@ def get_env(self, args_cli: argparse.Namespace): 0.04, ] ) - embodiment.action_config.arm_action = mdp.NistGearInsertionOscActionCfg( + embodiment.action_config.arm_action = NistGearInsertionOscActionCfg( asset_name="robot", joint_names=["panda_joint[1-7]"], body_name="panda_fingertip_centered", @@ -114,6 +118,9 @@ def get_env(self, args_cli: argparse.Namespace): close_command_expr={"panda_finger_joint1": 0.0, "panda_finger_joint2": 0.0}, ) + # OSC insertion requires a specialized 24-D policy observation (peg-relative + # EE pose, force feedback, prev actions) instead of the default embodiment obs. + # The task_obs group adds privileged state for the critic separately. embodiment.observation_config.policy.actions = None embodiment.observation_config.policy.gripper_pos = None embodiment.observation_config.policy.eef_pos = None @@ -121,7 +128,7 @@ def get_env(self, args_cli: argparse.Namespace): embodiment.observation_config.policy.joint_pos = None embodiment.observation_config.policy.joint_vel = None embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm( - func=mdp.NistGearInsertionPolicyObservations, + func=NistGearInsertionPolicyObservations, params={ "robot_name": "robot", "board_name": gears_and_base.name, @@ -133,6 +140,8 @@ def get_env(self, args_cli: argparse.Namespace): "force_noise_level": 0.0, }, ) + # OSC torque control uses task-specific penalties (action magnitude, jerk, + # contact force) instead of the default joint-level regularizers. embodiment.reward_config.action_rate = None embodiment.reward_config.joint_vel = None @@ -149,76 +158,7 @@ def get_env(self, args_cli: argparse.Namespace): ) scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) - nist_randomization_events = { - "held_physics_material": EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(medium_gear.name), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 1, - }, - ), - "robot_physics_material": EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg("robot", body_names=".*"), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 1, - }, - ), - "fixed_physics_material": EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(gears_and_base.name), - "static_friction_range": (0.25, 1.25), - "dynamic_friction_range": (0.25, 0.25), - "restitution_range": (0.0, 0.0), - "num_buckets": 128, - }, - ), - "held_object_mass": EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_mass, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg(medium_gear.name), - "mass_distribution_params": (-0.005, 0.005), - "operation": "add", - "distribution": "uniform", - }, - ), - "fixed_asset_pose": EventTermCfg( - func=mdp_isaac_lab.reset_root_state_uniform, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg(gears_and_base.name), - "pose_range": { - "x": (0.0, 0.0), - "y": (0.0, 0.0), - "z": (0.0, 0.0), - "roll": (0.0, 0.0), - "pitch": (0.0, 0.0), - "yaw": (0.0, 0.2617993877991494), - }, - "velocity_range": {}, - }, - ), - } - - task = NistGearInsertionTask( - assembled_board=assembled_board, - held_gear=medium_gear, - background_scene=table, - peg_offset_from_board=list(peg_base_offset), - success_z_fraction=success_z_fraction, - xy_threshold=xy_threshold, - start_in_gripper=True, + grasp_cfg = GraspConfig( num_arm_joints=7, hand_grasp_width=0.03, hand_close_width=0.0, @@ -226,20 +166,23 @@ def get_env(self, args_cli: argparse.Namespace): end_effector_body_name="panda_hand", grasp_rot_offset=[1.0, 0.0, 0.0, 0.0], grasp_offset=[0.02, 0.0, -0.128], - episode_length_s=episode_length_s, - enable_randomization=True, arm_joint_names="panda_joint[1-7]", finger_body_names=".*finger", - peg_offset_xy_noise=0.005, - gear_base_asset=gears_and_base, + ) + + task = NistGearInsertionTask( + assembled_board=assembled_board, + held_gear=medium_gear, + background_scene=table, + peg_offset_from_board=list(peg_base_offset), peg_offset_for_obs=list(peg_tip_offset), - include_insertion_regularizers=True, - pos_action_threshold=pos_action_threshold, - rot_action_threshold=rot_action_threshold, - engagement_xy_threshold=xy_threshold, - success_bonus_weight=1.0, - disable_drop_terminations=True, - extra_event_terms=nist_randomization_events, + gear_base_asset=gears_and_base, + success_z_fraction=success_z_fraction, + xy_threshold=xy_threshold, + episode_length_s=episode_length_s, + grasp_cfg=grasp_cfg, + enable_randomization=True, + rl_training_mode=args_cli.rl_training_mode, ) return IsaacLabArenaEnvironment( @@ -249,6 +192,8 @@ def get_env(self, args_cli: argparse.Namespace): task=task, teleop_device=teleop_device, env_cfg_callback=mdp.assembly_env_cfg_callback, + rl_framework=RLFramework.RL_GAMES, + rl_policy_cfg="isaaclab_arena_examples.policy:nist_gear_insertion_osc_rl_games.yaml", ) @staticmethod @@ -257,3 +202,8 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--teleop_device", type=str, default=None, help="Teleoperation device (e.g., keyboard, spacemouse)" ) + parser.add_argument( + "--rl_training_mode", + action="store_true", + help="Disable success termination (use when training with RL-Games).", + ) diff --git a/isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml similarity index 100% rename from isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml rename to isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml From d93c4a5f7b02232240420023ed562c0901df5fac Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 8 Apr 2026 20:39:14 -0700 Subject: [PATCH 04/12] Fix rl_training_mode success termination and loosen insertion threshold - Keep success term registered (returns all-False during training) so SuccessRateMetric can query it, matching Lift task pattern - Loosen success_z_fraction from 0.05 to 0.15 (3mm depth threshold) - Add new NIST asset definitions to object_library --- isaaclab_arena/assets/object_library.py | 153 ++++++++++++++++++ .../tasks/nist_gear_insertion_task.py | 5 +- isaaclab_arena/tasks/terminations.py | 6 + ...nist_assembled_gearmesh_osc_environment.py | 8 +- 4 files changed, 165 insertions(+), 7 deletions(-) diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index e078ea7f9..2ada1fd51 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -184,6 +184,159 @@ def __init__( super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) +@register_asset +class NistBoardAssembled(LibraryObject): + """NIST fully assembled taskboard.""" + + name = "nist_board_assembled" + tags = ["nist", "object"] + usd_path = str(_REPO_ROOT / "assets" / "nist_board_assembled.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=None), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistGearBase(LibraryObject): + """NIST gear base / receptacle (from Downloads/NIST).""" + + name = "nist_gear_base" + tags = ["nist", "object"] + usd_path = str(_REPO_ROOT / "assets" / "gear_base.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistGearSmall(LibraryObject): + + name = "nist_gear_small" + tags = ["nist", "object"] + usd_path = str(_REPO_ROOT / "assets" / "gear_small.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistGearMedium(LibraryObject): + + name = "nist_gear_medium" + tags = ["nist", "object"] + usd_path = str(_REPO_ROOT / "assets" / "nist_gear_medium.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistGearLarge(LibraryObject): + """NIST large gear.""" + + name = "nist_gear_large" + tags = ["nist", "object"] + usd_path = str(_REPO_ROOT / "assets" / "gear_large.usd") + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + @register_asset class MustardBottle(LibraryObject): """ diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion_task.py index 5c000f0bb..f0ca51076 100644 --- a/isaaclab_arena/tasks/nist_gear_insertion_task.py +++ b/isaaclab_arena/tasks/nist_gear_insertion_task.py @@ -147,6 +147,7 @@ def get_termination_cfg(self): "gear_peg_height": self.gear_peg_height, "success_z_fraction": self.success_z_fraction, "xy_threshold": self.xy_threshold, + "rl_training": self.rl_training_mode, }, ) object_dropped = TerminationTermCfg( @@ -157,8 +158,6 @@ def get_termination_cfg(self): }, ) cfg = _TerminationsCfg(success=success, object_dropped=object_dropped) - if self.rl_training_mode: - cfg.success = None if self.grasp_cfg is not None: cfg.gear_dropped_from_gripper = TerminationTermCfg( func=gear_dropped_from_gripper, @@ -331,7 +330,7 @@ class _TerminationsCfg: """Termination terms for the gear insertion task.""" time_out: TerminationTermCfg = TerminationTermCfg(func=mdp_isaac_lab.time_out) - success: TerminationTermCfg | None = MISSING + success: TerminationTermCfg = MISSING object_dropped: TerminationTermCfg | None = MISSING gear_dropped_from_gripper: TerminationTermCfg | None = None gear_orientation_exceeded: TerminationTermCfg | None = None diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index 8a0d3905a..612c7cb6e 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -268,13 +268,19 @@ def gear_mesh_insertion_success( gear_peg_height: float = 0.02, success_z_fraction: float = 0.80, xy_threshold: float = 0.0025, + rl_training: bool = False, ) -> torch.Tensor: """Terminate when the gear is inserted onto the peg to the required depth. Checks that the held gear's base (root + held_gear_base_offset in gear frame) is centered on the peg (XY) and lowered past a fraction of the peg height (Z). Peg position is fixed_asset pose + gear_base_offset in the fixed asset's local frame. + + When ``rl_training`` is True, always returns False (no early termination) + but the term stays registered so that ``SuccessRateMetric`` can query it. """ + if rl_training: + return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device) held_object: RigidObject = env.scene[held_object_cfg.name] fixed_object: RigidObject = env.scene[fixed_object_cfg.name] diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index 5844b2faf..c7bc38c11 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -39,12 +39,12 @@ def get_env(self, args_cli: argparse.Namespace): peg_tip_offset = (0.02025, 0.0, 0.025) peg_base_offset = (0.02025, 0.0, 0.0) - success_z_fraction = 0.05 + success_z_fraction = 0.20 xy_threshold = 0.0025 episode_length_s = 15.0 table = self.asset_registry.get_asset_by_name("table")() - assembled_board = self.asset_registry.get_asset_by_name("nist_assembled_board")() + assembled_board = self.asset_registry.get_asset_by_name("nist_board_assembled")() gears_and_base = self.asset_registry.get_asset_by_name("gears_and_base")() medium_gear = self.asset_registry.get_asset_by_name("medium_nist_gear")() light_spawner_cfg = sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=1500.0) @@ -150,8 +150,8 @@ def get_env(self, args_cli: argparse.Namespace): else: teleop_device = None - table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, 0.0), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) - assembled_board.set_initial_pose(Pose(position_xyz=(0.71, -0.005, 0.0), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) + table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, -0.009), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) + assembled_board.set_initial_pose(Pose(position_xyz=(0.88, 0.15, -0.009), rotation_xyzw=(0.0, 0.0, -0.7071, 0.7071))) medium_gear.set_initial_pose(Pose(position_xyz=(0.5462, -0.02386, 0.12858), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) gears_and_base.set_initial_pose( Pose(position_xyz=(0.585, -0.074, 0.0), rotation_xyzw=(0.0, 0.0, 0.9239, 0.3827)) From 93e071ed914ac3a6e12f832e8b16592c84b57b2e Mon Sep 17 00:00:00 2001 From: odoherty Date: Thu, 9 Apr 2026 12:04:43 -0700 Subject: [PATCH 05/12] Add NIST gear insertion docs, assets, and observation hardening Add step-by-step documentation for the NIST gear insertion RL workflow (environment setup, policy training, evaluation) mirroring the existing Franka lift task pages. Include task overview GIFs, register the new workflow in the RL workflows index, and clamp NaN/inf values in force-torque observations to prevent training instabilities. --- .gitattributes | 1 + README.md | 136 ---------- docs/images/nist_gear_insertion_parallel.gif | 3 + docs/images/nist_gear_insertion_task.gif | 3 + docs/nist_vs_lift_comparison.md | 219 --------------- .../nist_gear_insertion/index.rst | 96 +++++++ .../step_1_environment_setup.rst | 252 ++++++++++++++++++ .../step_2_policy_training.rst | 140 ++++++++++ .../nist_gear_insertion/step_3_evaluation.rst | 187 +++++++++++++ .../index.rst | 2 + isaaclab_arena/assets/object_library.py | 36 +-- .../policy/rl_games_action_policy.py | 12 +- .../reinforcement_learning/train_rl_games.py | 21 +- .../tasks/nist_gear_insertion_task.py | 24 +- .../gear_insertion_observations.py | 49 ++-- .../tasks/rewards/gear_insertion_rewards.py | 36 ++- isaaclab_arena/tasks/terminations.py | 3 +- isaaclab_arena/terms/events.py | 61 +++-- .../mdp/nist_gear_insertion_osc_action.py | 27 +- .../mdp/robot_configs.py | 4 +- ...nist_assembled_gearmesh_osc_environment.py | 38 ++- .../nist_gear_insertion_osc_rl_games.yaml | 5 + 22 files changed, 846 insertions(+), 509 deletions(-) create mode 100644 docs/images/nist_gear_insertion_parallel.gif create mode 100644 docs/images/nist_gear_insertion_task.gif delete mode 100644 docs/nist_vs_lift_comparison.md create mode 100644 docs/pages/example_workflows/nist_gear_insertion/index.rst create mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_1_environment_setup.rst create mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_2_policy_training.rst create mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst diff --git a/.gitattributes b/.gitattributes index 4e641c670..0467a96a4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6,4 +6,5 @@ *.mp4 filter=lfs diff=lfs merge=lfs -text docs/images/ filter=lfs diff=lfs merge=lfs -text assets/*.usd filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text *.pth filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 5e3f733a3..3ee626e3b 100644 --- a/README.md +++ b/README.md @@ -255,139 +255,3 @@ Isaac Lab-Arena builds on [NVIDIA Isaac Lab](https://github.com/isaac-sim/IsaacL Made with ❤️ by the NVIDIA Robotics Team - ---- - -## NIST Gear Insertion Task - -Reinforcement learning for NIST gear-mesh insertion using operational-space torque -control (OSC). A Franka Panda robot inserts a medium NIST gear onto a peg on the -gear base, trained with PPO (RL Games) and an LSTM recurrent policy. - -### Repository Layout (NIST task) - -``` -assets/ # USD scene assets (Git LFS) - gear_medium.usd # Held gear - gearbase_and_gears_gearbase_root.usd # Gear base + flanking gears - nist_assembled.usd # Assembled board (visual background) - -isaaclab_arena_environments/ - nist_assembled_gearmesh_osc_environment.py # Environment definition - mdp/ - nist_gear_insertion_osc_action.py # OSC action term (asset-relative, EMA, dead-zones) - observations.py # 24-D policy observation builder (NIST gear insertion) - -isaaclab_arena/ - policy/ - rl_games_action_policy.py # PolicyBase wrapper for RL Games inference - rl_policy/ - nist_gear_insertion_osc_rl_games.yaml # RL Games PPO hyperparameters - nist_gear_insertion_osc_rsl_rl.json # RSL-RL-style policy JSON (optional) - scripts/reinforcement_learning/ - train_rl_games.py # Training script - play_rl_games.py # Visual playback script - tasks/ - nist_gear_insertion_task.py # Task: rewards, terminations, events, observations - rewards/ - gear_insertion_rewards.py # Keypoints, bonuses, insertion regularizers - terms/ - events.py # Event terms (place gear in gripper, etc.) - assets/ - object_library.py # Asset registry (USD paths, physics props) -``` - -### Environment - -**Name:** `nist_assembled_gear_mesh_osc` - -| Property | Value | -|----------|-------| -| Robot | Franka Panda (`franka_mimic.usd`) | -| Controller | OSC — stiffness [565, 565, 565, 28, 28, 28] | -| Action space | 7-D (3 pos + 3 rot + 1 auxiliary scalar) | -| Observation space | 24-D policy obs + task state (gear pos, peg pos, board quat, etc.) | -| Episode length | 15 s (configurable in environment) | -| Success criteria | Gear Z within 5% of peg height, XY within 2.5 mm | -| Terminations | Success (early reset) + time-out | - -**Domain randomization:** physics materials (friction), held object mass (±5 g), -fixed-asset yaw (0–15°). - -**RL Games experiment name** (log folder under `logs/rl_games/`): `NistGearInsertionOscRlg`. - -### Reward Architecture (Current) - -- Reward configuration is defined in - `isaaclab_arena/tasks/nist_gear_insertion_task.py` and - `isaaclab_arena/tasks/rewards/gear_insertion_rewards.py`. -- Base rewards include: - - 3 keypoint squashing terms (`kp_baseline`, `kp_coarse`, `kp_fine`) - - engagement bonus - - success bonus -- When `include_insertion_regularizers=True`, optional terms include (implementation - classes in `gear_insertion_rewards.py`): - - `osc_action_magnitude_penalty` - - `osc_action_delta_penalty` - - `wrist_contact_force_penalty` - - `success_prediction_error` - -### Commands - -All commands run **inside the Docker container** (`isaaclab_arena-latest`). - -**Training:** - -```bash -python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ - --num_envs 128 \ - --max_iterations 300 \ - --headless \ - nist_assembled_gear_mesh_osc -``` - -**Resume from checkpoint:** - -```bash -python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ - --num_envs 128 \ - --max_iterations 300 \ - --headless \ - --checkpoint \ - nist_assembled_gear_mesh_osc -``` - -**Visual playback:** - -```bash -python isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py \ - --num_envs 1 \ - --sigma 0 \ - --checkpoint \ - nist_assembled_gear_mesh_osc -``` - -**Policy evaluation (metrics):** - -```bash -python isaaclab_arena/evaluation/policy_runner.py \ - --policy_type rl_games \ - --checkpoint_path \ - --num_envs 1 \ - --num_steps 2000 \ - nist_assembled_gear_mesh_osc -``` - -**Copy checkpoint to local machine:** - -```bash -docker cp isaaclab_arena-latest:/checkpoint.pth ./local_path/ -``` - -### Assets - -USD assets in `assets/` are tracked with **Git LFS**. After cloning, run: - -```bash -git lfs pull -``` diff --git a/docs/images/nist_gear_insertion_parallel.gif b/docs/images/nist_gear_insertion_parallel.gif new file mode 100644 index 000000000..1518a17b4 --- /dev/null +++ b/docs/images/nist_gear_insertion_parallel.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48de559f1c004b8a4d962415374907a2fd608c2ca37f171bcf49445a7afe6e4e +size 4076148 diff --git a/docs/images/nist_gear_insertion_task.gif b/docs/images/nist_gear_insertion_task.gif new file mode 100644 index 000000000..6086ed777 --- /dev/null +++ b/docs/images/nist_gear_insertion_task.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b7210281b9425cec7a045929647b0a3c4ce60278f3298aa79fd3871b6c2609 +size 2059467 diff --git a/docs/nist_vs_lift_comparison.md b/docs/nist_vs_lift_comparison.md deleted file mode 100644 index ac2514c13..000000000 --- a/docs/nist_vs_lift_comparison.md +++ /dev/null @@ -1,219 +0,0 @@ -# NIST Gear Insertion vs Lift Object — Arena Integration Comparison - -This document shows the structural differences between the **Lift Object** task (the upstream reference) and the **NIST Gear Insertion** task, organized by integration layer. Use it to identify what needs to change to bring NIST into alignment. - ---- - -## 1. Environment File (`isaaclab_arena_environments/`) - -| Aspect | Lift (`lift_object_environment.py`) | NIST (`nist_assembled_gearmesh_osc_environment.py`) | Delta | -|--------|------|------|-------| -| **RL Framework** | `RLFramework.RSL_RL` | `RLFramework.RL_GAMES` | Different RL library | -| **Policy Config** | `f"{base_rsl_rl_policy.__name__}:RLPolicyCfg"` (Python class) | `"isaaclab_arena.policy.rl_policy:nist_gear_insertion_osc_rl_games.yaml"` (YAML string) | Lift uses a Python configclass; NIST uses a YAML file. The `rl_policy/` directory has no `__init__.py`, which breaks `importlib.import_module` in Lab's `load_cfg_from_registry`. | -| **Embodiment default** | `"franka_joint_pos"` | `"franka_ik"` | Different default robot mode | -| **Robot config override** | None (uses embodiment default) | Replaces `scene_config.robot` with `FRANKA_MIMIC_OSC_CFG`, overrides `ee_frame`, `initial_joint_pose`, `arm_action`, `gripper_action` | NIST deeply customizes the embodiment post-construction | -| **Observation overrides** | None (uses embodiment defaults) | Nulls out 6 default policy obs, adds custom `nist_gear_policy_obs` ObsTerm | NIST replaces the entire policy observation group | -| **Reward overrides** | None | Nulls out `action_rate` and `joint_vel` from embodiment | NIST replaces default regularizers with task-specific ones | -| **env_cfg_callback** | None | `mdp.assembly_env_cfg_callback` (sets PhysX `gpu_collision_stack_size`) | NIST needs custom sim config | -| **Extra imports** | Minimal (5 imports in `get_env`) | Heavy (10+ imports including OSC controller, frame transformer, custom action/obs) | NIST has significantly more complexity in the env file | - -### Lift environment file (89 lines) - -```python -# Key structure: -embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)(concatenate_observation_terms=True) -# No overrides — uses embodiment defaults for robot, actions, observations, rewards - -task = LiftObjectTaskRL(pick_up_object, background, embodiment, ...) - -return IsaacLabArenaEnvironment( - ..., - rl_framework=RLFramework.RSL_RL, - rl_policy_cfg=f"{base_rsl_rl_policy.__name__}:RLPolicyCfg", -) -``` - -### NIST environment file (216 lines) - -```python -# Key structure: -embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)(...) -embodiment.scene_config.robot = mdp.FRANKA_MIMIC_OSC_CFG.replace(...) # Override robot -embodiment.scene_config.ee_frame = FrameTransformerCfg(...) # Override EE frame -embodiment.set_initial_joint_pose([...]) # Override joint pose -embodiment.action_config.arm_action = NistGearInsertionOscActionCfg(...) # Override action -embodiment.action_config.gripper_action = BinaryJointPositionActionCfg(...) -embodiment.observation_config.policy.joint_pos = None # Null out defaults -# ... null out 5 more default obs ... -embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm(...) # Custom obs -embodiment.reward_config.action_rate = None # Null out defaults -embodiment.reward_config.joint_vel = None - -task = NistGearInsertionTask(assembled_board, held_gear, background, ...) - -return IsaacLabArenaEnvironment( - ..., - env_cfg_callback=mdp.assembly_env_cfg_callback, - rl_framework=RLFramework.RL_GAMES, - rl_policy_cfg="isaaclab_arena.policy.rl_policy:nist_gear_insertion_osc_rl_games.yaml", -) -``` - ---- - -## 2. Task Class (`isaaclab_arena/tasks/`) - -| Aspect | Lift (`lift_object_task.py`) | NIST (`nist_gear_insertion_task.py`) | Delta | -|--------|------|------|-------| -| **Base class** | `TaskBase` (IL) / `LiftObjectTaskRL(LiftObjectTask)` (RL) | `TaskBase` (single class) | Lift has IL/RL split; NIST is RL-only | -| **Constructor args** | 8 params (RL version) | 35+ params | NIST is far more parameterized | -| **Observations** | `LiftObjectObservationsCfg` — 2 terms (target_pos, object_pos) in `task_obs` group | `_GearInsertionObservationsCfg` — 9 terms (gear_pos/quat, peg_pos, board_quat, peg_delta, joint_pos/vel, ee_pos/quat) in `task_obs` group | NIST has richer privileged observations | -| **Rewards** | 4 terms: reaching, lifting, goal_tracking, goal_tracking_fine | 10 terms: 3x keypoint_squashing, engagement_bonus, success_bonus, action_penalty, action_grad, contact_penalty, success_pred_error | NIST has a much more complex reward structure | -| **Terminations** | `time_out`, `object_dropped`, `success` (conditional on `rl_training_mode`) | `time_out`, `success` (conditional), `object_dropped`, `gear_dropped_from_gripper`, `gear_orientation_exceeded` | Similar pattern, NIST has more termination conditions | -| **Events** | None (uses embodiment defaults) | 13+ events: `place_gear`, `fixed_asset_pose`, 6x physics materials, `held_object_mass`, `robot_actuator_gains`, `robot_joint_friction` | NIST has extensive domain randomization | -| **Commands** | `LiftObjectCommandsCfg` with `UniformPoseCommandCfg` | None | Lift uses a command manager for goal poses; NIST has no commands | -| **Curriculum** | None | None | Same | -| **`rl_training_mode`** | Passed to termination; disables success termination | Same pattern | Aligned | - ---- - -## 3. Training Scripts - -| Aspect | Lift | NIST | Delta | -|--------|------|------|-------| -| **Script** | Isaac Lab's `submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py` | Arena's custom `isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py` | **Completely different scripts** | -| **Environment registration** | `--external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback` | `get_arena_builder_from_cli(args_cli)` → `arena_builder.build_registered()` | Lift uses Lab's callback mechanism; NIST builds directly | -| **Agent config source** | Auto-resolved from gym registry via `@hydra_task_config` decorator → `rsl_rl_cfg_entry_point` | Loaded manually from `--agent_cfg_path` CLI arg (YAML file) | Lift is auto-discovered; NIST is manual | -| **Hydra support** | Yes (override hyperparams via CLI: `agent.policy.activation=relu`) | No | NIST cannot use Hydra overrides | -| **Distributed training** | Built-in `--distributed` flag with multi-GPU rank handling | Not supported | Missing from NIST | -| **W&B tracking** | Not in RSL-RL Arena flow (but available in Lab RL-Games script) | Not supported | Missing from NIST | - -### Lift training command - -```bash -python submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ - --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ - --task lift_object \ - --rl_training_mode \ - --num_envs 4096 \ - --max_iterations 2000 -``` - -### NIST training command - -```bash -/isaac-sim/python.sh isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ - --environment nist_assembled_gear_mesh_osc \ - --rl_training_mode \ - --num_envs 4096 \ - --max_iterations 1000 \ - --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml -``` - ---- - -## 4. Evaluation / Play Scripts - -| Aspect | Lift | NIST | Delta | -|--------|------|------|-------| -| **Script** | Arena's `isaaclab_arena/evaluation/policy_runner.py` (generic, framework-agnostic) | Arena's custom `isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py` | Lift uses the generic policy runner; NIST has its own | -| **Policy class** | Uses `PolicyBase` subclass (e.g., RSL-RL policy loaded by policy runner) | Directly creates RL-Games `Runner` and calls `runner.run(play=True)` | NIST bypasses the policy abstraction | -| **NIST also has** | — | `isaaclab_arena/policy/rl_games_action_policy.py` (`RlGamesActionPolicy`) which IS a `PolicyBase` subclass and CAN work with `policy_runner.py` | This exists but isn't the primary eval path | - -### Lift play command - -```bash -python isaaclab_arena/evaluation/policy_runner.py \ - --task lift_object \ - --num_envs 50 \ - --checkpoint \ - --policy_type rsl_rl -``` - -### NIST play command - -```bash -/isaac-sim/python.sh isaaclab_arena/scripts/reinforcement_learning/play_rl_games.py \ - --environment nist_assembled_gear_mesh_osc \ - --num_envs 50 \ - --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml \ - --checkpoint -``` - -### NIST alternative (via generic policy runner) - -```bash -python isaaclab_arena/evaluation/policy_runner.py \ - --task nist_assembled_gear_mesh_osc \ - --num_envs 50 \ - --policy_type rl_games \ - --checkpoint_path \ - --agent_cfg_path isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml -``` - ---- - -## 5. Policy Configuration - -| Aspect | Lift | NIST | Delta | -|--------|------|------|-------| -| **Format** | Python `@configclass` (`RslRlOnPolicyRunnerCfg` subclass) | YAML file | Different formats | -| **Location** | `isaaclab_arena_examples/policy/base_rsl_rl_policy.py` | `isaaclab_arena/policy/rl_policy/nist_gear_insertion_osc_rl_games.yaml` | Different packages | -| **Network** | MLP `[256, 128, 64]`, activation=elu | MLP `[512, 128, 64]` + LSTM (1024 units, 2 layers), activation=elu | NIST uses recurrent policy | -| **PPO params** | `gamma=0.98`, `lam=0.95`, `lr=1e-4`, `entropy_coef=0.006` | `gamma=0.995`, `tau=0.95`, `lr=1e-4`, `entropy_coef=0.0` | Different discount/entropy | -| **Horizon** | `num_steps_per_env=24` | `horizon_length=128`, `seq_length=128` | NIST uses longer rollouts (LSTM needs it) | -| **Obs groups** | `{"actor": ["policy", "task_obs"], "critic": ["policy", "task_obs"]}` | `{"obs": ["policy", "task_obs"], "states": ["policy", "task_obs"]}` + `central_value_config` | NIST uses asymmetric actor-critic with central value | -| **Resolvable by Lab** | Yes — Python class loaded via `load_cfg_from_registry` | Partially — YAML can be loaded but `isaaclab_arena.policy.rl_policy` has no `__init__.py` so `importlib.import_module` fails | **Blocker** for Lab standard path | - ---- - -## 6. Custom MDP Components (NIST-only) - -These files exist only for NIST and have no lift equivalent: - -| File | Purpose | -|------|---------| -| `isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py` | Custom OSC action with peg-relative targets, EMA smoothing, contact-aware dead zones | -| `isaaclab_arena_environments/mdp/nist_gear_insertion_observations.py` | 24-D policy observation (peg-relative EE pose, force feedback, prev actions) | -| `isaaclab_arena_environments/mdp/robot_configs.py` | `FRANKA_MIMIC_OSC_CFG` — custom Franka config for torque control | -| `isaaclab_arena_environments/mdp/env_callbacks.py` | `assembly_env_cfg_callback` — sets PhysX GPU collision stack size | -| `isaaclab_arena/tasks/rewards/gear_insertion_rewards.py` | 7 reward classes/functions (keypoint squashing, bonuses, penalties) | -| `isaaclab_arena/tasks/observations/gear_insertion_observations.py` | 5 observation functions (gear/peg pos, quat, delta) | -| `isaaclab_arena/tasks/terminations.py` | 3 termination functions (insertion success, gear dropped, orientation exceeded) | -| `isaaclab_arena/terms/events.py` | `place_gear_in_gripper` event | - ---- - -## 7. Key Blocker: Isaac Lab Standard Training Path - -The `--external_callback` mechanism exists in **RSL-RL's `train.py`** but **not in RL-Games' `train.py`**. - -``` -Isaac Lab rsl_rl/train.py: - --external_callback → environment_registration_callback() → gym.register() → gym.make() - ✅ Lift uses this path - -Isaac Lab rl_games/train.py: - No --external_callback → resolve_task_config(task, agent) → load_cfg_from_registry() - ❌ NIST cannot use this path (no callback + YAML module path has no __init__.py) -``` - -### Options to resolve - -| Option | What changes | Effort | -|--------|-------------|--------| -| **A. Switch NIST to RSL-RL** | Write `RslRlOnPolicyRunnerCfg` equivalent; retune hyperparams; lose LSTM (RSL-RL has GRU) | Medium | -| **B. Add `--external_callback` to RL-Games script** | ~8 lines added to `rl_games/train.py` (upstream PR to IsaacLab) + add `__init__.py` to `rl_policy/` | Small code, needs upstream approval | -| **C. Keep custom Arena scripts** | No changes needed, already working | Zero (but stays off standard path) | - ---- - -## 8. Summary: What to Align - -Sorted by priority for upstream integration: - -1. **Training script path** — The biggest structural difference. Choose option A, B, or C above. -2. **Policy config format** — If staying on RL-Games, add `__init__.py` to `isaaclab_arena/policy/rl_policy/` so the YAML path is resolvable by `importlib`. -3. **Environment file complexity** — NIST's deep embodiment customization (overriding robot, actions, observations, rewards) is inherently more complex than lift. This is unavoidable given OSC control, but the pattern of null-then-replace could be formalized. -4. **Play/eval path** — `RlGamesActionPolicy` is now generic (no NIST defaults). Use `policy_runner.py --policy_type rl_games --agent_cfg_path --checkpoint_path ` as the primary eval path. RESOLVED. -5. **Task constructor** — 35+ params could be simplified with a config dataclass pattern (like lift's cleaner constructor), but this is cosmetic. diff --git a/docs/pages/example_workflows/nist_gear_insertion/index.rst b/docs/pages/example_workflows/nist_gear_insertion/index.rst new file mode 100644 index 000000000..529c58e03 --- /dev/null +++ b/docs/pages/example_workflows/nist_gear_insertion/index.rst @@ -0,0 +1,96 @@ +NIST Gear Insertion Task +======================== + +This example demonstrates the complete workflow for **reinforcement learning-based gear insertion** +on the assembled NIST board using the Franka Panda robot, operational-space control, and RL Games, +covering environment setup, policy training, and closed-loop evaluation. + +.. image:: ../../../images/nist_gear_insertion_task.gif + :align: center + :height: 400px + +Task Overview +------------- + +**Task ID:** ``nist_assembled_gear_mesh_osc`` + +**Task Description:** The robot starts with the medium gear in its gripper and learns to align and +insert it onto the target peg on the NIST assembly board using task-space control and contact-rich +observations that include wrist-force feedback. + +**Key Specifications:** + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Property + - Value + * - **Tags** + - Table-top assembly + * - **Skills** + - Align, insert, contact-aware manipulation + * - **Embodiment** + - Franka Panda (7 DOF arm + 2 DOF gripper) + * - **Controller** + - Operational-space control (7-D policy action) + * - **Scene** + - Table, assembled NIST board, target gear base, held medium gear, dome light + * - **Observations** + - 24-D policy observation plus task observations for critic/state + * - **Policy** + - RL Games PPO (learned from scratch) + * - **Training Method** + - Reinforcement Learning (on-policy PPO) + * - **Physics** + - PhysX + * - **Closed-loop** + - Yes + * - **Action Space** + - 7-D task-space action [3 position, 3 rotation, 1 auxiliary scalar] + * - **Metric** + - Success rate + * - **Episode Length** + - 15 seconds + + +Workflow +-------- + +This tutorial covers the pipeline for creating an RL environment, training a policy using RL Games, +and evaluating the trained policy in closed-loop. A user can follow the whole pipeline, or can start +at any intermediate step after preparing a checkpoint. + +Prerequisites +^^^^^^^^^^^^^ + +Start the Isaac Lab Arena docker container: + +:docker_run_default: + +You'll need to create folders for logs, checkpoints, and models: + +.. code-block:: bash + + export LOG_DIR=logs/rl_games + mkdir -p $LOG_DIR + export MODELS_DIR=models/isaaclab_arena/nist_gear_insertion + mkdir -p $MODELS_DIR + +Workflow Steps +^^^^^^^^^^^^^^ + +Follow the steps below to complete the workflow: + +- :doc:`step_1_environment_setup` +- :doc:`step_2_policy_training` +- :doc:`step_3_evaluation` + + +.. toctree:: + :maxdepth: 1 + :hidden: + + step_1_environment_setup + step_2_policy_training + step_3_evaluation diff --git a/docs/pages/example_workflows/nist_gear_insertion/step_1_environment_setup.rst b/docs/pages/example_workflows/nist_gear_insertion/step_1_environment_setup.rst new file mode 100644 index 000000000..59e216d90 --- /dev/null +++ b/docs/pages/example_workflows/nist_gear_insertion/step_1_environment_setup.rst @@ -0,0 +1,252 @@ +Environment Setup and Validation +-------------------------------- + +**Docker Container**: Base (see :doc:`../../quickstart/installation` for more details) + +On this page we briefly describe the RL environment used in this example workflow +and validate that we can load it in Isaac Lab. + +:docker_run_default: + + +Environment Description +^^^^^^^^^^^^^^^^^^^^^^^ + +.. dropdown:: The NIST Gear Insertion Environment (simplified) + :animate: fade-in + + The snippet below is a simplified view of the environment definition. For the full + implementation — including controller configuration, gripper action, grasp config, + and additional scene setup — see ``nist_assembled_gearmesh_osc_environment.py``. + + .. code-block:: python + + class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): + + name: str = "nist_assembled_gear_mesh_osc" + + def get_env(self, args_cli: argparse.Namespace): + table = self.asset_registry.get_asset_by_name("table")() + assembled_board = self.asset_registry.get_asset_by_name("nist_board_assembled")() + gears_and_base = self.asset_registry.get_asset_by_name("gears_and_base")() + medium_gear = self.asset_registry.get_asset_by_name("medium_nist_gear")() + light = self.asset_registry.get_asset_by_name("light")() + + embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)( + enable_cameras=args_cli.enable_cameras, + concatenate_observation_terms=True, + ) + + embodiment.action_config.arm_action = NistGearInsertionOscActionCfg( + asset_name="robot", + joint_names=["panda_joint[1-7]"], + body_name="panda_fingertip_centered", + fixed_asset_name=gears_and_base.name, + peg_offset=(0.02025, 0.0, 0.025), + ) + + embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm( + func=NistGearInsertionPolicyObservations, + params={ + "robot_name": "robot", + "board_name": gears_and_base.name, + "peg_offset": [0.02025, 0.0, 0.025], + }, + ) + + task = NistGearInsertionTask( + assembled_board=assembled_board, + held_gear=medium_gear, + background_scene=table, + peg_offset_from_board=[0.02025, 0.0, 0.0], + peg_offset_for_obs=[0.02025, 0.0, 0.025], + gear_base_asset=gears_and_base, + episode_length_s=15.0, + enable_randomization=True, + rl_training_mode=args_cli.rl_training_mode, + ) + + table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, -0.009), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) + assembled_board.set_initial_pose( + Pose(position_xyz=(0.88, 0.15, -0.009), rotation_xyzw=(0.0, 0.0, -0.7071, 0.7071)) + ) + medium_gear.set_initial_pose(Pose(position_xyz=(0.5462, -0.02386, 0.12858), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) + gears_and_base.set_initial_pose( + Pose(position_xyz=(0.585, -0.074, 0.0), rotation_xyzw=(0.0, 0.0, 0.9239, 0.3827)) + ) + + scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) + + return IsaacLabArenaEnvironment( + name=self.name, + embodiment=embodiment, + scene=scene, + task=task, + rl_framework=RLFramework.RL_GAMES, + rl_policy_cfg="isaaclab_arena_examples.policy:nist_gear_insertion_osc_rl_games.yaml", + ) + + +Step-by-Step Breakdown +^^^^^^^^^^^^^^^^^^^^^^ + +**1. Interact with the Asset Registry** + +.. code-block:: python + + table = self.asset_registry.get_asset_by_name("table")() + assembled_board = self.asset_registry.get_asset_by_name("nist_board_assembled")() + gears_and_base = self.asset_registry.get_asset_by_name("gears_and_base")() + medium_gear = self.asset_registry.get_asset_by_name("medium_nist_gear")() + +Here, we're selecting the components needed for our RL task: a table as the support surface, +the assembled NIST board for context, the fixed insertion target (``gears_and_base``), and the +held medium gear that the robot must insert onto the peg. The Franka embodiment is configured +with ``concatenate_observation_terms=True`` so the observation groups can be consumed by the RL stack. + +**2. Position the Objects** + +.. code-block:: python + + table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, -0.009), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) + assembled_board.set_initial_pose( + Pose(position_xyz=(0.88, 0.15, -0.009), rotation_xyzw=(0.0, 0.0, -0.7071, 0.7071)) + ) + medium_gear.set_initial_pose(Pose(position_xyz=(0.5462, -0.02386, 0.12858), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) + gears_and_base.set_initial_pose( + Pose(position_xyz=(0.585, -0.074, 0.0), rotation_xyzw=(0.0, 0.0, 0.9239, 0.3827)) + ) + +Before we create the scene, we place the assets in the assembled-board layout used for this task. +The table provides the workspace, the assembled board provides visual context, and the gear base +defines the target peg pose that the policy must reach relative to. + +**3. Configure the Franka Embodiment for Operational-Space Control** + +.. code-block:: python + + embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)( + enable_cameras=args_cli.enable_cameras, + concatenate_observation_terms=True, + ) + + embodiment.action_config.arm_action = NistGearInsertionOscActionCfg( + asset_name="robot", + joint_names=["panda_joint[1-7]"], + body_name="panda_fingertip_centered", + fixed_asset_name=gears_and_base.name, + peg_offset=(0.02025, 0.0, 0.025), + ) + +The arm is controlled in operational space rather than joint position space. +The policy emits a 7-D action: + +- 3 position commands +- 3 rotation commands +- 1 auxiliary success-prediction scalar used by the task-specific controller/reward stack + +The action term smooths commands, clips task-space deltas, locks roll and pitch to the +assembly convention, and defines targets relative to the peg position. This makes the +action space better aligned with insertion than a generic joint-space controller. + +**4. Configure the Policy Observation** + +.. code-block:: python + + embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm( + func=NistGearInsertionPolicyObservations, + params={ + "robot_name": "robot", + "board_name": gears_and_base.name, + "peg_offset": [0.02025, 0.0, 0.025], + }, + ) + +The environment swaps out the default embodiment policy observations for a specialized 24-D +observation stack designed for insertion. It includes: + +- fingertip pose relative to the fixed asset +- end-effector linear and angular velocity +- wrist-force feedback +- a sampled force threshold +- previous actions + +Task observations are still provided separately for critic/state use. + +**5. Create the Gear Insertion Task** + +.. code-block:: python + + task = NistGearInsertionTask( + assembled_board=assembled_board, + held_gear=medium_gear, + background_scene=table, + peg_offset_from_board=[0.02025, 0.0, 0.0], + peg_offset_for_obs=[0.02025, 0.0, 0.025], + gear_base_asset=gears_and_base, + success_z_fraction=0.20, + xy_threshold=0.0025, + episode_length_s=15.0, + enable_randomization=True, + rl_training_mode=args_cli.rl_training_mode, + ) + +The ``NistGearInsertionTask`` encapsulates the RL training objective: align the held gear with the +target peg and insert it successfully. The task includes: + +- **Reward Terms**: Dense shaping for alignment, engagement, insertion success, and action/contact regularization +- **Observation Space**: Task-specific policy observations, plus task observations for critic/state +- **Termination Conditions**: Timeout and insertion success, with success disabled during training by ``--rl_training_mode`` +- **Success Metric**: ``success_rate`` computed during evaluation + +When ``enable_randomization=True``, the task also configures environment-side randomization through +reset/startup events, including fixed-asset yaw variation, friction/material changes for the gear, +robot, and fixed asset, and held-object mass perturbations. + +See :doc:`../../concepts/concept_tasks_design` for task creation details. + +**6. Compose the Scene and Create the IsaacLab Arena Environment** + +.. code-block:: python + + scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) + + return IsaacLabArenaEnvironment( + name=self.name, + embodiment=embodiment, + scene=scene, + task=task, + rl_framework=RLFramework.RL_GAMES, + rl_policy_cfg="isaaclab_arena_examples.policy:nist_gear_insertion_osc_rl_games.yaml", + ) + +Finally, we assemble all the pieces into a complete, runnable RL environment. The +``IsaacLabArenaEnvironment`` connects the embodiment (the robot), the scene (the world), +and the task (the objective, rewards, and metrics), and declares that this workflow uses +the RL Games training stack. +See :doc:`../../concepts/concept_environment_design` for environment composition details. + + +Validation: Run One Training Iteration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To validate that the environment loads correctly, run one training iteration and check for errors: + +.. code-block:: bash + + python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --headless \ + --num_envs 128 \ + --max_iterations 1 \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + nist_assembled_gear_mesh_osc \ + --rl_training_mode + +If the environment is set up correctly, RL Games should initialize, create the environments, +run one optimization iteration, and then exit without environment-construction errors. + +You should see output indicating that training has started and that one iteration completed. + +.. image:: ../../../images/nist_gear_insertion_task.gif + :align: center + :height: 400px diff --git a/docs/pages/example_workflows/nist_gear_insertion/step_2_policy_training.rst b/docs/pages/example_workflows/nist_gear_insertion/step_2_policy_training.rst new file mode 100644 index 000000000..dc534c737 --- /dev/null +++ b/docs/pages/example_workflows/nist_gear_insertion/step_2_policy_training.rst @@ -0,0 +1,140 @@ +Policy Training +--------------- + +**Docker Container**: Base (see :doc:`../../quickstart/installation` for more details) + +:docker_run_default: + +Training Command +^^^^^^^^^^^^^^^^ + +Training uses Arena's RL Games entrypoint directly. The ``--agent_cfg_path`` argument +points to the task-specific RL Games PPO config, which defines the network architecture +and optimization hyperparameters used for this workflow. + +.. code-block:: bash + + python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --headless \ + --num_envs 4096 \ + --max_iterations 1000 \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + nist_assembled_gear_mesh_osc \ + --rl_training_mode + +The environment-specific flag ``--rl_training_mode`` comes after the environment name. +During training, it disables success termination so the policy can continue collecting +experience near successful insertions. + +Checkpoints are written to ``logs/rl_games/NistGearInsertionOscRlg//``. +The agent configuration is saved alongside as ``params/agent.yaml``, which the +evaluation scripts use together with the RL Games checkpoint. + +.. tip:: + + Remove ``--headless`` if you want to watch training in the viewer. + + +Policy Configuration +^^^^^^^^^^^^^^^^^^^^ + +The RL Games config defines both the policy network and the PPO hyperparameters, including: + +- a feed-forward MLP backbone +- an LSTM recurrent layer stack +- PPO learning rate, clipping, and rollout length +- the RL Games experiment name used for logs/checkpoints + +The configuration file is: + +.. code-block:: text + + isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml + +Key settings in this file include: + +- ``network.mlp.units`` for the actor-critic MLP sizes +- ``network.rnn`` for the recurrent policy/value architecture +- ``config.learning_rate`` for PPO optimization +- ``config.max_epochs`` for the training horizon +- ``config.name`` for the RL Games experiment name + +In this workflow, the experiment name is ``NistGearInsertionOscRlg``. + +Monitoring Training +^^^^^^^^^^^^^^^^^^^ + +During training, RL Games prints a summary to the console for each iteration, +including rollout statistics, losses, and timing information. Checkpoints are +saved periodically according to the settings in the YAML config. + +You should see output indicating that training has started: + +.. code-block:: text + + step: 0 + fps step: ... + fps total: ... + epoch: 1 + mean rewards: ... + mean episode lengths: ... + +If you are tuning the policy, the most useful signals to watch are: + +- the reward terms for alignment and insertion +- the episode length distribution +- the overall training stability across many iterations + + +Resume Training +^^^^^^^^^^^^^^^ + +To continue training from an existing checkpoint: + +.. code-block:: bash + + python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --headless \ + --num_envs 4096 \ + --max_iterations 1000 \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + --checkpoint \ + nist_assembled_gear_mesh_osc \ + --rl_training_mode + +Multi-GPU Training +^^^^^^^^^^^^^^^^^^ + +Add ``--distributed`` to spread environments across all available GPUs: + +.. code-block:: bash + + python -m torch.distributed.run --nnodes=1 --nproc_per_node=2 \ + isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ + --headless \ + --num_envs 4096 \ + --max_iterations 1000 \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + --distributed \ + nist_assembled_gear_mesh_osc \ + --rl_training_mode + + +Expected Results +^^^^^^^^^^^^^^^^ + +A successful training run should produce: + +- RL Games console output for rollout collection and optimization +- checkpoint files under the RL Games log directory +- progressively improving insertion performance during evaluation + +.. image:: ../../../images/nist_gear_insertion_task.gif + :align: center + :height: 400px + +.. note:: + + Training performance depends on hardware, environment configuration, and random seed. + Because this is a contact-rich insertion task with randomized environment parameters, + final performance will vary with the exact number of training iterations. diff --git a/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst b/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst new file mode 100644 index 000000000..920ec4406 --- /dev/null +++ b/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst @@ -0,0 +1,187 @@ +Closed-Loop Policy Inference and Evaluation +------------------------------------------- + +**Docker Container**: Base (see :doc:`../../quickstart/installation` for more details) + +:docker_run_default: + +Once inside the container, set the models directory if you plan to organize checkpoints locally: + +.. code-block:: bash + + export MODELS_DIR=models/isaaclab_arena/nist_gear_insertion + mkdir -p $MODELS_DIR + +This tutorial assumes you've completed :doc:`step_2_policy_training` and have a trained checkpoint, +or you can place a checkpoint in ``$MODELS_DIR`` and use it in the commands below. + +.. dropdown:: Download Pre-trained Model (skip preceding steps) + :animate: fade-in + + .. code-block:: bash + + hf download \ + nvidia/Arena-NIST-Gear-Insertion-RL-Task \ + --local-dir $MODELS_DIR/nist_gear_insertion_checkpoint + + After downloading, the checkpoint is at: + + ``$MODELS_DIR/nist_gear_insertion_checkpoint/best_NistGearInsertionOscRlg.pth`` + + Replace checkpoint paths in the examples below with this path. + + +Evaluation Methods +^^^^^^^^^^^^^^^^^^ + +There are three ways to evaluate a trained policy: + +1. **Single environment** (``policy_runner.py``): detailed evaluation with metrics +2. **Parallel environments** (``policy_runner.py``): larger-scale statistical evaluation +3. **Batch evaluation** (``eval_runner.py``): automated evaluation across multiple checkpoints + + +Method 1: Single Environment Evaluation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: bash + + python isaaclab_arena/evaluation/policy_runner.py \ + --checkpoint_path $MODELS_DIR/.pth \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + --policy_type rl_games \ + --num_episodes 20 \ + --num_envs 1 \ + --visualizer kit \ + nist_assembled_gear_mesh_osc + +.. note:: + + If you trained the model yourself, replace the checkpoint path with the path to your own checkpoint. + +Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, ``--agent_cfg_path``, etc.) +must come **before** the environment name. Environment-specific arguments must come **after** it. + +At the end of the run, metrics are printed to the console: + +.. code-block:: text + + Metrics: {'success_rate': 0.99, 'num_episodes': 1024} + +.. image:: ../../../images/nist_gear_insertion_task.gif + :align: center + :height: 400px + + +Method 2: Parallel Environment Evaluation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For more statistically significant results, run across many environments in parallel. + +.. code-block:: bash + + python isaaclab_arena/evaluation/policy_runner.py \ + --checkpoint_path $MODELS_DIR/.pth \ + --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ + --policy_type rl_games \ + --num_episodes 1024 \ + --num_envs 64 \ + --env_spacing 2.5 \ + --visualizer kit \ + nist_assembled_gear_mesh_osc + +At the end of the run, metrics are printed to the console: + +.. code-block:: text + + Metrics: {'success_rate': 0.99, 'num_episodes': 1024} + +.. image:: ../../../images/nist_gear_insertion_parallel.gif + :align: center + :height: 400px + + +Method 3: Batch Evaluation +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To evaluate multiple checkpoints in sequence, use ``eval_runner.py`` with a JSON config. +Here we evaluate checkpoints from the same training run. +The checkpoint paths should be replaced with the timestamped run directory in +``logs/rl_games/NistGearInsertionOscRlg/``. + +**1. Create an evaluation config** + +Create a file ``eval_config.json``: + +.. code-block:: json + + { + "jobs": [ + { + "name": "nist_gear_model_0500", + "policy_type": "rl_games", + "num_episodes": 1024, + "arena_env_args": { + "environment": "nist_assembled_gear_mesh_osc", + "num_envs": 64 + }, + "policy_config_dict": { + "checkpoint_path": "models/isaaclab_arena/nist_gear_insertion/model_0500.pth", + "agent_cfg_path": "isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml" + } + }, + { + "name": "nist_gear_model_1000", + "policy_type": "rl_games", + "num_episodes": 1024, + "arena_env_args": { + "environment": "nist_assembled_gear_mesh_osc", + "num_envs": 64 + }, + "policy_config_dict": { + "checkpoint_path": "models/isaaclab_arena/nist_gear_insertion/model_1000.pth", + "agent_cfg_path": "isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml" + } + } + ] + } + +**2. Run** + +.. code-block:: bash + + python isaaclab_arena/evaluation/eval_runner.py \ + --eval_jobs_config eval_config.json + +.. code-block:: text + + ====================================================================== + METRICS SUMMARY + ====================================================================== + + nist_gear_model_0500: + num_episodes 1024 + success_rate 0.95 + + nist_gear_model_1000: + num_episodes 1024 + success_rate 0.9900 + ====================================================================== + + +Understanding the Metrics +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The NIST gear insertion task reports two metrics: + +- ``success_rate``: fraction of episodes where the gear is successfully inserted +- ``num_episodes``: number of completed episodes in the evaluation run + +A well-trained policy should show improving insertion performance as training progresses. +Results will vary with hardware, random seed, and randomization settings. + +.. note:: + + For evaluation, omit ``--rl_training_mode`` (default): success termination stays enabled so + metrics such as success rate are meaningful. For RL Games training, pass ``--rl_training_mode`` to + disable success termination. diff --git a/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst b/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst index 53904cf9c..42c63c7ad 100644 --- a/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst +++ b/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst @@ -7,10 +7,12 @@ covering environment setup, policy training, and closed-loop evaluation. Currently, the following reinforcement learning workflow examples are provided: * :doc:`Franka Lift Object Task <../reinforcement_learning/index>` — Franka Panda, PhysX, joint-position control +* :doc:`NIST Gear Insertion Task <../nist_gear_insertion/index>` — Franka Panda, operational-space control, contact-rich insertion * :doc:`Dexsuite Kuka Allegro Lift Task (Newton) <../dexsuite_lift/index>` — Kuka + Allegro hand, **Newton** physics, dexterous manipulation .. toctree:: :maxdepth: 1 ../reinforcement_learning/index + ../nist_gear_insertion/index ../dexsuite_lift/index diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index 2ada1fd51..d54fa8ce6 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -120,7 +120,9 @@ class GearsAndBase(LibraryObject): "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), } - def __init__(self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None): + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) @@ -155,7 +157,6 @@ def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = Non @register_asset class NistAssembledBoard(LibraryObject): - """NIST assembled board (kinematic visual prop for gear insertion scene).""" name = "nist_assembled_board" tags = ["object"] @@ -217,7 +218,6 @@ def __init__( @register_asset class NistGearBase(LibraryObject): - """NIST gear base / receptacle (from Downloads/NIST).""" name = "nist_gear_base" tags = ["nist", "object"] @@ -276,36 +276,6 @@ def __init__( super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) -@register_asset -class NistGearMedium(LibraryObject): - - name = "nist_gear_medium" - tags = ["nist", "object"] - usd_path = str(_REPO_ROOT / "assets" / "nist_gear_medium.usd") - spawn_cfg_addon = { - "rigid_props": sim_utils.RigidBodyPropertiesCfg( - disable_gravity=False, - kinematic_enabled=False, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), - "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - } - - def __init__( - self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None - ): - super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) - - @register_asset class NistGearLarge(LibraryObject): """NIST large gear.""" diff --git a/isaaclab_arena/policy/rl_games_action_policy.py b/isaaclab_arena/policy/rl_games_action_policy.py index b8cad65ba..1971418ea 100644 --- a/isaaclab_arena/policy/rl_games_action_policy.py +++ b/isaaclab_arena/policy/rl_games_action_policy.py @@ -6,8 +6,8 @@ from __future__ import annotations import argparse -import math import gymnasium as gym +import math import torch import yaml from dataclasses import dataclass @@ -15,12 +15,11 @@ from pathlib import Path from isaaclab.utils.assets import retrieve_file_path +from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper from rl_games.common import env_configurations, vecenv from rl_games.common.player import BasePlayer from rl_games.torch_runner import Runner -from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper - from isaaclab_arena.assets.register import register_policy from isaaclab_arena.policy.policy_base import PolicyBase @@ -101,7 +100,12 @@ def _load_policy(self, env: gym.Env) -> None: concate_obs = agent_cfg["params"]["env"].get("concate_obs_groups", True) self._wrapper = RlGamesVecEnvWrapper( - env, device, clip_obs, clip_actions, obs_groups, concate_obs, + env, + device, + clip_obs, + clip_actions, + obs_groups, + concate_obs, ) vecenv.register( diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py index 1ed9a9fe5..b556d20ec 100644 --- a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py +++ b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py @@ -1,3 +1,8 @@ +# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + # Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers. # SPDX-License-Identifier: Apache-2.0 @@ -48,16 +53,14 @@ from datetime import datetime import omni.log -from rl_games.common import env_configurations, vecenv -from rl_games.common.algo_observer import IsaacAlgoObserver -from rl_games.torch_runner import Runner - from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent from isaaclab.utils.assets import retrieve_file_path from isaaclab.utils.dict import print_dict from isaaclab.utils.io import dump_yaml - from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper +from rl_games.common import env_configurations, vecenv +from rl_games.common.algo_observer import IsaacAlgoObserver +from rl_games.torch_runner import Runner from isaaclab_arena_environments.cli import get_arena_builder_from_cli @@ -81,6 +84,14 @@ def main(): agent_cfg["params"]["config"]["device"] = args_cli.device agent_cfg["params"]["config"]["device_name"] = args_cli.device + if args_cli.distributed: + local_rank = int(os.getenv("LOCAL_RANK", "0")) + agent_cfg["params"]["seed"] += int(os.getenv("RANK", "0")) + agent_cfg["params"]["config"]["device"] = f"cuda:{local_rank}" + agent_cfg["params"]["config"]["device_name"] = f"cuda:{local_rank}" + agent_cfg["params"]["config"]["multi_gpu"] = True + env_cfg.sim.device = f"cuda:{local_rank}" + if args_cli.max_iterations is not None: agent_cfg["params"]["config"]["max_epochs"] = args_cli.max_iterations diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion_task.py index f0ca51076..1a83ae0b8 100644 --- a/isaaclab_arena/tasks/nist_gear_insertion_task.py +++ b/isaaclab_arena/tasks/nist_gear_insertion_task.py @@ -11,15 +11,15 @@ from __future__ import annotations +import numpy as np from collections.abc import Callable from dataclasses import MISSING, dataclass, field -from typing import TYPE_CHECKING - -import numpy as np import isaaclab.envs.mdp as mdp_isaac_lab from isaaclab.envs.common import ViewerCfg -from isaaclab.managers import EventTermCfg, ObservationGroupCfg as ObsGroup, ObservationTermCfg as ObsTerm +from isaaclab.managers import EventTermCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.managers import RewardTermCfg, SceneEntityCfg, TerminationTermCfg from isaaclab.utils import configclass @@ -30,19 +30,10 @@ from isaaclab_arena.tasks.observations.gear_insertion_observations import body_pos_in_env_frame, body_quat_canonical from isaaclab_arena.tasks.rewards import gear_insertion_rewards from isaaclab_arena.tasks.task_base import TaskBase -from isaaclab_arena.tasks.terminations import ( - gear_dropped_from_gripper, - gear_mesh_insertion_success, - gear_orientation_exceeded, -) +from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success from isaaclab_arena.terms.events import place_gear_in_gripper from isaaclab_arena.utils.cameras import get_viewer_cfg_look_at_object -if TYPE_CHECKING: - from collections.abc import Sequence - - import torch - @dataclass class GraspConfig: @@ -97,7 +88,9 @@ def __init__( self._gear_base_asset = gear_base_asset if gear_base_asset is not None else assembled_board self.peg_offset_from_board = peg_offset_from_board or [2.025e-2, 0.0, 0.0] self.peg_offset_for_obs = peg_offset_for_obs - self.held_gear_base_offset = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + self.held_gear_base_offset = ( + held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + ) self.peg_offset_xy_noise = peg_offset_xy_noise self.gear_peg_height = gear_peg_height self.success_z_fraction = success_z_fraction @@ -374,6 +367,7 @@ def __init__( held_gear_base_offset: list[float] | None = None, ): hgo = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + @configclass class _TaskObsCfg(ObsGroup): gear_pos = ObsTerm( diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py index 1eaf89f1a..0fe2a9955 100644 --- a/isaaclab_arena/tasks/observations/gear_insertion_observations.py +++ b/isaaclab_arena/tasks/observations/gear_insertion_observations.py @@ -15,12 +15,11 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING - import torch -import warp as wp +from typing import TYPE_CHECKING import isaaclab.utils.math as math_utils +import warp as wp from isaaclab.assets import Articulation, RigidObject from isaaclab.managers import ManagerTermBase, SceneEntityCfg from isaaclab.utils.math import axis_angle_from_quat @@ -38,7 +37,7 @@ def peg_pos_in_env_frame( env: ManagerBasedRLEnv, board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] = [0.0, 0.0, 0.0], + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), ) -> torch.Tensor: """Target peg position: fixed asset pose + offset in its local frame.""" board: RigidObject = env.scene[board_cfg.name] @@ -51,15 +50,15 @@ def peg_pos_in_env_frame( def held_gear_base_pos_in_env_frame( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), ) -> torch.Tensor: """Position of the held gear's insertion point (root + offset in gear frame) in env frame.""" gear: RigidObject = env.scene[gear_cfg.name] gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins gear_quat = wp.to_torch(gear.data.root_quat_w) - held_off = torch.tensor( - held_gear_base_offset, device=env.device, dtype=torch.float32 - ).unsqueeze(0).expand(env.num_envs, 3) + held_off = ( + torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + ) return gear_pos + math_utils.quat_apply(gear_quat, held_off) @@ -67,8 +66,8 @@ def peg_delta_from_held_gear_base( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] = [0.0, 0.0, 0.0], - held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), + held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), ) -> torch.Tensor: """Vector from held gear insertion point to peg. Positive = peg is ahead in that axis.""" held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) @@ -116,6 +115,7 @@ def force_torque_at_body( robot: Articulation = env.scene[robot_cfg.name] body_idx = robot.body_names.index(body_name) wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] + wrench = torch.nan_to_num(wrench, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) if return_torque: return wrench return wrench[:, :3] @@ -154,9 +154,7 @@ def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) self._force_noise: float = p.get("force_noise_level", 1.0) self._ft_alpha: float = p.get("ft_smoothing_factor", 0.25) - self._contact_thresh_range: tuple[float, float] = tuple( - p.get("contact_threshold_range", [5.0, 10.0]) - ) + self._contact_thresh_range: tuple[float, float] = tuple(p.get("contact_threshold_range", [5.0, 10.0])) n = env.num_envs dev = env.device @@ -227,7 +225,8 @@ def __call__( rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) noisy_quat = math_utils.quat_mul( - ft_quat, math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + ft_quat, + math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), ) noisy_quat[:, [2, 3]] = 0.0 noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) @@ -252,6 +251,7 @@ def __call__( self._prev_noisy_quat[:] = noisy_quat raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] + raw_force = torch.nan_to_num(raw_force, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) arm_osc_action.force_smooth[:] = ( self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth ) @@ -262,12 +262,15 @@ def __call__( prev_actions = arm_osc_action._smoothed_actions.clone() prev_actions[:, 3:5] = 0.0 - return torch.cat([ - fingertip_pos_rel, - noisy_quat, - ee_linvel, - ee_angvel, - noisy_force, - force_threshold, - prev_actions, - ], dim=-1) + return torch.cat( + [ + fingertip_pos_rel, + noisy_quat, + ee_linvel, + ee_angvel, + noisy_force, + force_threshold, + prev_actions, + ], + dim=-1, + ) diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py index 49dc4dfac..00fd2caf1 100644 --- a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py +++ b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py @@ -13,11 +13,10 @@ from __future__ import annotations +import torch from typing import TYPE_CHECKING -import torch import warp as wp - from isaaclab.assets import RigidObject from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg from isaaclab.utils.math import combine_frame_transforms, quat_apply @@ -86,7 +85,9 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): super().__init__(cfg, env) self.gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] self.board_cfg: SceneEntityCfg = cfg.params["board_cfg"] - self.peg_offset = torch.tensor(cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device, dtype=torch.float32) + self.peg_offset = torch.tensor( + cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device, dtype=torch.float32 + ) self.held_gear_base_offset = torch.tensor( cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0]), device=env.device, dtype=torch.float32 ) @@ -98,8 +99,9 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): def reset(self, env_ids: torch.Tensor) -> None: if self._xy_noise_range > 0.0: n = len(env_ids) - self._offset_noise[env_ids, 0] = (torch.rand(n, device=self._offset_noise.device) * 2 - 1) * self._xy_noise_range - self._offset_noise[env_ids, 1] = (torch.rand(n, device=self._offset_noise.device) * 2 - 1) * self._xy_noise_range + noise_dev = self._offset_noise.device + self._offset_noise[env_ids, 0] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range + self._offset_noise[env_ids, 1] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range def __call__( self, @@ -149,7 +151,9 @@ def _check_gear_position( gear: RigidObject = env.scene[gear_cfg.name] gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins gear_quat = wp.to_torch(gear.data.root_quat_w) - held_off = torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + held_off = ( + torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + ) held_base_pos = gear_pos + quat_apply(gear_quat, held_off) board: RigidObject = env.scene[board_cfg.name] @@ -179,7 +183,14 @@ def gear_insertion_engagement_bonus( ) -> torch.Tensor: """Bonus when the gear is partially engaged on the peg.""" return _check_gear_position( - env, gear_cfg, board_cfg, peg_offset, held_gear_base_offset, gear_peg_height, engage_z_fraction, xy_threshold, + env, + gear_cfg, + board_cfg, + peg_offset, + held_gear_base_offset, + gear_peg_height, + engage_z_fraction, + xy_threshold, ).float() @@ -195,7 +206,14 @@ def gear_insertion_success_bonus( ) -> torch.Tensor: """Bonus when the gear is fully inserted (binary success geometry).""" return _check_gear_position( - env, gear_cfg, board_cfg, peg_offset, held_gear_base_offset, gear_peg_height, success_z_fraction, xy_threshold, + env, + gear_cfg, + board_cfg, + peg_offset, + held_gear_base_offset, + gear_peg_height, + success_z_fraction, + xy_threshold, ).float() @@ -298,5 +316,3 @@ def __call__( pred = (arm_osc_action.success_pred + 1.0) / 2.0 error = torch.abs(true_success.float() - pred) return error * self._pred_scale - - diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index 612c7cb6e..89adb59ec 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -6,9 +6,8 @@ import math import torch -import warp as wp - import isaaclab.utils.math as math_utils +import warp as wp from isaaclab.assets import Articulation, RigidObject from isaaclab.envs import ManagerBasedRLEnv from isaaclab.envs.mdp.terminations import root_height_below_minimum diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index a5768f759..1bf11e1dc 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -5,14 +5,12 @@ from __future__ import annotations -from collections.abc import Callable, Sequence -from typing import Any, cast - import torch - -import warp as wp +from collections.abc import Callable +from typing import Any, cast import isaaclab.utils.math as math_utils +import warp as wp from isaaclab.assets import Articulation from isaaclab.envs import ManagerBasedEnv from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg @@ -43,7 +41,9 @@ def set_object_pose( vel = velocity.to_tensor(device=env.device).unsqueeze(0).expand(num_envs, -1) asset.write_root_velocity_to_sim_index(root_velocity=vel, env_ids=env_ids) else: - asset.write_root_velocity_to_sim_index(root_velocity=torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids) + asset.write_root_velocity_to_sim_index( + root_velocity=torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids + ) def set_object_pose_per_env( @@ -66,7 +66,9 @@ def set_object_pose_per_env( pose_t_xyz_q_xyzw = pose.to_tensor(device=env.device).unsqueeze(0) pose_t_xyz_q_xyzw[0, :3] += env.scene.env_origins[cur_env, :] # Set the pose and velocity - asset.write_root_pose_to_sim_index(root_pose=pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device)) + asset.write_root_pose_to_sim_index( + root_pose=pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device) + ) asset.write_root_velocity_to_sim_index( root_velocity=torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) ) @@ -109,20 +111,17 @@ def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): self.hand_grasp_width: float = cast(float, cfg.params["hand_grasp_width"]) self.hand_close_width: float = cast(float, cfg.params["hand_close_width"]) self.gripper_joint_setter_func: Callable[..., Any] = cast( - Callable[..., Any], cfg.params["gripper_joint_setter_func"], + Callable[..., Any], + cfg.params["gripper_joint_setter_func"], ) grasp_rot_offset = cfg.params["grasp_rot_offset"] self.grasp_rot_offset_tensor = ( - torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32) - .unsqueeze(0) - .repeat(env.num_envs, 1) + torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1) ) grasp_offset = cfg.params["grasp_offset"] - self.grasp_offset_tensor = torch.tensor( - grasp_offset, device=env.device, dtype=torch.float32 - ) + self.grasp_offset_tensor = torch.tensor(grasp_offset, device=env.device, dtype=torch.float32) ee_name: str = cast(str, cfg.params.get("end_effector_body_name", "panda_hand")) eef_indices, _ = self.robot.find_bodies([ee_name]) @@ -133,7 +132,7 @@ def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): all_joints, _ = self.robot.find_joints([".*"]) self.all_joints = all_joints - self.finger_joints = all_joints[self.num_arm_joints:] + self.finger_joints = all_joints[self.num_arm_joints :] def __call__( self, @@ -174,9 +173,7 @@ def __call__( gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset_tensor) - target_pos = gear_pos_w + math_utils.quat_apply( - target_quat, grasp_offset_batch - ) + target_pos = gear_pos_w + math_utils.quat_apply(target_quat, grasp_offset_batch) eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] @@ -191,8 +188,10 @@ def __call__( ) delta_hand_pose = torch.cat((pos_error, aa_error), dim=-1) - if (torch.norm(pos_error, dim=-1).max() < pos_threshold - and torch.norm(aa_error, dim=-1).max() < rot_threshold): + if ( + torch.norm(pos_error, dim=-1).max() < pos_threshold + and torch.norm(aa_error, dim=-1).max() < rot_threshold + ): break jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() @@ -217,30 +216,38 @@ def __call__( for row_idx in range(n): self.gripper_joint_setter_func( - joint_pos, [row_idx], self.finger_joints, self.hand_grasp_width, + joint_pos, + [row_idx], + self.finger_joints, + self.hand_grasp_width, ) self.robot.set_joint_position_target_index( - target=joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + target=joint_pos, + joint_ids=self.all_joints, + env_ids=env_ids, ) self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) for row_idx in range(n): self.gripper_joint_setter_func( - joint_pos, [row_idx], self.finger_joints, self.hand_close_width, + joint_pos, + [row_idx], + self.finger_joints, + self.hand_close_width, ) self.robot.set_joint_position_target_index( - target=joint_pos, joint_ids=self.all_joints, env_ids=env_ids, + target=joint_pos, + joint_ids=self.all_joints, + env_ids=env_ids, ) gear_quat = wp.to_torch(self.gear.data.root_quat_w)[env_ids] eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] rel_quat = math_utils.quat_mul(gear_quat, math_utils.quat_conjugate(eef_quat)) if not hasattr(env, "_initial_gear_ee_relative_quat"): - env._initial_gear_ee_relative_quat = torch.zeros( - env.num_envs, 4, device=env.device, dtype=torch.float32 - ) + env._initial_gear_ee_relative_quat = torch.zeros(env.num_envs, 4, device=env.device, dtype=torch.float32) env._initial_gear_ee_relative_quat[:, 3] = 1.0 env._initial_gear_ee_relative_quat[env_ids] = rel_quat diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py index bc188397d..9830edba4 100644 --- a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py @@ -11,12 +11,11 @@ import math import torch -import warp as wp from collections.abc import Sequence from typing import TYPE_CHECKING import isaaclab.utils.math as math_utils - +import warp as wp from isaaclab.envs.mdp.actions.actions_cfg import OperationalSpaceControllerActionCfg from isaaclab.envs.mdp.actions.task_space_actions import OperationalSpaceControllerAction from isaaclab.managers.action_manager import ActionTerm @@ -103,9 +102,7 @@ def apply_actions(self): def process_actions(self, actions: torch.Tensor): self._raw_actions[:] = actions self._prev_smoothed_actions[:] = self._smoothed_actions - self._smoothed_actions[:] = ( - self.ema_factor * actions + (1.0 - self.ema_factor) * self._smoothed_actions - ) + self._smoothed_actions[:] = self.ema_factor * actions + (1.0 - self.ema_factor) * self._smoothed_actions self.success_pred[:] = self._smoothed_actions[:, 6] self._compute_ee_pose() @@ -165,7 +162,9 @@ def process_actions(self, actions: torch.Tensor): desired_xyz[:, 1] = curr_pitch_w + clipped_pitch final_quat = math_utils.quat_from_euler_xyz( - roll=desired_xyz[:, 0], pitch=desired_xyz[:, 1], yaw=desired_xyz[:, 2], + roll=desired_xyz[:, 0], + pitch=desired_xyz[:, 1], + yaw=desired_xyz[:, 2], ) self._processed_actions[:, :3] = final_pos @@ -180,7 +179,7 @@ def process_actions(self, actions: torch.Tensor): def reset(self, env_ids: Sequence[int] | None = None) -> None: super().reset(env_ids) - if env_ids is None or (hasattr(env_ids, '__len__') and len(env_ids) == 0): + if env_ids is None or (hasattr(env_ids, "__len__") and len(env_ids) == 0): return n = len(env_ids) @@ -189,15 +188,19 @@ def reset(self, env_ids: Sequence[int] | None = None) -> None: self.ema_factor[env_ids] = lo + torch.rand(n, 1, device=self.device) * (hi - lo) self._pos_thresh[env_ids] = _randomize_gains( - self._default_pos_thresh[env_ids], self.cfg.pos_threshold_noise_level, n, self.device, + self._default_pos_thresh[env_ids], + self.cfg.pos_threshold_noise_level, + n, + self.device, ) self._rot_thresh[env_ids] = _randomize_gains( - self._default_rot_thresh[env_ids], self.cfg.rot_threshold_noise_level, n, self.device, + self._default_rot_thresh[env_ids], + self.cfg.rot_threshold_noise_level, + n, + self.device, ) - self.fixed_pos_noise[env_ids] = ( - torch.randn(n, 3, device=self.device) * self._fixed_pos_noise_levels - ) + self.fixed_pos_noise[env_ids] = torch.randn(n, 3, device=self.device) * self._fixed_pos_noise_levels ct_lo, ct_hi = self.cfg.contact_threshold_range self.contact_thresholds[env_ids] = ct_lo + torch.rand(n, device=self.device) * (ct_hi - ct_lo) diff --git a/isaaclab_arena_environments/mdp/robot_configs.py b/isaaclab_arena_environments/mdp/robot_configs.py index 342a93cfb..a6b63874d 100644 --- a/isaaclab_arena_environments/mdp/robot_configs.py +++ b/isaaclab_arena_environments/mdp/robot_configs.py @@ -8,9 +8,8 @@ from __future__ import annotations -from collections.abc import Sequence - import torch +from collections.abc import Sequence import isaaclab.sim as sim_utils from isaaclab.actuators import ImplicitActuatorCfg @@ -32,6 +31,7 @@ def franka_gripper_joint_setter( for jid in finger_joint_indices: joint_pos[row_indices, jid] = width / 2.0 + # =========================================================================================== # Franka Panda robot configuration optimized for assembly tasks (peg insert, gear mesh, etc.). # =========================================================================================== diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index c7bc38c11..ba6491ac6 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -24,18 +24,14 @@ def get_env(self, args_cli: argparse.Namespace): from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg + import isaaclab_arena_environments.mdp as mdp from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment from isaaclab_arena.reinforcement_learning.frameworks import RLFramework from isaaclab_arena.scene.scene import Scene from isaaclab_arena.tasks.nist_gear_insertion_task import GraspConfig, NistGearInsertionTask + from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations from isaaclab_arena.utils.pose import Pose - import isaaclab_arena_environments.mdp as mdp - from isaaclab_arena.tasks.observations.gear_insertion_observations import ( - NistGearInsertionPolicyObservations, - ) - from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import ( - NistGearInsertionOscActionCfg, - ) + from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg peg_tip_offset = (0.02025, 0.0, 0.025) peg_base_offset = (0.02025, 0.0, 0.0) @@ -76,19 +72,17 @@ def get_env(self, args_cli: argparse.Namespace): ), ], ) - embodiment.set_initial_joint_pose( - [ - 0.561824, - 0.287201, - -0.543103, - -2.410188, - 0.507908, - 2.847644, - 0.454298, - 0.04, - 0.04, - ] - ) + embodiment.set_initial_joint_pose([ + 0.561824, + 0.287201, + -0.543103, + -2.410188, + 0.507908, + 2.847644, + 0.454298, + 0.04, + 0.04, + ]) embodiment.action_config.arm_action = NistGearInsertionOscActionCfg( asset_name="robot", joint_names=["panda_joint[1-7]"], @@ -151,7 +145,9 @@ def get_env(self, args_cli: argparse.Namespace): teleop_device = None table.set_initial_pose(Pose(position_xyz=(0.55, 0.0, -0.009), rotation_xyzw=(0.0, 0.0, 0.707, 0.707))) - assembled_board.set_initial_pose(Pose(position_xyz=(0.88, 0.15, -0.009), rotation_xyzw=(0.0, 0.0, -0.7071, 0.7071))) + assembled_board.set_initial_pose( + Pose(position_xyz=(0.88, 0.15, -0.009), rotation_xyzw=(0.0, 0.0, -0.7071, 0.7071)) + ) medium_gear.set_initial_pose(Pose(position_xyz=(0.5462, -0.02386, 0.12858), rotation_xyzw=(0.0, 0.0, 0.0, 1.0))) gears_and_base.set_initial_pose( Pose(position_xyz=(0.585, -0.074, 0.0), rotation_xyzw=(0.0, 0.0, 0.9239, 0.3827)) diff --git a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml index adb1630cc..40294c50d 100644 --- a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml +++ b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml @@ -1,3 +1,8 @@ +# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + # RL Games PPO config for NIST gear insertion (operational-space torque control). # Network and hyperparameters align with common assembly peg-insert RL benchmarks. From b5d877ed633909abf87bd78b7035afe5078c9343 Mon Sep 17 00:00:00 2001 From: odoherty Date: Fri, 10 Apr 2026 10:07:44 -0700 Subject: [PATCH 06/12] Address PR review feedback: code quality, correctness, and architecture - Fix mutable default args (list -> tuple) in rewards/terminations - Remove duplicate copyright header in train_rl_games.py - Remove unused asset classes (NistAssembledBoard, NistGearSmall, NistGearLarge) - Add gpu_collision_stack_size comment explaining 4 GB requirement - Add docstrings for private API access (_terms, get_link_incoming_joint_force) - Extract shared check_gear_insertion_geometry utility - Fix GraspConfig.grasp_rot_offset default to xyzw identity [0,0,0,1] - Replace manual quat sign-flip with quat_unique - Fix observation reset() consistency (zero z/w, normalize, flip) - Vectorize place_gear_in_gripper gripper loop - Set player.deterministic: True in RL Games YAML - Add cached offset helper for reward tensor allocations - Migrate environment from RLFramework enum to string-based entry point - Add distributed training support to train_rl_games.py Signed-off-by: odoherty --- .gitattributes | 4 +- .../react-isaac-sample-controls-start.jpg | Bin 24133 -> 130 bytes isaaclab_arena/assets/object_library.py | 91 ------------------ .../reinforcement_learning/train_rl_games.py | 5 +- .../tasks/nist_gear_insertion_task.py | 10 +- .../gear_insertion_observations.py | 47 ++++++++- .../tasks/rewards/gear_insertion_rewards.py | 70 +++++++++----- isaaclab_arena/tasks/terminations.py | 42 +------- isaaclab_arena/terms/events.py | 20 ++-- .../mdp/env_callbacks.py | 1 - ...nist_assembled_gearmesh_osc_environment.py | 3 +- .../nist_gear_insertion_osc_rl_games.yaml | 2 +- 12 files changed, 107 insertions(+), 188 deletions(-) diff --git a/.gitattributes b/.gitattributes index 0467a96a4..aa94387cc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6,5 +6,5 @@ *.mp4 filter=lfs diff=lfs merge=lfs -text docs/images/ filter=lfs diff=lfs merge=lfs -text assets/*.usd filter=lfs diff=lfs merge=lfs -text -*.jpg filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text +assets/**/*.jpg filter=lfs diff=lfs merge=lfs -text +models/**/*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/docs/images/react-isaac-sample-controls-start.jpg b/docs/images/react-isaac-sample-controls-start.jpg index 8140f221a8245c9ce12dab779a60f2f8d877c5ad..ca7e5b9cb87e3aa83cefce857fd2c63d00c5f873 100644 GIT binary patch literal 130 zcmWN?K@!3s3;@78uiyg~5<)}!8$!V{qtY?82Vbvy*{i;_kC)xg^U&3~dp~cFrqloZ zBU2eqr$;UB0yFw%b-RpKIoF)I&b7U^=j-*lj^wMP zN9spos`DiXGB<~|LJ;%=BqOyPS^!Eq;JhFu4@rZo6!?Rrl%Yl6%MkQLYR%uv&!jg0 zRR=t^K;a+tgPfs-f7MR`SBP}*%x`B-k$*jX`lRWBGp9{S$H=CK^tb=hp}Ax44qcrc zd-v|yp{KWF&-R^~^Upy?zzO>Q>nigQf@HwAh1+-T+z~GIXIW~2H1zCm_jhdH{!Hp0 z<9N2>Uw8;Lsd&1^X=wW4>~wGAMtf}IOTin^hIBn zi+k-=iAL)UsQOgbtF8{7=hstRuef>WQw>$WH?9xL^S8BC*MG0#>0+pQ*8KSTgA{j% z^}1TywYIB*r`_!@>7P7w_^)TdH$&CG2I=kXt>wK-i{kF64Hj*$_V%6HJ9lb=8k!zH zZl335B7wd-_+-yEz)FQZ?-zE}i$f;;Cx% zzoET>_WU@nID@9Ym;UTU``?V<`;Y%Ly}#85Yc^l}`{Em||JU)a75LW*{A&gNwF3WI zfq$*Q|NmCtAMnk=4PZ8J0L~yuuhdu5g9pjSPm&Is{$>nG%_H59^LTLKb_jBH^K?IH za$x;gE9><#-GAcX`QOf8^td{I{SOY%hM?s;W`!F6#IXNC#sA>;l6i~;e&d02gFEOP zjA~jLMG6G`T2wI;GiTx(4H9xl79G)dl#NUP;MXu z{mlNyy$#PGNbU{<<*@#7?;mS&^}PG}@6FA72n#L&|6?*4f|gl9kU|{|LuaL6L=hJBp;v^KS(Fc>Eon9L7LmdgGFEs&CyUa(Ml5g0G188GkU zEL^cj{+AuUE><{kUPkrW%AGgEpZ}nC;BATG$$H*~T^HPMEm^us>F3qT8#k$M-m-PK zj;`LGz4`|a86P$|VtVw{=`$9VfE*VAE_ZZtzU<-Yb={ll<9qu~;N75m!66ZmkD?w& zKY1FHkoY3$Wpc`^*Y7g3vU76Z=jE4{l~+_&eXRb}(Ad=6()zWH-P7CGKQK5nJo1e{ zIW;{am=%iVfYtv>!jcLt71YZ)|EsL?bk)Q0GYtb$G8%wqEkci^rdk$boqI=NuYo}#eq9M{!5{N zl_&Nu`v2f0Nb3GvUF^67x@BV`ffOnC?b~O!agk{~W?KmA4Q^YvOQ7<;idlHNw3f)9 zWqu8THUomppBJmv1`p_`s!1SO%P1<+O)Oy%QVB{_M^a%xfkDen`7;veW|jEUOvq;G zsj^A}&zx~fxi1S>$Zy(xn35kNft-TjZh6|21X8uEvC{;-T|mv=bgJK;DbO_5^M9pZ zRB%NCeT))28$-YP>nFRtU@Uf>hpgedxVim+5AJ$GiVv5DfO_p@ss@>zW_5!}TJyh#ia? z5=tv3@ku4DkggxX(+jP7IuDc)Dz?dG;$m?WcI=e$=6;fvv4u5+A5LL@)2?7Z_>WYM zH@X2|-Ix&A8>=gGo_j6%RWkEDdZ z8q=N#3-BdTeB#j>;X$i+&8LJGXZ)^9Aa?LvYfZd17J)dC7p;ms%}ej+|56fBOKW0? zq<2eAg(_-(XDZ6oyL%NTG_+c*LvAeHobW$BiIRrD2$l^9cHYWVsg(5-nJc=~d_5Iz z-E&F;-DfuHqvPO5N0ZF%O^NEWGf$D}-*o2G{zrnp)-r^)#-GS9^`ULAzmiRgzBKw| z#Z$Q3Yf9z;@pn>NWBJ%r+eoJKPK0lLbof3XW8|!tAZewPNT!c0+*mtytwyg5QFsw5 zfwpxw{Q`7Qc%lUA^O~gGhtD(FCMDwyMuAx3QoXsF)hDlD3cMahXZ4UV@B} zSvo1AsW!T038wP`fq2!8{=pn|lR#z-Ge@jhr?W%8sf$+I)7>qgYPknHBX(pf(Ne(Z4IozbVZB^va}SF+gQ7*5OUW)zk1Tj-28uu|=edx%ynH(*^O= z9zw}lFKqQU3B)rdf8AH=?83{fB&^ihX5e~FS!Cx>n|1871e*F%FM)2OVnCBu#sRTn zH(CWzUIJYpgm8E_s2%r5Wh;E9(~s)3DRvOWYE{6H$rtZ@wjgBl_j9qqwfx=gTi@w* zSy)dJR?%4SA_KPv@oK!-%Lw3?O;sh!0+dNhI?2PkxS}&;?h2Tb zNAv+qqtVY=4T!gsrZGI$K-IQ<)0~JaM%guxu~!8A5VQT;W@%A8KWHpYykEHKF#eTS zF&3`oI_0V+J^=UYi{=Jf{I;+gxIdmubailG*N|Hk-Bq>-y}OAj2Oi_Q0cRf~6$swT z+alvWkEGDV6Xd$&`|$5_ZLD%}(!0h>4I3TAEgr=cKp7IBk~M^7_!1Alfofve#7Z?i zH1qcH+m~Q&7L2;xAZ{3DX81dZ$?YwGwx=2I`uQek4* zE+3T*jc%T^1g*ASPr%v3(tlgBf4<5|n*QV%9LUQLD6`G7w2#syz6kgCH_vqX=ZO`q zsvN~y!lQd6(9oS)2?T3h@|_F&AL$dKFHowSWkjp$#@Xje>EtJb z98M^DqL0XyK)r-}sL8faAHqHDp2L?!31-gmy8#-?amwxlwAqVXt|5JuK!O+bMq5UY zauduEQu8WekzgOa7$;-90+r{x^6V;OvsQnPL&QKcv!Mr;_t;AX@j?e5(PYy&qdKXp1$JEG0G+xAh^Xm^YjVI1I zo+ai`KG46q2-XT&yi;W=OFnmLH+t1>9WA%iygDJ@)^4zGFkWuPX%b;cpmWXNn5a@> z8N5=9=t7qkM()4+&2Jx9mCZ4ZX!p}@SIhJE464D2ab#TdOzyKBu%vG+t0+NmwmEYw z)W>yE)xGR3D|T0VkL4Da8uh14!ZOuo@$J|xqKgDt^sQ@sL$)0BEtkxDkkfh8!(Jl%{s;63K7Kv?!b7qOW6U4StCt4JVx$HF(h`S8&n>Ce9#t%8Wd?_>*`=Ty9 zCaNePw7DFP=%8+$8<*9%zhVK!IYTqTB(KXTU|I@o!9Pl%ccgM+yHamJYbhg0Z&i+U zTnZk>TG>?+&mHXYTgPOG|bN>*rj-ZGiH+*v}lh zFF4F!9Lz)TrIGwtfg|tkZHL9tuoAkF>~aR1Rxn-mhx-9WhpCE%ukdy^xfE%W#+T}e z-Oz)LoDcBV7(058D8j}mX)^b6I7dXG(kT+i5QuSf0s?fgv)tER0+`WDyg^$?D_7YH zuc0E=d4=y<&Sae6a+7ktiY1UY;Ui;|1~bN6a=D9u`u+_|C|>HnuUL4yr_v- zXD%v?QKXe#w`sPN$ayALxm82pCkVjejFwXw_>vubo4(eN>7TUV zpE2ek-b3Yhe50+-&5mou9PS$?YK!3ljDw-fNCO)%#3Ekcr>x?I{EfAl66i-xJ0$*!AT0i!I}@vDRE6_==TAfFuP{VJ=C!E7YWf9b71 zfGIOOl!e6?DlJVpgMZ9{qwyG3L%($D#2BB@hA$9m62Hj+iod9~tN3qXlT-0~fs1f9 z3u4bhrx|T2)J0g$%UFE{UoFhE{?p~1qspC4 zxzW*@`KCkOkF5ye@yUQWEE#)>bmtOR7cW8gG!`rSoA9_nVVa(s8w_;^xt&@)NARI1 z5~z>^$N*ywG`0Hug!JN-z=B6#hJS*)pLP9=Wgt!;lg!l)a9kIKJ&C8ZrRNxxhpHUO zMJI}njvMHS&Ha^x;Xus8GqI$;=E!3_*DuUv4V=re0$5Wm^9T;_8uuR|-#>j^X6B}N z2kI?6kpfhz8@|CskU#K3prT%9RPn;LYO7As=lZ1EV@FRLU_@nsgOa9iNQmq265 z;)|#|k5O)#o}o=BOFVhYPw)ESyOV>?3KLZX)}-a6pQ~Xd<7)I{_n(sjb2YYn9kmFbw`4=_4a3W2#~KQM0b=EiC0y||M9$NzEOj5Z z!$S{zqi(6=d&$rBx-^^6jpALX7J@x7TI(c%R?y^0Ne2nRV^^PlJ`4(JaK39=VovBO`;oGIv+MnBy zevC8T-${vPDbo0cxd@)s4K)=0nnI%8ejA%wxR4mv$_~1Q(UG!e~3$dE-Wjx4lOE9 z?>(L0f-)Lziw^5WvLXga zk)KS7u*)tH!(*104*5sRl@f!al^AUT-*p|6Sj?KvH-{$k+f8Q)v+z{I`=ExIA9`GZ zH9JqNxv0wtCyS%zGP^0kF>pn#UWEXcYaGmPf^)ZxZ{w!T$c>HC$5Nf_WNqmve5q}g z$ez-O#-1B(c2QX)+?76p%Q{Poyk(ToY1YT zqFAm;ds89bBKBGP?r_qAfOw7AwsfS4I9JOT6=>Y!TaLlkl7%cA9&!h@O6sx}`VNVW z=H|HZm5NA&W9YW}e({9(u}A_n*U40_w9GUU8>05Pjb7$S2W9ssRZ8 zQ>`vA<6~b{z72gMN9?&M9-lxJPn{LQeMGsDocuv%Qin|&&|_6uV~o5=40uBp(^4Pn zPjy&zSmj3htPV!5Z{lZsqSP1o$c=w+<`#MCRQc}*?vM02044)i zc(5;op8%qNwrMU@eEN};q!pJM5mXO$Y;Z=+e@kb`wkL7!rETMk*LcX|i*7uV`_}jC z1CwWMCfG*>3A8`mm+44o8%xN=dlSz5BkJ~_I+1@x=G;v;PzEb2E;n{|R$qDSeuOKp zJ8<8-8WGPN3xNd4T?v$BQYm-mI^ioW-J-k(uZM#v_FpxWIO z;X!~tJ=?1rDu;f%e==Tk_L^0PS|u;B3MK5b{0!`0r?MsYe0PDLZj%n_2NC`YlKJLc zlMXoxX5RheWJ~xsiDvo8E1GTv=9lP9m+QD}rkcf*AT}AM0k@+7 z8>$gJf;(H1E7sAb%4t7>=xH}1f0op{yZB8ATf^mri{E?Ku`Xf`$%Tmen5$Hn5(oZ> zO`X~q0?jc5zw;Nr0_=IB0k0cz(Tz$o^N?qR#~bPnIRqr~m5$d>!%Ky^mCAQma7z2H zJ><8;-q-e7TiclkT;l;-DVr`Z8}NZAYh#cNV>*Th&0@LA=U59fG9J{gV{Bj)crwj0 zg+2U}U)m34MxKrFgO`1YR`ypw>niyeD}saoVQoAL&DjV-`HPbu`~(}jI>?`jZ_ne~B7?2vSnAcTA6?iX+6`{RU0hsG<@5@?x7**==w z_EBHz?oJo&uF@txB)m!eHrBLG$ zAUOOs*n2lYZ9dAylglF?x1lx1E3_CG`uOx3H*K3}SZ2}bnQ9pN63FA_iLbME7csFT zEE>ceA2HkucTOGW8Q{{NJbG`4K)ldcVCX|)4|EIji9#TP1x6~O!or1X99 zHlN3?mv{lWV=npu=~wVZ0V%7HFjdA9X_SqQ6NSbSs7Dryq0ErvI9A_cUKTB|`6gfD zP8Y1xW!rlOU6Me%{38Aw zCIN>s5#YAhFGfgziSK_U;_fCJ=zx8)$!1iB;G~e=1265OM);*`eH%TRW0rpKx12&# zFIh8ZUH_>P2_#L{!+l!6TwjF;pcedhjLd8)xG^x<*3yPiKA@m8kMDKCc~ep7Z$`og@p zjpLSOO6(iaSp!<8PZ*Ha!e#pjab0Aw;yPM1HK?0YCV`d+Bl?c>j2Q6q3fPHGX-n5O~?oeE< zOF6QMGuzjNOn$jxWSUT%EX)=I_*ckG;U*m z+c}d!*iixzgqbB<`a(5bEm|m9Su4!+H_{GNNI?L< z2;p!r7VyrBt#25Fg?LnL@Y@hYlR%v>>!gf53A|uoqF5T2r;oKW$BMcv*=VvDq#DM7 z4{mu5gI1m}{tr*ZehH+a)!&HAQeTf77DOJ+lR)*_c5m&}#YfHsFuK+64@C-hr!uew zH;7O40rn%#Cl^M)B91oHaeSZfK$o$FMe`VL0T67p-9Ecfv9`$&tqxyyA5Rc)=GH;* zG0*>``+pvqVh0EPO&I=~YN0L78D$4nhRV$qtk$~Z;w6Eu5u00=8AE={hPYDvTT>rg zcwJNHo@Utxt4`{^lt7#Gx`L6KDP8!j?ITXMV?}Undbzp7NsS2Hz51bju&u3sCoiZr*)*m&h+S{|CpK6`S>pooL}B%q9M-^f4F z>S=3Qcbl6Wr$wF~2`Zn=C#CLI%lvRuyrXu%vin zmkqnarl!yE0Q+05)u~z2wO_{R5-6DvY0%C zmAB}fkU+0+f(}8P&C3QRj`Iw}KFAMrw7G^v>jtmYgAE3HIaSAzn98n&?{X%!awX7f zj@K1lYB^)77V)-?E=S}|`sizQhVIVP_;umy@z?WC{A!KStq|kl#y-zn#yBOA_?3?PfaNu8?$uRM<1db;M$g&KR1^*u ztU8k9>5!5!Q*`lo=6w>9|JaOb?_;qk;3HF=1B;#C-7uQwv%-uvQQ_XApfW_NOgE{= zlQ-~(1=|wMKi5|MJKdn2*gV0>13o9~X!$cqm>R~n4V>M&V^gje&^LW+J7)le01F zr7s(e&QV?$7l&1Rj=uh}A$6qj;F_@`oFP-SC>JeCn_RiPi_+ukzRl5XC(^RH;XbbIAyZtSc9a4+%%$M4{Yi9zo!{+d3+H0C|dB)NbFD6UUK28*|_f zsqKY^VjK2h3B=zWbf)oS+ZChF`Da=q2Mwd{=xRj*K(8Lblt8}?OMe0tHPJ)O3P%{n zUCeejB?l@mjx%aqs0Y?$R|%eZX|BJ}kw@rNzr(R8#95C=&FUvMWkt+w#2S4l1Hcb< zFDk%S4V7x11b)8+QqZYvBKB;2gV*N9vo#rxZ4NtWVn0j*?OZ5kOf}BNJNRuNW;vFM zi83{moBIi$i$Ic` zFrRB19U*@m{9=Ak5X}oLlpZvzipH@6U8lL6%%wTdE;G6&|?&W3C|Y$VH1u8_MCzqSrnIxssmVx8W1c~u=DM(_Qk ze=q|J4W1pA9!HFh&@_959#ef2G)WpzTOvqfTS=J<=k zut9te0Y(LMd-7~8L~}VM#JRLgbTf}!8gQzw;%=a0kIf#XvM%-d>6u86T=vP?S;`Oq z=Wv`}>xypRVxZa$6I#i+ywRGJ(XJo)o3e8KI@7s@H7-K=mAA#=V`7j}Yq&w&wV7Ej zCr7L0EJn}wIpeF_vhi4-q|2+=2D6Qy=!&(qy#`l}SlY8&h5$Wsyk4Cb0|(iRzT713 za`dXVF0$txHZ_FRU$#^VmM+7YxK*VXU@5oaVeB^1ejkfn)|)Fh{JfY`Hg__qGD%+o zc?ee!N0X?Im2!7!ay%OSg)sAjZ&U>y{kq!68IP7fxb2jcJ_Y!zlN3^%9rBzdRILh} z#)tyv=E4(eT?FZ7R!=ul1e*1_@K}@7oGWj%n^B6~!Buj3$c^cJ`(*c=y3`!3mn)t? zy#SIVAoiqNaF+U=j6z~>?W!(~WO4`jSNVX^&rCEjJCQwwp-g^ zP=255d*>BpX(8j9UeOhySp`jrKXjMtf4)2Y0LP+XqO~@sAk*{l76tJzADe@46|mWZ zd6TA&{ef#A?@_AjD@c5OS!v5q z$3)kgO59r2hcapTiNHHYUotWWhoQ^*WU0-LO=%OF{gjD-xSlouqFrVt@E=5u8yRDS z9;MrPj=lg?baf?&t}K<*7;dH&(J5$ROm63Rpc#+$}uqzKQ{y!K0U3;w8dcJ=$7XrYj6)Y=c#g1nUzq-l#2M zY*ws>Ud58N26Ba#FGKK9`b<_=xb62gRn)R#ZT%npODc_xE_dcoGsoABFZ4o1_#pCFe;*B%!^;S zTq$^gvlz7wq*;xr^u7o^r~Z=<7(Q?Td3YN8?a25=SPNtmGyiX`#e67%+^SVL0vowE9otXiDFR^bitL ze%O}WPrY>WJjk@;Y;X*r~J`Vk>Z%OoVio339q!BaFcI z=q>kdQGOAZ69LZ#GrTR#1 z%-%poIcrhw#CehW&EkF@2%vzSSXYShxB|LeSl@G)N34`vK2p57KK@WtnwQNHH*Q;> znEsC1AC_&ftedz9)#61(-zZ);qU*an=p{4C@!~~!oAwW}vtx(ovrbVw?NUUzHF+$$ zuV}F>Yn2Ow7fQ~@C)r%W06g0l_MSuM@GRgqR*wX_t1f{8EmPpq6KVNlH-8Up*a%M) zRIh_*esGCv@p@<4dSO8iIcUy5q+wWhmzh!Htldl_@S}=H*gW4Y<{OeaIRwC zd}bg09X%=x?q%Iq+0#T@PyN!}d0GDveH-wA-&xK89JsImdr5RGA_%tgD7R)L(9gqc zyYwVu55?NJnQ7h0RO}^iGA)4wvh=JW>|)CXu~E3!p3qqPV`8PDOCk+kUpKKTRJ#wZ zBFh5@3T!Z3)hYdu*4B4>qN%HcnIPyFA5Lc*Yb#x=A%hO~jb`v}aVH%<4-n zF+5>VORQ90JAw@}xv?NCpE1cQk6(iXy6wgew0Oc#t+$+*JucIapbF7v#U{WPC-h_& zwD|AsnOJ7Uw)&R;<-r^+`6Ujxtk=|_!9;fqW3XI=L8KIUf_ zOmH=xbn4nxt*SX&uQY~Cvm85^AnDni*ZNqDtDN;Orp)=lxtX&%;t?Rqguv-n=&!x9 z_tM7FqiZ8f*Id|Y$p^ByND)if2GZUL-ThlU!Iix{nD8}AfKO6@XUR5%{8#+n>Vm9{ zb10bt>sEL)oq<0pBRg1@6M4r|-)j;txgKr9vD(f*%s|2txO~7^p>y>h1UjFT9i1WL zOI!))=`QW}I5!(?5bB$-?So5XVb}MZn(fX+;m=WkrN-g_n{k1b5pVvc+&NUSC#}xq z#DRwak6}IBaCQQ0SowB3^6Mv^XPxwc}9F&ycaH64Q8cGb(34^Pi;x6vmQ8+&^Xp-5_Jm8jU2ZGn6Ts* z*wGJO>+gpMZ}t`#)=`hPZM&s>G zd_&5I5x#~n&|x2Z-PW)dY&)E^oc4N)6r6JakuL+emo6))l;bVio*}W=nXJf!qa5B+ z2=@SS((WL5jT%W9Evz{dg^VPeOU4@`-ekbt_u#BMFn_3*vP@6h(4~&EEvE$xkI!NB zZN6aNS1X|6LJ~H=FP9$RQnnN9vS}UyTWl?sNg#9uL+~N9P2f{V^a7btCIf&!v~n*mnB3 z41Yo!m1hdNZ{rRtDYKL`2_(!fnQsJ$_o_%Acd80l`;1fVdN_;Rrn6EC)j^MAj{q2? z-x3hm40!`}IH?V0T{q{$oNKKtFANpNmf{3{{B5ruc%^mHKwv*iE+q5HdA6s$JYvk*k2|8CHf&4c@u#}QA?X8+lMaQmP#4n#e&1Ck_ z6@{!1F|S?X<(jmn@0Ww$8^YT&DsF_A3EfCDF#!lRcQg%`<@txBbLO#d(E^wc zl2iK{1Y9E<`|7DirHi<|KxoNh9S=;O0p|5xV!%K|gVOw1 zznr51BP})FSX0Qe3j!G$uPia3728t^RjVg=pqzKe80%`hZB-d(`A#_dr48t+GFo7O zMUE5u`g)MN2+zt@0#y!63w;uhY=%7T1N@c~vb&{W!boLg6^P&rB>h2fjDowbwN8$T zJ@6WL+9z0ffOjdLr46pPA#3jQZTrO!5E5O3@!w;@n z0TL|g^Se-Q1rMGG88X-uvN`<=el;OP!0^maQxDk81JHXtYorFis zY3$ZM0=j|?aPnc)#|IO~E^G%oX90`cXSRK+`<@64+4>t$5c>&_!J1IoiRHvMj;O*! zCCIR#KPKYxhHpk<_5nFe%`1GmehT5n>?5L!k(=~eU{Yp`_CA3Ki^wJlx9F8}R+;z4 zU0>!t*ERK9ym!!X%G(V)*W6S0UV(7L4D}A?H1MNtrQqd3_);113MwLSY0QwXu|PuB zq;K^x>g8Aw8H***oTFNDH2H%{csEI?6;wjL9fU7o0K`>7+f?UopktGlaj{`roKeSY ztlC4Pgxhj5)#&nDH;5Sop>CO=U6?Dj!Yk)kH}So;W+}qRo{Us1gpw?J8Q*?MIE(NL zVUGa+B7T|R9hSZg>|;CSPvzxu7sfBU-OafF@*U{sj*hI$k}FplH${ zjZwgveT@7@><$7i0)%?wOY+3)wQn1#b{Kp0kydp|AXk?z8_JQ5;qrU9Kzq_Z(H);k zAagwn#u$i|Eld=W68yb|nS74m8IOF|-zHrcTQwWMkyqQn+t`t#D}V6HLp4!28c7Le z07E#M1W}n|e85Yag?S*6kXwnY^gqtS^Lp>?4zMh>zI3}MeO2wo_gQ%@o>a&9brLB5 zYrH=~^C5bZt&TSZtcJ1;@p2A9Ei4 zSP|xu=>h%(@qV~OR|2`1d-N(*kngF;+NxXC7rOFg%=sErck{!8mFsMdzH5l68f(pL zuNpi!@FgCOo~lx>CTpYC*kkmBSb2j!^(4PZ?3MCBY4oPFsbg|d!GF3~$kx)WUPR(##j zCC^~(&t^8Ep^I4216WgJ%`>^i0+w(~@)+#c?LxjM@6u}SaxK6kIZHOV1Iu8$Y(D9O zH@ggo9=^D$Ul<0`>62|LNy7Vpkn6E1bTO_5-kD)><1Uu5?b)}j+A4Ovz-`~zVo%j4 z(cIXNUIC0zwb0bbva>)~9Pri%>M#DLJ$~!7kMO^|P3yADW($MPJf1b10()Cr-QaAM zy&LkdPGn*|70pvSn}{!+UPBRXybxi?2euH=gUxc1+zzzHfyrD|cGIp(_W3SoIUOzm`vyCW^*o4vT zQU%^P*GC&LIJ}lh=$5r8CwU^_4=tR0|g{g<^vpH$ApM~TV+9rgk#SoQ+`>Px^j3yVw9buwp0lhWtmWWa zWdU*XWB_(MUPV?%DUtZrCy99fl5?I3K92~+)XuXBA$6mF1-Y{Xzk_t&rN4zuoWIb3 zV^y-9X5a+ktoUY*P_BGRvxpJgvNGhb_`U}LSpGUTl8A^L$>r{*8w<7x#?C|zg14=h zg+@wniOKx7;=s8Lc7(8AmQ{A(bz>|^;nOF^MAe3qmis*zja)@^6hRdc!2+dc51Anxo_?+J?*R~n01Kr38(z-Y!G44Q2E=A2V0RwN@ zc@0bYogo-n=CpQby4o1tK?gM3g=K7zKu;=wuh#y6h`PNdqASY;?<4Vx*GF+~9O!dR zng}${NY$S*sh^R3qBSFXR{e2X?*4{^U59`#2fHIrm_i?QE=2+zTu2tm%%iMPqdt7; z4)Hm=O~jr}XOYgFK%yWHbUUMI3~P{iJQBNB0u5dPc@BWrE5F_09|&^-Tn9p5xiZ|d+(Ee1m4=K1EZgTfX0jJE zL63ml1;jv_Tp>R>R4bz8(-+OppE-Wm9cDkJ4=yjDKOiX1Ai+v5xnhfffiw6wxCVe? z#7l{e??O)tUycIOE7%n@hZ}wSA`XYSaXz7@{`KRxke5uMI-R?uhZRCtIE_em*s8kF z*4*6o;%4lS(?yJ}4}Z`QMWwsSkohu!*@`U&dOkXV4FPTIy-lot7@Q_%zt#}W#R?*^ zXIct1k(JiPthqT~;Cy76Q~_@I*aM2)_P|G3{20%cKx*RU$#iYp+LB+TSH%gPC5#%_ z&e!k+H7wvT!fJiwz77LjK9A`VFYIj=k;Y7Qvy8B5hg`?m|#ZXatyGw;~Oc$by8no zC)#>`P#~(FWR^jVJf54G9q4VkA?)6La!nAoj6cZ|+!1yXgLmt8z8HDCF^VBJU4yUy zmeS~771vMd+JTNdlG$^@Sdo#r3y3NGl9XK=Z|fdFZYE&TmjtmPHO?>h0~M$*3apLo zBw@>=ES);M2;;2e#w4TE=8*n9+vt+Ixuo$u;_OMsO4(3+J3#D9$ma7ecw8?&BTQF+ z+P4?}dOE|}BrY&4*Pu7;jPKU4hziSukjAzfpzCL=Hou;p*fnA$-o7c}`IONkw&9cc zTNb|q?E$d((yc{`-)|gW6atEM-#;0dP^zL`NS&<(pWZUoIy{nd~4HI9)2hl-Lj5q z)wT2V^Pf+Bqow!I7uQv_yt0wcct*Pp6rpy{Y0A`SOeFl_)V$~S8ZOz5%L4db*yIJk z*3D!fvoD%xyYN;Cw93Gvx05KKH2P~VzTKmFX4+J}RXtC9sC7r#kK)#1e)(Sbp)W_AuO&BUuTMN`v42Iz&DWYBsh~$Znzcfi|Sy%IMwb< zN-uHrNv?Ii=67DDd@Ai^-={EpQ7cBCO&v4T=yfc^>~LFU*10z>;yv=cgDmcLEMXyz zRwJK}GSYhe;saIIRIBq&22XO(eKi80c^GEEC0+vKdX|j>X)Ffx`^2^kZvWgo0W{b; zBiRwVb(ws-SG~2W%e3&JvB$0Mb_X6$0iAx006(5M*Y#F>oc1N2zz-ZMD3LTk*w2Cbh@?1$zK!)9C9o1dmNq{vVxr~#+~tW`%&K$1|s5lxQzyEp=uW}S-wDF z_kNqBmBa5)cuFG~UkJM3e+nBC3}Fws1`1w$mA>BP5A)aEeU3#Lau}1EgS9wGjFZ74 zp2K|yinVdzJpjQg=`U=Uk(0OKjOwx-1%D_9#ETr_3F@pvCSgQv|A+oK8J!Z1lG1t$ z!mN({7KJ@B>i0JhJK-N`%E0q52D`coDU>8puwrfIE+lF&po&?Wk4<7P3Vo8$->|zA z_y%XeWQj}qh(PI$v4LAeh2Z{^>l9O?K6FMXOD0A3s_R=kMa8|BNC#z~7$3JVCr^PX zR;)bZJ7f4%^!x%~{k4PfpWhmd z$-kmYC;MVgL>xXN0$a-3Mz^MR4y*)`?SR$A*3odbCDv&we%)`-bxKk{=SCRYN1u6c zff-)BIf;R8YGquEbbnc!4xHGdvsF`SxU%SWWrI)r)PG_CUXK>qT{>Bx$xQVkRMedTR zvBhK(&Rw(d+2`n*R|dJSr#U+_1;1M-ByzYWPagSg{}D8K%7<~3?hkh+&Yx{JF#gHY zG>UjPMUeI(g0A_^5N{?-xIB|J0z!E`vc{JwW)uDPuPS6F9TewBq}ItK`KxB;HW_IM z9Z8Yn=C9j8Xa(MM6k}t>xDx6vtQKz+*7R`3I@90_)EvB`p#jbtROh!-aJiktNEppz zeqaR^ug~Q%=(~vuUl6U>6dPMTEb}RV1oq5t_YXEtgN(HEs{FxF)fyKMc{2rV{zm`G z>v@-6%n=abzGIJ4ET8+CG9GZ=Qa%vF9$)wF+I+un(+f7~u1;*MIr$)7^5*T`@ySms zr)F7eCqItNo`UCCkBf_i>ykL%2s~0sR?%|3$CFcKG0_j-1`PTbH4cn{aO;SH2M~EK z#whgZtF@_$iEF(#)^5Y#dbjB64<qo($PG;#)b6uYYFn1s@A6QaRRs!skkooa6V?Y(%)7Xn?fa&coTka zde#&(7(g2PI7``B8=I6JIz|~PRC;r8tLZ|%=(+?_k0v)Nt^^y5uCW4tkwDt^@IE6V zMmN&+-~XPx1|PU&GJ+FS}WnFkDitL(=KE~V%w;#o- zV67D0S4jX>S5=go;!{ZkyI1WzP2S)#e+u8EH>iprR9CO0ucVgY3iLpCr;}PjYJ{nZ>{6)&J;F1GV zW5gV5wtQufzxP8dE^Rs}H{NK#oOLcfdE$!{!hVqgUg$GbY&S#DF#H(uM!n* zq|qe}f^Z*oH0%GV;7Y@qIJdA~uWc2VDk?2j(+V~U4QT^Z5lM<$M3&$JNDy+>A`psL zsXzi`YOSP*npy~}WMl~fL8uZ$ATU+7vWYAK*%EeS$wCrBAjxl z#;cSYd(kpNm$BkrO^ROBaI$DPUA+5>Yy#E*fk<;`GVj{O4Ou`PV)9iTODRRC{mHbl zuT_OTJ$hlG#hYN`x-X7$r${c`2!KUzLgt$Z@K7F@tGN?j?9FO(N;@0dl5UZt7n(p? zuJus@PSu5K^Yn*V0&+4JKDb59tkwg2#OJ8}x`cLGH}}8#dN7|Lp&j=zYc+_DY`{lt zZ~rasEk3Cos3H+Yuh?>Jg3~^=WikC?F6rLx`?R#Nzmv3S3|}}R0DAcYAU>^}Mdt0S zQsZ?Y8_*LUe4PFotH=B|M)+8`HCpNChef+f|E$OejHt88soH&4C$ObUBs7NRMu&{1 zKM?>GFT_8B<+Ux73X~WdEGA8SHTL8TO&`r%$aw0fs(ae-jbbPMPI0ZeqEeR*+N?TV zuVTCTfmEW2~c^O!Pb{+E=A*C0rjsHkimcM z(b%uhogAb8T`(j>u25kIEv86>{F4*VB0Y5W{4X^GsnK8L0`lRLW2$lEP-vNPpyOf7 zdBHD4Pj}3@B%^;ZNR~fIgr=`pme67(;DA#5#Vj7Xh=!yFnN%1)syfKprAsmfj`6EN zJD+#|x)FJ>AR?|=a0IVV(y?@pepD%~zYZ0jqcnb?pua?;)5DOD z^#b20o@KAi`&E5Ijx0OAby|xLXFsIh2{OsTG68q;tFGiQ+diz814=5!8HKihFiCpf zt<6-dn@b7Tzo2@dDuZ~TwsZ=j4>RsG>zh7E=CdYY;b%di$BAKsHxB9Dt&)22962>p zM47nb6!xQCc9%|{rP$5He3Ro0yzstHZO zUx)7hLJ|J4Vm0Pl{Q*dW=o;_rP6aS~Ca|Sl#3StkLI~J5tFlx|DzPio{^cQuF=HR2 z+bFv2U%Ye5^Psh)>V&R_8<|MSdCtfWC^OBhW^mxe%lZa|Eok!|}3i z)X4{3>-w2)C{C?H9 z;&9!jm19a24(e(!MOV9Ssx0sW_!G4u_h^)DbV$|ZE>+-;|2qX=$JcyLe4fx0-t7?g`^Z{%D9BTw|R#@Qn6!dPM^ z)v|D}YpqpLO1K3oi}G%-xMgr*fPSCW_JBUX-^^CDVI?YEQ<6p8rTfiJZNJ${49(|yX6TD4<>^`c;ZR2GBzb-ioWFOhdT0J4Az8uRzrlbsPM+80g74i zkx8d@V3A~VnQ`bIu1}BbrBcj8R+tQCKqjcBgi^=8RQ`y77INx8o|XLp(cSf?=)CJB zcF8N!dqY)zHpvj{COdbyKDX`EMq_%MjirG06SGyi9iOgw)Yh%1l^;_su4O~syHr+{+T19k|Zjf03nAx4I& z(}$0aBCBU%k|KkqzI1%w~c-k$1i|5RflH$K^SPsF2}tFk`JxJV!=et2!{vx* zD?f^-V|L`-@~kP#j4O_}l^t)Bw_AfX^|=h1nf6%&s_G?z47f;JA6-`ObKRIBpW##F zwVxP|$GCUIA0`!s@Zu&Px}lH4NcsCJA6v*{6HhE@N}B4KHkp^d$6X$9I`AE)u*CDk z?G8*7-wd11PFI~R?B0{-mmJ~cJZ^WvAwA4epd9QUf|}}iURSy!aWn@)&%5yoAQgpFJ=u4?5MxYHFLIHEi`m6_oOm$UxDp#@&`2xj&#_k$->o z@Jh~5kJ-W2YN{K8yoVFd{pU_l_o8r_dW-jWuas-REF84ftF<~>C!8o6cRNTv-zeoXa77& z`3yI&d!reQp=r~joV-N3!f_L!3LUn>E}fe%{aTIb&2N4*__ltg&;c8ZG`8%dl&2xSO5O>66oo9rahZCKh8?Z0k@Xm_p8i;Yb zqh9`HZeQw_e3Gei=j#Y%lLhi`d5`Zq=I=8Iy&jR!nM$=167qr*e!fE2jOXfFQ%p6-m135bB Z9K}X8(+^c&7_FIlR`95I?& torch.Tensor: + """Shared XY-centering + Z-depth insertion check used by rewards, terminations, and observations. + + Args: + held_base_pos: (N, 3) position of the held gear's insertion base in env frame. + peg_pos: (N, 3) position of the target peg in env frame. + gear_peg_height: Physical height of the peg. + z_fraction: Fraction of peg height that counts as inserted. + xy_threshold: Maximum XY distance for centering. + + Returns: + (N,) bool tensor — True where gear is centered and inserted. + """ + xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) + z_diff = held_base_pos[:, 2] - peg_pos[:, 2] + return (xy_dist < xy_threshold) & (z_diff < gear_peg_height * z_fraction) + + def peg_delta_from_held_gear_base( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), @@ -101,8 +125,7 @@ def body_quat_canonical( robot: Articulation = env.scene[robot_cfg.name] idx = robot.body_names.index(body_name) quat = wp.to_torch(robot.data.body_quat_w)[:, idx, :] - sign = torch.where(quat[:, 3:4] < 0, -1.0, 1.0) - return quat * sign + return quat_unique(quat) def force_torque_at_body( @@ -111,7 +134,12 @@ def force_torque_at_body( body_name: str = "force_sensor", return_torque: bool = False, ) -> torch.Tensor: - """Read joint reaction wrench at a specific body link.""" + """Read joint reaction wrench at a specific body link. + + Uses PhysX ``root_view.get_link_incoming_joint_force()`` (same pattern as + Isaac Lab's ``forge_env.py``). This is the standard way to read F/T sensor + data in Isaac Sim; no higher-level API is available. + """ robot: Articulation = env.scene[robot_cfg.name] body_idx = robot.body_names.index(body_name) wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] @@ -191,7 +219,11 @@ def reset(self, env_ids: list[int] | None = None): self._prev_noisy_pos[env_ids] = ( wp.to_torch(robot.data.body_pos_w)[env_ids, self._fingertip_idx] - origins[env_ids] ) - self._prev_noisy_quat[env_ids] = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx] + reset_quat = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx].clone() + reset_quat[:, [2, 3]] = 0.0 + reset_quat = torch.nn.functional.normalize(reset_quat, dim=-1) + reset_quat = reset_quat * flip.unsqueeze(-1) + self._prev_noisy_quat[env_ids] = reset_quat def __call__( self, @@ -229,8 +261,10 @@ def __call__( math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), ) noisy_quat[:, [2, 3]] = 0.0 + noisy_quat = torch.nn.functional.normalize(noisy_quat, dim=-1) noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) + # No public API for action term lookup; _terms is the standard access pattern. arm_osc_action = env.action_manager._terms["arm_action"] board_pos = wp.to_torch(board.data.root_pos_w) - origins board_quat = wp.to_torch(board.data.root_quat_w) @@ -252,6 +286,9 @@ def __call__( raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] raw_force = torch.nan_to_num(raw_force, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) + # Force EMA is updated here (in the observation) rather than in process_actions + # because the smoothed force must be current *before* the policy reads it. + # Moving this to process_actions would delay the update by one step. arm_osc_action.force_smooth[:] = ( self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth ) diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py index 00fd2caf1..9921ce5bc 100644 --- a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py +++ b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py @@ -21,6 +21,8 @@ from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg from isaaclab.utils.math import combine_frame_transforms, quat_apply +from isaaclab_arena.tasks.observations.gear_insertion_observations import check_gear_insertion_geometry + if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv @@ -108,8 +110,8 @@ def __call__( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] = [0.0, 0.0, 0.0], - held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), + held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), keypoint_scale: float = 0.15, num_keypoints: int = 4, squash_a: float = 50.0, @@ -133,50 +135,58 @@ def __call__( return _squashing_fn(kp_dist, squash_a, squash_b) +# Module-level cache for constant offset tensors used by _check_gear_position. +# In practice this holds only 2-3 entries (held_gear_base_offset and peg_offset +# per device). If per-episode offset randomization is added in the future, this +# cache should be replaced with instance-level pre-allocated tensors. +_CACHED_OFFSETS: dict[tuple, torch.Tensor] = {} + + +def _get_cached_offset(values: tuple[float, ...] | list[float], n: int, device: torch.device) -> torch.Tensor: + """Return a cached (n, 3) offset tensor, avoiding per-step allocation.""" + key = (tuple(values), device) + cached = _CACHED_OFFSETS.get(key) + if cached is None or cached.shape[0] < n: + cached = torch.tensor(values, device=device, dtype=torch.float32).unsqueeze(0).expand(max(n, 1), 3).contiguous() + _CACHED_OFFSETS[key] = cached + return cached[:n] + + def _check_gear_position( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg, board_cfg: SceneEntityCfg, - peg_offset: list[float], - held_gear_base_offset: list[float], + peg_offset: tuple[float, ...], + held_gear_base_offset: tuple[float, ...], gear_peg_height: float, z_fraction: float, xy_threshold: float, ) -> torch.Tensor: """Return bool tensor indicating whether gear meets XY centering and Z depth criteria. - Compares the held gear's insertion base (root + offset in gear frame) against - the peg position (fixed asset + offset in fixed asset frame). + Delegates to :func:`check_gear_insertion_geometry` for the shared XY+Z check. """ gear: RigidObject = env.scene[gear_cfg.name] gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins gear_quat = wp.to_torch(gear.data.root_quat_w) - held_off = ( - torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) - ) + held_off = _get_cached_offset(held_gear_base_offset, env.num_envs, env.device) held_base_pos = gear_pos + quat_apply(gear_quat, held_off) board: RigidObject = env.scene[board_cfg.name] pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins quat = wp.to_torch(board.data.root_quat_w) - offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) + offset = _get_cached_offset(peg_offset, env.num_envs, env.device) peg_pos = pos + quat_apply(quat, offset) - xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) - z_diff = held_base_pos[:, 2] - peg_pos[:, 2] - height_threshold = gear_peg_height * z_fraction - - is_centered = xy_dist < xy_threshold - is_inserted = z_diff < height_threshold - return is_centered & is_inserted + return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, z_fraction, xy_threshold) def gear_insertion_engagement_bonus( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] = [0.0, 0.0, 0.0], - held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), + held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), gear_peg_height: float = 0.02, engage_z_fraction: float = 0.90, xy_threshold: float = 0.015, @@ -198,8 +208,8 @@ def gear_insertion_success_bonus( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] = [0.0, 0.0, 0.0], - held_gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), + held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), gear_peg_height: float = 0.02, success_z_fraction: float = 0.05, xy_threshold: float = 0.0025, @@ -218,7 +228,10 @@ def gear_insertion_success_bonus( class osc_action_magnitude_penalty(ManagerTermBase): - """Penalize large asset-relative position/rotation commands.""" + """Penalize large asset-relative position/rotation commands. + + Note: accesses ``env.action_manager._terms`` directly — no public API exists. + """ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): super().__init__(cfg, env) @@ -264,6 +277,12 @@ class success_prediction_error(ManagerTermBase): ``true_success`` uses held insertion base vs target peg (same geometry as ``gear_insertion_success_bonus`` / ``gear_mesh_insertion_success``), not the gear rigid-body root, consistent with common assembly peg-insert benchmarks. + + ``_pred_scale`` acts as a **one-way curriculum gate**: it starts at 0 and + flips to 1 the first time mean success rate crosses ``delay_until_ratio``. + It intentionally never resets back to 0 — once the agent demonstrates + sufficient insertion ability, the prediction penalty remains active for the + rest of training even if performance temporarily dips. """ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): @@ -304,10 +323,9 @@ def __call__( peg_off = self._peg_offset.unsqueeze(0).expand(n, 3) peg_pos = board_pos + quat_apply(board_quat, peg_off) - xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) - z_diff = held_base_pos[:, 2] - peg_pos[:, 2] - height_threshold = self._gear_peg_height * self._success_z_fraction - true_success = (xy_dist < self._xy_threshold) & (z_diff < height_threshold) + true_success = check_gear_insertion_geometry( + held_base_pos, peg_pos, self._gear_peg_height, self._success_z_fraction, self._xy_threshold + ) if true_success.float().mean() >= self._delay_until_ratio: self._pred_scale = 1.0 diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index 89adb59ec..f60c87cb1 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -15,6 +15,8 @@ from isaaclab.sensors.contact_sensor.contact_sensor import ContactSensor from isaaclab.utils.math import combine_frame_transforms +from isaaclab_arena.tasks.observations.gear_insertion_observations import check_gear_insertion_geometry + # NOTE(alexmillane, 2025.09.15): The velocity threshold is set high because some stationary # seem to generate a "small" velocity. @@ -262,7 +264,7 @@ def gear_mesh_insertion_success( env: ManagerBasedRLEnv, held_object_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), fixed_object_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - gear_base_offset: list[float] = [2.025e-2, 0.0, 0.0], + gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), held_gear_base_offset: list[float] | None = None, gear_peg_height: float = 0.02, success_z_fraction: float = 0.80, @@ -294,14 +296,7 @@ def gear_mesh_insertion_success( offset = torch.tensor(gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) peg_pos = fixed_pos + math_utils.quat_apply(fixed_quat, offset) - xy_dist = torch.linalg.vector_norm(peg_pos[:, 0:2] - held_base_pos[:, 0:2], dim=1) - is_centered = xy_dist < xy_threshold - - z_disp = held_base_pos[:, 2] - peg_pos[:, 2] - height_threshold = gear_peg_height * success_z_fraction - is_inserted = z_disp < height_threshold - - return torch.logical_and(is_centered, is_inserted) + return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, success_z_fraction, xy_threshold) def gear_dropped_from_gripper( @@ -319,32 +314,3 @@ def gear_dropped_from_gripper( gear_pos = wp.to_torch(gear.data.root_pos_w) distance = torch.norm(gear_pos - ee_pos, dim=-1) return distance > distance_threshold - - -def gear_orientation_exceeded( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - ee_body_name: str = "panda_hand", - roll_threshold_deg: float = 15.0, - pitch_threshold_deg: float = 15.0, -) -> torch.Tensor: - """Reset when the gear has tilted too far relative to the end-effector.""" - if not hasattr(env, "_initial_gear_ee_relative_quat"): - return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device) - - gear: RigidObject = env.scene[gear_cfg.name] - robot: Articulation = env.scene[robot_cfg.name] - - eef_indices, _ = robot.find_bodies([ee_body_name]) - eef_quat = wp.to_torch(robot.data.body_quat_w)[:, eef_indices[0]] - current_relative = math_utils.quat_mul(wp.to_torch(gear.data.root_quat_w), math_utils.quat_conjugate(eef_quat)) - - initial_relative = env._initial_gear_ee_relative_quat - deviation = math_utils.quat_mul(current_relative, math_utils.quat_conjugate(initial_relative)) - - roll, pitch, _yaw = math_utils.euler_xyz_from_quat(deviation) - - roll_limit = torch.deg2rad(torch.tensor(roll_threshold_deg, device=env.device)) - pitch_limit = torch.deg2rad(torch.tensor(pitch_threshold_deg, device=env.device)) - return (torch.abs(roll) > roll_limit) | (torch.abs(pitch) > pitch_limit) diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index 1bf11e1dc..240c66411 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -14,6 +14,8 @@ from isaaclab.assets import Articulation from isaaclab.envs import ManagerBasedEnv from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +# Dependency: isaaclab_tasks.direct.automate (Factory task package) provides +# IK utilities (get_pose_error, _get_delta_dof_pos) used by place_gear_in_gripper. from isaaclab_tasks.direct.automate import factory_control as fc from isaaclab_arena.utils.pose import Pose @@ -214,13 +216,8 @@ def __call__( joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() - for row_idx in range(n): - self.gripper_joint_setter_func( - joint_pos, - [row_idx], - self.finger_joints, - self.hand_grasp_width, - ) + for jid in self.finger_joints: + joint_pos[:, jid] = self.hand_grasp_width / 2.0 self.robot.set_joint_position_target_index( target=joint_pos, @@ -230,13 +227,8 @@ def __call__( self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - for row_idx in range(n): - self.gripper_joint_setter_func( - joint_pos, - [row_idx], - self.finger_joints, - self.hand_close_width, - ) + for jid in self.finger_joints: + joint_pos[:, jid] = self.hand_close_width / 2.0 self.robot.set_joint_position_target_index( target=joint_pos, diff --git a/isaaclab_arena_environments/mdp/env_callbacks.py b/isaaclab_arena_environments/mdp/env_callbacks.py index 4f15f1833..c2ce506f3 100644 --- a/isaaclab_arena_environments/mdp/env_callbacks.py +++ b/isaaclab_arena_environments/mdp/env_callbacks.py @@ -58,7 +58,6 @@ def assembly_env_cfg_callback(env_cfg: IsaacLabArenaManagerBasedRLEnvCfg) -> Isa friction_correlation_distance=0.00625, gpu_max_rigid_contact_count=2**23, gpu_max_rigid_patch_count=2**23, - gpu_collision_stack_size=2**32 - 1, gpu_max_num_partitions=1, # Important for stable simulation ), physics_material=RigidBodyMaterialCfg( diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index ba6491ac6..8bafe6f80 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -26,7 +26,6 @@ def get_env(self, args_cli: argparse.Namespace): import isaaclab_arena_environments.mdp as mdp from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment - from isaaclab_arena.reinforcement_learning.frameworks import RLFramework from isaaclab_arena.scene.scene import Scene from isaaclab_arena.tasks.nist_gear_insertion_task import GraspConfig, NistGearInsertionTask from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations @@ -188,7 +187,7 @@ def get_env(self, args_cli: argparse.Namespace): task=task, teleop_device=teleop_device, env_cfg_callback=mdp.assembly_env_cfg_callback, - rl_framework=RLFramework.RL_GAMES, + rl_framework_entry_point="rl_games_cfg_entry_point", rl_policy_cfg="isaaclab_arena_examples.policy:nist_gear_insertion_osc_rl_games.yaml", ) diff --git a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml index 40294c50d..7f2a4ff88 100644 --- a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml +++ b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml @@ -126,4 +126,4 @@ params: layer_norm: True player: - deterministic: False + deterministic: True From 508fdb86a22a355f7defadf28fe67536afd323b0 Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 6 May 2026 16:27:57 -0700 Subject: [PATCH 07/12] Add NIST gear insertion task Signed-off-by: odoherty --- isaaclab_arena/assets/object_library.py | 23 +- isaaclab_arena/embodiments/franka/franka.py | 278 +++++++++++++- .../reinforcement_learning/train_rl_games.py | 275 ++++++-------- isaaclab_arena/tasks/events.py | 256 ++++++++++++- .../tasks/nist_gear_insertion_task.py | 291 +++++---------- .../gear_insertion_observations.py | 349 ++++++++++-------- .../tasks/rewards/gear_insertion_rewards.py | 335 ++++++++--------- isaaclab_arena/terms/events.py | 161 +------- .../tests/test_nist_gear_insertion_task.py | 198 ++++++++++ isaaclab_arena/tests/test_train_rl_games.py | 73 ++++ .../mdp/nist_gear_insertion_osc_action.py | 316 +++++++++++----- .../mdp/robot_configs.py | 94 ----- ...nist_assembled_gearmesh_osc_environment.py | 121 +----- .../nist_gear_insertion_osc_rl_games.yaml | 2 +- 14 files changed, 1606 insertions(+), 1166 deletions(-) create mode 100644 isaaclab_arena/tests/test_nist_gear_insertion_task.py create mode 100644 isaaclab_arena/tests/test_train_rl_games.py diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index 16bbe7aee..b88f07537 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -3,15 +3,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any import isaaclab.sim as sim_utils - -_REPO_ROOT = Path(__file__).resolve().parent.parent.parent - -if TYPE_CHECKING: - from isaaclab_arena.assets.hdr_image import HDRImage from isaaclab.assets import RigidObjectCfg from isaaclab.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR @@ -96,11 +90,14 @@ def __init__( @register_asset class GearsAndBase(LibraryObject): - """NIST gear base with SDF collision (local USD, gear_base as root prim).""" + """NIST gear base with SDF collision (gear_base as root prim).""" name = "gears_and_base" tags = ["object"] - usd_path = str(_REPO_ROOT / "assets" / "gearbase_and_gears_gearbase_root.usd") + usd_path = ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" + "gearbase_and_gears_gearbase_root.usd" + ) spawn_cfg_addon = { "rigid_props": sim_utils.RigidBodyPropertiesCfg( @@ -132,7 +129,7 @@ class MediumNistGear(LibraryObject): name = "medium_nist_gear" tags = ["object"] - usd_path = str(_REPO_ROOT / "assets" / "gear_medium.usd") + usd_path = "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_medium.usd" spawn_cfg_addon = { "rigid_props": sim_utils.RigidBodyPropertiesCfg( @@ -161,7 +158,9 @@ class NistBoardAssembled(LibraryObject): name = "nist_board_assembled" tags = ["nist", "object"] - usd_path = str(_REPO_ROOT / "assets" / "nist_board_assembled.usd") + usd_path = ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/nist_board_assembled.usd" + ) spawn_cfg_addon = { "rigid_props": sim_utils.RigidBodyPropertiesCfg( disable_gravity=False, @@ -191,7 +190,7 @@ class NistGearBase(LibraryObject): name = "nist_gear_base" tags = ["nist", "object"] - usd_path = str(_REPO_ROOT / "assets" / "gear_base.usd") + usd_path = "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_base.usd" spawn_cfg_addon = { "rigid_props": sim_utils.RigidBodyPropertiesCfg( disable_gravity=False, diff --git a/isaaclab_arena/embodiments/franka/franka.py b/isaaclab_arena/embodiments/franka/franka.py index 71d8f2e02..e9a893e53 100644 --- a/isaaclab_arena/embodiments/franka/franka.py +++ b/isaaclab_arena/embodiments/franka/franka.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 - import torch from collections.abc import Sequence from dataclasses import MISSING @@ -11,7 +10,9 @@ import isaaclab.envs.mdp as mdp_isaac_lab import isaaclab.sim as sim_utils import isaaclab.utils.math as PoseUtils +from isaaclab.actuators import ImplicitActuatorCfg from isaaclab.assets.articulation.articulation_cfg import ArticulationCfg +from isaaclab.controllers import OperationalSpaceControllerCfg from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg from isaaclab.envs import ManagerBasedRLMimicEnv from isaaclab.envs.mdp.actions.actions_cfg import ( @@ -28,6 +29,7 @@ from isaaclab.sensors import CameraCfg, TiledCameraCfg # noqa: F401 from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR from isaaclab_assets.robots.franka import FRANKA_PANDA_CFG, FRANKA_PANDA_HIGH_PD_CFG from isaaclab_tasks.manager_based.manipulation.stack.mdp import franka_stack_events from isaaclab_tasks.manager_based.manipulation.stack.mdp.observations import ee_frame_pos, ee_frame_quat @@ -38,7 +40,9 @@ from isaaclab_arena.embodiments.common.mimic_utils import get_rigid_and_articulated_object_poses from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase from isaaclab_arena.embodiments.franka.observations import gripper_pos +from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations from isaaclab_arena.utils.pose import Pose +from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg _DEFAULT_CAMERA_OFFSET = Pose(position_xyz=(0.11, -0.031, -0.074), rotation_xyzw=(0.0, 0.0, -0.66262, -0.74896)) @@ -204,6 +208,278 @@ class FrankaJointPosActionsCfg: ) +_FRANKA_MIMIC_OSC_USD_PATH = f"{ISAACLAB_NUCLEUS_DIR}/Factory/franka_mimic.usd" + +# Assembly tasks use stiffer contact settings than the default Franka assets. +_FRANKA_MIMIC_OSC_RIGID_PROPS = sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, +) + +_FRANKA_MIMIC_OSC_ARTICULATION_PROPS = sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, +) + +_FRANKA_MIMIC_OSC_COLLISION_PROPS = sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0) + +# Default pose from the Factory-style Franka mimic asset. +_FRANKA_MIMIC_OSC_JOINT_POS = { + "panda_joint1": 0.0, + "panda_joint2": -0.569, + "panda_joint3": 0.0, + "panda_joint4": -2.810, + "panda_joint5": 0.0, + "panda_joint6": 3.037, + "panda_joint7": 0.741, + "panda_finger_joint2": 0.04, +} + +# OSC writes joint torques directly for the arm; the hand remains position controlled. +_FRANKA_MIMIC_OSC_ACTUATORS = { + "panda_arm1": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-4]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=87, + velocity_limit_sim=124.6, + ), + "panda_arm2": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[5-7]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=12, + velocity_limit_sim=149.5, + ), + "panda_hand": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint[1-2]"], + effort_limit_sim=40.0, + velocity_limit_sim=0.04, + stiffness=7500.0, + damping=173.0, + friction=0.1, + armature=0.0, + ), +} + +# Franka mimic USD with contact sensors enabled for the wrist force body. +_FRANKA_MIMIC_OSC_CFG = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=_FRANKA_MIMIC_OSC_USD_PATH, + activate_contact_sensors=True, + rigid_props=_FRANKA_MIMIC_OSC_RIGID_PROPS, + articulation_props=_FRANKA_MIMIC_OSC_ARTICULATION_PROPS, + collision_props=_FRANKA_MIMIC_OSC_COLLISION_PROPS, + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos=_FRANKA_MIMIC_OSC_JOINT_POS, + pos=(0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), + ), + actuators=_FRANKA_MIMIC_OSC_ACTUATORS, +) + +_NIST_GEAR_INSERTION_INITIAL_JOINT_POSE = [ + 0.561824, + 0.287201, + -0.543103, + -2.410188, + 0.507908, + 2.847644, + 0.454298, + 0.04, + 0.04, +] + + +def franka_gripper_joint_setter( + joint_pos: torch.Tensor, + row_indices: Sequence[int], + finger_joint_indices: Sequence[int], + width: float, +) -> None: + """Set Franka Panda finger joints to achieve a given total opening width.""" + for jid in finger_joint_indices: + joint_pos[row_indices, jid] = width / 2.0 + + +def _nist_gear_insertion_ee_frame_cfg() -> FrameTransformerCfg: + return FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_link0", + debug_vis=False, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_fingertip_centered", + name="end_effector", + offset=OffsetCfg(pos=(0.0, 0.0, 0.0)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_rightfinger", + name="tool_rightfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_leftfinger", + name="tool_leftfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + ], + ) + + +@register_asset +class FrankaNistGearInsertionOscEmbodiment(FrankaEmbodimentBase): + """Franka embodiment for NIST gear insertion with OSC torque control.""" + + name = "franka_nist_gear_osc" + + def __init__( + self, + enable_cameras: bool = False, + initial_pose: Pose | None = None, + initial_joint_pose: list[float] | None = None, + concatenate_observation_terms: bool = False, + arm_mode: ArmMode | None = None, + camera_offset: Pose | None = _DEFAULT_CAMERA_OFFSET, + is_tiled_camera: bool = False, + fixed_asset_name: str = "gears_and_base", + peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), + ): + super().__init__( + enable_cameras=enable_cameras, + initial_pose=initial_pose, + initial_joint_pose=initial_joint_pose, + concatenate_observation_terms=concatenate_observation_terms, + arm_mode=arm_mode, + camera_offset=camera_offset, + is_tiled_camera=is_tiled_camera, + ) + self.scene_config.robot = _FRANKA_MIMIC_OSC_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene_config.ee_frame = _nist_gear_insertion_ee_frame_cfg() + self.set_initial_joint_pose(initial_joint_pose or _NIST_GEAR_INSERTION_INITIAL_JOINT_POSE) + self.action_config = FrankaNistGearInsertionOscActionsCfg( + fixed_asset_name=fixed_asset_name, + peg_offset=peg_offset, + ) + + self.observation_config = FrankaNistGearInsertionObservationsCfg( + fixed_asset_name=fixed_asset_name, + peg_offset=peg_offset, + fingertip_body_name=self.get_command_body_name(), + concatenate_observation_terms=self.concatenate_observation_terms, + ) + + self.reward_config.action_rate = None + self.reward_config.joint_vel = None + + def get_command_body_name(self) -> str: + return "panda_fingertip_centered" + + def get_gear_insertion_grasp_config(self) -> dict[str, object]: + return { + "hand_grasp_width": 0.03, + "hand_close_width": 0.03, + "gripper_joint_setter_func": franka_gripper_joint_setter, + "end_effector_body_name": "panda_hand", + "finger_joint_names": "panda_finger_joint[1-2]", + "grasp_rot_offset": [1.0, 0.0, 0.0, 0.0], + "grasp_offset": [0.02, 0.0, -0.128], + "arm_joint_names": "panda_joint[1-7]", + } + + +@configclass +class FrankaNistGearInsertionObservationsCfg: + """Observation specification for the NIST gear insertion OSC policy.""" + + @configclass + class PolicyCfg(ObsGroup): + """24-D NIST gear insertion policy observation.""" + + nist_gear_policy_obs: ObsTerm = MISSING + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = False + + policy: PolicyCfg = MISSING + + def __init__( + self, + fixed_asset_name: str = "gears_and_base", + peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), + fingertip_body_name: str = "panda_fingertip_centered", + concatenate_observation_terms: bool = False, + ): + self.policy = self.PolicyCfg() + self.policy.nist_gear_policy_obs = ObsTerm( + func=NistGearInsertionPolicyObservations, + params={ + "robot_name": "robot", + "board_name": fixed_asset_name, + "peg_offset": list(peg_offset), + "fingertip_body_name": fingertip_body_name, + "force_body_name": "force_sensor", + "pos_noise_level": 0.0, + "rot_noise_level_deg": 0.0, + "force_noise_level": 0.0, + }, + ) + self.policy.concatenate_terms = concatenate_observation_terms + + +@configclass +class FrankaNistGearInsertionOscActionsCfg: + """Action specification for the NIST gear insertion OSC Franka embodiment.""" + + arm_action: ActionTermCfg = MISSING + gripper_action: ActionTermCfg | None = None + + def __init__( + self, + fixed_asset_name: str = "gears_and_base", + peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), + ): + self.arm_action = NistGearInsertionOscActionCfg( + asset_name="robot", + joint_names=["panda_joint[1-7]"], + body_name="panda_fingertip_centered", + controller_cfg=OperationalSpaceControllerCfg( + target_types=["pose_abs"], + impedance_mode="fixed", + inertial_dynamics_decoupling=True, + partial_inertial_dynamics_decoupling=False, + gravity_compensation=False, + motion_stiffness_task=[565.0, 565.0, 565.0, 28.0, 28.0, 28.0], + motion_damping_ratio_task=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + nullspace_control="position", + nullspace_stiffness=10.0, + nullspace_damping_ratio=1.0, + ), + position_scale=1.0, + orientation_scale=1.0, + nullspace_joint_pos_target="default", + fixed_asset_name=fixed_asset_name, + peg_offset=peg_offset, + force_body_name="force_sensor", + ) + self.gripper_action = None + + @configclass class FrankaSceneCfg: """Additions to the scene configuration coming from the Franka embodiment.""" diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py index 55f7bc69d..516ffe8ff 100644 --- a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py +++ b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py @@ -3,166 +3,135 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Train an RL agent with RL-Games using the Arena environment builder. +"""Register an Arena environment, then delegate RL-Games training to Isaac Lab. -Launch Isaac Sim Simulator first. +This file is intentionally a thin compatibility wrapper. The training loop, +checkpoint handling, video recording, and RL-Games runner setup live in Isaac +Lab's ``scripts/reinforcement_learning/rl_games/train.py``. """ -import math -from pathlib import Path - -from isaaclab.app import AppLauncher +from __future__ import annotations -from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser -from isaaclab_arena_environments.cli import add_example_environments_cli_args +import argparse +import runpy +import sys +from pathlib import Path -parser = get_isaaclab_arena_cli_parser() -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") -parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") -parser.add_argument( +from isaaclab_arena.environments.isaaclab_interop import environment_registration_callback + +_FORWARDED_VALUE_OPTIONS = { + "--agent", + "--checkpoint", + "--device", + "--max_iterations", + "--num_envs", + "--ray-proc-id", + "--seed", + "--sigma", + "--task", + "--video_interval", + "--video_length", + "--wandb-entity", + "--wandb-name", + "--wandb-project-name", +} +_FORWARDED_FLAG_OPTIONS = { + "--distributed", + "--export_io_descriptors", + "--headless", + "--track", + "--video", +} +_DEPRECATED_VALUE_OPTIONS = { "--agent_cfg_path", - type=Path, - required=True, - help="Path to the RL-Games agent YAML configuration file.", -) -parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint to resume from.") -parser.add_argument("--sigma", type=str, default=None, help="The policy's initial standard deviation.") -parser.add_argument("--max_iterations", type=int, default=None, help="Override max training epochs.") -parser.add_argument("--experiment_name", type=str, default=None, help="Override experiment name in config.") -add_example_environments_cli_args(parser) -args_cli = parser.parse_args() - -if args_cli.video: - args_cli.enable_cameras = True - -if getattr(args_cli, "enable_pinocchio", False): - import pinocchio # noqa: F401 - -app_launcher = AppLauncher(args_cli) -simulation_app = app_launcher.app - -"""Rest everything follows.""" - -import gymnasium as gym -import os -import yaml -from datetime import datetime - -import omni.log -from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent -from isaaclab.utils.assets import retrieve_file_path -from isaaclab.utils.dict import print_dict -from isaaclab.utils.io import dump_yaml -from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper -from rl_games.common import env_configurations, vecenv -from rl_games.common.algo_observer import IsaacAlgoObserver -from rl_games.torch_runner import Runner - -from isaaclab_arena_environments.cli import get_arena_builder_from_cli - - -def main(): - """Train with RL-Games agent.""" - try: - arena_builder = get_arena_builder_from_cli(args_cli) - env_name, env_cfg = arena_builder.build_registered() - except Exception as e: - omni.log.error(f"Failed to parse environment configuration: {e}") - exit(1) - - with open(args_cli.agent_cfg_path) as f: - agent_cfg = yaml.safe_load(f) - - if args_cli.num_envs is not None: - env_cfg.scene.num_envs = args_cli.num_envs - if args_cli.device is not None: - env_cfg.sim.device = args_cli.device - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device - - if args_cli.distributed: - local_rank = int(os.getenv("LOCAL_RANK", "0")) - agent_cfg["params"]["seed"] += int(os.getenv("RANK", "0")) - agent_cfg["params"]["config"]["device"] = f"cuda:{local_rank}" - agent_cfg["params"]["config"]["device_name"] = f"cuda:{local_rank}" - agent_cfg["params"]["config"]["multi_gpu"] = True - env_cfg.sim.device = f"cuda:{local_rank}" - - if args_cli.max_iterations is not None: - agent_cfg["params"]["config"]["max_epochs"] = args_cli.max_iterations - - if args_cli.experiment_name is not None: - agent_cfg["params"]["config"]["name"] = args_cli.experiment_name - - if args_cli.checkpoint is not None: - resume_path = retrieve_file_path(args_cli.checkpoint) - agent_cfg["params"]["load_checkpoint"] = True - agent_cfg["params"]["load_path"] = resume_path - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - - train_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None - - env_cfg.seed = agent_cfg["params"]["seed"] - - config_name = agent_cfg["params"]["config"]["name"] - log_root_path = os.path.join("logs", "rl_games", config_name) - log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Logging experiment in directory: {log_root_path}") - - log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - agent_cfg["params"]["config"]["train_dir"] = log_root_path - agent_cfg["params"]["config"]["full_experiment_name"] = log_dir - - dump_yaml(os.path.join(log_root_path, log_dir, "params", "env.yaml"), env_cfg) - dump_yaml(os.path.join(log_root_path, log_dir, "params", "agent.yaml"), agent_cfg) - - env_cfg.log_dir = os.path.join(log_root_path, log_dir) - - env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) - - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - - if args_cli.video: - video_kwargs = { - "video_folder": os.path.join(log_root_path, log_dir, "videos", "train"), - "step_trigger": lambda step: step % args_cli.video_interval == 0, - "video_length": args_cli.video_length, - "disable_logger": True, - } - print("[INFO] Recording videos during training.") - print_dict(video_kwargs, nesting=4) - env = gym.wrappers.RecordVideo(env, **video_kwargs) - - rl_device = agent_cfg["params"]["config"]["device"] - clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) - clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) - obs_groups = agent_cfg["params"]["env"].get("obs_groups") - concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) - - env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) - - vecenv.register( - "IsaacRlgWrapper", - lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs), - ) - env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) - - agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs - - runner = Runner(IsaacAlgoObserver()) - runner.load(agent_cfg) - runner.reset() - - if args_cli.checkpoint is not None: - runner.run({"train": True, "play": False, "sigma": train_sigma, "checkpoint": resume_path}) - else: - runner.run({"train": True, "play": False, "sigma": train_sigma}) - - env.close() + "--experiment_name", +} + + +def _normalize_legacy_positional_task(argv: list[str]) -> list[str]: + if "--task" in argv or any(arg.startswith("--task=") for arg in argv): + return argv + + from isaaclab_arena_environments.cli import ExampleEnvironments + + for idx, arg in enumerate(argv): + if not arg.startswith("-") and arg in ExampleEnvironments: + print("[WARN] Positional environment names are deprecated for RL-Games training; use --task instead.") + return [*argv[:idx], "--task", arg, *argv[idx + 1 :]] + return argv + + +def _isaac_lab_rl_games_train_script() -> Path: + repo_root = Path(__file__).resolve().parents[3] + return repo_root / "submodules" / "IsaacLab" / "scripts" / "reinforcement_learning" / "rl_games" / "train.py" + + +def _copy_forwarded_args(argv: list[str]) -> list[str]: + forwarded: list[str] = [] + idx = 0 + while idx < len(argv): + arg = argv[idx] + option = arg.split("=", maxsplit=1)[0] + if option in _FORWARDED_FLAG_OPTIONS: + forwarded.append(arg) + elif option in _FORWARDED_VALUE_OPTIONS: + forwarded.append(arg) + if "=" not in arg and idx + 1 < len(argv): + idx += 1 + forwarded.append(argv[idx]) + elif option in _DEPRECATED_VALUE_OPTIONS and "=" not in arg and idx + 1 < len(argv): + idx += 1 + idx += 1 + return forwarded + + +def _drop_forwarded_args(argv: list[str]) -> list[str]: + remaining: list[str] = [] + idx = 0 + while idx < len(argv): + arg = argv[idx] + option = arg.split("=", maxsplit=1)[0] + if option in _FORWARDED_FLAG_OPTIONS: + idx += 1 + continue + if option in _FORWARDED_VALUE_OPTIONS: + if "=" not in arg and idx + 1 < len(argv): + idx += 1 + idx += 1 + continue + remaining.append(arg) + idx += 1 + return remaining + + +def _remove_deprecated_args(argv: list[str]) -> tuple[list[str], str | None]: + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--agent_cfg_path", default=None) + parser.add_argument("--experiment_name", default=None) + deprecated, remaining = parser.parse_known_args(argv) + if deprecated.agent_cfg_path is not None: + print("[WARN] --agent_cfg_path is ignored; the registered Arena environment provides rl_games_cfg_entry_point.") + return remaining, deprecated.experiment_name + + +def main() -> None: + original_args = _normalize_legacy_positional_task(sys.argv[1:]) + sys.argv = [sys.argv[0], *original_args] + lab_script = _isaac_lab_rl_games_train_script() + if not lab_script.is_file(): + raise FileNotFoundError(f"Isaac Lab RL-Games trainer not found: {lab_script}") + + remaining_args = environment_registration_callback() + remaining_args, experiment_name = _remove_deprecated_args(remaining_args) + remaining_args = _drop_forwarded_args(remaining_args) + + forwarded_args = _copy_forwarded_args(original_args) + if experiment_name is not None: + forwarded_args.append(f"agent.params.config.name={experiment_name}") + + sys.argv = [str(lab_script), *forwarded_args, *remaining_args] + runpy.run_path(str(lab_script), run_name="__main__") if __name__ == "__main__": main() - simulation_app.close() diff --git a/isaaclab_arena/tasks/events.py b/isaaclab_arena/tasks/events.py index cb61b3108..066d51049 100644 --- a/isaaclab_arena/tasks/events.py +++ b/isaaclab_arena/tasks/events.py @@ -3,14 +3,19 @@ # # SPDX-License-Identifier: Apache-2.0 +"""Task-specific event terms.""" from __future__ import annotations import torch -from typing import TYPE_CHECKING, Literal +from collections.abc import Callable +from typing import TYPE_CHECKING, Literal, cast import isaaclab.utils.math as math_utils -from isaaclab.managers import SceneEntityCfg +import warp as wp +from isaaclab.assets import Articulation +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab_tasks.direct.automate import factory_control as fc from isaaclab_tasks.manager_based.manipulation.stack.mdp.franka_stack_events import sample_object_poses if TYPE_CHECKING: @@ -85,3 +90,250 @@ def randomize_poses_and_align_auxiliary_assets( rel_asset.write_root_velocity_to_sim( torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) ) + + +class place_gear_in_gripper(ManagerTermBase): + """Place the held gear in the gripper during reset.""" + + def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + + robot_cfg: SceneEntityCfg = cfg.params.get("robot_cfg", SceneEntityCfg("robot")) + self._robot_name = robot_cfg.name + self.robot: Articulation = env.scene[robot_cfg.name] + + gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] + self._gear_name = gear_cfg.name + self.gear = env.scene[gear_cfg.name] + + self.hand_grasp_width: float = cast(float, cfg.params["hand_grasp_width"]) + self.hand_close_width: float = cast(float, cfg.params["hand_close_width"]) + self.gripper_joint_setter_func: Callable[..., None] = cast( + Callable[..., None], + cfg.params["gripper_joint_setter_func"], + ) + + grasp_rot_offset = cfg.params["grasp_rot_offset"] + self._grasp_rot_offset_values = tuple(grasp_rot_offset) + self.grasp_rot_offset_tensor = ( + torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1) + ) + + grasp_offset = cfg.params["grasp_offset"] + self._grasp_offset_values = tuple(grasp_offset) + self.grasp_offset_tensor = torch.tensor(grasp_offset, device=env.device, dtype=torch.float32) + + end_effector_body_name: str = cast(str, cfg.params.get("end_effector_body_name", "panda_hand")) + finger_joint_names: str = cast(str, cfg.params.get("finger_joint_names", "panda_finger_joint[1-2]")) + self._eef_name = end_effector_body_name + self._finger_joint_names = finger_joint_names + self._resolve_robot_indices(end_effector_body_name, finger_joint_names) + + self._max_ik_iterations: int = cfg.params.get("max_ik_iterations", 10) + self._pos_threshold: float = cfg.params.get("pos_threshold", 1e-6) + self._rot_threshold: float = cfg.params.get("rot_threshold", 1e-6) + + def _resolve_robot_indices(self, end_effector_body_name: str, finger_joint_names: str) -> None: + eef_indices, _ = self.robot.find_bodies([end_effector_body_name]) + if not eef_indices: + raise ValueError(f"End-effector body '{end_effector_body_name}' not found in robot") + self.eef_idx: int = eef_indices[0] + self.jacobi_body_idx: int = self.eef_idx - 1 + + self.all_joints, _ = self.robot.find_joints([".*"]) + self.finger_joints, _ = self.robot.find_joints([finger_joint_names]) + if not self.finger_joints: + raise ValueError(f"Finger joints '{finger_joint_names}' not found in robot") + + def _set_gripper_width( + self, + joint_pos: torch.Tensor, + width: float, + gripper_joint_setter_func: Callable[..., None], + ) -> None: + row_indices = torch.arange(joint_pos.shape[0], device=joint_pos.device) + gripper_joint_setter_func(joint_pos, row_indices, self.finger_joints, width) + + def _sync_sim_state(self, env: ManagerBasedEnv) -> None: + env.scene.write_data_to_sim() + env.sim.forward() + env.scene.update(dt=0.0) + + def _update_cached_assets( + self, + env: ManagerBasedEnv, + robot_cfg: SceneEntityCfg, + gear_cfg: SceneEntityCfg | None, + end_effector_body_name: str, + finger_joint_names: str, + ) -> None: + """Refresh cached scene handles and indices when call-time cfgs change.""" + resolve_robot_indices = ( + end_effector_body_name != self._eef_name or finger_joint_names != self._finger_joint_names + ) + if robot_cfg.name != self._robot_name: + self._robot_name = robot_cfg.name + self.robot = env.scene[robot_cfg.name] + resolve_robot_indices = True + + if resolve_robot_indices: + self._resolve_robot_indices(end_effector_body_name, finger_joint_names) + self._eef_name = end_effector_body_name + self._finger_joint_names = finger_joint_names + + if gear_cfg is not None and gear_cfg.name != self._gear_name: + self._gear_name = gear_cfg.name + self.gear = env.scene[gear_cfg.name] + + def _get_grasp_offsets( + self, + env_ids: torch.Tensor, + num_envs: int, + device: torch.device, + grasp_rot_offset: list[float] | None, + grasp_offset: list[float] | None, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return batched grasp offsets for the selected environments.""" + if grasp_rot_offset is None or tuple(grasp_rot_offset) == self._grasp_rot_offset_values: + grasp_rot_offset_tensor = self.grasp_rot_offset_tensor[env_ids] + else: + grasp_rot_offset_tensor = torch.tensor(grasp_rot_offset, device=device, dtype=torch.float32).repeat( + num_envs, 1 + ) + + if grasp_offset is None or tuple(grasp_offset) == self._grasp_offset_values: + grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(num_envs, -1) + else: + grasp_offset_batch = ( + torch.tensor(grasp_offset, device=device, dtype=torch.float32).unsqueeze(0).expand(num_envs, -1) + ) + + return grasp_rot_offset_tensor, grasp_offset_batch + + def _run_grasp_ik( + self, + env: ManagerBasedEnv, + env_ids: torch.Tensor, + grasp_rot_offset: torch.Tensor, + grasp_offset: torch.Tensor, + max_ik_iterations: int, + pos_threshold: float, + rot_threshold: float, + ) -> torch.Tensor: + """Move the end-effector to the gear grasp pose using iterative DLS IK. + + Each iteration computes the world-frame grasp pose from the current gear pose, + applies one damped-least-squares joint update, and flushes the simulation state + so the next iteration sees updated body transforms. + """ + joint_vel = torch.zeros(len(env_ids), len(self.all_joints), device=env.device) + for _ in range(max_ik_iterations): + # Get the current robot state for this IK iteration. + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() + joint_vel = wp.to_torch(self.robot.data.joint_vel)[env_ids].clone() + + # Build the target grasp pose from the current gear pose. + gear_pos_w = wp.to_torch(self.gear.data.root_link_pos_w)[env_ids].clone() + gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() + target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset) + target_pos = gear_pos_w + math_utils.quat_apply(target_quat, grasp_offset) + + # Compute the end-effector pose error. + eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] + eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] + pos_error, aa_error = fc.get_pose_error( + fingertip_midpoint_pos=eef_pos, + fingertip_midpoint_quat=eef_quat, + ctrl_target_fingertip_midpoint_pos=target_pos, + ctrl_target_fingertip_midpoint_quat=target_quat, + jacobian_type="geometric", + rot_error_type="axis_angle", + ) + delta_hand_pose = torch.cat((pos_error, aa_error), dim=-1) + + if ( + torch.norm(pos_error, dim=-1).max() < pos_threshold + and torch.norm(aa_error, dim=-1).max() < rot_threshold + ): + break + + # Solve one DLS IK step. + jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() + jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] + delta_dof_pos = fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=jacobian, + device=env.device, + ) + + # Write the updated joint state and refresh sim tensors before the next iteration. + joint_pos = joint_pos + delta_dof_pos + joint_vel = torch.zeros_like(joint_pos) + self.robot.set_joint_position_target_index(target=joint_pos, env_ids=env_ids) + self.robot.set_joint_velocity_target_index(target=joint_vel, env_ids=env_ids) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) + self._sync_sim_state(env) + + return joint_vel + + def _write_gripper_width( + self, + env: ManagerBasedEnv, + env_ids: torch.Tensor, + joint_pos: torch.Tensor, + joint_vel: torch.Tensor, + width: float, + gripper_joint_setter_func: Callable[..., None], + ) -> None: + """Write one gripper width and flush the reset state to the simulator.""" + self._set_gripper_width(joint_pos, width, gripper_joint_setter_func) + self.robot.set_joint_position_target_index( + target=joint_pos, + joint_ids=self.all_joints, + env_ids=env_ids, + ) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) + self._sync_sim_state(env) + + def __call__( + self, + env: ManagerBasedEnv, + env_ids: torch.Tensor, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + gear_cfg: SceneEntityCfg | None = None, + hand_grasp_width: float | None = None, + hand_close_width: float | None = None, + gripper_joint_setter_func: Callable[..., None] | None = None, + end_effector_body_name: str = "panda_hand", + finger_joint_names: str = "panda_finger_joint[1-2]", + grasp_rot_offset: list[float] | None = None, + grasp_offset: list[float] | None = None, + max_ik_iterations: int | None = None, + pos_threshold: float | None = None, + rot_threshold: float | None = None, + ) -> None: + self._update_cached_assets(env, robot_cfg, gear_cfg, end_effector_body_name, finger_joint_names) + n = len(env_ids) + grasp_rot_offset_tensor, grasp_offset_batch = self._get_grasp_offsets( + env_ids, n, env.device, grasp_rot_offset, grasp_offset + ) + joint_vel = self._run_grasp_ik( + env=env, + env_ids=env_ids, + grasp_rot_offset=grasp_rot_offset_tensor, + grasp_offset=grasp_offset_batch, + max_ik_iterations=self._max_ik_iterations if max_ik_iterations is None else max_ik_iterations, + pos_threshold=self._pos_threshold if pos_threshold is None else pos_threshold, + rot_threshold=self._rot_threshold if rot_threshold is None else rot_threshold, + ) + + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() + gripper_joint_setter = gripper_joint_setter_func or self.gripper_joint_setter_func + for width in ( + self.hand_grasp_width if hand_grasp_width is None else hand_grasp_width, + self.hand_close_width if hand_close_width is None else hand_close_width, + ): + self._write_gripper_width(env, env_ids, joint_pos, joint_vel, width, gripper_joint_setter) diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion_task.py index d0a4f5041..6b7c14e4f 100644 --- a/isaaclab_arena/tasks/nist_gear_insertion_task.py +++ b/isaaclab_arena/tasks/nist_gear_insertion_task.py @@ -3,14 +3,11 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Simple gear insertion task for the assembled NIST board. - -The medium gear is a separate object that the robot must insert -onto the peg. -""" +"""NIST gear insertion task.""" from __future__ import annotations +import math import numpy as np from collections.abc import Callable from dataclasses import MISSING, dataclass, field @@ -24,137 +21,134 @@ from isaaclab.utils import configclass from isaaclab_arena.assets.asset import Asset +from isaaclab_arena.embodiments.common.arm_mode import ArmMode from isaaclab_arena.metrics.metric_base import MetricBase from isaaclab_arena.metrics.success_rate import SuccessRateMetric +from isaaclab_arena.tasks.events import place_gear_in_gripper from isaaclab_arena.tasks.observations import gear_insertion_observations -from isaaclab_arena.tasks.observations.gear_insertion_observations import body_pos_in_env_frame, body_quat_canonical from isaaclab_arena.tasks.rewards import gear_insertion_rewards from isaaclab_arena.tasks.task_base import TaskBase from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success -from isaaclab_arena.terms.events import place_gear_in_gripper from isaaclab_arena.utils.cameras import get_viewer_cfg_look_at_object +_DEFAULT_PEG_OFFSET = (2.025e-2, 0.0, 0.0) + @dataclass -class GraspConfig: - """Configuration for placing the gear in the robot's gripper at reset. +class GearInsertionGeometryCfg: + """Geometry parameters for the insertion target.""" + + peg_offset_from_board: list[float] = field(default_factory=lambda: list(_DEFAULT_PEG_OFFSET)) + peg_offset_for_obs: list[float] | None = None + held_gear_base_offset: list[float] = field(default_factory=lambda: list(_DEFAULT_PEG_OFFSET)) + gear_peg_height: float = 0.02 + success_z_fraction: float = 0.30 + xy_threshold: float = 0.0025 + peg_offset_xy_noise: float = 0.005 - Groups all embodiment-specific grasp parameters so the task constructor - stays focused on task-level concerns (geometry, success criteria). - """ - num_arm_joints: int = 7 +@dataclass +class GraspCfg: + """Embodiment-specific reset grasp parameters.""" + hand_grasp_width: float = 0.03 hand_close_width: float = 0.0 gripper_joint_setter_func: Callable | None = None end_effector_body_name: str = "panda_hand" - # xyzw identity; the environment overrides with task-specific orientation + finger_joint_names: str = "panda_finger_joint[1-2]" grasp_rot_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0, 1.0]) grasp_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) arm_joint_names: str = "panda_joint.*" - finger_body_names: str = ".*finger" class NistGearInsertionTask(TaskBase): - """Gear insertion task: insert the medium gear onto the peg. - - Peg position is computed from a fixed asset (board or separate gear-base USD) - plus an offset in that asset's local frame (assembly peg-insert convention). - """ + """Task for inserting the held gear onto a fixed NIST peg.""" def __init__( self, assembled_board: Asset, held_gear: Asset, background_scene: Asset, - peg_offset_from_board: list[float] | None = None, - peg_offset_for_obs: list[float] | None = None, - held_gear_base_offset: list[float] | None = None, gear_base_asset: Asset | None = None, - # Success geometry: Z threshold = gear_peg_height * success_z_fraction - # e.g. 0.02 * 0.20 = 4 mm. Tune these together. - gear_peg_height: float = 0.02, - success_z_fraction: float = 0.30, - xy_threshold: float = 0.0025, + geometry_cfg: GearInsertionGeometryCfg | None = None, episode_length_s: float | None = None, task_description: str | None = None, - grasp_cfg: GraspConfig | None = None, + grasp_cfg: GraspCfg | None = None, enable_randomization: bool = False, - peg_offset_xy_noise: float = 0.005, disable_drop_terminations: bool = True, rl_training_mode: bool = False, ): - super().__init__(episode_length_s=episode_length_s) + super().__init__(episode_length_s=episode_length_s, task_description=task_description) self.assembled_board = assembled_board self.held_gear = held_gear self.background_scene = background_scene + self.held_gear.disable_reset_pose() self._gear_base_asset = gear_base_asset if gear_base_asset is not None else assembled_board - self.peg_offset_from_board = peg_offset_from_board or [2.025e-2, 0.0, 0.0] - self.peg_offset_for_obs = peg_offset_for_obs - self.held_gear_base_offset = ( - held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] - ) - self.peg_offset_xy_noise = peg_offset_xy_noise - self.gear_peg_height = gear_peg_height - self.success_z_fraction = success_z_fraction - self.xy_threshold = xy_threshold + self.geometry_cfg = geometry_cfg if geometry_cfg is not None else GearInsertionGeometryCfg() self.grasp_cfg = grasp_cfg self.enable_randomization = enable_randomization self.disable_drop_terminations = disable_drop_terminations self.rl_training_mode = rl_training_mode - self.task_description = ( - f"Insert the {held_gear.name} onto the gear base on the {assembled_board.name}" - if task_description is None - else task_description - ) + if self.task_description is None: + self.task_description = f"Insert the {held_gear.name} onto the gear base on the {assembled_board.name}" def get_scene_cfg(self): return None def get_observation_cfg(self): - peg_obs = self.peg_offset_for_obs if self.peg_offset_for_obs is not None else self.peg_offset_from_board - return _GearInsertionObservationsCfg( + geometry_cfg = self.geometry_cfg + peg_obs = ( + geometry_cfg.peg_offset_for_obs + if geometry_cfg.peg_offset_for_obs is not None + else geometry_cfg.peg_offset_from_board + ) + return GearInsertionObservationsCfg( gear_name=self.held_gear.name, board_name=self._gear_base_asset.name, peg_offset=peg_obs, - held_gear_base_offset=self.held_gear_base_offset, + held_gear_base_offset=geometry_cfg.held_gear_base_offset, ) def get_rewards_cfg(self): - return _GearInsertionRewardsCfg( + geometry_cfg = self.geometry_cfg + return GearInsertionRewardsCfg( gear_name=self.held_gear.name, board_name=self._gear_base_asset.name, - peg_offset=self.peg_offset_from_board, - held_gear_base_offset=self.held_gear_base_offset, - gear_peg_height=self.gear_peg_height, - success_z_fraction=self.success_z_fraction, - xy_threshold=self.xy_threshold, - peg_offset_xy_noise=self.peg_offset_xy_noise, + peg_offset=geometry_cfg.peg_offset_from_board, + held_gear_base_offset=geometry_cfg.held_gear_base_offset, + gear_peg_height=geometry_cfg.gear_peg_height, + success_z_fraction=geometry_cfg.success_z_fraction, + xy_threshold=geometry_cfg.xy_threshold, + peg_offset_xy_noise=geometry_cfg.peg_offset_xy_noise, ) def get_termination_cfg(self): + geometry_cfg = self.geometry_cfg success = TerminationTermCfg( func=gear_mesh_insertion_success, params={ "held_object_cfg": SceneEntityCfg(self.held_gear.name), "fixed_object_cfg": SceneEntityCfg(self._gear_base_asset.name), - "gear_base_offset": self.peg_offset_from_board, - "held_gear_base_offset": self.held_gear_base_offset, - "gear_peg_height": self.gear_peg_height, - "success_z_fraction": self.success_z_fraction, - "xy_threshold": self.xy_threshold, + "gear_base_offset": geometry_cfg.peg_offset_from_board, + "held_gear_base_offset": geometry_cfg.held_gear_base_offset, + "gear_peg_height": geometry_cfg.gear_peg_height, + "success_z_fraction": geometry_cfg.success_z_fraction, + "xy_threshold": geometry_cfg.xy_threshold, "rl_training": self.rl_training_mode, }, ) - object_dropped = TerminationTermCfg( - func=mdp_isaac_lab.root_height_below_minimum, - params={ - "minimum_height": self.background_scene.object_min_z, - "asset_cfg": SceneEntityCfg(self.held_gear.name), - }, - ) - cfg = _TerminationsCfg(success=success, object_dropped=object_dropped) - if self.grasp_cfg is not None: + + cfg = GearInsertionTerminationsCfg(success=success, object_dropped=None) + # Drop checks are disabled during training to allow recovery. + if not self.disable_drop_terminations: + cfg.object_dropped = TerminationTermCfg( + func=mdp_isaac_lab.root_height_below_minimum, + params={ + "minimum_height": self.background_scene.object_min_z, + "asset_cfg": SceneEntityCfg(self.held_gear.name), + }, + ) + if not self.disable_drop_terminations and self.grasp_cfg is not None: cfg.gear_dropped_from_gripper = TerminationTermCfg( func=gear_dropped_from_gripper, params={ @@ -164,65 +158,30 @@ def get_termination_cfg(self): "distance_threshold": 0.15, }, ) - if self.disable_drop_terminations: - cfg.object_dropped = None - cfg.gear_dropped_from_gripper = None return cfg def get_events_cfg(self): - cfg = _EventsCfg() - gc = self.grasp_cfg - if gc is not None and gc.gripper_joint_setter_func is not None: + cfg = GearInsertionEventsCfg() + grasp_cfg = self.grasp_cfg + if grasp_cfg is not None and grasp_cfg.gripper_joint_setter_func is not None: cfg.place_gear = EventTermCfg( func=place_gear_in_gripper, mode="reset", params={ "gear_cfg": SceneEntityCfg(self.held_gear.name), - "num_arm_joints": gc.num_arm_joints, - "hand_grasp_width": gc.hand_grasp_width, - "hand_close_width": gc.hand_close_width, - "gripper_joint_setter_func": gc.gripper_joint_setter_func, - "end_effector_body_name": gc.end_effector_body_name, - "grasp_rot_offset": gc.grasp_rot_offset, - "grasp_offset": gc.grasp_offset, + "hand_grasp_width": grasp_cfg.hand_grasp_width, + "hand_close_width": grasp_cfg.hand_close_width, + "gripper_joint_setter_func": grasp_cfg.gripper_joint_setter_func, + "end_effector_body_name": grasp_cfg.end_effector_body_name, + "finger_joint_names": grasp_cfg.finger_joint_names, + "grasp_rot_offset": grasp_cfg.grasp_rot_offset, + "grasp_offset": grasp_cfg.grasp_offset, }, ) if self.enable_randomization: - arm_joints = gc.arm_joint_names if gc is not None else "panda_joint.*" - finger_bodies = gc.finger_body_names if gc is not None else ".*finger" - cfg.held_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(self.held_gear.name), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 1, - }, - ) - cfg.robot_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg("robot", body_names=".*"), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 1, - }, - ) - cfg.fixed_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(self._gear_base_asset.name), - "static_friction_range": (0.25, 1.25), - "dynamic_friction_range": (0.25, 0.25), - "restitution_range": (0.0, 0.0), - "num_buckets": 128, - }, - ) + if grasp_cfg is None: + raise ValueError("NIST gear insertion randomization requires an embodiment grasp configuration.") + arm_joints = grasp_cfg.arm_joint_names cfg.held_object_mass = EventTermCfg( func=mdp_isaac_lab.randomize_rigid_body_mass, mode="reset", @@ -244,7 +203,7 @@ def get_events_cfg(self): "z": (0.0, 0.0), "roll": (0.0, 0.0), "pitch": (0.0, 0.0), - "yaw": (0.0, 0.2617993877991494), + "yaw": (0.0, math.radians(15.0)), }, "velocity_range": {}, }, @@ -270,46 +229,11 @@ def get_events_cfg(self): "distribution": "uniform", }, ) - cfg.gear_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(self.held_gear.name, body_names=".*"), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 16, - }, - ) - cfg.board_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg(self._gear_base_asset.name, body_names=".*"), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 16, - }, - ) - cfg.robot_finger_physics_material = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_material, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg("robot", body_names=finger_bodies), - "static_friction_range": (0.75, 0.75), - "dynamic_friction_range": (0.75, 0.75), - "restitution_range": (0.0, 0.0), - "num_buckets": 16, - }, - ) return cfg - def get_prompt(self): - raise NotImplementedError("Function not implemented yet.") - - def get_mimic_env_cfg(self, embodiment_name: str): - raise NotImplementedError("Function not implemented yet.") + def get_mimic_env_cfg(self, arm_mode: ArmMode): + del arm_mode + raise NotImplementedError("NIST gear insertion does not define a Mimic configuration yet.") def get_metrics(self) -> list[MetricBase]: return [SuccessRateMetric()] @@ -322,7 +246,7 @@ def get_viewer_cfg(self) -> ViewerCfg: @configclass -class _TerminationsCfg: +class GearInsertionTerminationsCfg: """Termination terms for the gear insertion task.""" time_out: TerminationTermCfg = TerminationTermCfg(func=mdp_isaac_lab.time_out) @@ -332,8 +256,8 @@ class _TerminationsCfg: @configclass -class _EventsCfg: - """Events: reset to default poses plus optional deploy-style randomization.""" +class GearInsertionEventsCfg: + """Reset and randomization events for gear insertion.""" reset_all: EventTermCfg = EventTermCfg( func=mdp_isaac_lab.reset_scene_to_default, @@ -342,21 +266,13 @@ class _EventsCfg: ) place_gear: EventTermCfg | None = None fixed_asset_pose: EventTermCfg | None = None - held_physics_material: EventTermCfg | None = None - robot_physics_material: EventTermCfg | None = None - fixed_physics_material: EventTermCfg | None = None held_object_mass: EventTermCfg | None = None - gripper_init_yaw_noise: EventTermCfg | None = None robot_actuator_gains: EventTermCfg | None = None robot_joint_friction: EventTermCfg | None = None - gear_physics_material: EventTermCfg | None = None - board_physics_material: EventTermCfg | None = None - robot_finger_physics_material: EventTermCfg | None = None - held_asset_pos_noise: EventTermCfg | None = None @configclass -class _GearInsertionObservationsCfg: +class GearInsertionObservationsCfg: """Task-specific observations for the gear insertion task.""" task_obs: ObsGroup = MISSING @@ -368,10 +284,10 @@ def __init__( peg_offset: list[float], held_gear_base_offset: list[float] | None = None, ): - hgo = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + held_offset = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] @configclass - class _TaskObsCfg(ObsGroup): + class TaskObsCfg(ObsGroup): gear_pos = ObsTerm( func=mdp_isaac_lab.root_pos_w, params={"asset_cfg": SceneEntityCfg(gear_name)}, @@ -394,7 +310,7 @@ class _TaskObsCfg(ObsGroup): "gear_cfg": SceneEntityCfg(gear_name), "board_cfg": SceneEntityCfg(board_name), "peg_offset": peg_offset, - "held_gear_base_offset": hgo, + "held_gear_base_offset": held_offset, }, ) joint_pos = ObsTerm( @@ -406,11 +322,11 @@ class _TaskObsCfg(ObsGroup): params={"asset_cfg": SceneEntityCfg("robot")}, ) ee_pos_noiseless = ObsTerm( - func=body_pos_in_env_frame, + func=gear_insertion_observations.body_pos_in_env_frame, params={"body_name": "panda_fingertip_centered"}, ) ee_quat_noiseless = ObsTerm( - func=body_quat_canonical, + func=gear_insertion_observations.body_quat_canonical, params={"body_name": "panda_fingertip_centered"}, ) @@ -418,16 +334,12 @@ def __post_init__(self): self.enable_corruption = False self.concatenate_terms = True - self.task_obs = _TaskObsCfg() + self.task_obs = TaskObsCfg() @configclass -class _GearInsertionRewardsCfg: - """Keypoint squashing, bonuses, and insertion regularisers for gear insertion. - - Reward weights are hardcoded here (matching the lift-task pattern) rather - than being surfaced through the task constructor. - """ +class GearInsertionRewardsCfg: + """Reward terms for gear insertion.""" kp_baseline: RewardTermCfg = MISSING kp_coarse: RewardTermCfg = MISSING @@ -450,14 +362,13 @@ def __init__( xy_threshold: float, peg_offset_xy_noise: float = 0.005, ): - hgo = held_gear_base_offset gear_cfg = SceneEntityCfg(gear_name) board_cfg = SceneEntityCfg(board_name) common_params = { "gear_cfg": gear_cfg, "board_cfg": board_cfg, "peg_offset": peg_offset, - "held_gear_base_offset": hgo, + "held_gear_base_offset": held_gear_base_offset, "keypoint_scale": 0.15, "num_keypoints": 4, "peg_offset_xy_noise": peg_offset_xy_noise, @@ -466,7 +377,9 @@ def __init__( "gear_cfg": gear_cfg, "board_cfg": board_cfg, "peg_offset": peg_offset, - "held_gear_base_offset": hgo, + "held_gear_base_offset": held_gear_base_offset, + "gear_peg_height": gear_peg_height, + "xy_threshold": xy_threshold, } self.kp_baseline = RewardTermCfg( @@ -485,19 +398,19 @@ def __init__( params={**common_params, "squash_a": 100.0, "squash_b": 0.0}, ) self.engagement_bonus = RewardTermCfg( - func=gear_insertion_rewards.gear_insertion_engagement_bonus, + func=gear_insertion_rewards.gear_insertion_geometry_bonus, weight=1.0, - params={**bonus_params, "engage_z_fraction": 0.90, "xy_threshold": xy_threshold}, + params={**bonus_params, "z_fraction": 0.90}, ) self.success_bonus = RewardTermCfg( - func=gear_insertion_rewards.gear_insertion_success_bonus, + func=gear_insertion_rewards.gear_insertion_geometry_bonus, weight=1.0, - params={**bonus_params, "success_z_fraction": success_z_fraction}, + params={**bonus_params, "z_fraction": success_z_fraction}, ) self.action_penalty_asset = RewardTermCfg( func=gear_insertion_rewards.osc_action_magnitude_penalty, weight=-0.0005, - params={"pos_action_threshold": 0.02, "rot_action_threshold": 0.097}, + params={}, ) self.action_grad_penalty = RewardTermCfg( func=gear_insertion_rewards.osc_action_delta_penalty, @@ -516,7 +429,7 @@ def __init__( "gear_cfg": gear_cfg, "board_cfg": board_cfg, "peg_offset": peg_offset, - "held_gear_base_offset": hgo, + "held_gear_base_offset": held_gear_base_offset, "gear_peg_height": gear_peg_height, "success_z_fraction": success_z_fraction, "xy_threshold": xy_threshold, diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py index 3cbee5e64..9f8c14831 100644 --- a/isaaclab_arena/tasks/observations/gear_insertion_observations.py +++ b/isaaclab_arena/tasks/observations/gear_insertion_observations.py @@ -3,19 +3,13 @@ # # SPDX-License-Identifier: Apache-2.0 -"""All observation terms for the NIST gear insertion task. - -This module contains: - -* Task-level observation primitives (peg position, gear-base offsets, deltas). -* Privileged critic helpers (body pose, force/torque at a body link). -* The 24-D policy observation class (``NistGearInsertionPolicyObservations``). -""" +"""Observation terms for the NIST gear insertion task.""" from __future__ import annotations import math import torch +import torch.nn.functional as F from typing import TYPE_CHECKING import isaaclab.utils.math as math_utils @@ -24,14 +18,27 @@ from isaaclab.managers import ManagerTermBase, SceneEntityCfg from isaaclab.utils.math import axis_angle_from_quat, quat_unique +from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import get_nist_gear_insertion_arm_action + if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv from isaaclab.managers import ObservationTermCfg -# --------------------------------------------------------------------------- -# Task-level observation primitives -# --------------------------------------------------------------------------- +def _offset_pos_in_env_frame( + env: ManagerBasedRLEnv, + asset: RigidObject, + offset: tuple[float, ...] | torch.Tensor, +) -> torch.Tensor: + """Return an asset-local offset position in each environment frame.""" + root_pos = wp.to_torch(asset.data.root_pos_w) - env.scene.env_origins + root_quat = wp.to_torch(asset.data.root_quat_w) + if isinstance(offset, torch.Tensor): + offset_tensor = offset.to(device=env.device, dtype=torch.float32) + else: + offset_tensor = torch.tensor(offset, device=env.device, dtype=torch.float32) + offset_tensor = offset_tensor.unsqueeze(0).expand(env.num_envs, 3) + return root_pos + math_utils.quat_apply(root_quat, offset_tensor) def peg_pos_in_env_frame( @@ -39,12 +46,9 @@ def peg_pos_in_env_frame( board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), ) -> torch.Tensor: - """Target peg position: fixed asset pose + offset in its local frame.""" + """Return the target peg position in each environment frame.""" board: RigidObject = env.scene[board_cfg.name] - pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins - quat = wp.to_torch(board.data.root_quat_w) - offset = torch.tensor(peg_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) - return pos + math_utils.quat_apply(quat, offset) + return _offset_pos_in_env_frame(env, board, peg_offset) def held_gear_base_pos_in_env_frame( @@ -52,14 +56,32 @@ def held_gear_base_pos_in_env_frame( gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), ) -> torch.Tensor: - """Position of the held gear's insertion point (root + offset in gear frame) in env frame.""" + """Return the held gear insertion point in each environment frame.""" gear: RigidObject = env.scene[gear_cfg.name] - gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins - gear_quat = wp.to_torch(gear.data.root_quat_w) - held_off = ( - torch.tensor(held_gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) - ) - return gear_pos + math_utils.quat_apply(gear_quat, held_off) + return _offset_pos_in_env_frame(env, gear, held_gear_base_offset) + + +def body_pos_in_env_frame( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Return a robot body position in each environment frame.""" + robot: Articulation = env.scene[robot_cfg.name] + body_id = robot.body_names.index(body_name) + return wp.to_torch(robot.data.body_pos_w)[:, body_id] - env.scene.env_origins + + +def body_quat_canonical( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Return a robot body quaternion with a unique sign convention.""" + robot: Articulation = env.scene[robot_cfg.name] + body_id = robot.body_names.index(body_name) + quat = wp.to_torch(robot.data.body_quat_w)[:, body_id] + return quat_unique(quat) def check_gear_insertion_geometry( @@ -69,17 +91,17 @@ def check_gear_insertion_geometry( z_fraction: float, xy_threshold: float, ) -> torch.Tensor: - """Shared XY-centering + Z-depth insertion check used by rewards, terminations, and observations. + """Return whether the gear is centered and inserted on the peg. Args: - held_base_pos: (N, 3) position of the held gear's insertion base in env frame. - peg_pos: (N, 3) position of the target peg in env frame. + held_base_pos: Position of the held gear insertion base. + peg_pos: Position of the target peg. gear_peg_height: Physical height of the peg. - z_fraction: Fraction of peg height that counts as inserted. - xy_threshold: Maximum XY distance for centering. + z_fraction: Fraction of peg height used for the insertion threshold. + xy_threshold: Maximum XY distance from the peg center. Returns: - (N,) bool tensor — True where gear is centered and inserted. + Boolean tensor indicating insertion success. """ xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) z_diff = held_base_pos[:, 2] - peg_pos[:, 2] @@ -93,69 +115,14 @@ def peg_delta_from_held_gear_base( peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), ) -> torch.Tensor: - """Vector from held gear insertion point to peg. Positive = peg is ahead in that axis.""" + """Return the vector from the held gear insertion point to the peg.""" held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) peg_pos = peg_pos_in_env_frame(env, board_cfg, peg_offset) return peg_pos - held_base -# --------------------------------------------------------------------------- -# Privileged critic helpers (body-level pose & wrench) -# --------------------------------------------------------------------------- - - -def body_pos_in_env_frame( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Noiseless body position in env frame (privileged state for critic).""" - robot: Articulation = env.scene[robot_cfg.name] - idx = robot.body_names.index(body_name) - pos_w = wp.to_torch(robot.data.body_pos_w)[:, idx, :] - return pos_w - env.scene.env_origins - - -def body_quat_canonical( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Noiseless body quaternion, canonicalized w >= 0 (privileged state for critic).""" - robot: Articulation = env.scene[robot_cfg.name] - idx = robot.body_names.index(body_name) - quat = wp.to_torch(robot.data.body_quat_w)[:, idx, :] - return quat_unique(quat) - - -def force_torque_at_body( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "force_sensor", - return_torque: bool = False, -) -> torch.Tensor: - """Read joint reaction wrench at a specific body link. - - Uses PhysX ``root_view.get_link_incoming_joint_force()`` (same pattern as - Isaac Lab's ``forge_env.py``). This is the standard way to read F/T sensor - data in Isaac Sim; no higher-level API is available. - """ - robot: Articulation = env.scene[robot_cfg.name] - body_idx = robot.body_names.index(body_name) - wrench = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, body_idx] - wrench = torch.nan_to_num(wrench, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) - if return_torque: - return wrench - return wrench[:, :3] - - -# --------------------------------------------------------------------------- -# 24-D policy observation (OSC + wrist sensing) -# --------------------------------------------------------------------------- - - class NistGearInsertionPolicyObservations(ManagerTermBase): - """24-D policy observation stack for NIST gear insertion (OSC + wrist sensing). + """Policy observation term for OSC-based NIST gear insertion. Output layout (per env):: @@ -174,35 +141,120 @@ def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): p = cfg.params self._robot_name: str = p.get("robot_name", "robot") self._board_name: str = p.get("board_name", "gears_and_base") - self._peg_offset = torch.tensor(p.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) + self._peg_offset_values = tuple(p.get("peg_offset", [0.0, 0.0, 0.0])) + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device) self._fingertip_body: str = p.get("fingertip_body_name", "panda_fingertip_centered") self._force_body: str = p.get("force_body_name", "force_sensor") self._pos_noise: float = p.get("pos_noise_level", 0.00025) self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) self._force_noise: float = p.get("force_noise_level", 1.0) - self._ft_alpha: float = p.get("ft_smoothing_factor", 0.25) - self._contact_thresh_range: tuple[float, float] = tuple(p.get("contact_threshold_range", [5.0, 10.0])) n = env.num_envs dev = env.device self._fingertip_idx: int | None = None - self._force_idx: int | None = None + self._body_key = (self._robot_name, self._fingertip_body, self._force_body) self._flip_quats = torch.ones(n, device=dev) self._prev_noisy_pos = torch.zeros(n, 3, device=dev) self._prev_noisy_quat = torch.zeros(n, 4, device=dev) self._prev_noisy_quat[:, 3] = 1.0 - def _ensure_body_indices(self): - if self._fingertip_idx is not None: - return - robot: Articulation = self._env.scene[self._robot_name] - self._fingertip_idx = robot.body_names.index(self._fingertip_body) - self._force_idx = robot.body_names.index(self._force_body) + def _resolve_fingertip_idx( + self, + robot_name: str, + fingertip_body_name: str, + force_body_name: str, + ) -> int: + """Resolve the fingertip body index used by the observation term.""" + body_key = (robot_name, fingertip_body_name, force_body_name) + if self._fingertip_idx is not None and body_key == self._body_key: + return self._fingertip_idx + + robot: Articulation = self._env.scene[robot_name] + for body_name in (fingertip_body_name, force_body_name): + if body_name not in robot.body_names: + raise ValueError( + f"Body '{body_name}' is missing from robot '{robot_name}'. Use a USD that defines this " + "body or override the corresponding observation parameter." + ) + fingertip_idx = robot.body_names.index(fingertip_body_name) + if body_key == self._body_key: + self._fingertip_idx = fingertip_idx + return fingertip_idx + + def _resolve_peg_offset(self, peg_offset: list[float] | None, device: torch.device) -> torch.Tensor: + """Return the cached peg offset unless the manager supplies different values.""" + if peg_offset is None or tuple(peg_offset) == self._peg_offset_values: + return self._peg_offset + return torch.tensor(peg_offset, device=device, dtype=torch.float32) + + def _sample_noisy_pose( + self, + env: ManagerBasedRLEnv, + ft_pos: torch.Tensor, + ft_quat: torch.Tensor, + pos_noise_level: float, + rot_noise_level_deg: float, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return fingertip pose with configured sensor noise.""" + pos_noise = torch.randn(env.num_envs, 3, device=env.device) * pos_noise_level + noisy_pos = ft_pos + pos_noise + + rot_noise_axis = torch.randn(env.num_envs, 3, device=env.device) + rot_noise_axis = F.normalize(rot_noise_axis, dim=1, eps=1e-8) + rot_noise_angle = torch.randn(env.num_envs, device=env.device) * math.radians(rot_noise_level_deg) + noisy_quat = math_utils.quat_mul( + ft_quat, + math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + ) + return noisy_pos, quat_unique(noisy_quat) + + def _compute_fingertip_target_delta( + self, + env: ManagerBasedRLEnv, + noisy_pos: torch.Tensor, + board_name: str, + peg_offset: torch.Tensor, + ) -> torch.Tensor: + """Return fingertip position relative to the noisy peg target.""" + board: RigidObject = env.scene[board_name] + arm_osc_action = get_nist_gear_insertion_arm_action(env) + peg_pos = _offset_pos_in_env_frame(env, board, peg_offset) + noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise + return noisy_pos - noisy_fixed_pos + + def _compute_velocities( + self, + noisy_pos: torch.Tensor, + noisy_quat: torch.Tensor, + dt: float, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Estimate fingertip linear and angular velocity from noisy poses.""" + safe_dt = max(dt, 1e-6) + ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt + + rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) + rot_diff = quat_unique(rot_diff) + ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt + ee_angvel[:, 0:2] = 0.0 + + self._prev_noisy_pos[:] = noisy_pos + self._prev_noisy_quat[:] = noisy_quat + return ee_linvel, ee_angvel + + def _read_smoothed_force( + self, + env: ManagerBasedRLEnv, + arm_osc_action, + force_noise_level: float, + ) -> torch.Tensor: + """Return the smoothed wrist force with configured observation noise.""" + return arm_osc_action.force_smooth + torch.randn(env.num_envs, 3, device=env.device) * force_noise_level def reset(self, env_ids: list[int] | None = None): + """Reset cached noisy pose state for the selected environments.""" if env_ids is None or len(env_ids) == 0: return @@ -213,96 +265,66 @@ def reset(self, env_ids: list[int] | None = None): flip[torch.rand(n, device=dev) > 0.5] = -1.0 self._flip_quats[env_ids] = flip - self._ensure_body_indices() + fingertip_idx = self._resolve_fingertip_idx(self._robot_name, self._fingertip_body, self._force_body) robot: Articulation = self._env.scene[self._robot_name] origins = self._env.scene.env_origins - self._prev_noisy_pos[env_ids] = ( - wp.to_torch(robot.data.body_pos_w)[env_ids, self._fingertip_idx] - origins[env_ids] - ) - reset_quat = wp.to_torch(robot.data.body_quat_w)[env_ids, self._fingertip_idx].clone() - reset_quat[:, [2, 3]] = 0.0 - reset_quat = torch.nn.functional.normalize(reset_quat, dim=-1) - reset_quat = reset_quat * flip.unsqueeze(-1) - self._prev_noisy_quat[env_ids] = reset_quat + self._prev_noisy_pos[env_ids] = wp.to_torch(robot.data.body_pos_w)[env_ids, fingertip_idx] - origins[env_ids] + reset_quat = wp.to_torch(robot.data.body_quat_w)[env_ids, fingertip_idx] + self._prev_noisy_quat[env_ids] = quat_unique(reset_quat) def __call__( self, env: ManagerBasedRLEnv, - robot_name: str = "robot", - board_name: str = "gears_and_base", + robot_name: str | None = None, + board_name: str | None = None, peg_offset: list[float] | None = None, - fingertip_body_name: str = "panda_fingertip_centered", - force_body_name: str = "force_sensor", - pos_noise_level: float = 0.00025, - rot_noise_level_deg: float = 0.1, - force_noise_level: float = 1.0, + fingertip_body_name: str | None = None, + force_body_name: str | None = None, + pos_noise_level: float | None = None, + rot_noise_level_deg: float | None = None, + force_noise_level: float | None = None, ) -> torch.Tensor: - self._ensure_body_indices() + """Return the 24-D policy observation tensor.""" + robot_name = self._robot_name if robot_name is None else robot_name + board_name = self._board_name if board_name is None else board_name + fingertip_body_name = self._fingertip_body if fingertip_body_name is None else fingertip_body_name + force_body_name = self._force_body if force_body_name is None else force_body_name + pos_noise_level = self._pos_noise if pos_noise_level is None else pos_noise_level + rot_noise_level_deg = self._rot_noise_deg if rot_noise_level_deg is None else rot_noise_level_deg + force_noise_level = self._force_noise if force_noise_level is None else force_noise_level + + peg_offset_tensor = self._resolve_peg_offset(peg_offset, env.device) + fingertip_idx = self._resolve_fingertip_idx(robot_name, fingertip_body_name, force_body_name) - n = env.num_envs - dev = env.device dt = env.step_dt - robot: Articulation = env.scene[self._robot_name] - board = env.scene[self._board_name] + robot: Articulation = env.scene[robot_name] origins = env.scene.env_origins - ft_pos = wp.to_torch(robot.data.body_pos_w)[:, self._fingertip_idx] - origins - ft_quat = wp.to_torch(robot.data.body_quat_w)[:, self._fingertip_idx] + ft_pos = wp.to_torch(robot.data.body_pos_w)[:, fingertip_idx] - origins + ft_quat = wp.to_torch(robot.data.body_quat_w)[:, fingertip_idx] - pos_noise = torch.randn(n, 3, device=dev) * self._pos_noise - noisy_pos = ft_pos + pos_noise + noisy_pos, noisy_quat_full = self._sample_noisy_pose(env, ft_pos, ft_quat, pos_noise_level, rot_noise_level_deg) + fingertip_pos_rel = self._compute_fingertip_target_delta(env, noisy_pos, board_name, peg_offset_tensor) + ee_linvel, ee_angvel = self._compute_velocities(noisy_pos, noisy_quat_full, dt) - rot_noise_axis = torch.randn(n, 3, device=dev) - rot_noise_axis = rot_noise_axis / (torch.linalg.norm(rot_noise_axis, dim=1, keepdim=True) + 1e-8) - rot_noise_angle = torch.randn(n, device=dev) * math.radians(self._rot_noise_deg) - noisy_quat = math_utils.quat_mul( - ft_quat, - math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), - ) - noisy_quat[:, [2, 3]] = 0.0 - noisy_quat = torch.nn.functional.normalize(noisy_quat, dim=-1) - noisy_quat = noisy_quat * self._flip_quats.unsqueeze(-1) - - # No public API for action term lookup; _terms is the standard access pattern. - arm_osc_action = env.action_manager._terms["arm_action"] - board_pos = wp.to_torch(board.data.root_pos_w) - origins - board_quat = wp.to_torch(board.data.root_quat_w) - peg_offset_exp = self._peg_offset.unsqueeze(0).expand(n, 3) - peg_pos = board_pos + math_utils.quat_apply(board_quat, peg_offset_exp) - noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise - - fingertip_pos_rel = noisy_pos - noisy_fixed_pos - - safe_dt = max(dt, 1e-6) - ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt - self._prev_noisy_pos[:] = noisy_pos + obs_quat = noisy_quat_full.clone() + # The gear is symmetric about the peg axis, so the policy observes the tilt quaternion up to sign. + obs_quat[:, [2, 3]] = 0.0 + obs_quat = obs_quat * self._flip_quats.unsqueeze(-1) - rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) - rot_diff = rot_diff * torch.sign(rot_diff[:, 3]).unsqueeze(-1) - ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt - ee_angvel[:, 0:2] = 0.0 - self._prev_noisy_quat[:] = noisy_quat - - raw_force = wp.to_torch(robot.root_view.get_link_incoming_joint_force())[:, self._force_idx, :3] - raw_force = torch.nan_to_num(raw_force, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) - # Force EMA is updated here (in the observation) rather than in process_actions - # because the smoothed force must be current *before* the policy reads it. - # Moving this to process_actions would delay the update by one step. - arm_osc_action.force_smooth[:] = ( - self._ft_alpha * raw_force + (1.0 - self._ft_alpha) * arm_osc_action.force_smooth - ) - noisy_force = arm_osc_action.force_smooth + torch.randn(n, 3, device=dev) * self._force_noise + arm_osc_action = get_nist_gear_insertion_arm_action(env) + noisy_force = self._read_smoothed_force(env, arm_osc_action, force_noise_level) force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) - prev_actions = arm_osc_action._smoothed_actions.clone() + prev_actions = arm_osc_action.smoothed_actions.clone() prev_actions[:, 3:5] = 0.0 - return torch.cat( + obs = torch.cat( [ fingertip_pos_rel, - noisy_quat, + obs_quat, ee_linvel, ee_angvel, noisy_force, @@ -311,3 +333,4 @@ def __call__( ], dim=-1, ) + return torch.nan_to_num(obs, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py index 9921ce5bc..312be010f 100644 --- a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py +++ b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py @@ -3,13 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Reward terms for the NIST gear insertion task. - -Includes squashing-function keypoint rewards (baseline, coarse, fine), engagement -and success bonuses, and optional OSC / contact regularizers. Design follows -common assembly peg-insert RL practice; see e.g. Appendix B of -https://arxiv.org/pdf/2408.04587 for related shaping. -""" +"""Reward terms for the NIST gear insertion task.""" from __future__ import annotations @@ -20,25 +14,20 @@ from isaaclab.assets import RigidObject from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg from isaaclab.utils.math import combine_frame_transforms, quat_apply +from isaaclab_tasks.direct.factory.factory_utils import get_keypoint_offsets, squashing_fn from isaaclab_arena.tasks.observations.gear_insertion_observations import check_gear_insertion_geometry +from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import get_nist_gear_insertion_arm_action if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv -def _get_keypoint_offsets(num_keypoints: int = 4, device: torch.device | None = None) -> torch.Tensor: - """Uniformly-spaced keypoints along the Z-axis, centered at 0.""" - offsets = torch.zeros((num_keypoints, 3), device=device, dtype=torch.float32) - offsets[:, 2] = torch.linspace(0.0, 1.0, num_keypoints, device=device) - 0.5 - return offsets - - class _KeypointDistanceComputer: - """Pre-cached keypoint distance calculator matching Factory's pattern.""" + """Keypoint distance calculator with reusable buffers.""" def __init__(self, num_envs: int, device: torch.device, num_keypoints: int = 4): - self.offsets_base = _get_keypoint_offsets(num_keypoints=num_keypoints, device=device) + self.offsets_base = get_keypoint_offsets(num_keypoints, device) self.n_kp = self.offsets_base.shape[0] self.identity_quat = ( torch.tensor([[0.0, 0.0, 0.0, 1.0]], device=device, dtype=torch.float32) @@ -55,7 +44,7 @@ def compute( quat_b: torch.Tensor, scale: float = 1.0, ) -> torch.Tensor: - """Returns mean keypoint L2 distance, shape (num_envs,).""" + """Return mean keypoint L2 distance.""" n = pos_a.shape[0] offsets = self.offsets_base * scale self.offsets_buf[:n] = offsets.unsqueeze(0) @@ -71,182 +60,168 @@ def _expand(t: torch.Tensor) -> torch.Tensor: return per_kp_dist.mean(-1) -def _squashing_fn(x: torch.Tensor, a: float, b: float) -> torch.Tensor: - """Squashing function r(x) = 1 / (exp(a*x) + b + exp(-a*x)).""" - return 1.0 / (torch.exp(a * x) + b + torch.exp(-a * x)) +def _resolve_offset_tensor( + values: list[float] | None, + cached_values: tuple[float, ...], + cached_tensor: torch.Tensor, + device: torch.device, +) -> torch.Tensor: + """Return the cached offset tensor unless the manager supplies different values.""" + if values is None or tuple(values) == cached_values: + return cached_tensor + return torch.tensor(values, device=device, dtype=torch.float32) -class gear_peg_keypoint_squashing(ManagerTermBase): - """Squashing-function keypoint reward for gear vs peg alignment. +def _offset_pose_in_env_frame( + env: ManagerBasedRLEnv, + asset: RigidObject, + offset: torch.Tensor, + num_envs: int, +) -> tuple[torch.Tensor, torch.Tensor]: + """Return an asset-local offset pose in each environment frame.""" + root_pos = wp.to_torch(asset.data.root_pos_w)[:num_envs] - env.scene.env_origins[:num_envs] + root_quat = wp.to_torch(asset.data.root_quat_w)[:num_envs] + offset = offset.unsqueeze(0).expand(num_envs, 3) + return root_pos + quat_apply(root_quat, offset), root_quat - Instantiate three times with different [a, b] for baseline/coarse/fine. - Supports optional per-episode XY noise on the peg offset. - """ + +class gear_peg_keypoint_squashing(ManagerTermBase): + """Squashing-function keypoint reward for gear-to-peg alignment.""" def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): super().__init__(cfg, env) - self.gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] - self.board_cfg: SceneEntityCfg = cfg.params["board_cfg"] - self.peg_offset = torch.tensor( - cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device, dtype=torch.float32 - ) + self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) + self.peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) + self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) self.held_gear_base_offset = torch.tensor( - cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0]), device=env.device, dtype=torch.float32 + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 ) self._xy_noise_range = cfg.params.get("peg_offset_xy_noise", 0.0) - num_keypoints = cfg.params.get("num_keypoints", 4) - self.kp = _KeypointDistanceComputer(env.num_envs, env.device, num_keypoints=num_keypoints) + self._num_keypoints: int = cfg.params.get("num_keypoints", 4) + self.kp = _KeypointDistanceComputer(env.num_envs, env.device, num_keypoints=self._num_keypoints) self._offset_noise = torch.zeros(env.num_envs, 3, device=env.device, dtype=torch.float32) - def reset(self, env_ids: torch.Tensor) -> None: - if self._xy_noise_range > 0.0: - n = len(env_ids) - noise_dev = self._offset_noise.device - self._offset_noise[env_ids, 0] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range - self._offset_noise[env_ids, 1] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range + def reset(self, env_ids: torch.Tensor | None = None) -> None: + if self._xy_noise_range <= 0.0: + return + if env_ids is None: + env_ids = torch.arange(self._offset_noise.shape[0], device=self._offset_noise.device) + elif isinstance(env_ids, slice): + env_ids = torch.arange(self._offset_noise.shape[0], device=self._offset_noise.device)[env_ids] + if len(env_ids) == 0: + return + + n = len(env_ids) + noise_dev = self._offset_noise.device + self._offset_noise[env_ids, 0] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range + self._offset_noise[env_ids, 1] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range def __call__( self, env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), - held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, keypoint_scale: float = 0.15, num_keypoints: int = 4, + peg_offset_xy_noise: float = 0.0, squash_a: float = 50.0, squash_b: float = 2.0, - peg_offset_xy_noise: float = 0.0, ) -> torch.Tensor: - gear: RigidObject = env.scene[self.gear_cfg.name] - gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins - gear_quat = wp.to_torch(gear.data.root_quat_w) - n = gear_pos.shape[0] - held_offset = self.held_gear_base_offset.unsqueeze(0).expand(n, 3) - held_base_pos = gear_pos + quat_apply(gear_quat, held_offset) - - board: RigidObject = env.scene[self.board_cfg.name] - pos = wp.to_torch(board.data.root_pos_w)[:n] - env.scene.env_origins[:n] - quat = wp.to_torch(board.data.root_quat_w)[:n] - offset = self.peg_offset.unsqueeze(0).expand(n, 3) - target_pos = pos + quat_apply(quat, offset) + self._offset_noise[:n] - target_quat = quat - kp_dist = self.kp.compute(target_pos, target_quat, held_base_pos, gear_quat, scale=keypoint_scale) - return _squashing_fn(kp_dist, squash_a, squash_b) - - -# Module-level cache for constant offset tensors used by _check_gear_position. -# In practice this holds only 2-3 entries (held_gear_base_offset and peg_offset -# per device). If per-episode offset randomization is added in the future, this -# cache should be replaced with instance-level pre-allocated tensors. -_CACHED_OFFSETS: dict[tuple, torch.Tensor] = {} - + if num_keypoints != self._num_keypoints: + raise ValueError( + f"num_keypoints is fixed at term initialization. Expected {self._num_keypoints}, got {num_keypoints}." + ) + + n = env.num_envs + gear: RigidObject = env.scene[gear_cfg.name] + held_gear_base_offset_tensor = _resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self.held_gear_base_offset, + env.device, + ) + held_base_pos, gear_quat = _offset_pose_in_env_frame(env, gear, held_gear_base_offset_tensor, n) -def _get_cached_offset(values: tuple[float, ...] | list[float], n: int, device: torch.device) -> torch.Tensor: - """Return a cached (n, 3) offset tensor, avoiding per-step allocation.""" - key = (tuple(values), device) - cached = _CACHED_OFFSETS.get(key) - if cached is None or cached.shape[0] < n: - cached = torch.tensor(values, device=device, dtype=torch.float32).unsqueeze(0).expand(max(n, 1), 3).contiguous() - _CACHED_OFFSETS[key] = cached - return cached[:n] + board: RigidObject = env.scene[board_cfg.name] + peg_offset_tensor = _resolve_offset_tensor(peg_offset, self._peg_offset_values, self.peg_offset, env.device) + target_pos, target_quat = _offset_pose_in_env_frame(env, board, peg_offset_tensor, n) + offset_noise = self._offset_noise[:n] if peg_offset_xy_noise > 0.0 else 0.0 + target_pos = target_pos + offset_noise + kp_dist = self.kp.compute(target_pos, target_quat, held_base_pos, gear_quat, scale=keypoint_scale) + return squashing_fn(kp_dist, squash_a, squash_b) -def _check_gear_position( +def _compute_gear_position_success( env: ManagerBasedRLEnv, gear_cfg: SceneEntityCfg, board_cfg: SceneEntityCfg, - peg_offset: tuple[float, ...], - held_gear_base_offset: tuple[float, ...], + peg_offset: torch.Tensor, + held_gear_base_offset: torch.Tensor, gear_peg_height: float, z_fraction: float, xy_threshold: float, ) -> torch.Tensor: - """Return bool tensor indicating whether gear meets XY centering and Z depth criteria. - - Delegates to :func:`check_gear_insertion_geometry` for the shared XY+Z check. - """ + """Return whether the gear meets the XY-centering and Z-depth thresholds.""" gear: RigidObject = env.scene[gear_cfg.name] - gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins - gear_quat = wp.to_torch(gear.data.root_quat_w) - held_off = _get_cached_offset(held_gear_base_offset, env.num_envs, env.device) - held_base_pos = gear_pos + quat_apply(gear_quat, held_off) + held_base_pos, _ = _offset_pose_in_env_frame(env, gear, held_gear_base_offset, env.num_envs) board: RigidObject = env.scene[board_cfg.name] - pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins - quat = wp.to_torch(board.data.root_quat_w) - offset = _get_cached_offset(peg_offset, env.num_envs, env.device) - peg_pos = pos + quat_apply(quat, offset) + peg_pos, _ = _offset_pose_in_env_frame(env, board, peg_offset, env.num_envs) return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, z_fraction, xy_threshold) -def gear_insertion_engagement_bonus( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), - held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), - gear_peg_height: float = 0.02, - engage_z_fraction: float = 0.90, - xy_threshold: float = 0.015, -) -> torch.Tensor: - """Bonus when the gear is partially engaged on the peg.""" - return _check_gear_position( - env, - gear_cfg, - board_cfg, - peg_offset, - held_gear_base_offset, - gear_peg_height, - engage_z_fraction, - xy_threshold, - ).float() - - -def gear_insertion_success_bonus( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), - held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), - gear_peg_height: float = 0.02, - success_z_fraction: float = 0.05, - xy_threshold: float = 0.0025, -) -> torch.Tensor: - """Bonus when the gear is fully inserted (binary success geometry).""" - return _check_gear_position( - env, - gear_cfg, - board_cfg, - peg_offset, - held_gear_base_offset, - gear_peg_height, - success_z_fraction, - xy_threshold, - ).float() - - -class osc_action_magnitude_penalty(ManagerTermBase): - """Penalize large asset-relative position/rotation commands. - - Note: accesses ``env.action_manager._terms`` directly — no public API exists. - """ +class gear_insertion_geometry_bonus(ManagerTermBase): + """Bonus when the gear satisfies the configured insertion geometry.""" def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): super().__init__(cfg, env) - self._pos_thresh: float = cfg.params.get("pos_action_threshold", 0.02) - self._rot_thresh: float = cfg.params.get("rot_action_threshold", 0.097) + self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) + self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) + self._held_gear_base_offset = torch.tensor( + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 + ) def __call__( self, env: ManagerBasedRLEnv, - pos_action_threshold: float = 0.02, - rot_action_threshold: float = 0.097, + gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), + board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + gear_peg_height: float = 0.02, + z_fraction: float = 0.05, + xy_threshold: float = 0.015, ) -> torch.Tensor: - action_term = env.action_manager._terms["arm_action"] - pos_error = torch.norm(action_term.delta_pos, p=2, dim=-1) / self._pos_thresh - rot_error = torch.abs(action_term.delta_yaw) / self._rot_thresh + return _compute_gear_position_success( + env, + gear_cfg, + board_cfg, + _resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), + _resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self._held_gear_base_offset, + env.device, + ), + gear_peg_height, + z_fraction, + xy_threshold, + ).float() + + +class osc_action_magnitude_penalty(ManagerTermBase): + """Penalize large asset-relative position and yaw commands.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = get_nist_gear_insertion_arm_action(env) + pos_scale = action_term.position_thresholds.clamp_min(1.0e-6) + rot_scale = action_term.rotation_thresholds[:, 2].clamp_min(1.0e-6) + pos_error = torch.norm(action_term.delta_pos / pos_scale, p=2, dim=-1) + rot_error = torch.abs(action_term.delta_yaw) / rot_scale return pos_error + rot_error @@ -254,9 +229,9 @@ class osc_action_delta_penalty(ManagerTermBase): """Penalize jerky actions using smoothed action deltas.""" def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: - action_term = env.action_manager._terms["arm_action"] + action_term = get_nist_gear_insertion_arm_action(env) return torch.norm( - action_term._smoothed_actions - action_term._prev_smoothed_actions, + action_term.smoothed_actions - action_term.previous_smoothed_actions, p=2, dim=-1, ) @@ -266,37 +241,23 @@ class wrist_contact_force_penalty(ManagerTermBase): """Penalize wrist/contact force magnitude above per-episode threshold.""" def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: - action_term = env.action_manager._terms["arm_action"] + action_term = get_nist_gear_insertion_arm_action(env) force_mag = torch.norm(action_term.force_smooth, p=2, dim=-1) return torch.nn.functional.relu(force_mag - action_term.contact_thresholds) class success_prediction_error(ManagerTermBase): - """Penalize incorrect success predictions from the 7th action dimension. - - ``true_success`` uses held insertion base vs target peg (same geometry as - ``gear_insertion_success_bonus`` / ``gear_mesh_insertion_success``), not the gear - rigid-body root, consistent with common assembly peg-insert benchmarks. - - ``_pred_scale`` acts as a **one-way curriculum gate**: it starts at 0 and - flips to 1 the first time mean success rate crosses ``delay_until_ratio``. - It intentionally never resets back to 0 — once the agent demonstrates - sufficient insertion ability, the prediction penalty remains active for the - rest of training even if performance temporarily dips. - """ + """Penalize incorrect success predictions from the seventh action dimension.""" def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): super().__init__(cfg, env) self._pred_scale = 0.0 - self._delay_until_ratio: float = cfg.params.get("delay_until_ratio", 0.25) - self._gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] - self._board_cfg: SceneEntityCfg = cfg.params["board_cfg"] - hgo = cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0]) - self._held_gear_base_offset = torch.tensor(hgo, device=env.device, dtype=torch.float32) - self._peg_offset = torch.tensor(cfg.params.get("peg_offset", [0.0, 0.0, 0.0]), device=env.device) - self._gear_peg_height: float = cfg.params.get("gear_peg_height", 0.02) - self._success_z_fraction: float = cfg.params.get("success_z_fraction", 0.05) - self._xy_threshold: float = cfg.params.get("xy_threshold", 0.0025) + self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) + self._held_gear_base_offset = torch.tensor( + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 + ) + self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) def __call__( self, @@ -310,27 +271,27 @@ def __call__( xy_threshold: float = 0.0025, delay_until_ratio: float = 0.25, ) -> torch.Tensor: - gear: RigidObject = env.scene[self._gear_cfg.name] - gear_pos = wp.to_torch(gear.data.root_pos_w) - env.scene.env_origins - gear_quat = wp.to_torch(gear.data.root_quat_w) - n = gear_pos.shape[0] - held_off = self._held_gear_base_offset.unsqueeze(0).expand(n, 3) - held_base_pos = gear_pos + quat_apply(gear_quat, held_off) - - board: RigidObject = env.scene[self._board_cfg.name] - board_pos = wp.to_torch(board.data.root_pos_w) - env.scene.env_origins - board_quat = wp.to_torch(board.data.root_quat_w) - peg_off = self._peg_offset.unsqueeze(0).expand(n, 3) - peg_pos = board_pos + quat_apply(board_quat, peg_off) - - true_success = check_gear_insertion_geometry( - held_base_pos, peg_pos, self._gear_peg_height, self._success_z_fraction, self._xy_threshold + true_success = _compute_gear_position_success( + env, + gear_cfg, + board_cfg, + _resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), + _resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self._held_gear_base_offset, + env.device, + ), + gear_peg_height, + success_z_fraction, + xy_threshold, ) - if true_success.float().mean() >= self._delay_until_ratio: + # Once enough environments have reached the success geometry, keep the auxiliary loss enabled. + if true_success.float().mean() >= delay_until_ratio: self._pred_scale = 1.0 - arm_osc_action = env.action_manager._terms["arm_action"] + arm_osc_action = get_nist_gear_insertion_arm_action(env) pred = (arm_osc_action.success_pred + 1.0) / 2.0 error = torch.abs(true_success.float() - pred) return error * self._pred_scale diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index 240c66411..422361527 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -6,17 +6,10 @@ from __future__ import annotations import torch -from collections.abc import Callable -from typing import Any, cast -import isaaclab.utils.math as math_utils import warp as wp -from isaaclab.assets import Articulation from isaaclab.envs import ManagerBasedEnv -from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg -# Dependency: isaaclab_tasks.direct.automate (Factory task package) provides -# IK utilities (get_pose_error, _get_delta_dof_pos) used by place_gear_in_gripper. -from isaaclab_tasks.direct.automate import factory_control as fc +from isaaclab.managers import SceneEntityCfg from isaaclab_arena.utils.pose import Pose from isaaclab_arena.utils.velocity import Velocity @@ -91,155 +84,3 @@ def reset_all_articulation_joints(env: ManagerBasedEnv, env_ids: torch.Tensor): # set into the physics simulation articulation_asset.write_joint_position_to_sim_index(position=default_joint_pos, env_ids=env_ids) articulation_asset.write_joint_velocity_to_sim_index(velocity=default_joint_vel, env_ids=env_ids) - - -class place_gear_in_gripper(ManagerTermBase): - """Solve IK to position the gripper at the gear, then close fingers. - - Uses iterative DLS IK to move the end-effector to the gear's pose - (with configurable grasp offset/orientation), then sets gripper joints. - """ - - def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): - super().__init__(cfg, env) - - robot_cfg: SceneEntityCfg = cfg.params.get("robot_cfg", SceneEntityCfg("robot")) - self.robot: Articulation = env.scene[robot_cfg.name] - - gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] - self.gear = env.scene[gear_cfg.name] - - self.num_arm_joints: int = cast(int, cfg.params["num_arm_joints"]) - self.hand_grasp_width: float = cast(float, cfg.params["hand_grasp_width"]) - self.hand_close_width: float = cast(float, cfg.params["hand_close_width"]) - self.gripper_joint_setter_func: Callable[..., Any] = cast( - Callable[..., Any], - cfg.params["gripper_joint_setter_func"], - ) - - grasp_rot_offset = cfg.params["grasp_rot_offset"] - self.grasp_rot_offset_tensor = ( - torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1) - ) - - grasp_offset = cfg.params["grasp_offset"] - self.grasp_offset_tensor = torch.tensor(grasp_offset, device=env.device, dtype=torch.float32) - - ee_name: str = cast(str, cfg.params.get("end_effector_body_name", "panda_hand")) - eef_indices, _ = self.robot.find_bodies([ee_name]) - if not eef_indices: - raise ValueError(f"End-effector body '{ee_name}' not found in robot") - self.eef_idx: int = eef_indices[0] - self.jacobi_body_idx: int = self.eef_idx - 1 - - all_joints, _ = self.robot.find_joints([".*"]) - self.all_joints = all_joints - self.finger_joints = all_joints[self.num_arm_joints :] - - def __call__( - self, - env: ManagerBasedEnv, - env_ids: torch.Tensor, - robot_cfg: SceneEntityCfg | None = None, - gear_cfg: SceneEntityCfg | None = None, - num_arm_joints: int | None = None, - hand_grasp_width: float | None = None, - hand_close_width: float | None = None, - gripper_joint_setter_func: Callable | None = None, - end_effector_body_name: str | None = None, - grasp_rot_offset: list | None = None, - grasp_offset: list | None = None, - max_ik_iterations: int = 10, - pos_threshold: float = 1e-6, - rot_threshold: float = 1e-6, - ) -> None: - n = len(env_ids) - device = env.device - - grasp_rot_offset_tensor = self.grasp_rot_offset_tensor[env_ids] - hand_init_orn_noise_yaw = 0.0 - random_yaw = (2.0 * torch.rand(n, device=device) - 1.0) * hand_init_orn_noise_yaw - yaw_quat = math_utils.quat_from_euler_xyz( - torch.zeros(n, device=device), - torch.zeros(n, device=device), - random_yaw, - ) - grasp_rot_offset_tensor = math_utils.quat_mul(grasp_rot_offset_tensor, yaw_quat) - grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(n, -1) - - for _ in range(max_ik_iterations): - joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() - joint_vel = wp.to_torch(self.robot.data.joint_vel)[env_ids].clone() - - gear_pos_w = wp.to_torch(self.gear.data.root_link_pos_w)[env_ids].clone() - gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() - - target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset_tensor) - target_pos = gear_pos_w + math_utils.quat_apply(target_quat, grasp_offset_batch) - - eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] - eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] - - pos_error, aa_error = fc.get_pose_error( - fingertip_midpoint_pos=eef_pos, - fingertip_midpoint_quat=eef_quat, - ctrl_target_fingertip_midpoint_pos=target_pos, - ctrl_target_fingertip_midpoint_quat=target_quat, - jacobian_type="geometric", - rot_error_type="axis_angle", - ) - delta_hand_pose = torch.cat((pos_error, aa_error), dim=-1) - - if ( - torch.norm(pos_error, dim=-1).max() < pos_threshold - and torch.norm(aa_error, dim=-1).max() < rot_threshold - ): - break - - jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() - jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] - - delta_dof_pos = fc._get_delta_dof_pos( - delta_pose=delta_hand_pose, - ik_method="dls", - jacobian=jacobian, - device=device, - ) - - joint_pos = joint_pos + delta_dof_pos - joint_vel = torch.zeros_like(joint_pos) - - self.robot.set_joint_position_target_index(target=joint_pos, env_ids=env_ids) - self.robot.set_joint_velocity_target_index(target=joint_vel, env_ids=env_ids) - self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) - self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - - joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() - - for jid in self.finger_joints: - joint_pos[:, jid] = self.hand_grasp_width / 2.0 - - self.robot.set_joint_position_target_index( - target=joint_pos, - joint_ids=self.all_joints, - env_ids=env_ids, - ) - self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) - self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - - for jid in self.finger_joints: - joint_pos[:, jid] = self.hand_close_width / 2.0 - - self.robot.set_joint_position_target_index( - target=joint_pos, - joint_ids=self.all_joints, - env_ids=env_ids, - ) - - gear_quat = wp.to_torch(self.gear.data.root_quat_w)[env_ids] - eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] - rel_quat = math_utils.quat_mul(gear_quat, math_utils.quat_conjugate(eef_quat)) - if not hasattr(env, "_initial_gear_ee_relative_quat"): - env._initial_gear_ee_relative_quat = torch.zeros(env.num_envs, 4, device=env.device, dtype=torch.float32) - env._initial_gear_ee_relative_quat[:, 3] = 1.0 - env._initial_gear_ee_relative_quat[env_ids] = rel_quat diff --git a/isaaclab_arena/tests/test_nist_gear_insertion_task.py b/isaaclab_arena/tests/test_nist_gear_insertion_task.py new file mode 100644 index 000000000..5af7981c7 --- /dev/null +++ b/isaaclab_arena/tests/test_nist_gear_insertion_task.py @@ -0,0 +1,198 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from isaaclab.managers import ObservationGroupCfg as ObsGroup + +from isaaclab_arena.assets.asset_registry import AssetRegistry +from isaaclab_arena.embodiments.franka.franka import ( + FrankaNistGearInsertionObservationsCfg, + FrankaNistGearInsertionOscEmbodiment, + franka_gripper_joint_setter, +) +from isaaclab_arena.tasks.events import place_gear_in_gripper +from isaaclab_arena.tasks.nist_gear_insertion_task import GearInsertionGeometryCfg, GraspCfg, NistGearInsertionTask +from isaaclab_arena.tasks.observations.gear_insertion_observations import ( + NistGearInsertionPolicyObservations, + peg_delta_from_held_gear_base, + peg_pos_in_env_frame, +) +from isaaclab_arena.tasks.rewards import gear_insertion_rewards +from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success +from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg +from isaaclab_arena_environments.nist_assembled_gearmesh_osc_environment import NISTAssembledGearMeshOSCEnvironment + + +class _TestAsset: + def __init__(self, name: str, object_min_z: float | None = None): + self.name = name + self.object_min_z = object_min_z + self.reset_pose = True + + def disable_reset_pose(self) -> None: + self.reset_pose = False + + +def _asset(name: str, object_min_z: float | None = None) -> _TestAsset: + return _TestAsset(name=name, object_min_z=object_min_z) + + +def _nist_task(**kwargs) -> NistGearInsertionTask: + return NistGearInsertionTask( + assembled_board=_asset("nist_board_assembled"), + held_gear=_asset("medium_nist_gear"), + background_scene=_asset("table", object_min_z=0.0), + gear_base_asset=_asset("gears_and_base"), + geometry_cfg=GearInsertionGeometryCfg( + peg_offset_from_board=[0.02025, 0.0, 0.0], + peg_offset_for_obs=[0.02025, 0.0, 0.025], + held_gear_base_offset=[0.02025, 0.0, 0.0], + success_z_fraction=0.20, + xy_threshold=0.0025, + ), + **kwargs, + ) + + +def test_franka_nist_gear_osc_embodiment_registers_task_terms(): + asset_registry = AssetRegistry() + embodiment_cls = asset_registry.get_asset_by_name("franka_nist_gear_osc") + embodiment = embodiment_cls(fixed_asset_name="gears_and_base", peg_offset=(0.02025, 0.0, 0.025)) + + assert embodiment_cls is FrankaNistGearInsertionOscEmbodiment + assert isinstance(embodiment.action_config.arm_action, NistGearInsertionOscActionCfg) + assert embodiment.action_config.arm_action.fixed_asset_name == "gears_and_base" + assert embodiment.action_config.arm_action.peg_offset == (0.02025, 0.0, 0.025) + assert embodiment.action_config.arm_action.body_name == "panda_fingertip_centered" + + assert isinstance(embodiment.observation_config, FrankaNistGearInsertionObservationsCfg) + policy_obs = embodiment.observation_config.policy + assert policy_obs.nist_gear_policy_obs.func is NistGearInsertionPolicyObservations + assert policy_obs.nist_gear_policy_obs.params["board_name"] == "gears_and_base" + assert policy_obs.nist_gear_policy_obs.params["peg_offset"] == [0.02025, 0.0, 0.025] + assert policy_obs.nist_gear_policy_obs.params["fingertip_body_name"] == "panda_fingertip_centered" + + +def test_nist_object_library_uses_shared_nucleus_assets(): + asset_registry = AssetRegistry() + expected_paths = { + "gears_and_base": ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" + "gearbase_and_gears_gearbase_root.usd" + ), + "medium_nist_gear": ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_medium.usd" + ), + "nist_board_assembled": ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" + "nist_board_assembled.usd" + ), + "nist_gear_base": ( + "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_base.usd" + ), + } + + for asset_name, expected_path in expected_paths.items(): + asset_cls = asset_registry.get_asset_by_name(asset_name) + assert asset_cls.usd_path == expected_path + + +def test_nist_task_observation_terms_use_task_geometry(): + obs_cfg = _nist_task().get_observation_cfg() + + assert isinstance(obs_cfg.task_obs, ObsGroup) + assert obs_cfg.task_obs.peg_pos.func is peg_pos_in_env_frame + assert obs_cfg.task_obs.peg_pos.params["board_cfg"].name == "gears_and_base" + assert obs_cfg.task_obs.peg_pos.params["peg_offset"] == [0.02025, 0.0, 0.025] + + assert obs_cfg.task_obs.peg_delta.func is peg_delta_from_held_gear_base + assert obs_cfg.task_obs.peg_delta.params["gear_cfg"].name == "medium_nist_gear" + assert obs_cfg.task_obs.peg_delta.params["board_cfg"].name == "gears_and_base" + assert obs_cfg.task_obs.peg_delta.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] + + assert obs_cfg.task_obs.ee_pos_noiseless.func.__name__ == "body_pos_in_env_frame" + assert obs_cfg.task_obs.ee_pos_noiseless.params["body_name"] == "panda_fingertip_centered" + assert obs_cfg.task_obs.ee_quat_noiseless.func.__name__ == "body_quat_canonical" + assert obs_cfg.task_obs.ee_quat_noiseless.params["body_name"] == "panda_fingertip_centered" + assert obs_cfg.task_obs.concatenate_terms + + +def test_nist_task_reward_terms_share_insertion_geometry(): + rewards_cfg = _nist_task().get_rewards_cfg() + + keypoint_terms = [rewards_cfg.kp_baseline, rewards_cfg.kp_coarse, rewards_cfg.kp_fine] + for term in keypoint_terms: + assert term.func is gear_insertion_rewards.gear_peg_keypoint_squashing + assert term.params["gear_cfg"].name == "medium_nist_gear" + assert term.params["board_cfg"].name == "gears_and_base" + assert term.params["peg_offset"] == [0.02025, 0.0, 0.0] + assert term.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] + + assert rewards_cfg.engagement_bonus.func is gear_insertion_rewards.gear_insertion_geometry_bonus + assert rewards_cfg.success_bonus.func is gear_insertion_rewards.gear_insertion_geometry_bonus + assert rewards_cfg.engagement_bonus.params["z_fraction"] == 0.90 + assert rewards_cfg.success_bonus.params["z_fraction"] == 0.20 + assert rewards_cfg.success_bonus.params["xy_threshold"] == 0.0025 + + assert rewards_cfg.action_penalty_asset.func is gear_insertion_rewards.osc_action_magnitude_penalty + assert rewards_cfg.action_grad_penalty.func is gear_insertion_rewards.osc_action_delta_penalty + assert rewards_cfg.contact_penalty.func is gear_insertion_rewards.wrist_contact_force_penalty + assert rewards_cfg.success_pred_error.func is gear_insertion_rewards.success_prediction_error + + +def test_nist_task_events_use_embodiment_grasp_and_randomization_cfg(): + embodiment = AssetRegistry().get_asset_by_name("franka_nist_gear_osc")() + task = _nist_task( + grasp_cfg=GraspCfg(**embodiment.get_gear_insertion_grasp_config()), + enable_randomization=True, + ) + + events_cfg = task.get_events_cfg() + + assert events_cfg.place_gear.func is place_gear_in_gripper + assert events_cfg.place_gear.params["gear_cfg"].name == "medium_nist_gear" + assert events_cfg.place_gear.params["hand_grasp_width"] == 0.03 + assert events_cfg.place_gear.params["hand_close_width"] == 0.03 + assert events_cfg.place_gear.params["gripper_joint_setter_func"] is franka_gripper_joint_setter + assert events_cfg.place_gear.params["end_effector_body_name"] == "panda_hand" + assert events_cfg.place_gear.params["finger_joint_names"] == "panda_finger_joint[1-2]" + + assert events_cfg.fixed_asset_pose.params["asset_cfg"].name == "gears_and_base" + assert events_cfg.held_object_mass.params["asset_cfg"].name == "medium_nist_gear" + assert events_cfg.robot_actuator_gains.params["asset_cfg"].joint_names == "panda_joint[1-7]" + assert events_cfg.robot_joint_friction.params["asset_cfg"].joint_names == "panda_joint[1-7]" + + +def test_nist_task_termination_terms_include_success_and_optional_drop_checks(): + terminations_cfg = _nist_task(rl_training_mode=True).get_termination_cfg() + + assert terminations_cfg.success.func is gear_mesh_insertion_success + assert terminations_cfg.success.params["held_object_cfg"].name == "medium_nist_gear" + assert terminations_cfg.success.params["fixed_object_cfg"].name == "gears_and_base" + assert terminations_cfg.success.params["gear_base_offset"] == [0.02025, 0.0, 0.0] + assert terminations_cfg.success.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] + assert terminations_cfg.success.params["rl_training"] + assert terminations_cfg.object_dropped is None + assert terminations_cfg.gear_dropped_from_gripper is None + + drop_cfg = _nist_task( + grasp_cfg=GraspCfg(gripper_joint_setter_func=franka_gripper_joint_setter), + disable_drop_terminations=False, + ).get_termination_cfg() + + assert drop_cfg.object_dropped is not None + assert drop_cfg.object_dropped.params["minimum_height"] == 0.0 + assert drop_cfg.gear_dropped_from_gripper.func is gear_dropped_from_gripper + assert drop_cfg.gear_dropped_from_gripper.params["gear_cfg"].name == "medium_nist_gear" + assert drop_cfg.gear_dropped_from_gripper.params["ee_body_name"] == "panda_hand" + + +def test_nist_environment_defaults_to_nist_franka_embodiment(): + import argparse + + parser = argparse.ArgumentParser() + NISTAssembledGearMeshOSCEnvironment.add_cli_args(parser) + args_cli = parser.parse_args([]) + + assert args_cli.embodiment == "franka_nist_gear_osc" diff --git a/isaaclab_arena/tests/test_train_rl_games.py b/isaaclab_arena/tests/test_train_rl_games.py new file mode 100644 index 000000000..ada00f5fc --- /dev/null +++ b/isaaclab_arena/tests/test_train_rl_games.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from isaaclab_arena.scripts.reinforcement_learning import train_rl_games + + +def test_copy_forwarded_args_preserves_isaac_lab_options(): + argv = [ + "--task", + "nist_assembled_gear_mesh_osc", + "--num_envs=64", + "--headless", + "--agent_cfg_path", + "legacy.yaml", + "--embodiment", + "franka_nist_gear_osc", + ] + + assert train_rl_games._copy_forwarded_args(argv) == [ + "--task", + "nist_assembled_gear_mesh_osc", + "--num_envs=64", + "--headless", + ] + + +def test_drop_forwarded_args_keeps_environment_specific_options(): + argv = [ + "--task", + "nist_assembled_gear_mesh_osc", + "--num_envs", + "64", + "--headless", + "--embodiment", + "franka_nist_gear_osc", + "--rl_training_mode", + ] + + assert train_rl_games._drop_forwarded_args(argv) == [ + "--embodiment", + "franka_nist_gear_osc", + "--rl_training_mode", + ] + + +def test_remove_deprecated_args_returns_experiment_name(): + remaining, experiment_name = train_rl_games._remove_deprecated_args([ + "--agent_cfg_path", + "legacy.yaml", + "--experiment_name", + "test_run", + "--rl_training_mode", + ]) + + assert remaining == ["--rl_training_mode"] + assert experiment_name == "test_run" + + +def test_normalize_legacy_positional_task(): + assert train_rl_games._normalize_legacy_positional_task(["nist_assembled_gear_mesh_osc", "--num_envs", "64"]) == [ + "--task", + "nist_assembled_gear_mesh_osc", + "--num_envs", + "64", + ] + + +def test_normalize_legacy_positional_task_keeps_explicit_task(): + argv = ["--task", "nist_assembled_gear_mesh_osc", "--num_envs", "64"] + + assert train_rl_games._normalize_legacy_positional_task(argv) == argv diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py index 9830edba4..f5203af83 100644 --- a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py @@ -3,8 +3,10 @@ # # SPDX-License-Identifier: Apache-2.0 -"""OSC torque action term for NIST gear insertion: asset-relative commands, EMA smoothing, -roll/pitch lock, target clipping, and success prediction (7-D policy output). +"""Operational-space action term for NIST gear insertion with a Franka arm. + +This module converts normalized policy commands into end-effector pose targets +for the OSC controller and applies task-specific filtering around peg contact. """ from __future__ import annotations @@ -20,36 +22,57 @@ from isaaclab.envs.mdp.actions.task_space_actions import OperationalSpaceControllerAction from isaaclab.managers.action_manager import ActionTerm from isaaclab.utils import configclass +from isaaclab_tasks.direct.factory.factory_utils import wrap_yaw +from isaaclab_tasks.direct.forge.forge_utils import get_random_prop_gains if TYPE_CHECKING: from isaaclab.envs import ManagerBasedEnv -def _wrap_yaw(angle: torch.Tensor) -> torch.Tensor: - """Map yaw angles so the Franka joint-limit discontinuity (~-135 deg) is avoided.""" - return torch.where(angle > math.radians(235), angle - 2 * math.pi, angle) +def _action_to_target_yaw(action: torch.Tensor) -> torch.Tensor: + """Map normalized policy action to the commanded yaw interval.""" + return -math.pi + math.radians(270.0) * (action + 1.0) / 2.0 -def _randomize_gains( - default_values: torch.Tensor, - noise_levels: tuple[float, ...], - num_envs: int, - device: torch.device, -) -> torch.Tensor: - """Multiplicative gain randomization for position/rotation clip thresholds.""" - ndim = default_values.shape[-1] - noise = torch.rand(num_envs, ndim, device=device) * torch.tensor(noise_levels, device=device) - multiplier = 1.0 + noise - decrease = torch.rand(num_envs, ndim, device=device) > 0.5 - return default_values * torch.where(decrease, 1.0 / multiplier, multiplier) +def _target_yaw_to_action(yaw: torch.Tensor) -> torch.Tensor: + """Map commanded yaw back to the normalized policy interval.""" + return (yaw + math.pi) / math.radians(270.0) * 2.0 - 1.0 -class NistGearInsertionOscAction(OperationalSpaceControllerAction): - """Operational-space torque control for peg-style insertion with a 7-D policy. +def _wrap_yaw_to_action_range(yaw: torch.Tensor) -> torch.Tensor: + """Represent wrapped yaw in the policy command interval.""" + yaw = torch.where(yaw > math.pi / 2, yaw - 2 * math.pi, yaw) + return torch.where(yaw < -math.pi, yaw + 2 * math.pi, yaw) + + +def _gripper_down_to_yaw_frame_quat(num_envs: int, device: torch.device) -> torch.Tensor: + """Return the fixed rotation from the gripper-down frame to the yaw frame.""" + return math_utils.quat_from_euler_xyz( + torch.full((num_envs,), -math.pi, device=device), + torch.zeros(num_envs, device=device), + torch.zeros(num_envs, device=device), + ) + - 3 position + 3 rotation + 1 success prediction. Asset-relative position, roll/pitch - locked, EMA smoothing, target clipping. Layout follows common assembly RL benchmarks. - """ +def get_nist_gear_insertion_arm_action( + env: ManagerBasedEnv, + term_name: str = "arm_action", +) -> NistGearInsertionOscAction: + """Return the NIST gear insertion OSC action term from an environment.""" + try: + action_term = env.action_manager.get_term(term_name) + except KeyError as exc: + raise KeyError(f"Action term '{term_name}' is required for NIST gear insertion.") from exc + if not isinstance(action_term, NistGearInsertionOscAction): + raise TypeError( + f"Action term '{term_name}' must be {NistGearInsertionOscAction.__name__}; " + f"got {type(action_term).__name__}." + ) + return action_term + + +class NistGearInsertionOscAction(OperationalSpaceControllerAction): + """Operational-space control action term for NIST gear insertion.""" cfg: NistGearInsertionOscActionCfg @@ -60,10 +83,10 @@ def __init__(self, cfg: NistGearInsertionOscActionCfg, env: ManagerBasedEnv): self.ema_factor = torch.full((self.num_envs, 1), 0.05, device=self.device) self._pos_bounds = torch.tensor(cfg.pos_action_bounds, device=self.device) - _pt = torch.tensor(cfg.pos_action_threshold, device=self.device) - _rt = torch.tensor(cfg.rot_action_threshold, device=self.device) - self._default_pos_thresh = _pt.unsqueeze(0).expand(self.num_envs, -1).clone() - self._default_rot_thresh = _rt.unsqueeze(0).expand(self.num_envs, -1).clone() + pos_threshold = torch.tensor(cfg.pos_action_threshold, device=self.device) + rot_threshold = torch.tensor(cfg.rot_action_threshold, device=self.device) + self._default_pos_thresh = pos_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() + self._default_rot_thresh = rot_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() self._pos_thresh = self._default_pos_thresh.clone() self._rot_thresh = self._default_rot_thresh.clone() @@ -79,9 +102,11 @@ def __init__(self, cfg: NistGearInsertionOscActionCfg, env: ManagerBasedEnv): self.success_pred = torch.full((self.num_envs,), -1.0, device=self.device) self._pos_dead_zone = torch.tensor(cfg.pos_dead_zone, device=self.device).unsqueeze(0) self._rot_dead_zone = cfg.rot_dead_zone + self._force_body_idx: int | None = None + self._force_smoothing_factor = cfg.force_smoothing_factor - def _get_bolt_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: - """Peg tip in env frame from fixed asset pose + local peg offset.""" + def _get_peg_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: + """Return the peg target position in each environment frame.""" origins = self._env.scene.env_origins board = self._env.scene[self.cfg.fixed_asset_name] pos = wp.to_torch(board.data.root_pos_w) - origins @@ -94,106 +119,186 @@ def _get_bolt_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: @property def action_dim(self) -> int: + """Number of policy actions consumed by the OSC term.""" return 7 - def apply_actions(self): - super().apply_actions() + @property + def smoothed_actions(self) -> torch.Tensor: + """EMA-filtered policy actions used by observations and penalties.""" + return self._smoothed_actions + + @property + def previous_smoothed_actions(self) -> torch.Tensor: + """EMA-filtered policy actions from the previous environment step.""" + return self._prev_smoothed_actions + + @property + def position_thresholds(self) -> torch.Tensor: + """Per-environment position command limits after reset randomization.""" + return self._pos_thresh + + @property + def rotation_thresholds(self) -> torch.Tensor: + """Per-environment orientation command limits after reset randomization.""" + return self._rot_thresh def process_actions(self, actions: torch.Tensor): + self._update_smoothed_force() + self._update_smoothed_actions(actions) + self._compute_ee_pose() + + # Convert normalized policy output into a bounded OSC pose command. + ee_pos_b = self._ee_pose_b[:, :3] + ee_quat_b = self._ee_pose_b[:, 3:7] + self._processed_actions[:, :3] = self._compute_target_position(ee_pos_b) + self._processed_actions[:, 3:7] = self._compute_target_orientation(ee_quat_b) + + self._compute_task_frame_pose() + self._osc.set_command( + command=self._processed_actions, + current_ee_pose_b=self._ee_pose_b, + current_task_frame_pose_b=self._task_frame_pose_b, + ) + + def _update_smoothed_actions(self, actions: torch.Tensor) -> None: + actions = torch.nan_to_num(actions, nan=0.0, posinf=1.0, neginf=-1.0).clamp(-1.0, 1.0) self._raw_actions[:] = actions self._prev_smoothed_actions[:] = self._smoothed_actions self._smoothed_actions[:] = self.ema_factor * actions + (1.0 - self.ema_factor) * self._smoothed_actions self.success_pred[:] = self._smoothed_actions[:, 6] - self._compute_ee_pose() - ee_pos_b = self._ee_pose_b[:, :3] - ee_quat_b = self._ee_pose_b[:, 3:7] + def _ensure_force_body_idx(self) -> None: + """Resolve the wrist force-sensor body index.""" + if self._force_body_idx is not None: + return + body_ids, body_names = self._asset.find_bodies(self.cfg.force_body_name) + if len(body_ids) != 1: + raise ValueError( + f"Body '{self.cfg.force_body_name}' is required by {self.__class__.__name__} on asset " + f"'{self.cfg.asset_name}'. Use a USD with a wrist force-sensor body or override " + f"'force_body_name'. Found {len(body_ids)} matches: {body_names}." + ) + self._force_body_idx = body_ids[0] + + def _update_smoothed_force(self) -> None: + """Update the wrist-force EMA used by observations and rewards.""" + self._ensure_force_body_idx() + raw_force = wp.to_torch(self._asset.root_view.get_link_incoming_joint_force())[:, self._force_body_idx, :3] + raw_force = torch.nan_to_num(raw_force, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) + self.force_smooth[:] = ( + self._force_smoothing_factor * raw_force + (1.0 - self._force_smoothing_factor) * self.force_smooth + ) - bolt_pos_b = self._get_bolt_pos() + self.fixed_pos_noise + def _compute_target_position(self, ee_pos_b: torch.Tensor) -> torch.Tensor: + """Return the next end-effector position command in the robot base frame.""" + peg_pos_b = self._get_peg_pos() + self.fixed_pos_noise pos_actions = self._smoothed_actions[:, :3] - target_pos = bolt_pos_b + pos_actions * self._pos_bounds + target_pos = peg_pos_b + pos_actions * self._pos_bounds self.delta_pos[:] = target_pos - ee_pos_b - clipped_delta = torch.clamp(self.delta_pos, -self._pos_thresh, self._pos_thresh) - clipped_delta = torch.where( - torch.abs(clipped_delta) > self._pos_dead_zone, - clipped_delta, - torch.zeros_like(clipped_delta), + clipped_delta = self._clip_delta_with_dead_zone(self.delta_pos, self._pos_thresh, self._pos_dead_zone) + return ee_pos_b + clipped_delta + + def _compute_target_orientation(self, ee_quat_b: torch.Tensor) -> torch.Tensor: + """Return the next end-effector orientation command in the robot base frame.""" + target_yaw = _action_to_target_yaw(self._smoothed_actions[:, 5]) + target_quat = self._target_quat_from_yaw(target_yaw) + desired_xyz = self._clip_orientation_delta(ee_quat_b, target_quat) + return math_utils.quat_from_euler_xyz( + roll=desired_xyz[:, 0], + pitch=desired_xyz[:, 1], + yaw=desired_xyz[:, 2], ) - final_pos = ee_pos_b + clipped_delta - rot_actions = self._smoothed_actions[:, 3:6].clone() - rot_actions[:, 0:2] = 0.0 - - yaw_rad = math.radians(-180.0) + math.radians(270.0) * (rot_actions[:, 2] + 1.0) / 2.0 - zero = torch.zeros_like(yaw_rad) - bolt_quat = math_utils.quat_from_euler_xyz(zero, zero, yaw_rad) - pi_t = torch.full_like(yaw_rad, math.pi) - flip_quat = math_utils.quat_from_euler_xyz(pi_t, zero, zero) - target_quat = math_utils.quat_mul(flip_quat, bolt_quat) + def _target_quat_from_yaw(self, target_yaw: torch.Tensor) -> torch.Tensor: + """Return the gripper-down target orientation for the commanded yaw.""" + zero = torch.zeros_like(target_yaw) + target_yaw_quat = math_utils.quat_from_euler_xyz(zero, zero, target_yaw) + gripper_down_quat = math_utils.quat_from_euler_xyz(torch.full_like(target_yaw, math.pi), zero, zero) + return math_utils.quat_mul(gripper_down_quat, target_yaw_quat) + def _clip_orientation_delta(self, ee_quat_b: torch.Tensor, target_quat: torch.Tensor) -> torch.Tensor: + """Clip roll, pitch, and yaw deltas before sending the OSC command.""" curr_roll, curr_pitch, curr_yaw = math_utils.euler_xyz_from_quat(ee_quat_b, wrap_to_2pi=True) desired_roll, desired_pitch, desired_yaw = math_utils.euler_xyz_from_quat(target_quat, wrap_to_2pi=True) desired_xyz = torch.stack([desired_roll, desired_pitch, desired_yaw], dim=1) - curr_yaw = _wrap_yaw(curr_yaw) - desired_yaw = _wrap_yaw(desired_yaw) - - self.delta_yaw[:] = desired_yaw - curr_yaw - clipped_yaw = torch.clamp(self.delta_yaw, -self._rot_thresh[:, 2], self._rot_thresh[:, 2]) - clipped_yaw = torch.where( - torch.abs(clipped_yaw) > self._rot_dead_zone, - clipped_yaw, - torch.zeros_like(clipped_yaw), - ) - desired_xyz[:, 2] = curr_yaw + clipped_yaw + desired_xyz[:, 0] = self._clip_roll_delta(curr_roll, desired_roll) + desired_xyz[:, 1] = self._clip_pitch_delta(curr_pitch, desired_pitch) + desired_xyz[:, 2] = self._clip_yaw_delta(curr_yaw, desired_yaw) + return desired_xyz + def _clip_roll_delta(self, curr_roll: torch.Tensor, desired_roll: torch.Tensor) -> torch.Tensor: + """Return roll target after applying the per-step rotation limit.""" desired_roll = torch.where(desired_roll < 0.0, desired_roll + 2 * math.pi, desired_roll) delta_roll = desired_roll - curr_roll clipped_roll = torch.clamp(delta_roll, -self._rot_thresh[:, 0], self._rot_thresh[:, 0]) - desired_xyz[:, 0] = curr_roll + clipped_roll + return curr_roll + clipped_roll + def _clip_pitch_delta(self, curr_pitch: torch.Tensor, desired_pitch: torch.Tensor) -> torch.Tensor: + """Return pitch target after wrapping into the signed interval and clipping.""" curr_pitch_w = torch.where(curr_pitch > math.pi, curr_pitch - 2 * math.pi, curr_pitch) desired_pitch = torch.where(desired_pitch < 0.0, desired_pitch + 2 * math.pi, desired_pitch) desired_pitch_w = torch.where(desired_pitch > math.pi, desired_pitch - 2 * math.pi, desired_pitch) delta_pitch = desired_pitch_w - curr_pitch_w clipped_pitch = torch.clamp(delta_pitch, -self._rot_thresh[:, 1], self._rot_thresh[:, 1]) - desired_xyz[:, 1] = curr_pitch_w + clipped_pitch + return curr_pitch_w + clipped_pitch - final_quat = math_utils.quat_from_euler_xyz( - roll=desired_xyz[:, 0], - pitch=desired_xyz[:, 1], - yaw=desired_xyz[:, 2], - ) + def _clip_yaw_delta(self, curr_yaw: torch.Tensor, desired_yaw: torch.Tensor) -> torch.Tensor: + """Return yaw target after wrapping, clipping, and applying the dead zone.""" + curr_yaw = wrap_yaw(curr_yaw) + desired_yaw = wrap_yaw(desired_yaw) - self._processed_actions[:, :3] = final_pos - self._processed_actions[:, 3:7] = final_quat - - self._compute_task_frame_pose() - self._osc.set_command( - command=self._processed_actions, - current_ee_pose_b=self._ee_pose_b, - current_task_frame_pose_b=self._task_frame_pose_b, + self.delta_yaw[:] = desired_yaw - curr_yaw + clipped_yaw = self._clip_delta_with_dead_zone( + self.delta_yaw, + self._rot_thresh[:, 2], + self._rot_dead_zone, ) + return curr_yaw + clipped_yaw + + def _clip_delta_with_dead_zone( + self, + delta: torch.Tensor, + limits: torch.Tensor | float, + dead_zone: torch.Tensor | float, + ) -> torch.Tensor: + """Clamp a requested delta and zero small values.""" + clipped = torch.clamp(delta, -limits, limits) + return torch.where(torch.abs(clipped) > dead_zone, clipped, torch.zeros_like(clipped)) + + def _ee_quat_to_yaw_action(self, ee_quat: torch.Tensor) -> torch.Tensor: + """Convert the current EE orientation to the normalized policy yaw.""" + n = ee_quat.shape[0] + gripper_down_to_yaw_frame = _gripper_down_to_yaw_frame_quat(n, self.device) + yaw_frame_quat = math_utils.quat_mul(gripper_down_to_yaw_frame, ee_quat) + target_yaw = math_utils.euler_xyz_from_quat(yaw_frame_quat, wrap_to_2pi=True)[2] + target_yaw = _wrap_yaw_to_action_range(target_yaw) + return _target_yaw_to_action(target_yaw) def reset(self, env_ids: Sequence[int] | None = None) -> None: super().reset(env_ids) - if env_ids is None or (hasattr(env_ids, "__len__") and len(env_ids) == 0): + if env_ids is None: + env_ids = slice(None) + n = self.num_envs + elif isinstance(env_ids, slice): + n = len(range(*env_ids.indices(self.num_envs))) + elif len(env_ids) == 0: return - - n = len(env_ids) + else: + n = len(env_ids) lo, hi = self.cfg.ema_factor_range self.ema_factor[env_ids] = lo + torch.rand(n, 1, device=self.device) * (hi - lo) - self._pos_thresh[env_ids] = _randomize_gains( + self._pos_thresh[env_ids] = get_random_prop_gains( self._default_pos_thresh[env_ids], self.cfg.pos_threshold_noise_level, n, self.device, ) - self._rot_thresh[env_ids] = _randomize_gains( + self._rot_thresh[env_ids] = get_random_prop_gains( self._default_rot_thresh[env_ids], self.cfg.rot_threshold_noise_level, n, @@ -211,21 +316,12 @@ def reset(self, env_ids: Sequence[int] | None = None) -> None: ee_pos = self._ee_pose_b[env_ids, :3] ee_quat = self._ee_pose_b[env_ids, 3:7] - bolt_pos = self._get_bolt_pos(env_ids) + self.fixed_pos_noise[env_ids] + peg_pos = self._get_peg_pos(env_ids) + self.fixed_pos_noise[env_ids] - pos_actions = (ee_pos - bolt_pos) / self._pos_bounds + pos_actions = (ee_pos - peg_pos) / self._pos_bounds self._smoothed_actions[env_ids, 0:3] = pos_actions - unrot_pi = math_utils.quat_from_euler_xyz( - torch.full((n,), -math.pi, device=self.device), - torch.zeros(n, device=self.device), - torch.zeros(n, device=self.device), - ) - quat_rel_bolt = math_utils.quat_mul(unrot_pi, ee_quat) - yaw_bolt = math_utils.euler_xyz_from_quat(quat_rel_bolt, wrap_to_2pi=True)[2] - yaw_bolt = torch.where(yaw_bolt > math.pi / 2, yaw_bolt - 2 * math.pi, yaw_bolt) - yaw_bolt = torch.where(yaw_bolt < -math.pi, yaw_bolt + 2 * math.pi, yaw_bolt) - yaw_action = (yaw_bolt + math.radians(180.0)) / math.radians(270.0) * 2.0 - 1.0 + yaw_action = self._ee_quat_to_yaw_action(ee_quat) self._smoothed_actions[env_ids, 3:5] = 0.0 self._smoothed_actions[env_ids, 5] = yaw_action @@ -244,15 +340,43 @@ class NistGearInsertionOscActionCfg(OperationalSpaceControllerActionCfg): class_type: type[ActionTerm] = NistGearInsertionOscAction fixed_asset_name: str = "gears_and_base" + """Name of the fixed asset that contains the insertion peg.""" + peg_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) + """Local-frame peg offset on :attr:`fixed_asset_name`.""" + fixed_pos_noise_levels: tuple[float, float, float] = (0.001, 0.001, 0.001) + """Per-axis standard deviation for reset-time target noise.""" + pos_action_bounds: tuple[float, float, float] = (0.05, 0.05, 0.05) + """Position scale applied to normalized policy actions.""" + pos_action_threshold: tuple[float, float, float] = (0.02, 0.02, 0.02) + """Maximum per-step position delta sent to the OSC controller.""" + rot_action_threshold: tuple[float, float, float] = (0.097, 0.097, 0.097) + """Maximum per-step orientation delta sent to the OSC controller.""" + pos_threshold_noise_level: tuple[float, float, float] = (0.25, 0.25, 0.25) + """Reset-time multiplicative noise for position thresholds.""" + rot_threshold_noise_level: tuple[float, float, float] = (0.29, 0.29, 0.29) + """Reset-time multiplicative noise for orientation thresholds.""" + ema_factor_range: tuple[float, float] = (0.05, 0.2) + """Reset-time range for the action EMA factor.""" + contact_threshold_range: tuple[float, float] = (5.0, 10.0) - # Dead zone: zero out small commanded deltas on the task wrench. - pos_dead_zone: tuple[float, float, float] = (0.0005, 0.0005, 0.0005) # m, ~0.5 mm - rot_dead_zone: float = 0.001 # rad + """Reset-time wrist-force threshold range.""" + + pos_dead_zone: tuple[float, float, float] = (0.0005, 0.0005, 0.0005) + """Position command dead zone.""" + + rot_dead_zone: float = 0.001 + """Orientation command dead zone.""" + + force_body_name: str = "force_sensor" + """Body that exposes the wrist force-sensor joint wrench.""" + + force_smoothing_factor: float = 0.25 + """EMA factor used to smooth wrist-force readings.""" diff --git a/isaaclab_arena_environments/mdp/robot_configs.py b/isaaclab_arena_environments/mdp/robot_configs.py index a6b63874d..182585253 100644 --- a/isaaclab_arena_environments/mdp/robot_configs.py +++ b/isaaclab_arena_environments/mdp/robot_configs.py @@ -8,30 +8,8 @@ from __future__ import annotations -import torch -from collections.abc import Sequence - -import isaaclab.sim as sim_utils -from isaaclab.actuators import ImplicitActuatorCfg -from isaaclab.assets import ArticulationCfg -from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR from isaaclab_assets.robots.franka import FRANKA_PANDA_HIGH_PD_CFG - -def franka_gripper_joint_setter( - joint_pos: torch.Tensor, - row_indices: Sequence[int], - finger_joint_indices: Sequence[int], - width: float, -) -> None: - """Set Franka Panda finger joints to achieve a given total opening *width*. - - Each finger joint is set to ``width / 2`` (symmetric grasp). - """ - for jid in finger_joint_indices: - joint_pos[row_indices, jid] = width / 2.0 - - # =========================================================================================== # Franka Panda robot configuration optimized for assembly tasks (peg insert, gear mesh, etc.). # =========================================================================================== @@ -54,75 +32,3 @@ def franka_gripper_joint_setter( # Set initial position for tabletop tasks FRANKA_PANDA_ASSEMBLY_HIGH_PD_CFG.init_state.pos = (0.0, 0.0, 0.0) - - -_FACTORY_ASSET_DIR = f"{ISAACLAB_NUCLEUS_DIR}/Factory" - -FRANKA_MIMIC_OSC_CFG = ArticulationCfg( - prim_path="{ENV_REGEX_NS}/Robot", - spawn=sim_utils.UsdFileCfg( - usd_path=f"{_FACTORY_ASSET_DIR}/franka_mimic.usd", - activate_contact_sensors=True, - rigid_props=sim_utils.RigidBodyPropertiesCfg( - disable_gravity=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=False, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - ), - collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - ), - init_state=ArticulationCfg.InitialStateCfg( - joint_pos={ - "panda_joint1": 0.0, - "panda_joint2": -0.569, - "panda_joint3": 0.0, - "panda_joint4": -2.810, - "panda_joint5": 0.0, - "panda_joint6": 3.037, - "panda_joint7": 0.741, - "panda_finger_joint2": 0.04, - }, - pos=(0.0, 0.0, 0.0), - rot=(0.0, 0.0, 0.0, 1.0), - ), - actuators={ - "panda_arm1": ImplicitActuatorCfg( - joint_names_expr=["panda_joint[1-4]"], - stiffness=0.0, - damping=0.0, - friction=0.0, - armature=0.0, - effort_limit_sim=87, - velocity_limit_sim=124.6, - ), - "panda_arm2": ImplicitActuatorCfg( - joint_names_expr=["panda_joint[5-7]"], - stiffness=0.0, - damping=0.0, - friction=0.0, - armature=0.0, - effort_limit_sim=12, - velocity_limit_sim=149.5, - ), - "panda_hand": ImplicitActuatorCfg( - joint_names_expr=["panda_finger_joint[1-2]"], - effort_limit_sim=40.0, - velocity_limit_sim=0.04, - stiffness=7500.0, - damping=173.0, - friction=0.1, - armature=0.0, - ), - }, -) diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index 8bafe6f80..91fc6306a 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -19,18 +19,16 @@ class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): def get_env(self, args_cli: argparse.Namespace): import isaaclab.sim as sim_utils - from isaaclab.controllers import OperationalSpaceControllerCfg - from isaaclab.envs.mdp.actions import BinaryJointPositionActionCfg - from isaaclab.managers import ObservationTermCfg as ObsTerm - from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg import isaaclab_arena_environments.mdp as mdp from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment from isaaclab_arena.scene.scene import Scene - from isaaclab_arena.tasks.nist_gear_insertion_task import GraspConfig, NistGearInsertionTask - from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations + from isaaclab_arena.tasks.nist_gear_insertion_task import ( + GearInsertionGeometryCfg, + GraspCfg, + NistGearInsertionTask, + ) from isaaclab_arena.utils.pose import Pose - from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg peg_tip_offset = (0.02025, 0.0, 0.025) peg_base_offset = (0.02025, 0.0, 0.0) @@ -48,95 +46,9 @@ def get_env(self, args_cli: argparse.Namespace): embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)( enable_cameras=args_cli.enable_cameras, concatenate_observation_terms=True, - ) - embodiment.scene_config.robot = mdp.FRANKA_MIMIC_OSC_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") - embodiment.scene_config.ee_frame = FrameTransformerCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_link0", - debug_vis=False, - target_frames=[ - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_fingertip_centered", - name="end_effector", - offset=OffsetCfg(pos=(0.0, 0.0, 0.0)), - ), - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_rightfinger", - name="tool_rightfinger", - offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), - ), - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_leftfinger", - name="tool_leftfinger", - offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), - ), - ], - ) - embodiment.set_initial_joint_pose([ - 0.561824, - 0.287201, - -0.543103, - -2.410188, - 0.507908, - 2.847644, - 0.454298, - 0.04, - 0.04, - ]) - embodiment.action_config.arm_action = NistGearInsertionOscActionCfg( - asset_name="robot", - joint_names=["panda_joint[1-7]"], - body_name="panda_fingertip_centered", - controller_cfg=OperationalSpaceControllerCfg( - target_types=["pose_abs"], - impedance_mode="fixed", - inertial_dynamics_decoupling=True, - partial_inertial_dynamics_decoupling=False, - gravity_compensation=False, - motion_stiffness_task=[565.0, 565.0, 565.0, 28.0, 28.0, 28.0], - motion_damping_ratio_task=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - nullspace_control="position", - nullspace_stiffness=10.0, - nullspace_damping_ratio=1.0, - ), - position_scale=1.0, - orientation_scale=1.0, - nullspace_joint_pos_target="default", fixed_asset_name=gears_and_base.name, peg_offset=peg_tip_offset, ) - embodiment.action_config.gripper_action = BinaryJointPositionActionCfg( - asset_name="robot", - joint_names=["panda_finger_joint[1-2]"], - open_command_expr={"panda_finger_joint1": 0.0, "panda_finger_joint2": 0.0}, - close_command_expr={"panda_finger_joint1": 0.0, "panda_finger_joint2": 0.0}, - ) - - # OSC insertion requires a specialized 24-D policy observation (peg-relative - # EE pose, force feedback, prev actions) instead of the default embodiment obs. - # The task_obs group adds privileged state for the critic separately. - embodiment.observation_config.policy.actions = None - embodiment.observation_config.policy.gripper_pos = None - embodiment.observation_config.policy.eef_pos = None - embodiment.observation_config.policy.eef_quat = None - embodiment.observation_config.policy.joint_pos = None - embodiment.observation_config.policy.joint_vel = None - embodiment.observation_config.policy.nist_gear_policy_obs = ObsTerm( - func=NistGearInsertionPolicyObservations, - params={ - "robot_name": "robot", - "board_name": gears_and_base.name, - "peg_offset": list(peg_tip_offset), - "fingertip_body_name": "panda_fingertip_centered", - "force_body_name": "force_sensor", - "pos_noise_level": 0.0, - "rot_noise_level_deg": 0.0, - "force_noise_level": 0.0, - }, - ) - # OSC torque control uses task-specific penalties (action magnitude, jerk, - # contact force) instead of the default joint-level regularizers. - embodiment.reward_config.action_rate = None - embodiment.reward_config.joint_vel = None if args_cli.teleop_device is not None: teleop_device = self.device_registry.get_device_by_name(args_cli.teleop_device)() @@ -153,27 +65,20 @@ def get_env(self, args_cli: argparse.Namespace): ) scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) - grasp_cfg = GraspConfig( - num_arm_joints=7, - hand_grasp_width=0.03, - hand_close_width=0.0, - gripper_joint_setter_func=mdp.franka_gripper_joint_setter, - end_effector_body_name="panda_hand", - grasp_rot_offset=[1.0, 0.0, 0.0, 0.0], - grasp_offset=[0.02, 0.0, -0.128], - arm_joint_names="panda_joint[1-7]", - finger_body_names=".*finger", + grasp_cfg = GraspCfg(**embodiment.get_gear_insertion_grasp_config()) + geometry_cfg = GearInsertionGeometryCfg( + peg_offset_from_board=list(peg_base_offset), + peg_offset_for_obs=list(peg_tip_offset), + success_z_fraction=success_z_fraction, + xy_threshold=xy_threshold, ) task = NistGearInsertionTask( assembled_board=assembled_board, held_gear=medium_gear, background_scene=table, - peg_offset_from_board=list(peg_base_offset), - peg_offset_for_obs=list(peg_tip_offset), gear_base_asset=gears_and_base, - success_z_fraction=success_z_fraction, - xy_threshold=xy_threshold, + geometry_cfg=geometry_cfg, episode_length_s=episode_length_s, grasp_cfg=grasp_cfg, enable_randomization=True, @@ -193,7 +98,7 @@ def get_env(self, args_cli: argparse.Namespace): @staticmethod def add_cli_args(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--embodiment", type=str, default="franka_ik", help="Robot embodiment") + parser.add_argument("--embodiment", type=str, default="franka_nist_gear_osc", help="Robot embodiment") parser.add_argument( "--teleop_device", type=str, default=None, help="Teleoperation device (e.g., keyboard, spacemouse)" ) diff --git a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml index 7f2a4ff88..2e7a55eba 100644 --- a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml +++ b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml @@ -27,7 +27,7 @@ params: space: continuous: mu_activation: None - sigma_activation: None + sigma_activation: tanh mu_init: name: default sigma_init: From 72ed2ca3f5d9dd387c806103748dd86b1dc37785 Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 6 May 2026 16:44:33 -0700 Subject: [PATCH 08/12] Remove docs from NIST gear code branch Signed-off-by: odoherty --- .gitattributes | 3 - docs/images/nist_gear_insertion_parallel.gif | 3 - docs/images/nist_gear_insertion_task.gif | 3 - .../react-isaac-sample-controls-start.jpg | Bin 130 -> 24133 bytes .../nist_gear_insertion/index.rst | 96 ------- .../step_1_environment_setup.rst | 252 ------------------ .../step_2_policy_training.rst | 140 ---------- .../nist_gear_insertion/step_3_evaluation.rst | 187 ------------- .../index.rst | 2 - 9 files changed, 686 deletions(-) delete mode 100644 docs/images/nist_gear_insertion_parallel.gif delete mode 100644 docs/images/nist_gear_insertion_task.gif delete mode 100644 docs/pages/example_workflows/nist_gear_insertion/index.rst delete mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_1_environment_setup.rst delete mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_2_policy_training.rst delete mode 100644 docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst diff --git a/.gitattributes b/.gitattributes index aa94387cc..0d4fdcf71 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,6 +5,3 @@ *.gif filter=lfs diff=lfs merge=lfs -text *.mp4 filter=lfs diff=lfs merge=lfs -text docs/images/ filter=lfs diff=lfs merge=lfs -text -assets/*.usd filter=lfs diff=lfs merge=lfs -text -assets/**/*.jpg filter=lfs diff=lfs merge=lfs -text -models/**/*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/docs/images/nist_gear_insertion_parallel.gif b/docs/images/nist_gear_insertion_parallel.gif deleted file mode 100644 index 1518a17b4..000000000 --- a/docs/images/nist_gear_insertion_parallel.gif +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48de559f1c004b8a4d962415374907a2fd608c2ca37f171bcf49445a7afe6e4e -size 4076148 diff --git a/docs/images/nist_gear_insertion_task.gif b/docs/images/nist_gear_insertion_task.gif deleted file mode 100644 index 6086ed777..000000000 --- a/docs/images/nist_gear_insertion_task.gif +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:58b7210281b9425cec7a045929647b0a3c4ce60278f3298aa79fd3871b6c2609 -size 2059467 diff --git a/docs/images/react-isaac-sample-controls-start.jpg b/docs/images/react-isaac-sample-controls-start.jpg index ca7e5b9cb87e3aa83cefce857fd2c63d00c5f873..8140f221a8245c9ce12dab779a60f2f8d877c5ad 100644 GIT binary patch literal 24133 zcmeFYc|26@|37}Da+kZ@k=&_FH-RpKIoF)I&b7U^=j-*lj^wMP zN9spos`DiXGB<~|LJ;%=BqOyPS^!Eq;JhFu4@rZo6!?Rrl%Yl6%MkQLYR%uv&!jg0 zRR=t^K;a+tgPfs-f7MR`SBP}*%x`B-k$*jX`lRWBGp9{S$H=CK^tb=hp}Ax44qcrc zd-v|yp{KWF&-R^~^Upy?zzO>Q>nigQf@HwAh1+-T+z~GIXIW~2H1zCm_jhdH{!Hp0 z<9N2>Uw8;Lsd&1^X=wW4>~wGAMtf}IOTin^hIBn zi+k-=iAL)UsQOgbtF8{7=hstRuef>WQw>$WH?9xL^S8BC*MG0#>0+pQ*8KSTgA{j% z^}1TywYIB*r`_!@>7P7w_^)TdH$&CG2I=kXt>wK-i{kF64Hj*$_V%6HJ9lb=8k!zH zZl335B7wd-_+-yEz)FQZ?-zE}i$f;;Cx% zzoET>_WU@nID@9Ym;UTU``?V<`;Y%Ly}#85Yc^l}`{Em||JU)a75LW*{A&gNwF3WI zfq$*Q|NmCtAMnk=4PZ8J0L~yuuhdu5g9pjSPm&Is{$>nG%_H59^LTLKb_jBH^K?IH za$x;gE9><#-GAcX`QOf8^td{I{SOY%hM?s;W`!F6#IXNC#sA>;l6i~;e&d02gFEOP zjA~jLMG6G`T2wI;GiTx(4H9xl79G)dl#NUP;MXu z{mlNyy$#PGNbU{<<*@#7?;mS&^}PG}@6FA72n#L&|6?*4f|gl9kU|{|LuaL6L=hJBp;v^KS(Fc>Eon9L7LmdgGFEs&CyUa(Ml5g0G188GkU zEL^cj{+AuUE><{kUPkrW%AGgEpZ}nC;BATG$$H*~T^HPMEm^us>F3qT8#k$M-m-PK zj;`LGz4`|a86P$|VtVw{=`$9VfE*VAE_ZZtzU<-Yb={ll<9qu~;N75m!66ZmkD?w& zKY1FHkoY3$Wpc`^*Y7g3vU76Z=jE4{l~+_&eXRb}(Ad=6()zWH-P7CGKQK5nJo1e{ zIW;{am=%iVfYtv>!jcLt71YZ)|EsL?bk)Q0GYtb$G8%wqEkci^rdk$boqI=NuYo}#eq9M{!5{N zl_&Nu`v2f0Nb3GvUF^67x@BV`ffOnC?b~O!agk{~W?KmA4Q^YvOQ7<;idlHNw3f)9 zWqu8THUomppBJmv1`p_`s!1SO%P1<+O)Oy%QVB{_M^a%xfkDen`7;veW|jEUOvq;G zsj^A}&zx~fxi1S>$Zy(xn35kNft-TjZh6|21X8uEvC{;-T|mv=bgJK;DbO_5^M9pZ zRB%NCeT))28$-YP>nFRtU@Uf>hpgedxVim+5AJ$GiVv5DfO_p@ss@>zW_5!}TJyh#ia? z5=tv3@ku4DkggxX(+jP7IuDc)Dz?dG;$m?WcI=e$=6;fvv4u5+A5LL@)2?7Z_>WYM zH@X2|-Ix&A8>=gGo_j6%RWkEDdZ z8q=N#3-BdTeB#j>;X$i+&8LJGXZ)^9Aa?LvYfZd17J)dC7p;ms%}ej+|56fBOKW0? zq<2eAg(_-(XDZ6oyL%NTG_+c*LvAeHobW$BiIRrD2$l^9cHYWVsg(5-nJc=~d_5Iz z-E&F;-DfuHqvPO5N0ZF%O^NEWGf$D}-*o2G{zrnp)-r^)#-GS9^`ULAzmiRgzBKw| z#Z$Q3Yf9z;@pn>NWBJ%r+eoJKPK0lLbof3XW8|!tAZewPNT!c0+*mtytwyg5QFsw5 zfwpxw{Q`7Qc%lUA^O~gGhtD(FCMDwyMuAx3QoXsF)hDlD3cMahXZ4UV@B} zSvo1AsW!T038wP`fq2!8{=pn|lR#z-Ge@jhr?W%8sf$+I)7>qgYPknHBX(pf(Ne(Z4IozbVZB^va}SF+gQ7*5OUW)zk1Tj-28uu|=edx%ynH(*^O= z9zw}lFKqQU3B)rdf8AH=?83{fB&^ihX5e~FS!Cx>n|1871e*F%FM)2OVnCBu#sRTn zH(CWzUIJYpgm8E_s2%r5Wh;E9(~s)3DRvOWYE{6H$rtZ@wjgBl_j9qqwfx=gTi@w* zSy)dJR?%4SA_KPv@oK!-%Lw3?O;sh!0+dNhI?2PkxS}&;?h2Tb zNAv+qqtVY=4T!gsrZGI$K-IQ<)0~JaM%guxu~!8A5VQT;W@%A8KWHpYykEHKF#eTS zF&3`oI_0V+J^=UYi{=Jf{I;+gxIdmubailG*N|Hk-Bq>-y}OAj2Oi_Q0cRf~6$swT z+alvWkEGDV6Xd$&`|$5_ZLD%}(!0h>4I3TAEgr=cKp7IBk~M^7_!1Alfofve#7Z?i zH1qcH+m~Q&7L2;xAZ{3DX81dZ$?YwGwx=2I`uQek4* zE+3T*jc%T^1g*ASPr%v3(tlgBf4<5|n*QV%9LUQLD6`G7w2#syz6kgCH_vqX=ZO`q zsvN~y!lQd6(9oS)2?T3h@|_F&AL$dKFHowSWkjp$#@Xje>EtJb z98M^DqL0XyK)r-}sL8faAHqHDp2L?!31-gmy8#-?amwxlwAqVXt|5JuK!O+bMq5UY zauduEQu8WekzgOa7$;-90+r{x^6V;OvsQnPL&QKcv!Mr;_t;AX@j?e5(PYy&qdKXp1$JEG0G+xAh^Xm^YjVI1I zo+ai`KG46q2-XT&yi;W=OFnmLH+t1>9WA%iygDJ@)^4zGFkWuPX%b;cpmWXNn5a@> z8N5=9=t7qkM()4+&2Jx9mCZ4ZX!p}@SIhJE464D2ab#TdOzyKBu%vG+t0+NmwmEYw z)W>yE)xGR3D|T0VkL4Da8uh14!ZOuo@$J|xqKgDt^sQ@sL$)0BEtkxDkkfh8!(Jl%{s;63K7Kv?!b7qOW6U4StCt4JVx$HF(h`S8&n>Ce9#t%8Wd?_>*`=Ty9 zCaNePw7DFP=%8+$8<*9%zhVK!IYTqTB(KXTU|I@o!9Pl%ccgM+yHamJYbhg0Z&i+U zTnZk>TG>?+&mHXYTgPOG|bN>*rj-ZGiH+*v}lh zFF4F!9Lz)TrIGwtfg|tkZHL9tuoAkF>~aR1Rxn-mhx-9WhpCE%ukdy^xfE%W#+T}e z-Oz)LoDcBV7(058D8j}mX)^b6I7dXG(kT+i5QuSf0s?fgv)tER0+`WDyg^$?D_7YH zuc0E=d4=y<&Sae6a+7ktiY1UY;Ui;|1~bN6a=D9u`u+_|C|>HnuUL4yr_v- zXD%v?QKXe#w`sPN$ayALxm82pCkVjejFwXw_>vubo4(eN>7TUV zpE2ek-b3Yhe50+-&5mou9PS$?YK!3ljDw-fNCO)%#3Ekcr>x?I{EfAl66i-xJ0$*!AT0i!I}@vDRE6_==TAfFuP{VJ=C!E7YWf9b71 zfGIOOl!e6?DlJVpgMZ9{qwyG3L%($D#2BB@hA$9m62Hj+iod9~tN3qXlT-0~fs1f9 z3u4bhrx|T2)J0g$%UFE{UoFhE{?p~1qspC4 zxzW*@`KCkOkF5ye@yUQWEE#)>bmtOR7cW8gG!`rSoA9_nVVa(s8w_;^xt&@)NARI1 z5~z>^$N*ywG`0Hug!JN-z=B6#hJS*)pLP9=Wgt!;lg!l)a9kIKJ&C8ZrRNxxhpHUO zMJI}njvMHS&Ha^x;Xus8GqI$;=E!3_*DuUv4V=re0$5Wm^9T;_8uuR|-#>j^X6B}N z2kI?6kpfhz8@|CskU#K3prT%9RPn;LYO7As=lZ1EV@FRLU_@nsgOa9iNQmq265 z;)|#|k5O)#o}o=BOFVhYPw)ESyOV>?3KLZX)}-a6pQ~Xd<7)I{_n(sjb2YYn9kmFbw`4=_4a3W2#~KQM0b=EiC0y||M9$NzEOj5Z z!$S{zqi(6=d&$rBx-^^6jpALX7J@x7TI(c%R?y^0Ne2nRV^^PlJ`4(JaK39=VovBO`;oGIv+MnBy zevC8T-${vPDbo0cxd@)s4K)=0nnI%8ejA%wxR4mv$_~1Q(UG!e~3$dE-Wjx4lOE9 z?>(L0f-)Lziw^5WvLXga zk)KS7u*)tH!(*104*5sRl@f!al^AUT-*p|6Sj?KvH-{$k+f8Q)v+z{I`=ExIA9`GZ zH9JqNxv0wtCyS%zGP^0kF>pn#UWEXcYaGmPf^)ZxZ{w!T$c>HC$5Nf_WNqmve5q}g z$ez-O#-1B(c2QX)+?76p%Q{Poyk(ToY1YT zqFAm;ds89bBKBGP?r_qAfOw7AwsfS4I9JOT6=>Y!TaLlkl7%cA9&!h@O6sx}`VNVW z=H|HZm5NA&W9YW}e({9(u}A_n*U40_w9GUU8>05Pjb7$S2W9ssRZ8 zQ>`vA<6~b{z72gMN9?&M9-lxJPn{LQeMGsDocuv%Qin|&&|_6uV~o5=40uBp(^4Pn zPjy&zSmj3htPV!5Z{lZsqSP1o$c=w+<`#MCRQc}*?vM02044)i zc(5;op8%qNwrMU@eEN};q!pJM5mXO$Y;Z=+e@kb`wkL7!rETMk*LcX|i*7uV`_}jC z1CwWMCfG*>3A8`mm+44o8%xN=dlSz5BkJ~_I+1@x=G;v;PzEb2E;n{|R$qDSeuOKp zJ8<8-8WGPN3xNd4T?v$BQYm-mI^ioW-J-k(uZM#v_FpxWIO z;X!~tJ=?1rDu;f%e==Tk_L^0PS|u;B3MK5b{0!`0r?MsYe0PDLZj%n_2NC`YlKJLc zlMXoxX5RheWJ~xsiDvo8E1GTv=9lP9m+QD}rkcf*AT}AM0k@+7 z8>$gJf;(H1E7sAb%4t7>=xH}1f0op{yZB8ATf^mri{E?Ku`Xf`$%Tmen5$Hn5(oZ> zO`X~q0?jc5zw;Nr0_=IB0k0cz(Tz$o^N?qR#~bPnIRqr~m5$d>!%Ky^mCAQma7z2H zJ><8;-q-e7TiclkT;l;-DVr`Z8}NZAYh#cNV>*Th&0@LA=U59fG9J{gV{Bj)crwj0 zg+2U}U)m34MxKrFgO`1YR`ypw>niyeD}saoVQoAL&DjV-`HPbu`~(}jI>?`jZ_ne~B7?2vSnAcTA6?iX+6`{RU0hsG<@5@?x7**==w z_EBHz?oJo&uF@txB)m!eHrBLG$ zAUOOs*n2lYZ9dAylglF?x1lx1E3_CG`uOx3H*K3}SZ2}bnQ9pN63FA_iLbME7csFT zEE>ceA2HkucTOGW8Q{{NJbG`4K)ldcVCX|)4|EIji9#TP1x6~O!or1X99 zHlN3?mv{lWV=npu=~wVZ0V%7HFjdA9X_SqQ6NSbSs7Dryq0ErvI9A_cUKTB|`6gfD zP8Y1xW!rlOU6Me%{38Aw zCIN>s5#YAhFGfgziSK_U;_fCJ=zx8)$!1iB;G~e=1265OM);*`eH%TRW0rpKx12&# zFIh8ZUH_>P2_#L{!+l!6TwjF;pcedhjLd8)xG^x<*3yPiKA@m8kMDKCc~ep7Z$`og@p zjpLSOO6(iaSp!<8PZ*Ha!e#pjab0Aw;yPM1HK?0YCV`d+Bl?c>j2Q6q3fPHGX-n5O~?oeE< zOF6QMGuzjNOn$jxWSUT%EX)=I_*ckG;U*m z+c}d!*iixzgqbB<`a(5bEm|m9Su4!+H_{GNNI?L< z2;p!r7VyrBt#25Fg?LnL@Y@hYlR%v>>!gf53A|uoqF5T2r;oKW$BMcv*=VvDq#DM7 z4{mu5gI1m}{tr*ZehH+a)!&HAQeTf77DOJ+lR)*_c5m&}#YfHsFuK+64@C-hr!uew zH;7O40rn%#Cl^M)B91oHaeSZfK$o$FMe`VL0T67p-9Ecfv9`$&tqxyyA5Rc)=GH;* zG0*>``+pvqVh0EPO&I=~YN0L78D$4nhRV$qtk$~Z;w6Eu5u00=8AE={hPYDvTT>rg zcwJNHo@Utxt4`{^lt7#Gx`L6KDP8!j?ITXMV?}Undbzp7NsS2Hz51bju&u3sCoiZr*)*m&h+S{|CpK6`S>pooL}B%q9M-^f4F z>S=3Qcbl6Wr$wF~2`Zn=C#CLI%lvRuyrXu%vin zmkqnarl!yE0Q+05)u~z2wO_{R5-6DvY0%C zmAB}fkU+0+f(}8P&C3QRj`Iw}KFAMrw7G^v>jtmYgAE3HIaSAzn98n&?{X%!awX7f zj@K1lYB^)77V)-?E=S}|`sizQhVIVP_;umy@z?WC{A!KStq|kl#y-zn#yBOA_?3?PfaNu8?$uRM<1db;M$g&KR1^*u ztU8k9>5!5!Q*`lo=6w>9|JaOb?_;qk;3HF=1B;#C-7uQwv%-uvQQ_XApfW_NOgE{= zlQ-~(1=|wMKi5|MJKdn2*gV0>13o9~X!$cqm>R~n4V>M&V^gje&^LW+J7)le01F zr7s(e&QV?$7l&1Rj=uh}A$6qj;F_@`oFP-SC>JeCn_RiPi_+ukzRl5XC(^RH;XbbIAyZtSc9a4+%%$M4{Yi9zo!{+d3+H0C|dB)NbFD6UUK28*|_f zsqKY^VjK2h3B=zWbf)oS+ZChF`Da=q2Mwd{=xRj*K(8Lblt8}?OMe0tHPJ)O3P%{n zUCeejB?l@mjx%aqs0Y?$R|%eZX|BJ}kw@rNzr(R8#95C=&FUvMWkt+w#2S4l1Hcb< zFDk%S4V7x11b)8+QqZYvBKB;2gV*N9vo#rxZ4NtWVn0j*?OZ5kOf}BNJNRuNW;vFM zi83{moBIi$i$Ic` zFrRB19U*@m{9=Ak5X}oLlpZvzipH@6U8lL6%%wTdE;G6&|?&W3C|Y$VH1u8_MCzqSrnIxssmVx8W1c~u=DM(_Qk ze=q|J4W1pA9!HFh&@_959#ef2G)WpzTOvqfTS=J<=k zut9te0Y(LMd-7~8L~}VM#JRLgbTf}!8gQzw;%=a0kIf#XvM%-d>6u86T=vP?S;`Oq z=Wv`}>xypRVxZa$6I#i+ywRGJ(XJo)o3e8KI@7s@H7-K=mAA#=V`7j}Yq&w&wV7Ej zCr7L0EJn}wIpeF_vhi4-q|2+=2D6Qy=!&(qy#`l}SlY8&h5$Wsyk4Cb0|(iRzT713 za`dXVF0$txHZ_FRU$#^VmM+7YxK*VXU@5oaVeB^1ejkfn)|)Fh{JfY`Hg__qGD%+o zc?ee!N0X?Im2!7!ay%OSg)sAjZ&U>y{kq!68IP7fxb2jcJ_Y!zlN3^%9rBzdRILh} z#)tyv=E4(eT?FZ7R!=ul1e*1_@K}@7oGWj%n^B6~!Buj3$c^cJ`(*c=y3`!3mn)t? zy#SIVAoiqNaF+U=j6z~>?W!(~WO4`jSNVX^&rCEjJCQwwp-g^ zP=255d*>BpX(8j9UeOhySp`jrKXjMtf4)2Y0LP+XqO~@sAk*{l76tJzADe@46|mWZ zd6TA&{ef#A?@_AjD@c5OS!v5q z$3)kgO59r2hcapTiNHHYUotWWhoQ^*WU0-LO=%OF{gjD-xSlouqFrVt@E=5u8yRDS z9;MrPj=lg?baf?&t}K<*7;dH&(J5$ROm63Rpc#+$}uqzKQ{y!K0U3;w8dcJ=$7XrYj6)Y=c#g1nUzq-l#2M zY*ws>Ud58N26Ba#FGKK9`b<_=xb62gRn)R#ZT%npODc_xE_dcoGsoABFZ4o1_#pCFe;*B%!^;S zTq$^gvlz7wq*;xr^u7o^r~Z=<7(Q?Td3YN8?a25=SPNtmGyiX`#e67%+^SVL0vowE9otXiDFR^bitL ze%O}WPrY>WJjk@;Y;X*r~J`Vk>Z%OoVio339q!BaFcI z=q>kdQGOAZ69LZ#GrTR#1 z%-%poIcrhw#CehW&EkF@2%vzSSXYShxB|LeSl@G)N34`vK2p57KK@WtnwQNHH*Q;> znEsC1AC_&ftedz9)#61(-zZ);qU*an=p{4C@!~~!oAwW}vtx(ovrbVw?NUUzHF+$$ zuV}F>Yn2Ow7fQ~@C)r%W06g0l_MSuM@GRgqR*wX_t1f{8EmPpq6KVNlH-8Up*a%M) zRIh_*esGCv@p@<4dSO8iIcUy5q+wWhmzh!Htldl_@S}=H*gW4Y<{OeaIRwC zd}bg09X%=x?q%Iq+0#T@PyN!}d0GDveH-wA-&xK89JsImdr5RGA_%tgD7R)L(9gqc zyYwVu55?NJnQ7h0RO}^iGA)4wvh=JW>|)CXu~E3!p3qqPV`8PDOCk+kUpKKTRJ#wZ zBFh5@3T!Z3)hYdu*4B4>qN%HcnIPyFA5Lc*Yb#x=A%hO~jb`v}aVH%<4-n zF+5>VORQ90JAw@}xv?NCpE1cQk6(iXy6wgew0Oc#t+$+*JucIapbF7v#U{WPC-h_& zwD|AsnOJ7Uw)&R;<-r^+`6Ujxtk=|_!9;fqW3XI=L8KIUf_ zOmH=xbn4nxt*SX&uQY~Cvm85^AnDni*ZNqDtDN;Orp)=lxtX&%;t?Rqguv-n=&!x9 z_tM7FqiZ8f*Id|Y$p^ByND)if2GZUL-ThlU!Iix{nD8}AfKO6@XUR5%{8#+n>Vm9{ zb10bt>sEL)oq<0pBRg1@6M4r|-)j;txgKr9vD(f*%s|2txO~7^p>y>h1UjFT9i1WL zOI!))=`QW}I5!(?5bB$-?So5XVb}MZn(fX+;m=WkrN-g_n{k1b5pVvc+&NUSC#}xq z#DRwak6}IBaCQQ0SowB3^6Mv^XPxwc}9F&ycaH64Q8cGb(34^Pi;x6vmQ8+&^Xp-5_Jm8jU2ZGn6Ts* z*wGJO>+gpMZ}t`#)=`hPZM&s>G zd_&5I5x#~n&|x2Z-PW)dY&)E^oc4N)6r6JakuL+emo6))l;bVio*}W=nXJf!qa5B+ z2=@SS((WL5jT%W9Evz{dg^VPeOU4@`-ekbt_u#BMFn_3*vP@6h(4~&EEvE$xkI!NB zZN6aNS1X|6LJ~H=FP9$RQnnN9vS}UyTWl?sNg#9uL+~N9P2f{V^a7btCIf&!v~n*mnB3 z41Yo!m1hdNZ{rRtDYKL`2_(!fnQsJ$_o_%Acd80l`;1fVdN_;Rrn6EC)j^MAj{q2? z-x3hm40!`}IH?V0T{q{$oNKKtFANpNmf{3{{B5ruc%^mHKwv*iE+q5HdA6s$JYvk*k2|8CHf&4c@u#}QA?X8+lMaQmP#4n#e&1Ck_ z6@{!1F|S?X<(jmn@0Ww$8^YT&DsF_A3EfCDF#!lRcQg%`<@txBbLO#d(E^wc zl2iK{1Y9E<`|7DirHi<|KxoNh9S=;O0p|5xV!%K|gVOw1 zznr51BP})FSX0Qe3j!G$uPia3728t^RjVg=pqzKe80%`hZB-d(`A#_dr48t+GFo7O zMUE5u`g)MN2+zt@0#y!63w;uhY=%7T1N@c~vb&{W!boLg6^P&rB>h2fjDowbwN8$T zJ@6WL+9z0ffOjdLr46pPA#3jQZTrO!5E5O3@!w;@n z0TL|g^Se-Q1rMGG88X-uvN`<=el;OP!0^maQxDk81JHXtYorFis zY3$ZM0=j|?aPnc)#|IO~E^G%oX90`cXSRK+`<@64+4>t$5c>&_!J1IoiRHvMj;O*! zCCIR#KPKYxhHpk<_5nFe%`1GmehT5n>?5L!k(=~eU{Yp`_CA3Ki^wJlx9F8}R+;z4 zU0>!t*ERK9ym!!X%G(V)*W6S0UV(7L4D}A?H1MNtrQqd3_);113MwLSY0QwXu|PuB zq;K^x>g8Aw8H***oTFNDH2H%{csEI?6;wjL9fU7o0K`>7+f?UopktGlaj{`roKeSY ztlC4Pgxhj5)#&nDH;5Sop>CO=U6?Dj!Yk)kH}So;W+}qRo{Us1gpw?J8Q*?MIE(NL zVUGa+B7T|R9hSZg>|;CSPvzxu7sfBU-OafF@*U{sj*hI$k}FplH${ zjZwgveT@7@><$7i0)%?wOY+3)wQn1#b{Kp0kydp|AXk?z8_JQ5;qrU9Kzq_Z(H);k zAagwn#u$i|Eld=W68yb|nS74m8IOF|-zHrcTQwWMkyqQn+t`t#D}V6HLp4!28c7Le z07E#M1W}n|e85Yag?S*6kXwnY^gqtS^Lp>?4zMh>zI3}MeO2wo_gQ%@o>a&9brLB5 zYrH=~^C5bZt&TSZtcJ1;@p2A9Ei4 zSP|xu=>h%(@qV~OR|2`1d-N(*kngF;+NxXC7rOFg%=sErck{!8mFsMdzH5l68f(pL zuNpi!@FgCOo~lx>CTpYC*kkmBSb2j!^(4PZ?3MCBY4oPFsbg|d!GF3~$kx)WUPR(##j zCC^~(&t^8Ep^I4216WgJ%`>^i0+w(~@)+#c?LxjM@6u}SaxK6kIZHOV1Iu8$Y(D9O zH@ggo9=^D$Ul<0`>62|LNy7Vpkn6E1bTO_5-kD)><1Uu5?b)}j+A4Ovz-`~zVo%j4 z(cIXNUIC0zwb0bbva>)~9Pri%>M#DLJ$~!7kMO^|P3yADW($MPJf1b10()Cr-QaAM zy&LkdPGn*|70pvSn}{!+UPBRXybxi?2euH=gUxc1+zzzHfyrD|cGIp(_W3SoIUOzm`vyCW^*o4vT zQU%^P*GC&LIJ}lh=$5r8CwU^_4=tR0|g{g<^vpH$ApM~TV+9rgk#SoQ+`>Px^j3yVw9buwp0lhWtmWWa zWdU*XWB_(MUPV?%DUtZrCy99fl5?I3K92~+)XuXBA$6mF1-Y{Xzk_t&rN4zuoWIb3 zV^y-9X5a+ktoUY*P_BGRvxpJgvNGhb_`U}LSpGUTl8A^L$>r{*8w<7x#?C|zg14=h zg+@wniOKx7;=s8Lc7(8AmQ{A(bz>|^;nOF^MAe3qmis*zja)@^6hRdc!2+dc51Anxo_?+J?*R~n01Kr38(z-Y!G44Q2E=A2V0RwN@ zc@0bYogo-n=CpQby4o1tK?gM3g=K7zKu;=wuh#y6h`PNdqASY;?<4Vx*GF+~9O!dR zng}${NY$S*sh^R3qBSFXR{e2X?*4{^U59`#2fHIrm_i?QE=2+zTu2tm%%iMPqdt7; z4)Hm=O~jr}XOYgFK%yWHbUUMI3~P{iJQBNB0u5dPc@BWrE5F_09|&^-Tn9p5xiZ|d+(Ee1m4=K1EZgTfX0jJE zL63ml1;jv_Tp>R>R4bz8(-+OppE-Wm9cDkJ4=yjDKOiX1Ai+v5xnhfffiw6wxCVe? z#7l{e??O)tUycIOE7%n@hZ}wSA`XYSaXz7@{`KRxke5uMI-R?uhZRCtIE_em*s8kF z*4*6o;%4lS(?yJ}4}Z`QMWwsSkohu!*@`U&dOkXV4FPTIy-lot7@Q_%zt#}W#R?*^ zXIct1k(JiPthqT~;Cy76Q~_@I*aM2)_P|G3{20%cKx*RU$#iYp+LB+TSH%gPC5#%_ z&e!k+H7wvT!fJiwz77LjK9A`VFYIj=k;Y7Qvy8B5hg`?m|#ZXatyGw;~Oc$by8no zC)#>`P#~(FWR^jVJf54G9q4VkA?)6La!nAoj6cZ|+!1yXgLmt8z8HDCF^VBJU4yUy zmeS~771vMd+JTNdlG$^@Sdo#r3y3NGl9XK=Z|fdFZYE&TmjtmPHO?>h0~M$*3apLo zBw@>=ES);M2;;2e#w4TE=8*n9+vt+Ixuo$u;_OMsO4(3+J3#D9$ma7ecw8?&BTQF+ z+P4?}dOE|}BrY&4*Pu7;jPKU4hziSukjAzfpzCL=Hou;p*fnA$-o7c}`IONkw&9cc zTNb|q?E$d((yc{`-)|gW6atEM-#;0dP^zL`NS&<(pWZUoIy{nd~4HI9)2hl-Lj5q z)wT2V^Pf+Bqow!I7uQv_yt0wcct*Pp6rpy{Y0A`SOeFl_)V$~S8ZOz5%L4db*yIJk z*3D!fvoD%xyYN;Cw93Gvx05KKH2P~VzTKmFX4+J}RXtC9sC7r#kK)#1e)(Sbp)W_AuO&BUuTMN`v42Iz&DWYBsh~$Znzcfi|Sy%IMwb< zN-uHrNv?Ii=67DDd@Ai^-={EpQ7cBCO&v4T=yfc^>~LFU*10z>;yv=cgDmcLEMXyz zRwJK}GSYhe;saIIRIBq&22XO(eKi80c^GEEC0+vKdX|j>X)Ffx`^2^kZvWgo0W{b; zBiRwVb(ws-SG~2W%e3&JvB$0Mb_X6$0iAx006(5M*Y#F>oc1N2zz-ZMD3LTk*w2Cbh@?1$zK!)9C9o1dmNq{vVxr~#+~tW`%&K$1|s5lxQzyEp=uW}S-wDF z_kNqBmBa5)cuFG~UkJM3e+nBC3}Fws1`1w$mA>BP5A)aEeU3#Lau}1EgS9wGjFZ74 zp2K|yinVdzJpjQg=`U=Uk(0OKjOwx-1%D_9#ETr_3F@pvCSgQv|A+oK8J!Z1lG1t$ z!mN({7KJ@B>i0JhJK-N`%E0q52D`coDU>8puwrfIE+lF&po&?Wk4<7P3Vo8$->|zA z_y%XeWQj}qh(PI$v4LAeh2Z{^>l9O?K6FMXOD0A3s_R=kMa8|BNC#z~7$3JVCr^PX zR;)bZJ7f4%^!x%~{k4PfpWhmd z$-kmYC;MVgL>xXN0$a-3Mz^MR4y*)`?SR$A*3odbCDv&we%)`-bxKk{=SCRYN1u6c zff-)BIf;R8YGquEbbnc!4xHGdvsF`SxU%SWWrI)r)PG_CUXK>qT{>Bx$xQVkRMedTR zvBhK(&Rw(d+2`n*R|dJSr#U+_1;1M-ByzYWPagSg{}D8K%7<~3?hkh+&Yx{JF#gHY zG>UjPMUeI(g0A_^5N{?-xIB|J0z!E`vc{JwW)uDPuPS6F9TewBq}ItK`KxB;HW_IM z9Z8Yn=C9j8Xa(MM6k}t>xDx6vtQKz+*7R`3I@90_)EvB`p#jbtROh!-aJiktNEppz zeqaR^ug~Q%=(~vuUl6U>6dPMTEb}RV1oq5t_YXEtgN(HEs{FxF)fyKMc{2rV{zm`G z>v@-6%n=abzGIJ4ET8+CG9GZ=Qa%vF9$)wF+I+un(+f7~u1;*MIr$)7^5*T`@ySms zr)F7eCqItNo`UCCkBf_i>ykL%2s~0sR?%|3$CFcKG0_j-1`PTbH4cn{aO;SH2M~EK z#whgZtF@_$iEF(#)^5Y#dbjB64<qo($PG;#)b6uYYFn1s@A6QaRRs!skkooa6V?Y(%)7Xn?fa&coTka zde#&(7(g2PI7``B8=I6JIz|~PRC;r8tLZ|%=(+?_k0v)Nt^^y5uCW4tkwDt^@IE6V zMmN&+-~XPx1|PU&GJ+FS}WnFkDitL(=KE~V%w;#o- zV67D0S4jX>S5=go;!{ZkyI1WzP2S)#e+u8EH>iprR9CO0ucVgY3iLpCr;}PjYJ{nZ>{6)&J;F1GV zW5gV5wtQufzxP8dE^Rs}H{NK#oOLcfdE$!{!hVqgUg$GbY&S#DF#H(uM!n* zq|qe}f^Z*oH0%GV;7Y@qIJdA~uWc2VDk?2j(+V~U4QT^Z5lM<$M3&$JNDy+>A`psL zsXzi`YOSP*npy~}WMl~fL8uZ$ATU+7vWYAK*%EeS$wCrBAjxl z#;cSYd(kpNm$BkrO^ROBaI$DPUA+5>Yy#E*fk<;`GVj{O4Ou`PV)9iTODRRC{mHbl zuT_OTJ$hlG#hYN`x-X7$r${c`2!KUzLgt$Z@K7F@tGN?j?9FO(N;@0dl5UZt7n(p? zuJus@PSu5K^Yn*V0&+4JKDb59tkwg2#OJ8}x`cLGH}}8#dN7|Lp&j=zYc+_DY`{lt zZ~rasEk3Cos3H+Yuh?>Jg3~^=WikC?F6rLx`?R#Nzmv3S3|}}R0DAcYAU>^}Mdt0S zQsZ?Y8_*LUe4PFotH=B|M)+8`HCpNChef+f|E$OejHt88soH&4C$ObUBs7NRMu&{1 zKM?>GFT_8B<+Ux73X~WdEGA8SHTL8TO&`r%$aw0fs(ae-jbbPMPI0ZeqEeR*+N?TV zuVTCTfmEW2~c^O!Pb{+E=A*C0rjsHkimcM z(b%uhogAb8T`(j>u25kIEv86>{F4*VB0Y5W{4X^GsnK8L0`lRLW2$lEP-vNPpyOf7 zdBHD4Pj}3@B%^;ZNR~fIgr=`pme67(;DA#5#Vj7Xh=!yFnN%1)syfKprAsmfj`6EN zJD+#|x)FJ>AR?|=a0IVV(y?@pepD%~zYZ0jqcnb?pua?;)5DOD z^#b20o@KAi`&E5Ijx0OAby|xLXFsIh2{OsTG68q;tFGiQ+diz814=5!8HKihFiCpf zt<6-dn@b7Tzo2@dDuZ~TwsZ=j4>RsG>zh7E=CdYY;b%di$BAKsHxB9Dt&)22962>p zM47nb6!xQCc9%|{rP$5He3Ro0yzstHZO zUx)7hLJ|J4Vm0Pl{Q*dW=o;_rP6aS~Ca|Sl#3StkLI~J5tFlx|DzPio{^cQuF=HR2 z+bFv2U%Ye5^Psh)>V&R_8<|MSdCtfWC^OBhW^mxe%lZa|Eok!|}3i z)X4{3>-w2)C{C?H9 z;&9!jm19a24(e(!MOV9Ssx0sW_!G4u_h^)DbV$|ZE>+-;|2qX=$JcyLe4fx0-t7?g`^Z{%D9BTw|R#@Qn6!dPM^ z)v|D}YpqpLO1K3oi}G%-xMgr*fPSCW_JBUX-^^CDVI?YEQ<6p8rTfiJZNJ${49(|yX6TD4<>^`c;ZR2GBzb-ioWFOhdT0J4Az8uRzrlbsPM+80g74i zkx8d@V3A~VnQ`bIu1}BbrBcj8R+tQCKqjcBgi^=8RQ`y77INx8o|XLp(cSf?=)CJB zcF8N!dqY)zHpvj{COdbyKDX`EMq_%MjirG06SGyi9iOgw)Yh%1l^;_su4O~syHr+{+T19k|Zjf03nAx4I& z(}$0aBCBU%k|KkqzI1%w~c-k$1i|5RflH$K^SPsF2}tFk`JxJV!=et2!{vx* zD?f^-V|L`-@~kP#j4O_}l^t)Bw_AfX^|=h1nf6%&s_G?z47f;JA6-`ObKRIBpW##F zwVxP|$GCUIA0`!s@Zu&Px}lH4NcsCJA6v*{6HhE@N}B4KHkp^d$6X$9I`AE)u*CDk z?G8*7-wd11PFI~R?B0{-mmJ~cJZ^WvAwA4epd9QUf|}}iURSy!aWn@)&%5yoAQgpFJ=u4?5MxYHFLIHEi`m6_oOm$UxDp#@&`2xj&#_k$->o z@Jh~5kJ-W2YN{K8yoVFd{pU_l_o8r_dW-jWuas-REF84ftF<~>C!8o6cRNTv-zeoXa77& z`3yI&d!reQp=r~joV-N3!f_L!3LUn>E}fe%{aTIb&2N4*__ltg&;c8ZG`8%dl&2xSO5O>66oo9rahZCKh8?Z0k@Xm_p8i;Yb zqh9`HZeQw_e3Gei=j#Y%lLhi`d5`Zq=I=8Iy&jR!nM$=167qr*e!fE2jOXfFQ%p6-m135bB Z9K}X8(+^c&7_FIlR`95I?&/``. -The agent configuration is saved alongside as ``params/agent.yaml``, which the -evaluation scripts use together with the RL Games checkpoint. - -.. tip:: - - Remove ``--headless`` if you want to watch training in the viewer. - - -Policy Configuration -^^^^^^^^^^^^^^^^^^^^ - -The RL Games config defines both the policy network and the PPO hyperparameters, including: - -- a feed-forward MLP backbone -- an LSTM recurrent layer stack -- PPO learning rate, clipping, and rollout length -- the RL Games experiment name used for logs/checkpoints - -The configuration file is: - -.. code-block:: text - - isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml - -Key settings in this file include: - -- ``network.mlp.units`` for the actor-critic MLP sizes -- ``network.rnn`` for the recurrent policy/value architecture -- ``config.learning_rate`` for PPO optimization -- ``config.max_epochs`` for the training horizon -- ``config.name`` for the RL Games experiment name - -In this workflow, the experiment name is ``NistGearInsertionOscRlg``. - -Monitoring Training -^^^^^^^^^^^^^^^^^^^ - -During training, RL Games prints a summary to the console for each iteration, -including rollout statistics, losses, and timing information. Checkpoints are -saved periodically according to the settings in the YAML config. - -You should see output indicating that training has started: - -.. code-block:: text - - step: 0 - fps step: ... - fps total: ... - epoch: 1 - mean rewards: ... - mean episode lengths: ... - -If you are tuning the policy, the most useful signals to watch are: - -- the reward terms for alignment and insertion -- the episode length distribution -- the overall training stability across many iterations - - -Resume Training -^^^^^^^^^^^^^^^ - -To continue training from an existing checkpoint: - -.. code-block:: bash - - python isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ - --headless \ - --num_envs 4096 \ - --max_iterations 1000 \ - --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ - --checkpoint \ - nist_assembled_gear_mesh_osc \ - --rl_training_mode - -Multi-GPU Training -^^^^^^^^^^^^^^^^^^ - -Add ``--distributed`` to spread environments across all available GPUs: - -.. code-block:: bash - - python -m torch.distributed.run --nnodes=1 --nproc_per_node=2 \ - isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py \ - --headless \ - --num_envs 4096 \ - --max_iterations 1000 \ - --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ - --distributed \ - nist_assembled_gear_mesh_osc \ - --rl_training_mode - - -Expected Results -^^^^^^^^^^^^^^^^ - -A successful training run should produce: - -- RL Games console output for rollout collection and optimization -- checkpoint files under the RL Games log directory -- progressively improving insertion performance during evaluation - -.. image:: ../../../images/nist_gear_insertion_task.gif - :align: center - :height: 400px - -.. note:: - - Training performance depends on hardware, environment configuration, and random seed. - Because this is a contact-rich insertion task with randomized environment parameters, - final performance will vary with the exact number of training iterations. diff --git a/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst b/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst deleted file mode 100644 index 920ec4406..000000000 --- a/docs/pages/example_workflows/nist_gear_insertion/step_3_evaluation.rst +++ /dev/null @@ -1,187 +0,0 @@ -Closed-Loop Policy Inference and Evaluation -------------------------------------------- - -**Docker Container**: Base (see :doc:`../../quickstart/installation` for more details) - -:docker_run_default: - -Once inside the container, set the models directory if you plan to organize checkpoints locally: - -.. code-block:: bash - - export MODELS_DIR=models/isaaclab_arena/nist_gear_insertion - mkdir -p $MODELS_DIR - -This tutorial assumes you've completed :doc:`step_2_policy_training` and have a trained checkpoint, -or you can place a checkpoint in ``$MODELS_DIR`` and use it in the commands below. - -.. dropdown:: Download Pre-trained Model (skip preceding steps) - :animate: fade-in - - .. code-block:: bash - - hf download \ - nvidia/Arena-NIST-Gear-Insertion-RL-Task \ - --local-dir $MODELS_DIR/nist_gear_insertion_checkpoint - - After downloading, the checkpoint is at: - - ``$MODELS_DIR/nist_gear_insertion_checkpoint/best_NistGearInsertionOscRlg.pth`` - - Replace checkpoint paths in the examples below with this path. - - -Evaluation Methods -^^^^^^^^^^^^^^^^^^ - -There are three ways to evaluate a trained policy: - -1. **Single environment** (``policy_runner.py``): detailed evaluation with metrics -2. **Parallel environments** (``policy_runner.py``): larger-scale statistical evaluation -3. **Batch evaluation** (``eval_runner.py``): automated evaluation across multiple checkpoints - - -Method 1: Single Environment Evaluation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: bash - - python isaaclab_arena/evaluation/policy_runner.py \ - --checkpoint_path $MODELS_DIR/.pth \ - --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ - --policy_type rl_games \ - --num_episodes 20 \ - --num_envs 1 \ - --visualizer kit \ - nist_assembled_gear_mesh_osc - -.. note:: - - If you trained the model yourself, replace the checkpoint path with the path to your own checkpoint. - -Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, ``--agent_cfg_path``, etc.) -must come **before** the environment name. Environment-specific arguments must come **after** it. - -At the end of the run, metrics are printed to the console: - -.. code-block:: text - - Metrics: {'success_rate': 0.99, 'num_episodes': 1024} - -.. image:: ../../../images/nist_gear_insertion_task.gif - :align: center - :height: 400px - - -Method 2: Parallel Environment Evaluation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For more statistically significant results, run across many environments in parallel. - -.. code-block:: bash - - python isaaclab_arena/evaluation/policy_runner.py \ - --checkpoint_path $MODELS_DIR/.pth \ - --agent_cfg_path isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml \ - --policy_type rl_games \ - --num_episodes 1024 \ - --num_envs 64 \ - --env_spacing 2.5 \ - --visualizer kit \ - nist_assembled_gear_mesh_osc - -At the end of the run, metrics are printed to the console: - -.. code-block:: text - - Metrics: {'success_rate': 0.99, 'num_episodes': 1024} - -.. image:: ../../../images/nist_gear_insertion_parallel.gif - :align: center - :height: 400px - - -Method 3: Batch Evaluation -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To evaluate multiple checkpoints in sequence, use ``eval_runner.py`` with a JSON config. -Here we evaluate checkpoints from the same training run. -The checkpoint paths should be replaced with the timestamped run directory in -``logs/rl_games/NistGearInsertionOscRlg/``. - -**1. Create an evaluation config** - -Create a file ``eval_config.json``: - -.. code-block:: json - - { - "jobs": [ - { - "name": "nist_gear_model_0500", - "policy_type": "rl_games", - "num_episodes": 1024, - "arena_env_args": { - "environment": "nist_assembled_gear_mesh_osc", - "num_envs": 64 - }, - "policy_config_dict": { - "checkpoint_path": "models/isaaclab_arena/nist_gear_insertion/model_0500.pth", - "agent_cfg_path": "isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml" - } - }, - { - "name": "nist_gear_model_1000", - "policy_type": "rl_games", - "num_episodes": 1024, - "arena_env_args": { - "environment": "nist_assembled_gear_mesh_osc", - "num_envs": 64 - }, - "policy_config_dict": { - "checkpoint_path": "models/isaaclab_arena/nist_gear_insertion/model_1000.pth", - "agent_cfg_path": "isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml" - } - } - ] - } - -**2. Run** - -.. code-block:: bash - - python isaaclab_arena/evaluation/eval_runner.py \ - --eval_jobs_config eval_config.json - -.. code-block:: text - - ====================================================================== - METRICS SUMMARY - ====================================================================== - - nist_gear_model_0500: - num_episodes 1024 - success_rate 0.95 - - nist_gear_model_1000: - num_episodes 1024 - success_rate 0.9900 - ====================================================================== - - -Understanding the Metrics -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The NIST gear insertion task reports two metrics: - -- ``success_rate``: fraction of episodes where the gear is successfully inserted -- ``num_episodes``: number of completed episodes in the evaluation run - -A well-trained policy should show improving insertion performance as training progresses. -Results will vary with hardware, random seed, and randomization settings. - -.. note:: - - For evaluation, omit ``--rl_training_mode`` (default): success termination stays enabled so - metrics such as success rate are meaningful. For RL Games training, pass ``--rl_training_mode`` to - disable success termination. diff --git a/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst b/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst index 42c63c7ad..53904cf9c 100644 --- a/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst +++ b/docs/pages/example_workflows/reinforcement_learning_workflows/index.rst @@ -7,12 +7,10 @@ covering environment setup, policy training, and closed-loop evaluation. Currently, the following reinforcement learning workflow examples are provided: * :doc:`Franka Lift Object Task <../reinforcement_learning/index>` — Franka Panda, PhysX, joint-position control -* :doc:`NIST Gear Insertion Task <../nist_gear_insertion/index>` — Franka Panda, operational-space control, contact-rich insertion * :doc:`Dexsuite Kuka Allegro Lift Task (Newton) <../dexsuite_lift/index>` — Kuka + Allegro hand, **Newton** physics, dexterous manipulation .. toctree:: :maxdepth: 1 ../reinforcement_learning/index - ../nist_gear_insertion/index ../dexsuite_lift/index From fc6807d4f95ccfd48db71957403e9ae050450f52 Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 6 May 2026 17:27:52 -0700 Subject: [PATCH 09/12] Fix NIST tests after registry refactor Signed-off-by: odoherty --- .../scripts/reinforcement_learning/train_rl_games.py | 8 ++++++-- isaaclab_arena/tests/test_nist_gear_insertion_task.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py index 516ffe8ff..d1d2408b5 100644 --- a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py +++ b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py @@ -52,10 +52,14 @@ def _normalize_legacy_positional_task(argv: list[str]) -> list[str]: if "--task" in argv or any(arg.startswith("--task=") for arg in argv): return argv - from isaaclab_arena_environments.cli import ExampleEnvironments + from isaaclab_arena.assets.registries import EnvironmentRegistry + from isaaclab_arena_environments.cli import ensure_environments_registered + + ensure_environments_registered() + env_registry = EnvironmentRegistry() for idx, arg in enumerate(argv): - if not arg.startswith("-") and arg in ExampleEnvironments: + if not arg.startswith("-") and env_registry.is_registered(arg): print("[WARN] Positional environment names are deprecated for RL-Games training; use --task instead.") return [*argv[:idx], "--task", arg, *argv[idx + 1 :]] return argv diff --git a/isaaclab_arena/tests/test_nist_gear_insertion_task.py b/isaaclab_arena/tests/test_nist_gear_insertion_task.py index 5af7981c7..94c78c7f3 100644 --- a/isaaclab_arena/tests/test_nist_gear_insertion_task.py +++ b/isaaclab_arena/tests/test_nist_gear_insertion_task.py @@ -5,7 +5,7 @@ from isaaclab.managers import ObservationGroupCfg as ObsGroup -from isaaclab_arena.assets.asset_registry import AssetRegistry +from isaaclab_arena.assets.registries import AssetRegistry from isaaclab_arena.embodiments.franka.franka import ( FrankaNistGearInsertionObservationsCfg, FrankaNistGearInsertionOscEmbodiment, From 4fedbfe7b14cfbfb5cfc3467c8b9ce1b1bc6e6ae Mon Sep 17 00:00:00 2001 From: odoherty Date: Thu, 7 May 2026 11:17:19 -0700 Subject: [PATCH 10/12] Defer NIST test imports until execution Signed-off-by: odoherty --- .../tests/test_nist_gear_insertion_task.py | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/isaaclab_arena/tests/test_nist_gear_insertion_task.py b/isaaclab_arena/tests/test_nist_gear_insertion_task.py index 94c78c7f3..53eaa1113 100644 --- a/isaaclab_arena/tests/test_nist_gear_insertion_task.py +++ b/isaaclab_arena/tests/test_nist_gear_insertion_task.py @@ -3,27 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from isaaclab.managers import ObservationGroupCfg as ObsGroup - -from isaaclab_arena.assets.registries import AssetRegistry -from isaaclab_arena.embodiments.franka.franka import ( - FrankaNistGearInsertionObservationsCfg, - FrankaNistGearInsertionOscEmbodiment, - franka_gripper_joint_setter, -) -from isaaclab_arena.tasks.events import place_gear_in_gripper -from isaaclab_arena.tasks.nist_gear_insertion_task import GearInsertionGeometryCfg, GraspCfg, NistGearInsertionTask -from isaaclab_arena.tasks.observations.gear_insertion_observations import ( - NistGearInsertionPolicyObservations, - peg_delta_from_held_gear_base, - peg_pos_in_env_frame, -) -from isaaclab_arena.tasks.rewards import gear_insertion_rewards -from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success -from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg -from isaaclab_arena_environments.nist_assembled_gearmesh_osc_environment import NISTAssembledGearMeshOSCEnvironment - - class _TestAsset: def __init__(self, name: str, object_min_z: float | None = None): self.name = name @@ -38,7 +17,9 @@ def _asset(name: str, object_min_z: float | None = None) -> _TestAsset: return _TestAsset(name=name, object_min_z=object_min_z) -def _nist_task(**kwargs) -> NistGearInsertionTask: +def _nist_task(**kwargs): + from isaaclab_arena.tasks.nist_gear_insertion_task import GearInsertionGeometryCfg, NistGearInsertionTask + return NistGearInsertionTask( assembled_board=_asset("nist_board_assembled"), held_gear=_asset("medium_nist_gear"), @@ -56,6 +37,14 @@ def _nist_task(**kwargs) -> NistGearInsertionTask: def test_franka_nist_gear_osc_embodiment_registers_task_terms(): + from isaaclab_arena.assets.registries import AssetRegistry + from isaaclab_arena.embodiments.franka.franka import ( + FrankaNistGearInsertionObservationsCfg, + FrankaNistGearInsertionOscEmbodiment, + ) + from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations + from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg + asset_registry = AssetRegistry() embodiment_cls = asset_registry.get_asset_by_name("franka_nist_gear_osc") embodiment = embodiment_cls(fixed_asset_name="gears_and_base", peg_offset=(0.02025, 0.0, 0.025)) @@ -75,6 +64,8 @@ def test_franka_nist_gear_osc_embodiment_registers_task_terms(): def test_nist_object_library_uses_shared_nucleus_assets(): + from isaaclab_arena.assets.registries import AssetRegistry + asset_registry = AssetRegistry() expected_paths = { "gears_and_base": ( @@ -99,6 +90,13 @@ def test_nist_object_library_uses_shared_nucleus_assets(): def test_nist_task_observation_terms_use_task_geometry(): + from isaaclab.managers import ObservationGroupCfg as ObsGroup + + from isaaclab_arena.tasks.observations.gear_insertion_observations import ( + peg_delta_from_held_gear_base, + peg_pos_in_env_frame, + ) + obs_cfg = _nist_task().get_observation_cfg() assert isinstance(obs_cfg.task_obs, ObsGroup) @@ -119,6 +117,8 @@ def test_nist_task_observation_terms_use_task_geometry(): def test_nist_task_reward_terms_share_insertion_geometry(): + from isaaclab_arena.tasks.rewards import gear_insertion_rewards + rewards_cfg = _nist_task().get_rewards_cfg() keypoint_terms = [rewards_cfg.kp_baseline, rewards_cfg.kp_coarse, rewards_cfg.kp_fine] @@ -142,6 +142,11 @@ def test_nist_task_reward_terms_share_insertion_geometry(): def test_nist_task_events_use_embodiment_grasp_and_randomization_cfg(): + from isaaclab_arena.assets.registries import AssetRegistry + from isaaclab_arena.embodiments.franka.franka import franka_gripper_joint_setter + from isaaclab_arena.tasks.events import place_gear_in_gripper + from isaaclab_arena.tasks.nist_gear_insertion_task import GraspCfg + embodiment = AssetRegistry().get_asset_by_name("franka_nist_gear_osc")() task = _nist_task( grasp_cfg=GraspCfg(**embodiment.get_gear_insertion_grasp_config()), @@ -165,6 +170,10 @@ def test_nist_task_events_use_embodiment_grasp_and_randomization_cfg(): def test_nist_task_termination_terms_include_success_and_optional_drop_checks(): + from isaaclab_arena.embodiments.franka.franka import franka_gripper_joint_setter + from isaaclab_arena.tasks.nist_gear_insertion_task import GraspCfg + from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success + terminations_cfg = _nist_task(rl_training_mode=True).get_termination_cfg() assert terminations_cfg.success.func is gear_mesh_insertion_success @@ -191,6 +200,8 @@ def test_nist_task_termination_terms_include_success_and_optional_drop_checks(): def test_nist_environment_defaults_to_nist_franka_embodiment(): import argparse + from isaaclab_arena_environments.nist_assembled_gearmesh_osc_environment import NISTAssembledGearMeshOSCEnvironment + parser = argparse.ArgumentParser() NISTAssembledGearMeshOSCEnvironment.add_cli_args(parser) args_cli = parser.parse_args([]) From 4a7d61d5314267d9a5a6ccfe66e3f5d7d205b225 Mon Sep 17 00:00:00 2001 From: odoherty Date: Wed, 13 May 2026 13:54:49 -0700 Subject: [PATCH 11/12] Refine NIST gear insertion task structure Signed-off-by: odoherty --- isaaclab_arena/assets/object_library.py | 208 ++++----- isaaclab_arena/embodiments/__init__.py | 1 + isaaclab_arena/embodiments/franka/franka.py | 277 +---------- .../franka/nist_gear_insertion/__init__.py | 12 + .../franka/nist_gear_insertion/franka_osc.py | 193 ++++++++ .../franka/nist_gear_insertion/gear_grasp.py | 46 ++ .../policy/rl_games_action_policy.py | 70 ++- .../reinforcement_learning/train_rl_games.py | 141 ------ isaaclab_arena/tasks/events.py | 256 +--------- .../tasks/nist_gear_insertion/__init__.py | 16 + .../tasks/nist_gear_insertion/events.py | 318 +++++++++++++ .../tasks/nist_gear_insertion/geometry.py | 150 ++++++ .../tasks/nist_gear_insertion/observations.py | 59 +++ .../tasks/nist_gear_insertion/rewards.py | 234 ++++++++++ .../task.py} | 391 ++++++++-------- .../tasks/nist_gear_insertion/terminations.py | 70 +++ .../gear_insertion_observations.py | 336 -------------- .../tasks/rewards/gear_insertion_rewards.py | 297 ------------ isaaclab_arena/tasks/terminations.py | 61 +-- isaaclab_arena/terms/events.py | 25 +- .../tests/test_nist_gear_insertion_task.py | 362 +++++++++------ .../tests/test_rl_games_action_policy.py | 43 ++ isaaclab_arena/tests/test_train_rl_games.py | 73 --- .../mdp/nist_gear_insertion/__init__.py | 8 + .../mdp/nist_gear_insertion/franka_osc_cfg.py | 106 +++++ .../osc_action.py} | 277 +++++++---- .../nist_gear_insertion/osc_observations.py | 436 ++++++++++++++++++ .../mdp/nist_gear_insertion/osc_rewards.py | 197 ++++++++ ...nist_assembled_gearmesh_osc_environment.py | 82 ++-- .../nist_gear_insertion_osc_rl_games.yaml | 129 ------ 30 files changed, 2688 insertions(+), 2186 deletions(-) create mode 100644 isaaclab_arena/embodiments/franka/nist_gear_insertion/__init__.py create mode 100644 isaaclab_arena/embodiments/franka/nist_gear_insertion/franka_osc.py create mode 100644 isaaclab_arena/embodiments/franka/nist_gear_insertion/gear_grasp.py delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/__init__.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/events.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/geometry.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/observations.py create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/rewards.py rename isaaclab_arena/tasks/{nist_gear_insertion_task.py => nist_gear_insertion/task.py} (51%) create mode 100644 isaaclab_arena/tasks/nist_gear_insertion/terminations.py delete mode 100644 isaaclab_arena/tasks/observations/gear_insertion_observations.py delete mode 100644 isaaclab_arena/tasks/rewards/gear_insertion_rewards.py create mode 100644 isaaclab_arena/tests/test_rl_games_action_policy.py delete mode 100644 isaaclab_arena/tests/test_train_rl_games.py create mode 100644 isaaclab_arena_environments/mdp/nist_gear_insertion/__init__.py create mode 100644 isaaclab_arena_environments/mdp/nist_gear_insertion/franka_osc_cfg.py rename isaaclab_arena_environments/mdp/{nist_gear_insertion_osc_action.py => nist_gear_insertion/osc_action.py} (55%) create mode 100644 isaaclab_arena_environments/mdp/nist_gear_insertion/osc_observations.py create mode 100644 isaaclab_arena_environments/mdp/nist_gear_insertion/osc_rewards.py delete mode 100644 isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index 4da689708..c14672292 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 + from typing import Any import isaaclab.sim as sim_utils @@ -83,133 +84,6 @@ def __init__( super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose, scale=scale) -@register_asset -class GearsAndBase(LibraryObject): - """NIST gear base with SDF collision (gear_base as root prim).""" - - name = "gears_and_base" - tags = ["object"] - usd_path = ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" - "gearbase_and_gears_gearbase_root.usd" - ) - - spawn_cfg_addon = { - "rigid_props": sim_utils.RigidBodyPropertiesCfg( - disable_gravity=False, - max_depenetration_velocity=5.0, - kinematic_enabled=True, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), - "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - } - - def __init__( - self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None - ): - super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) - - -@register_asset -class MediumNistGear(LibraryObject): - """NIST medium gear (held asset for gear insertion).""" - - name = "medium_nist_gear" - tags = ["object"] - usd_path = "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_medium.usd" - - spawn_cfg_addon = { - "rigid_props": sim_utils.RigidBodyPropertiesCfg( - disable_gravity=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), - "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - } - - def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = None): - super().__init__(prim_path=prim_path, initial_pose=initial_pose) - - -@register_asset -class NistBoardAssembled(LibraryObject): - """NIST fully assembled taskboard.""" - - name = "nist_board_assembled" - tags = ["nist", "object"] - usd_path = ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/nist_board_assembled.usd" - ) - spawn_cfg_addon = { - "rigid_props": sim_utils.RigidBodyPropertiesCfg( - disable_gravity=False, - kinematic_enabled=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - "mass_props": sim_utils.MassPropertiesCfg(mass=None), - "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - } - - def __init__( - self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None - ): - super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) - - -@register_asset -class NistGearBase(LibraryObject): - - name = "nist_gear_base" - tags = ["nist", "object"] - usd_path = "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_base.usd" - spawn_cfg_addon = { - "rigid_props": sim_utils.RigidBodyPropertiesCfg( - disable_gravity=False, - kinematic_enabled=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), - "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - } - - def __init__( - self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None - ): - super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) - - @register_asset class MustardBottle(LibraryObject): """ @@ -408,6 +282,86 @@ def __init__( super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose, scale=scale) +# NIST insertion assets use Arena's high-precision assembly preset to mirror +# Factory/Forge dense-contact settings for small-clearance gear insertion. +@register_asset +class GearsAndBase(LibraryObject): + """NIST gear base with SDF collision (gear_base as root prim).""" + + name = "gears_and_base" + tags = ["object"] + usd_path = f"{ISAACLAB_NUCLEUS_DIR}/Arena/assets/object_library/NIST/gearbase_and_gears_gearbase_root.usd" + + spawn_cfg_addon = { + "rigid_props": RIGID_BODY_PROPS_HIGH_PRECISION.replace(disable_gravity=False, kinematic_enabled=True), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class MediumNistGear(LibraryObject): + """NIST medium gear (held asset for gear insertion).""" + + name = "medium_nist_gear" + tags = ["object"] + usd_path = f"{ISAACLAB_NUCLEUS_DIR}/Arena/assets/object_library/NIST/gear_medium.usd" + + spawn_cfg_addon = { + "rigid_props": RIGID_BODY_PROPS_HIGH_PRECISION.replace(disable_gravity=True), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = None): + super().__init__(prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistBoardAssembled(LibraryObject): + """NIST fully assembled taskboard.""" + + name = "nist_board_assembled" + tags = ["nist", "object"] + usd_path = f"{ISAACLAB_NUCLEUS_DIR}/Arena/assets/object_library/NIST/nist_board_assembled.usd" + object_type = ObjectType.BASE + # The assembled board is visual context for this task; gears_and_base owns + # the physical peg and gear contacts used by observations, rewards, and success. + spawn_cfg_addon = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg(rigid_body_enabled=False), + "collision_props": sim_utils.CollisionPropertiesCfg(collision_enabled=False), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + +@register_asset +class NistGearBase(LibraryObject): + """NIST standalone gear base.""" + + name = "nist_gear_base" + tags = ["nist", "object"] + usd_path = f"{ISAACLAB_NUCLEUS_DIR}/Arena/assets/object_library/NIST/gear_base.usd" + spawn_cfg_addon = { + "rigid_props": RIGID_BODY_PROPS_HIGH_PRECISION.replace(disable_gravity=False, kinematic_enabled=True), + "mass_props": sim_utils.MassPropertiesCfg(mass=0.012), + "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + } + + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) + + @register_asset class BlueSortingBin(LibraryObject): """ diff --git a/isaaclab_arena/embodiments/__init__.py b/isaaclab_arena/embodiments/__init__.py index d63721b26..2b5d70824 100644 --- a/isaaclab_arena/embodiments/__init__.py +++ b/isaaclab_arena/embodiments/__init__.py @@ -6,6 +6,7 @@ from .agibot.agibot import * from .droid.droid import * from .franka.franka import * +from .franka.nist_gear_insertion.franka_osc import * from .g1.g1 import * from .galbot.galbot import * from .gr1t2.gr1t2 import * diff --git a/isaaclab_arena/embodiments/franka/franka.py b/isaaclab_arena/embodiments/franka/franka.py index 3f86b82c9..2cfc37f8a 100644 --- a/isaaclab_arena/embodiments/franka/franka.py +++ b/isaaclab_arena/embodiments/franka/franka.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 + import torch from collections.abc import Sequence from dataclasses import MISSING @@ -10,9 +11,7 @@ import isaaclab.envs.mdp as mdp_isaac_lab import isaaclab.sim as sim_utils import isaaclab.utils.math as PoseUtils -from isaaclab.actuators import ImplicitActuatorCfg from isaaclab.assets.articulation.articulation_cfg import ArticulationCfg -from isaaclab.controllers import OperationalSpaceControllerCfg from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg from isaaclab.envs import ManagerBasedRLMimicEnv from isaaclab.envs.mdp.actions.actions_cfg import ( @@ -39,9 +38,7 @@ from isaaclab_arena.embodiments.common.mimic_utils import get_rigid_and_articulated_object_poses from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase from isaaclab_arena.embodiments.franka.observations import gripper_pos -from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations from isaaclab_arena.utils.pose import Pose -from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg _DEFAULT_CAMERA_OFFSET = Pose(position_xyz=(0.11, -0.031, -0.074), rotation_xyzw=(0.0, 0.0, -0.66262, -0.74896)) @@ -205,278 +202,6 @@ class FrankaJointPosActionsCfg: ) -_FRANKA_MIMIC_OSC_USD_PATH = f"{ISAACLAB_NUCLEUS_DIR}/Factory/franka_mimic.usd" - -# Assembly tasks use stiffer contact settings than the default Franka assets. -_FRANKA_MIMIC_OSC_RIGID_PROPS = sim_utils.RigidBodyPropertiesCfg( - disable_gravity=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, -) - -_FRANKA_MIMIC_OSC_ARTICULATION_PROPS = sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=False, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, -) - -_FRANKA_MIMIC_OSC_COLLISION_PROPS = sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0) - -# Default pose from the Factory-style Franka mimic asset. -_FRANKA_MIMIC_OSC_JOINT_POS = { - "panda_joint1": 0.0, - "panda_joint2": -0.569, - "panda_joint3": 0.0, - "panda_joint4": -2.810, - "panda_joint5": 0.0, - "panda_joint6": 3.037, - "panda_joint7": 0.741, - "panda_finger_joint2": 0.04, -} - -# OSC writes joint torques directly for the arm; the hand remains position controlled. -_FRANKA_MIMIC_OSC_ACTUATORS = { - "panda_arm1": ImplicitActuatorCfg( - joint_names_expr=["panda_joint[1-4]"], - stiffness=0.0, - damping=0.0, - friction=0.0, - armature=0.0, - effort_limit_sim=87, - velocity_limit_sim=124.6, - ), - "panda_arm2": ImplicitActuatorCfg( - joint_names_expr=["panda_joint[5-7]"], - stiffness=0.0, - damping=0.0, - friction=0.0, - armature=0.0, - effort_limit_sim=12, - velocity_limit_sim=149.5, - ), - "panda_hand": ImplicitActuatorCfg( - joint_names_expr=["panda_finger_joint[1-2]"], - effort_limit_sim=40.0, - velocity_limit_sim=0.04, - stiffness=7500.0, - damping=173.0, - friction=0.1, - armature=0.0, - ), -} - -# Franka mimic USD with contact sensors enabled for the wrist force body. -_FRANKA_MIMIC_OSC_CFG = ArticulationCfg( - prim_path="{ENV_REGEX_NS}/Robot", - spawn=sim_utils.UsdFileCfg( - usd_path=_FRANKA_MIMIC_OSC_USD_PATH, - activate_contact_sensors=True, - rigid_props=_FRANKA_MIMIC_OSC_RIGID_PROPS, - articulation_props=_FRANKA_MIMIC_OSC_ARTICULATION_PROPS, - collision_props=_FRANKA_MIMIC_OSC_COLLISION_PROPS, - ), - init_state=ArticulationCfg.InitialStateCfg( - joint_pos=_FRANKA_MIMIC_OSC_JOINT_POS, - pos=(0.0, 0.0, 0.0), - rot=(0.0, 0.0, 0.0, 1.0), - ), - actuators=_FRANKA_MIMIC_OSC_ACTUATORS, -) - -_NIST_GEAR_INSERTION_INITIAL_JOINT_POSE = [ - 0.561824, - 0.287201, - -0.543103, - -2.410188, - 0.507908, - 2.847644, - 0.454298, - 0.04, - 0.04, -] - - -def franka_gripper_joint_setter( - joint_pos: torch.Tensor, - row_indices: Sequence[int], - finger_joint_indices: Sequence[int], - width: float, -) -> None: - """Set Franka Panda finger joints to achieve a given total opening width.""" - for jid in finger_joint_indices: - joint_pos[row_indices, jid] = width / 2.0 - - -def _nist_gear_insertion_ee_frame_cfg() -> FrameTransformerCfg: - return FrameTransformerCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_link0", - debug_vis=False, - target_frames=[ - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_fingertip_centered", - name="end_effector", - offset=OffsetCfg(pos=(0.0, 0.0, 0.0)), - ), - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_rightfinger", - name="tool_rightfinger", - offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), - ), - FrameTransformerCfg.FrameCfg( - prim_path="{ENV_REGEX_NS}/Robot/panda_leftfinger", - name="tool_leftfinger", - offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), - ), - ], - ) - - -@register_asset -class FrankaNistGearInsertionOscEmbodiment(FrankaEmbodimentBase): - """Franka embodiment for NIST gear insertion with OSC torque control.""" - - name = "franka_nist_gear_osc" - - def __init__( - self, - enable_cameras: bool = False, - initial_pose: Pose | None = None, - initial_joint_pose: list[float] | None = None, - concatenate_observation_terms: bool = False, - arm_mode: ArmMode | None = None, - camera_offset: Pose | None = _DEFAULT_CAMERA_OFFSET, - is_tiled_camera: bool = False, - fixed_asset_name: str = "gears_and_base", - peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), - ): - super().__init__( - enable_cameras=enable_cameras, - initial_pose=initial_pose, - initial_joint_pose=initial_joint_pose, - concatenate_observation_terms=concatenate_observation_terms, - arm_mode=arm_mode, - camera_offset=camera_offset, - is_tiled_camera=is_tiled_camera, - ) - self.scene_config.robot = _FRANKA_MIMIC_OSC_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") - self.scene_config.ee_frame = _nist_gear_insertion_ee_frame_cfg() - self.set_initial_joint_pose(initial_joint_pose or _NIST_GEAR_INSERTION_INITIAL_JOINT_POSE) - self.action_config = FrankaNistGearInsertionOscActionsCfg( - fixed_asset_name=fixed_asset_name, - peg_offset=peg_offset, - ) - - self.observation_config = FrankaNistGearInsertionObservationsCfg( - fixed_asset_name=fixed_asset_name, - peg_offset=peg_offset, - fingertip_body_name=self.get_command_body_name(), - concatenate_observation_terms=self.concatenate_observation_terms, - ) - - self.reward_config.action_rate = None - self.reward_config.joint_vel = None - - def get_command_body_name(self) -> str: - return "panda_fingertip_centered" - - def get_gear_insertion_grasp_config(self) -> dict[str, object]: - return { - "hand_grasp_width": 0.03, - "hand_close_width": 0.03, - "gripper_joint_setter_func": franka_gripper_joint_setter, - "end_effector_body_name": "panda_hand", - "finger_joint_names": "panda_finger_joint[1-2]", - "grasp_rot_offset": [1.0, 0.0, 0.0, 0.0], - "grasp_offset": [0.02, 0.0, -0.128], - "arm_joint_names": "panda_joint[1-7]", - } - - -@configclass -class FrankaNistGearInsertionObservationsCfg: - """Observation specification for the NIST gear insertion OSC policy.""" - - @configclass - class PolicyCfg(ObsGroup): - """24-D NIST gear insertion policy observation.""" - - nist_gear_policy_obs: ObsTerm = MISSING - - def __post_init__(self): - self.enable_corruption = False - self.concatenate_terms = False - - policy: PolicyCfg = MISSING - - def __init__( - self, - fixed_asset_name: str = "gears_and_base", - peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), - fingertip_body_name: str = "panda_fingertip_centered", - concatenate_observation_terms: bool = False, - ): - self.policy = self.PolicyCfg() - self.policy.nist_gear_policy_obs = ObsTerm( - func=NistGearInsertionPolicyObservations, - params={ - "robot_name": "robot", - "board_name": fixed_asset_name, - "peg_offset": list(peg_offset), - "fingertip_body_name": fingertip_body_name, - "force_body_name": "force_sensor", - "pos_noise_level": 0.0, - "rot_noise_level_deg": 0.0, - "force_noise_level": 0.0, - }, - ) - self.policy.concatenate_terms = concatenate_observation_terms - - -@configclass -class FrankaNistGearInsertionOscActionsCfg: - """Action specification for the NIST gear insertion OSC Franka embodiment.""" - - arm_action: ActionTermCfg = MISSING - gripper_action: ActionTermCfg | None = None - - def __init__( - self, - fixed_asset_name: str = "gears_and_base", - peg_offset: tuple[float, float, float] = (0.02025, 0.0, 0.025), - ): - self.arm_action = NistGearInsertionOscActionCfg( - asset_name="robot", - joint_names=["panda_joint[1-7]"], - body_name="panda_fingertip_centered", - controller_cfg=OperationalSpaceControllerCfg( - target_types=["pose_abs"], - impedance_mode="fixed", - inertial_dynamics_decoupling=True, - partial_inertial_dynamics_decoupling=False, - gravity_compensation=False, - motion_stiffness_task=[565.0, 565.0, 565.0, 28.0, 28.0, 28.0], - motion_damping_ratio_task=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - nullspace_control="position", - nullspace_stiffness=10.0, - nullspace_damping_ratio=1.0, - ), - position_scale=1.0, - orientation_scale=1.0, - nullspace_joint_pos_target="default", - fixed_asset_name=fixed_asset_name, - peg_offset=peg_offset, - force_body_name="force_sensor", - ) - self.gripper_action = None - - @configclass class FrankaSceneCfg: """Additions to the scene configuration coming from the Franka embodiment.""" diff --git a/isaaclab_arena/embodiments/franka/nist_gear_insertion/__init__.py b/isaaclab_arena/embodiments/franka/nist_gear_insertion/__init__.py new file mode 100644 index 000000000..4a5683985 --- /dev/null +++ b/isaaclab_arena/embodiments/franka/nist_gear_insertion/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Franka embodiment components for NIST gear insertion.""" + +from .franka_osc import FrankaNistGearInsertionOscEmbodiment + +__all__ = [ + "FrankaNistGearInsertionOscEmbodiment", +] diff --git a/isaaclab_arena/embodiments/franka/nist_gear_insertion/franka_osc.py b/isaaclab_arena/embodiments/franka/nist_gear_insertion/franka_osc.py new file mode 100644 index 000000000..341f66232 --- /dev/null +++ b/isaaclab_arena/embodiments/franka/nist_gear_insertion/franka_osc.py @@ -0,0 +1,193 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Franka OSC embodiment for the NIST gear insertion task.""" + +from __future__ import annotations + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets.articulation.articulation_cfg import ArticulationCfg +from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg, OffsetCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR + +from isaaclab_arena.assets.register import register_asset +from isaaclab_arena.embodiments.common.arm_mode import ArmMode +from isaaclab_arena.embodiments.franka.franka import _DEFAULT_CAMERA_OFFSET, FrankaEmbodimentBase +from isaaclab_arena.embodiments.franka.nist_gear_insertion.gear_grasp import get_franka_nist_gear_insertion_grasp_config +from isaaclab_arena.tasks.nist_gear_insertion.events import GraspCfg +from isaaclab_arena.utils.pose import Pose + +__all__ = [ + "FrankaNistGearInsertionOscEmbodiment", +] + +_FRANKA_MIMIC_OSC_USD_PATH = f"{ISAACLAB_NUCLEUS_DIR}/Factory/franka_mimic.usd" + +# Mirrors Isaac Lab Factory's franka_mimic OSC setup in +# isaaclab_tasks.direct.factory.factory_env_cfg. +_FRANKA_MIMIC_OSC_RIGID_PROPS = sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, +) + +_FRANKA_MIMIC_OSC_ARTICULATION_PROPS = sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, +) + +_FRANKA_MIMIC_OSC_COLLISION_PROPS = sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0) + +# Default pose from the Factory-style Franka mimic asset. +_FRANKA_MIMIC_OSC_JOINT_POS = { + "panda_joint1": 0.0, + "panda_joint2": -0.569, + "panda_joint3": 0.0, + "panda_joint4": -2.810, + "panda_joint5": 0.0, + "panda_joint6": 3.037, + "panda_joint7": 0.741, + "panda_finger_joint2": 0.04, +} + +# OSC writes joint torques directly for the arm; the hand remains position controlled. +_FRANKA_MIMIC_OSC_ACTUATORS = { + "panda_arm1": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-4]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=87, + velocity_limit_sim=124.6, + ), + "panda_arm2": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[5-7]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit_sim=12, + velocity_limit_sim=149.5, + ), + "panda_hand": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint[1-2]"], + effort_limit_sim=40.0, + velocity_limit_sim=0.04, + stiffness=7500.0, + damping=173.0, + friction=0.1, + armature=0.0, + ), +} + +# Franka mimic USD with contact sensors enabled for the wrist force body. +_FRANKA_MIMIC_OSC_CFG = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=_FRANKA_MIMIC_OSC_USD_PATH, + activate_contact_sensors=True, + rigid_props=_FRANKA_MIMIC_OSC_RIGID_PROPS, + articulation_props=_FRANKA_MIMIC_OSC_ARTICULATION_PROPS, + collision_props=_FRANKA_MIMIC_OSC_COLLISION_PROPS, + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos=_FRANKA_MIMIC_OSC_JOINT_POS, + pos=(0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), + ), + actuators=_FRANKA_MIMIC_OSC_ACTUATORS, +) + +_GEAR_INSERTION_INITIAL_JOINT_POSE = [ + 0.561824, + 0.287201, + -0.543103, + -2.410188, + 0.507908, + 2.847644, + 0.454298, + 0.04, + 0.04, +] + + +def _gear_insertion_ee_frame_cfg() -> FrameTransformerCfg: + """Return Franka frames used by the gear insertion OSC policy. + + The policy commands the centered fingertip frame, while the finger frames + are exposed for grasp/reset diagnostics. + """ + return FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_link0", + debug_vis=False, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_fingertip_centered", + name="end_effector", + offset=OffsetCfg(pos=(0.0, 0.0, 0.0)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_rightfinger", + name="tool_rightfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/panda_leftfinger", + name="tool_leftfinger", + offset=OffsetCfg(pos=(0.0, 0.0, 0.046)), + ), + ], + ) + + +@register_asset +class FrankaNistGearInsertionOscEmbodiment(FrankaEmbodimentBase): + """Franka embodiment for NIST gear insertion with OSC torque control. + + The embodiment owns the Franka mimic robot setup, OSC command frame, and + grasp metadata. Environments wire scene-specific asset names and insertion + geometry into the action, observation, and reward configs. + """ + + name = "franka_nist_gear_insertion_osc" + + def __init__( + self, + enable_cameras: bool = False, + initial_pose: Pose | None = None, + initial_joint_pose: list[float] | None = None, + concatenate_observation_terms: bool = False, + arm_mode: ArmMode | None = None, + camera_offset: Pose | None = _DEFAULT_CAMERA_OFFSET, + is_tiled_camera: bool = False, + ): + super().__init__( + enable_cameras=enable_cameras, + initial_pose=initial_pose, + initial_joint_pose=initial_joint_pose, + concatenate_observation_terms=concatenate_observation_terms, + arm_mode=arm_mode, + camera_offset=camera_offset, + is_tiled_camera=is_tiled_camera, + ) + self.scene_config.robot = _FRANKA_MIMIC_OSC_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene_config.ee_frame = _gear_insertion_ee_frame_cfg() + self.set_initial_joint_pose(initial_joint_pose or _GEAR_INSERTION_INITIAL_JOINT_POSE) + + def get_command_body_name(self) -> str: + return "panda_fingertip_centered" + + def get_gear_insertion_grasp_config(self) -> GraspCfg: + return get_franka_nist_gear_insertion_grasp_config() diff --git a/isaaclab_arena/embodiments/franka/nist_gear_insertion/gear_grasp.py b/isaaclab_arena/embodiments/franka/nist_gear_insertion/gear_grasp.py new file mode 100644 index 000000000..9ad43b4d8 --- /dev/null +++ b/isaaclab_arena/embodiments/franka/nist_gear_insertion/gear_grasp.py @@ -0,0 +1,46 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Franka-specific grasp parameters for NIST gear insertion.""" + +from __future__ import annotations + +import torch +from collections.abc import Sequence + +from isaaclab_arena.tasks.nist_gear_insertion.events import GraspCfg + + +def franka_gripper_joint_setter( + joint_pos: torch.Tensor, + row_indices: Sequence[int], + finger_joint_indices: Sequence[int], + width: float, +) -> None: + """Set Franka Panda finger joints from a total gripper opening width.""" + for jid in finger_joint_indices: + joint_pos[row_indices, jid] = width / 2.0 + + +def get_franka_nist_gear_insertion_grasp_config() -> GraspCfg: + """Return reset grasp parameters for the Franka NIST gear insertion policy. + + The reset first opens the fingers to 3 cm while inverse kinematics moves + ``panda_hand`` to the gear, then closes the fingers to 0 m to hold the gear. + In Isaac Lab 3.0 xyzw convention, ``[1, 0, 0, 0]`` is an intentional + 180-degree rotation about X. The ``[0.02, 0.0, -0.128]`` translation is the + held-gear-root to ``panda_hand`` offset that centers the gear between the + fingertips. + """ + return GraspCfg( + hand_grasp_width=0.03, + hand_close_width=0.0, + gripper_joint_setter_func=franka_gripper_joint_setter, + end_effector_body_name="panda_hand", + finger_joint_names="panda_finger_joint[1-2]", + grasp_rot_offset=[1.0, 0.0, 0.0, 0.0], + grasp_offset=[0.02, 0.0, -0.128], + arm_joint_names="panda_joint[1-7]", + ) diff --git a/isaaclab_arena/policy/rl_games_action_policy.py b/isaaclab_arena/policy/rl_games_action_policy.py index 1971418ea..c5d1e3a37 100644 --- a/isaaclab_arena/policy/rl_games_action_policy.py +++ b/isaaclab_arena/policy/rl_games_action_policy.py @@ -24,6 +24,37 @@ from isaaclab_arena.policy.policy_base import PolicyBase +class _RlGamesInferenceEnvWrapper(RlGamesVecEnvWrapper): + """``RlGamesVecEnvWrapper`` that avoids extra simulator resets during inference. + + Mirrors :class:`_RslRlInferenceEnvWrapper`: ``policy_runner.py`` already + resets the environment before requesting the first action, so wrapper setup + should read the cached observation buffer instead of triggering a second + reset and recording a phantom episode. + """ + + def __init__( + self, + env: gym.Env, + rl_device: str, + clip_obs: float, + clip_actions: float, + obs_groups: dict[str, list[str]] | None = None, + concate_obs_group: bool = True, + ): + original_reset = env.reset + # Return cached obs during wrapper setup to avoid recording a phantom episode. + env.reset = lambda *args, **kwargs: (dict(env.unwrapped.obs_buf), {}) + try: + super().__init__(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_group) + finally: + env.reset = original_reset + + def process_observations(self, observation: GymSpacesDict) -> dict[str, torch.Tensor]: + """Process Arena observations through the RL-Games wrapper.""" + return self._process_obs(dict(observation)) + + @dataclass class RlGamesActionPolicyConfig: """Configuration for RL-Games action policy. @@ -34,7 +65,7 @@ class RlGamesActionPolicyConfig: checkpoint_path: str """Path to the RL-Games .pth checkpoint file.""" - agent_cfg_path: Path = None + agent_cfg_path: Path | None = None """Path to the RL-Games agent YAML configuration file. When using the CLI (``policy_runner.py``), this is set via ``--agent_cfg_path``. @@ -60,29 +91,25 @@ def from_cli_args(cls, args: argparse.Namespace) -> RlGamesActionPolicyConfig: class RlGamesActionPolicy(PolicyBase): """Policy that uses a trained RL-Games model for inference. - Wraps the RL-Games player for use with the Arena policy runner and eval runner. - Handles observation processing (concatenation, clipping) and LSTM state management. - + Wraps the RL-Games player for use with the Arena policy runner and eval + runner. Handles observation processing, clipping, and RNN state management. """ name = "rl_games" config_class = RlGamesActionPolicyConfig - def __init__(self, config: RlGamesActionPolicyConfig, args_cli: argparse.Namespace | None = None): + def __init__(self, config: RlGamesActionPolicyConfig): super().__init__(config) self.config: RlGamesActionPolicyConfig = config self._player: BasePlayer | None = None - self._wrapper: RlGamesVecEnvWrapper | None = None + self._wrapper: _RlGamesInferenceEnvWrapper | None = None self._rnn_initialized = False - self.args_cli = args_cli def _load_policy(self, env: gym.Env) -> None: - """Set up the RL-Games infrastructure and load the checkpoint. + """Set up RL-Games infrastructure and load the checkpoint.""" + if self.config.agent_cfg_path is None: + raise ValueError("RL-Games policy requires --agent_cfg_path.") - Creates the RlGamesVecEnvWrapper (for observation processing only), - registers the env with the RL-Games runner, creates the player, - and restores the checkpoint weights. - """ with open(self.config.agent_cfg_path) as f: agent_cfg = yaml.safe_load(f) @@ -99,7 +126,7 @@ def _load_policy(self, env: gym.Env) -> None: obs_groups = agent_cfg["params"]["env"].get("obs_groups") concate_obs = agent_cfg["params"]["env"].get("concate_obs_groups", True) - self._wrapper = RlGamesVecEnvWrapper( + self._wrapper = _RlGamesInferenceEnvWrapper( env, device, clip_obs, @@ -134,8 +161,7 @@ def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor: assert self._player is not None assert self._wrapper is not None - obs_copy = dict(observation) - obs_rlg = self._wrapper._process_obs(obs_copy) + obs_rlg = self._wrapper.process_observations(observation) obs_tensor = obs_rlg["obs"] if not self._rnn_initialized: @@ -157,16 +183,16 @@ def reset(self, env_ids: torch.Tensor | None = None) -> None: if not hasattr(self._player, "states") or self._player.states is None: return if env_ids is None: - for s in self._player.states: - s.zero_() + for state in self._player.states: + state.zero_() else: - for s in self._player.states: - s[:, env_ids, :] = 0.0 + for state in self._player.states: + state[:, env_ids, :] = 0.0 @classmethod def from_dict(cls, config_dict: dict) -> RlGamesActionPolicy: config = RlGamesActionPolicyConfig(**config_dict) - return cls(config, args_cli=None) + return cls(config) @staticmethod def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: @@ -175,7 +201,7 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars "--checkpoint_path", type=str, required=True, - help="Path to the .pth checkpoint file containing the RL-Games policy", + help="Path to the .pth checkpoint file containing the RL-Games policy.", ) group.add_argument( "--agent_cfg_path", @@ -200,4 +226,4 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars @staticmethod def from_args(args: argparse.Namespace) -> RlGamesActionPolicy: config = RlGamesActionPolicyConfig.from_cli_args(args) - return RlGamesActionPolicy(config, args_cli=args) + return RlGamesActionPolicy(config) diff --git a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py b/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py deleted file mode 100644 index d1d2408b5..000000000 --- a/isaaclab_arena/scripts/reinforcement_learning/train_rl_games.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Register an Arena environment, then delegate RL-Games training to Isaac Lab. - -This file is intentionally a thin compatibility wrapper. The training loop, -checkpoint handling, video recording, and RL-Games runner setup live in Isaac -Lab's ``scripts/reinforcement_learning/rl_games/train.py``. -""" - -from __future__ import annotations - -import argparse -import runpy -import sys -from pathlib import Path - -from isaaclab_arena.environments.isaaclab_interop import environment_registration_callback - -_FORWARDED_VALUE_OPTIONS = { - "--agent", - "--checkpoint", - "--device", - "--max_iterations", - "--num_envs", - "--ray-proc-id", - "--seed", - "--sigma", - "--task", - "--video_interval", - "--video_length", - "--wandb-entity", - "--wandb-name", - "--wandb-project-name", -} -_FORWARDED_FLAG_OPTIONS = { - "--distributed", - "--export_io_descriptors", - "--headless", - "--track", - "--video", -} -_DEPRECATED_VALUE_OPTIONS = { - "--agent_cfg_path", - "--experiment_name", -} - - -def _normalize_legacy_positional_task(argv: list[str]) -> list[str]: - if "--task" in argv or any(arg.startswith("--task=") for arg in argv): - return argv - - from isaaclab_arena.assets.registries import EnvironmentRegistry - from isaaclab_arena_environments.cli import ensure_environments_registered - - ensure_environments_registered() - env_registry = EnvironmentRegistry() - - for idx, arg in enumerate(argv): - if not arg.startswith("-") and env_registry.is_registered(arg): - print("[WARN] Positional environment names are deprecated for RL-Games training; use --task instead.") - return [*argv[:idx], "--task", arg, *argv[idx + 1 :]] - return argv - - -def _isaac_lab_rl_games_train_script() -> Path: - repo_root = Path(__file__).resolve().parents[3] - return repo_root / "submodules" / "IsaacLab" / "scripts" / "reinforcement_learning" / "rl_games" / "train.py" - - -def _copy_forwarded_args(argv: list[str]) -> list[str]: - forwarded: list[str] = [] - idx = 0 - while idx < len(argv): - arg = argv[idx] - option = arg.split("=", maxsplit=1)[0] - if option in _FORWARDED_FLAG_OPTIONS: - forwarded.append(arg) - elif option in _FORWARDED_VALUE_OPTIONS: - forwarded.append(arg) - if "=" not in arg and idx + 1 < len(argv): - idx += 1 - forwarded.append(argv[idx]) - elif option in _DEPRECATED_VALUE_OPTIONS and "=" not in arg and idx + 1 < len(argv): - idx += 1 - idx += 1 - return forwarded - - -def _drop_forwarded_args(argv: list[str]) -> list[str]: - remaining: list[str] = [] - idx = 0 - while idx < len(argv): - arg = argv[idx] - option = arg.split("=", maxsplit=1)[0] - if option in _FORWARDED_FLAG_OPTIONS: - idx += 1 - continue - if option in _FORWARDED_VALUE_OPTIONS: - if "=" not in arg and idx + 1 < len(argv): - idx += 1 - idx += 1 - continue - remaining.append(arg) - idx += 1 - return remaining - - -def _remove_deprecated_args(argv: list[str]) -> tuple[list[str], str | None]: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--agent_cfg_path", default=None) - parser.add_argument("--experiment_name", default=None) - deprecated, remaining = parser.parse_known_args(argv) - if deprecated.agent_cfg_path is not None: - print("[WARN] --agent_cfg_path is ignored; the registered Arena environment provides rl_games_cfg_entry_point.") - return remaining, deprecated.experiment_name - - -def main() -> None: - original_args = _normalize_legacy_positional_task(sys.argv[1:]) - sys.argv = [sys.argv[0], *original_args] - lab_script = _isaac_lab_rl_games_train_script() - if not lab_script.is_file(): - raise FileNotFoundError(f"Isaac Lab RL-Games trainer not found: {lab_script}") - - remaining_args = environment_registration_callback() - remaining_args, experiment_name = _remove_deprecated_args(remaining_args) - remaining_args = _drop_forwarded_args(remaining_args) - - forwarded_args = _copy_forwarded_args(original_args) - if experiment_name is not None: - forwarded_args.append(f"agent.params.config.name={experiment_name}") - - sys.argv = [str(lab_script), *forwarded_args, *remaining_args] - runpy.run_path(str(lab_script), run_name="__main__") - - -if __name__ == "__main__": - main() diff --git a/isaaclab_arena/tasks/events.py b/isaaclab_arena/tasks/events.py index 066d51049..cb61b3108 100644 --- a/isaaclab_arena/tasks/events.py +++ b/isaaclab_arena/tasks/events.py @@ -3,19 +3,14 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Task-specific event terms.""" from __future__ import annotations import torch -from collections.abc import Callable -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, Literal import isaaclab.utils.math as math_utils -import warp as wp -from isaaclab.assets import Articulation -from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg -from isaaclab_tasks.direct.automate import factory_control as fc +from isaaclab.managers import SceneEntityCfg from isaaclab_tasks.manager_based.manipulation.stack.mdp.franka_stack_events import sample_object_poses if TYPE_CHECKING: @@ -90,250 +85,3 @@ def randomize_poses_and_align_auxiliary_assets( rel_asset.write_root_velocity_to_sim( torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) ) - - -class place_gear_in_gripper(ManagerTermBase): - """Place the held gear in the gripper during reset.""" - - def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): - super().__init__(cfg, env) - - robot_cfg: SceneEntityCfg = cfg.params.get("robot_cfg", SceneEntityCfg("robot")) - self._robot_name = robot_cfg.name - self.robot: Articulation = env.scene[robot_cfg.name] - - gear_cfg: SceneEntityCfg = cfg.params["gear_cfg"] - self._gear_name = gear_cfg.name - self.gear = env.scene[gear_cfg.name] - - self.hand_grasp_width: float = cast(float, cfg.params["hand_grasp_width"]) - self.hand_close_width: float = cast(float, cfg.params["hand_close_width"]) - self.gripper_joint_setter_func: Callable[..., None] = cast( - Callable[..., None], - cfg.params["gripper_joint_setter_func"], - ) - - grasp_rot_offset = cfg.params["grasp_rot_offset"] - self._grasp_rot_offset_values = tuple(grasp_rot_offset) - self.grasp_rot_offset_tensor = ( - torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1) - ) - - grasp_offset = cfg.params["grasp_offset"] - self._grasp_offset_values = tuple(grasp_offset) - self.grasp_offset_tensor = torch.tensor(grasp_offset, device=env.device, dtype=torch.float32) - - end_effector_body_name: str = cast(str, cfg.params.get("end_effector_body_name", "panda_hand")) - finger_joint_names: str = cast(str, cfg.params.get("finger_joint_names", "panda_finger_joint[1-2]")) - self._eef_name = end_effector_body_name - self._finger_joint_names = finger_joint_names - self._resolve_robot_indices(end_effector_body_name, finger_joint_names) - - self._max_ik_iterations: int = cfg.params.get("max_ik_iterations", 10) - self._pos_threshold: float = cfg.params.get("pos_threshold", 1e-6) - self._rot_threshold: float = cfg.params.get("rot_threshold", 1e-6) - - def _resolve_robot_indices(self, end_effector_body_name: str, finger_joint_names: str) -> None: - eef_indices, _ = self.robot.find_bodies([end_effector_body_name]) - if not eef_indices: - raise ValueError(f"End-effector body '{end_effector_body_name}' not found in robot") - self.eef_idx: int = eef_indices[0] - self.jacobi_body_idx: int = self.eef_idx - 1 - - self.all_joints, _ = self.robot.find_joints([".*"]) - self.finger_joints, _ = self.robot.find_joints([finger_joint_names]) - if not self.finger_joints: - raise ValueError(f"Finger joints '{finger_joint_names}' not found in robot") - - def _set_gripper_width( - self, - joint_pos: torch.Tensor, - width: float, - gripper_joint_setter_func: Callable[..., None], - ) -> None: - row_indices = torch.arange(joint_pos.shape[0], device=joint_pos.device) - gripper_joint_setter_func(joint_pos, row_indices, self.finger_joints, width) - - def _sync_sim_state(self, env: ManagerBasedEnv) -> None: - env.scene.write_data_to_sim() - env.sim.forward() - env.scene.update(dt=0.0) - - def _update_cached_assets( - self, - env: ManagerBasedEnv, - robot_cfg: SceneEntityCfg, - gear_cfg: SceneEntityCfg | None, - end_effector_body_name: str, - finger_joint_names: str, - ) -> None: - """Refresh cached scene handles and indices when call-time cfgs change.""" - resolve_robot_indices = ( - end_effector_body_name != self._eef_name or finger_joint_names != self._finger_joint_names - ) - if robot_cfg.name != self._robot_name: - self._robot_name = robot_cfg.name - self.robot = env.scene[robot_cfg.name] - resolve_robot_indices = True - - if resolve_robot_indices: - self._resolve_robot_indices(end_effector_body_name, finger_joint_names) - self._eef_name = end_effector_body_name - self._finger_joint_names = finger_joint_names - - if gear_cfg is not None and gear_cfg.name != self._gear_name: - self._gear_name = gear_cfg.name - self.gear = env.scene[gear_cfg.name] - - def _get_grasp_offsets( - self, - env_ids: torch.Tensor, - num_envs: int, - device: torch.device, - grasp_rot_offset: list[float] | None, - grasp_offset: list[float] | None, - ) -> tuple[torch.Tensor, torch.Tensor]: - """Return batched grasp offsets for the selected environments.""" - if grasp_rot_offset is None or tuple(grasp_rot_offset) == self._grasp_rot_offset_values: - grasp_rot_offset_tensor = self.grasp_rot_offset_tensor[env_ids] - else: - grasp_rot_offset_tensor = torch.tensor(grasp_rot_offset, device=device, dtype=torch.float32).repeat( - num_envs, 1 - ) - - if grasp_offset is None or tuple(grasp_offset) == self._grasp_offset_values: - grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(num_envs, -1) - else: - grasp_offset_batch = ( - torch.tensor(grasp_offset, device=device, dtype=torch.float32).unsqueeze(0).expand(num_envs, -1) - ) - - return grasp_rot_offset_tensor, grasp_offset_batch - - def _run_grasp_ik( - self, - env: ManagerBasedEnv, - env_ids: torch.Tensor, - grasp_rot_offset: torch.Tensor, - grasp_offset: torch.Tensor, - max_ik_iterations: int, - pos_threshold: float, - rot_threshold: float, - ) -> torch.Tensor: - """Move the end-effector to the gear grasp pose using iterative DLS IK. - - Each iteration computes the world-frame grasp pose from the current gear pose, - applies one damped-least-squares joint update, and flushes the simulation state - so the next iteration sees updated body transforms. - """ - joint_vel = torch.zeros(len(env_ids), len(self.all_joints), device=env.device) - for _ in range(max_ik_iterations): - # Get the current robot state for this IK iteration. - joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() - joint_vel = wp.to_torch(self.robot.data.joint_vel)[env_ids].clone() - - # Build the target grasp pose from the current gear pose. - gear_pos_w = wp.to_torch(self.gear.data.root_link_pos_w)[env_ids].clone() - gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() - target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset) - target_pos = gear_pos_w + math_utils.quat_apply(target_quat, grasp_offset) - - # Compute the end-effector pose error. - eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] - eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] - pos_error, aa_error = fc.get_pose_error( - fingertip_midpoint_pos=eef_pos, - fingertip_midpoint_quat=eef_quat, - ctrl_target_fingertip_midpoint_pos=target_pos, - ctrl_target_fingertip_midpoint_quat=target_quat, - jacobian_type="geometric", - rot_error_type="axis_angle", - ) - delta_hand_pose = torch.cat((pos_error, aa_error), dim=-1) - - if ( - torch.norm(pos_error, dim=-1).max() < pos_threshold - and torch.norm(aa_error, dim=-1).max() < rot_threshold - ): - break - - # Solve one DLS IK step. - jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() - jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] - delta_dof_pos = fc._get_delta_dof_pos( - delta_pose=delta_hand_pose, - ik_method="dls", - jacobian=jacobian, - device=env.device, - ) - - # Write the updated joint state and refresh sim tensors before the next iteration. - joint_pos = joint_pos + delta_dof_pos - joint_vel = torch.zeros_like(joint_pos) - self.robot.set_joint_position_target_index(target=joint_pos, env_ids=env_ids) - self.robot.set_joint_velocity_target_index(target=joint_vel, env_ids=env_ids) - self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) - self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - self._sync_sim_state(env) - - return joint_vel - - def _write_gripper_width( - self, - env: ManagerBasedEnv, - env_ids: torch.Tensor, - joint_pos: torch.Tensor, - joint_vel: torch.Tensor, - width: float, - gripper_joint_setter_func: Callable[..., None], - ) -> None: - """Write one gripper width and flush the reset state to the simulator.""" - self._set_gripper_width(joint_pos, width, gripper_joint_setter_func) - self.robot.set_joint_position_target_index( - target=joint_pos, - joint_ids=self.all_joints, - env_ids=env_ids, - ) - self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) - self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) - self._sync_sim_state(env) - - def __call__( - self, - env: ManagerBasedEnv, - env_ids: torch.Tensor, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - gear_cfg: SceneEntityCfg | None = None, - hand_grasp_width: float | None = None, - hand_close_width: float | None = None, - gripper_joint_setter_func: Callable[..., None] | None = None, - end_effector_body_name: str = "panda_hand", - finger_joint_names: str = "panda_finger_joint[1-2]", - grasp_rot_offset: list[float] | None = None, - grasp_offset: list[float] | None = None, - max_ik_iterations: int | None = None, - pos_threshold: float | None = None, - rot_threshold: float | None = None, - ) -> None: - self._update_cached_assets(env, robot_cfg, gear_cfg, end_effector_body_name, finger_joint_names) - n = len(env_ids) - grasp_rot_offset_tensor, grasp_offset_batch = self._get_grasp_offsets( - env_ids, n, env.device, grasp_rot_offset, grasp_offset - ) - joint_vel = self._run_grasp_ik( - env=env, - env_ids=env_ids, - grasp_rot_offset=grasp_rot_offset_tensor, - grasp_offset=grasp_offset_batch, - max_ik_iterations=self._max_ik_iterations if max_ik_iterations is None else max_ik_iterations, - pos_threshold=self._pos_threshold if pos_threshold is None else pos_threshold, - rot_threshold=self._rot_threshold if rot_threshold is None else rot_threshold, - ) - - joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() - gripper_joint_setter = gripper_joint_setter_func or self.gripper_joint_setter_func - for width in ( - self.hand_grasp_width if hand_grasp_width is None else hand_grasp_width, - self.hand_close_width if hand_close_width is None else hand_close_width, - ): - self._write_gripper_width(env, env_ids, joint_pos, joint_vel, width, gripper_joint_setter) diff --git a/isaaclab_arena/tasks/nist_gear_insertion/__init__.py b/isaaclab_arena/tasks/nist_gear_insertion/__init__.py new file mode 100644 index 000000000..11d432141 --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Task modules for NIST gear insertion.""" + +from .events import GraspCfg, place_gear_in_gripper +from .task import GearInsertionGeometryCfg, NistGearInsertionRLTask + +__all__ = [ + "GearInsertionGeometryCfg", + "GraspCfg", + "NistGearInsertionRLTask", + "place_gear_in_gripper", +] diff --git a/isaaclab_arena/tasks/nist_gear_insertion/events.py b/isaaclab_arena/tasks/nist_gear_insertion/events.py new file mode 100644 index 000000000..45529df19 --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/events.py @@ -0,0 +1,318 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Reset event terms for NIST gear insertion tasks.""" + +from __future__ import annotations + +import torch +from collections.abc import Callable, Sequence +from dataclasses import field +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +import warp as wp +from isaaclab.assets import Articulation +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab.utils import configclass +from isaaclab_tasks.direct.automate import factory_control as fc + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +@configclass +class GraspCfg: + """Embodiment-specific reset grasp parameters. + + The task owns the reset event, but the embodiment supplies the hand body, + finger joints, offsets, and gripper-width setter because those details are + robot-specific. + """ + + hand_grasp_width: float = 0.03 + """Opening width used before flushing the reset grasp state.""" + + hand_close_width: float = 0.0 + """Final gripper target width after the gear is placed.""" + + gripper_joint_setter_func: Callable[..., None] | None = None + """Callable that maps a total opening width to embodiment-specific joints.""" + + end_effector_body_name: str = "panda_hand" + """Robot body used as the IK end-effector for reset grasp placement.""" + + finger_joint_names: str = "panda_finger_joint[1-2]" + """Joint-name expression for the gripper fingers.""" + + grasp_rot_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0, 1.0]) + """Gear-frame to end-effector grasp rotation in Isaac Lab xyzw convention.""" + + grasp_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) + """Gear-frame translation from the gear root to the end-effector grasp pose.""" + + arm_joint_names: str = "panda_joint.*" + """Arm joints affected by task reset randomization.""" + + max_ik_iterations: int = 10 + """Maximum DLS IK updates used to place the hand at the grasp frame.""" + + pos_threshold: float = 1e-6 + """Position-error threshold for stopping reset-time IK.""" + + rot_threshold: float = 1e-6 + """Axis-angle error threshold for stopping reset-time IK.""" + + +class place_gear_in_gripper(ManagerTermBase): + """Place the held gear in the robot gripper during reset. + + The event follows the Factory/Forge reset pattern: solve IK to move the hand + to a configured grasp frame on the gear, open to the grasp width, flush the + simulator state, then set the final closed target. The final target is not + written as a teleport so the controller maintains the grasp after reset. + """ + + def __init__(self, cfg: EventTermCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + gear_cfg = cfg.params["gear_cfg"] + grasp_cfg = cfg.params["grasp_cfg"] + robot_cfg = cfg.params.get("robot_cfg", SceneEntityCfg("robot")) + self.robot: Articulation = env.scene[robot_cfg.name] + + self.gear = env.scene[gear_cfg.name] + + self.hand_grasp_width = grasp_cfg.hand_grasp_width + self.hand_close_width = grasp_cfg.hand_close_width + assert grasp_cfg.gripper_joint_setter_func is not None, "A gripper joint setter is required for grasp reset." + self.gripper_joint_setter_func = grasp_cfg.gripper_joint_setter_func + + # Cache default grasp offsets as tensors because this event runs every + # reset and often across thousands of environments. + self.grasp_rot_offset_tensor = ( + torch.tensor(grasp_cfg.grasp_rot_offset, device=env.device, dtype=torch.float32) + .unsqueeze(0) + .repeat(env.num_envs, 1) + ) + + self.grasp_offset_tensor = torch.tensor(grasp_cfg.grasp_offset, device=env.device, dtype=torch.float32) + + self._resolve_robot_indices(grasp_cfg.end_effector_body_name, grasp_cfg.finger_joint_names) + + self._max_ik_iterations = grasp_cfg.max_ik_iterations + self._pos_threshold = grasp_cfg.pos_threshold + self._rot_threshold = grasp_cfg.rot_threshold + + def _resolve_robot_indices(self, end_effector_body_name: str, finger_joint_names: str) -> None: + """Resolve the body and joint indices needed by reset-time IK.""" + eef_indices, _ = self.robot.find_bodies([end_effector_body_name]) + if not eef_indices: + raise ValueError(f"End-effector body '{end_effector_body_name}' not found in robot") + self.eef_idx = eef_indices[0] + # Body index is shifted by one when indexing ``root_view.get_jacobians()``. + self.jacobi_body_idx = self.eef_idx - 1 + assert self.jacobi_body_idx >= 0, "End-effector body must not be the articulation root." + + self.all_joints, _ = self.robot.find_joints([".*"]) + self.finger_joints, _ = self.robot.find_joints([finger_joint_names]) + if not self.finger_joints: + raise ValueError(f"Finger joints '{finger_joint_names}' not found in robot") + + def _set_gripper_width( + self, + joint_pos: torch.Tensor, + width: float, + gripper_joint_setter_func: Callable[..., None], + ) -> None: + """Update the selected finger joints for the requested opening width.""" + row_indices = torch.arange(joint_pos.shape[0], device=joint_pos.device) + gripper_joint_setter_func(joint_pos, row_indices, self.finger_joints, width) + + def _sync_sim_state(self, env: ManagerBasedEnv) -> None: + """Flush written joint state before reading body poses again.""" + env.scene.write_data_to_sim() + env.sim.forward() + env.scene.update(dt=0.0) + + def _run_grasp_ik( + self, + env: ManagerBasedEnv, + env_ids: Sequence[int] | slice | torch.Tensor, + num_envs: int, + grasp_rot_offset: torch.Tensor, + grasp_offset: torch.Tensor, + ) -> torch.Tensor: + """Move the end-effector to the gear grasp pose using iterative DLS IK. + + Each iteration computes the world-frame grasp pose from the current gear pose, + applies one damped-least-squares joint update, and flushes the simulation state + so the next iteration sees updated body transforms. + """ + joint_vel = torch.zeros(num_envs, len(self.all_joints), device=env.device) + for _ in range(self._max_ik_iterations): + joint_pos, joint_vel = self._read_joint_state(env_ids) + target_pos, target_quat = self._compute_grasp_target_pose(env_ids, grasp_rot_offset, grasp_offset) + delta_hand_pose, pos_error, aa_error = self._compute_pose_error(env_ids, target_pos, target_quat) + + if self._has_converged(pos_error, aa_error): + break + + delta_dof_pos = self._compute_dls_joint_delta(env, env_ids, delta_hand_pose) + joint_pos = joint_pos + delta_dof_pos + joint_vel = torch.zeros_like(joint_pos) + self._write_ik_joint_state(env, env_ids, joint_pos, joint_vel) + + return joint_vel + + def _read_joint_state( + self, + env_ids: Sequence[int] | slice | torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return current joint positions and velocities for the reset IK solve.""" + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() + joint_vel = wp.to_torch(self.robot.data.joint_vel)[env_ids].clone() + return joint_pos, joint_vel + + def _compute_grasp_target_pose( + self, + env_ids: Sequence[int] | slice | torch.Tensor, + grasp_rot_offset: torch.Tensor, + grasp_offset: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return the world-frame hand pose for the configured gear grasp.""" + gear_pos_w = wp.to_torch(self.gear.data.root_link_pos_w)[env_ids].clone() + gear_quat_w = wp.to_torch(self.gear.data.root_link_quat_w)[env_ids].clone() + target_quat = math_utils.quat_mul(gear_quat_w, grasp_rot_offset) + target_pos = gear_pos_w + math_utils.quat_apply(target_quat, grasp_offset) + return target_pos, target_quat + + def _compute_pose_error( + self, + env_ids: Sequence[int] | slice | torch.Tensor, + target_pos: torch.Tensor, + target_quat: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Return Cartesian pose error used by the Factory DLS IK routine.""" + eef_pos = wp.to_torch(self.robot.data.body_pos_w)[env_ids, self.eef_idx] + eef_quat = wp.to_torch(self.robot.data.body_quat_w)[env_ids, self.eef_idx] + pos_error, aa_error = fc.get_pose_error( + fingertip_midpoint_pos=eef_pos, + fingertip_midpoint_quat=eef_quat, + ctrl_target_fingertip_midpoint_pos=target_pos, + ctrl_target_fingertip_midpoint_quat=target_quat, + jacobian_type="geometric", + rot_error_type="axis_angle", + ) + return torch.cat((pos_error, aa_error), dim=-1), pos_error, aa_error + + def _has_converged(self, pos_error: torch.Tensor, aa_error: torch.Tensor) -> bool: + """Return whether the reset IK solve is within configured tolerances.""" + return ( + torch.norm(pos_error, dim=-1).max() < self._pos_threshold + and torch.norm(aa_error, dim=-1).max() < self._rot_threshold + ) + + def _compute_dls_joint_delta( + self, + env: ManagerBasedEnv, + env_ids: Sequence[int] | slice | torch.Tensor, + delta_hand_pose: torch.Tensor, + ) -> torch.Tensor: + """Return one damped-least-squares joint update for the hand pose error.""" + # Isaac Lab articulation Jacobians are indexed by non-root body id; + # the shifted index is resolved once in ``_resolve_robot_indices``. + jacobians = wp.to_torch(self.robot.root_view.get_jacobians()).clone() + jacobian = jacobians[env_ids, self.jacobi_body_idx, :, :] + return fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=jacobian, + device=env.device, + ) + + def _write_ik_joint_state( + self, + env: ManagerBasedEnv, + env_ids: Sequence[int] | slice | torch.Tensor, + joint_pos: torch.Tensor, + joint_vel: torch.Tensor, + ) -> None: + """Write one IK update and refresh tensors before the next iteration.""" + self.robot.set_joint_position_target_index(target=joint_pos, env_ids=env_ids) + self.robot.set_joint_velocity_target_index(target=joint_vel, env_ids=env_ids) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) + self._sync_sim_state(env) + + def _write_gripper_width( + self, + env: ManagerBasedEnv, + env_ids: Sequence[int] | slice | torch.Tensor, + joint_pos: torch.Tensor, + joint_vel: torch.Tensor, + width: float, + gripper_joint_setter_func: Callable[..., None], + ) -> None: + """Write one gripper width and flush the reset state to the simulator.""" + self._set_gripper_width(joint_pos, width, gripper_joint_setter_func) + self.robot.set_joint_position_target_index( + target=joint_pos, + joint_ids=self.all_joints, + env_ids=env_ids, + ) + self.robot.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids) + self.robot.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids) + self._sync_sim_state(env) + + def _set_gripper_width_target( + self, + env_ids: Sequence[int] | slice | torch.Tensor, + joint_pos: torch.Tensor, + width: float, + gripper_joint_setter_func: Callable[..., None], + ) -> None: + """Set the final gripper target without teleporting the sim joint state.""" + self._set_gripper_width(joint_pos, width, gripper_joint_setter_func) + self.robot.set_joint_position_target_index( + target=joint_pos, + joint_ids=self.all_joints, + env_ids=env_ids, + ) + + def __call__( + self, + env: ManagerBasedEnv, + env_ids: Sequence[int] | slice | torch.Tensor, + gear_cfg: SceneEntityCfg, + grasp_cfg: GraspCfg, + robot_cfg: SceneEntityCfg | None = None, + ) -> None: + """Run the reset grasp sequence for ``env_ids``. + + The event manager passes ``gear_cfg``, ``grasp_cfg``, and ``robot_cfg`` + on every call; this term resolves and caches them in ``__init__``. + """ + grasp_rot_offset_tensor = self.grasp_rot_offset_tensor[env_ids] + num_envs = grasp_rot_offset_tensor.shape[0] + grasp_offset_batch = self.grasp_offset_tensor.unsqueeze(0).expand(num_envs, -1) + joint_vel = self._run_grasp_ik( + env=env, + env_ids=env_ids, + num_envs=num_envs, + grasp_rot_offset=grasp_rot_offset_tensor, + grasp_offset=grasp_offset_batch, + ) + + joint_pos = wp.to_torch(self.robot.data.joint_pos)[env_ids].clone() + + self._write_gripper_width( + env, + env_ids, + joint_pos, + joint_vel, + self.hand_grasp_width, + self.gripper_joint_setter_func, + ) + self._set_gripper_width_target(env_ids, joint_pos, self.hand_close_width, self.gripper_joint_setter_func) diff --git a/isaaclab_arena/tasks/nist_gear_insertion/geometry.py b/isaaclab_arena/tasks/nist_gear_insertion/geometry.py new file mode 100644 index 000000000..2ad2cd41f --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/geometry.py @@ -0,0 +1,150 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Shared geometry utilities for gear insertion. + +The frame math follows the Isaac Lab Factory/Forge insertion tasks: represent +the peg and held-gear insertion points as local offsets on their owning assets, +then compare those points in each environment frame. +""" + +from __future__ import annotations + +import torch + +import isaaclab.utils.math as math_utils +import warp as wp +from isaaclab.assets import RigidObject +from isaaclab.envs import ManagerBasedRLEnv +from isaaclab.managers import SceneEntityCfg + + +def resolve_offset_tensor( + values: list[float] | tuple[float, ...] | None, + cached_values: tuple[float, ...], + cached_tensor: torch.Tensor, + device: torch.device, +) -> torch.Tensor: + """Return an offset tensor on ``device``. + + Reward and termination terms cache their default offsets at construction. + Manager call-time parameters may still override those values, so this function + keeps the common "use cached tensor unless explicitly overridden" path in + one place. + """ + if values is None or tuple(values) == cached_values: + return cached_tensor + return torch.tensor(values, device=device, dtype=torch.float32) + + +def compute_asset_local_offset_pose( + env: ManagerBasedRLEnv, + asset: RigidObject, + offset: tuple[float, ...] | torch.Tensor, + num_envs: int | None = None, +) -> tuple[torch.Tensor, torch.Tensor]: + """Return an asset-local offset pose in each environment frame. + + Isaac Lab stores rigid-object root poses in world coordinates. Gear + insertion rewards and terminations compare geometry in the per-environment + frame, so this function subtracts ``env.scene.env_origins`` before rotating + the local offset into the asset root frame. + """ + num_envs = env.num_envs if num_envs is None else num_envs + root_pos = wp.to_torch(asset.data.root_pos_w)[:num_envs] - env.scene.env_origins[:num_envs] + root_quat = wp.to_torch(asset.data.root_quat_w)[:num_envs] + offset_tensor = torch.as_tensor(offset, device=env.device, dtype=torch.float32) + offset_tensor = offset_tensor.unsqueeze(0).expand(num_envs, 3) + return root_pos + math_utils.quat_apply(root_quat, offset_tensor), root_quat + + +def compute_asset_local_offset_pos( + env: ManagerBasedRLEnv, + asset: RigidObject, + offset: tuple[float, ...] | torch.Tensor, +) -> torch.Tensor: + """Return an asset-local offset position in each environment frame.""" + pos, _ = compute_asset_local_offset_pose(env, asset, offset) + return pos + + +def peg_pos_in_env_frame( + env: ManagerBasedRLEnv, + board_cfg: SceneEntityCfg, + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), +) -> torch.Tensor: + """Return the target peg position in each environment frame. + + ``peg_offset`` is measured in the fixed board or gear-base asset frame. + """ + board: RigidObject = env.scene[board_cfg.name] + return compute_asset_local_offset_pos(env, board, peg_offset) + + +def held_gear_base_pos_in_env_frame( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + held_gear_base_offset: tuple[float, ...] = (0.0, 0.0, 0.0), +) -> torch.Tensor: + """Return the held gear insertion point in each environment frame. + + The held gear's root is not necessarily the point that should align with + the peg, so insertion logic compares this offset point against the peg. + """ + gear: RigidObject = env.scene[gear_cfg.name] + return compute_asset_local_offset_pos(env, gear, held_gear_base_offset) + + +def peg_delta_from_held_gear_base( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), + held_gear_base_offset: tuple[float, ...] = (0.0, 0.0, 0.0), +) -> torch.Tensor: + """Return the vector from the held gear insertion point to the peg.""" + held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) + peg_pos = peg_pos_in_env_frame(env, board_cfg, peg_offset) + return peg_pos - held_base + + +def check_gear_insertion_geometry( + held_base_pos: torch.Tensor, + peg_pos: torch.Tensor, + gear_peg_height: float, + z_fraction: float, + xy_threshold: float, +) -> torch.Tensor: + """Return whether the gear is centered and inserted on the peg. + + Success is geometric: the insertion point must be within the XY tolerance + and below the configured fraction of the peg height. Lower Z values mean the + gear has moved farther down the peg. + """ + xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) + z_diff = held_base_pos[:, 2] - peg_pos[:, 2] + return (xy_dist < xy_threshold) & (z_diff < gear_peg_height * z_fraction) + + +def compute_gear_insertion_success( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + peg_offset: tuple[float, ...] | torch.Tensor, + held_gear_base_offset: tuple[float, ...] | torch.Tensor, + gear_peg_height: float, + z_fraction: float, + xy_threshold: float, +) -> torch.Tensor: + """Return whether the held gear meets the insertion success geometry. + + This is the shared implementation used by rewards, terminations, and + OSC-specific auxiliary losses so they agree on the success label. + """ + gear: RigidObject = env.scene[gear_cfg.name] + board: RigidObject = env.scene[board_cfg.name] + held_base_pos, _ = compute_asset_local_offset_pose(env, gear, held_gear_base_offset) + peg_pos, _ = compute_asset_local_offset_pose(env, board, peg_offset) + return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, z_fraction, xy_threshold) diff --git a/isaaclab_arena/tasks/nist_gear_insertion/observations.py b/isaaclab_arena/tasks/nist_gear_insertion/observations.py new file mode 100644 index 000000000..1808cd331 --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/observations.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Generic observation terms for gear insertion tasks. + +This module intentionally contains reusable task observations. The packed +OSC policy observation lives in the environment MDP package because it depends +on OSC action-term state. +""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import warp as wp +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import quat_unique + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def _resolve_body_id(robot: Articulation, body_name: str) -> int: + """Return the single body id matching ``body_name``.""" + body_ids, body_names = robot.find_bodies([body_name]) + if len(body_ids) != 1: + raise ValueError(f"Expected one body matching '{body_name}', found {len(body_ids)}: {body_names}.") + return body_ids[0] + + +def body_pos_in_env_frame( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Return a robot body position in each environment frame. + + Isaac Lab body positions are world-frame values; subtracting environment + origins makes the observation invariant to scene cloning offsets. + """ + robot: Articulation = env.scene[robot_cfg.name] + body_id = _resolve_body_id(robot, body_name) + return wp.to_torch(robot.data.body_pos_w)[:, body_id] - env.scene.env_origins + + +def body_quat_canonical( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + body_name: str = "panda_fingertip_centered", +) -> torch.Tensor: + """Return a robot body quaternion with a unique sign convention.""" + robot: Articulation = env.scene[robot_cfg.name] + body_id = _resolve_body_id(robot, body_name) + quat = wp.to_torch(robot.data.body_quat_w)[:, body_id] + return quat_unique(quat) diff --git a/isaaclab_arena/tasks/nist_gear_insertion/rewards.py b/isaaclab_arena/tasks/nist_gear_insertion/rewards.py new file mode 100644 index 000000000..423dce20a --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/rewards.py @@ -0,0 +1,234 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Reward terms for gear insertion tasks. + +The generic rewards in this module only depend on insertion geometry: keypoint +alignment around the peg and sparse bonuses for engagement or completion. +Controller-specific penalties live in the environment MDP package. +""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import RigidObject +from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg +from isaaclab.utils.math import combine_frame_transforms +from isaaclab_tasks.direct.factory.factory_utils import get_keypoint_offsets, squashing_fn + +from isaaclab_arena.tasks.nist_gear_insertion import geometry as gear_geometry + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +class _KeypointDistanceComputer: + """Keypoint distance calculator with reusable buffers. + + Factory/Forge insertion rewards compare multiple keypoints around the + target and held-object frames instead of comparing only frame origins. The + keypoint offsets and identity quaternions are allocated once here to avoid + per-step tensor construction. + """ + + def __init__(self, num_envs: int, device: torch.device, num_keypoints: int = 4): + self.offsets_base = get_keypoint_offsets(num_keypoints, device) + self.n_kp = self.offsets_base.shape[0] + self.identity_quat = ( + torch.tensor([[0.0, 0.0, 0.0, 1.0]], device=device, dtype=torch.float32) + .repeat(num_envs * self.n_kp, 1) + .contiguous() + ) + self.offsets_buf = torch.zeros(num_envs, self.n_kp, 3, device=device, dtype=torch.float32) + + def compute( + self, + pos_a: torch.Tensor, + quat_a: torch.Tensor, + pos_b: torch.Tensor, + quat_b: torch.Tensor, + scale: float = 1.0, + ) -> torch.Tensor: + """Return the mean L2 distance between transformed keypoint sets.""" + n = pos_a.shape[0] + offsets = self.offsets_base * scale + self.offsets_buf[:n] = offsets.unsqueeze(0) + off_flat = self.offsets_buf[:n].reshape(-1, 3) + iq = self.identity_quat[: n * self.n_kp] + + def _expand(t: torch.Tensor) -> torch.Tensor: + return t.unsqueeze(1).expand(-1, self.n_kp, -1).reshape(-1, t.shape[-1]) + + kp_a, _ = combine_frame_transforms(_expand(pos_a), _expand(quat_a), off_flat, iq) + kp_b, _ = combine_frame_transforms(_expand(pos_b), _expand(quat_b), off_flat, iq) + per_kp_dist = torch.norm(kp_b.reshape(n, self.n_kp, 3) - kp_a.reshape(n, self.n_kp, 3), p=2, dim=-1) + return per_kp_dist.mean(-1) + + +class gear_peg_keypoint_squashing(ManagerTermBase): + """Factory-style keypoint reward for gear-to-peg alignment. + + The term aligns the held gear insertion frame to the target peg frame using + a squashed keypoint distance. The squashing parameters are configured by the + reward group to provide coarse-to-fine shaping with the same geometry. + """ + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self._peg_offset_values = tuple(cfg.params["peg_offset"]) + self.peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) + self._held_gear_base_offset_values = tuple(cfg.params["held_gear_base_offset"]) + self.held_gear_base_offset = torch.tensor( + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 + ) + self._xy_noise_range = cfg.params.get("peg_offset_xy_noise", 0.0) + self._num_keypoints: int = cfg.params.get("num_keypoints", 4) + self.kp = _KeypointDistanceComputer(env.num_envs, env.device, num_keypoints=self._num_keypoints) + self._offset_noise = torch.zeros(env.num_envs, 3, device=env.device, dtype=torch.float32) + + def reset(self, env_ids: torch.Tensor | None = None) -> None: + """Sample the per-episode XY target perturbation.""" + if self._xy_noise_range <= 0.0: + return + if env_ids is None: + env_ids = torch.arange(self._offset_noise.shape[0], device=self._offset_noise.device) + if len(env_ids) == 0: + return + + n = len(env_ids) + noise_dev = self._offset_noise.device + self._offset_noise[env_ids, 0] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range + self._offset_noise[env_ids, 1] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range + + def __call__( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + keypoint_scale: float = 0.15, + num_keypoints: int = 4, + peg_offset_xy_noise: float = 0.0, + squash_a: float = 50.0, + squash_b: float = 2.0, + ) -> torch.Tensor: + """Return the squashed keypoint alignment reward.""" + self._validate_num_keypoints(num_keypoints) + held_base_pos, gear_quat = self._compute_held_base_pose(env, gear_cfg, held_gear_base_offset) + target_pos, target_quat = self._compute_target_pose(env, board_cfg, peg_offset) + target_pos = self._apply_target_noise(target_pos, peg_offset_xy_noise) + return self._compute_squashed_keypoint_reward( + target_pos, + target_quat, + held_base_pos, + gear_quat, + keypoint_scale, + squash_a, + squash_b, + ) + + def _validate_num_keypoints(self, num_keypoints: int) -> None: + """Validate that the reward uses the keypoint layout allocated at construction.""" + if num_keypoints != self._num_keypoints: + raise ValueError( + f"num_keypoints is fixed at term initialization. Expected {self._num_keypoints}, got {num_keypoints}." + ) + + def _compute_held_base_pose( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + held_gear_base_offset: list[float] | None, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return the held gear insertion-point pose in each environment frame.""" + gear: RigidObject = env.scene[gear_cfg.name] + held_gear_base_offset_tensor = gear_geometry.resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self.held_gear_base_offset, + env.device, + ) + return gear_geometry.compute_asset_local_offset_pose(env, gear, held_gear_base_offset_tensor) + + def _compute_target_pose( + self, + env: ManagerBasedRLEnv, + board_cfg: SceneEntityCfg, + peg_offset: list[float] | None, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return the target peg pose in each environment frame.""" + board: RigidObject = env.scene[board_cfg.name] + peg_offset_tensor = gear_geometry.resolve_offset_tensor( + peg_offset, self._peg_offset_values, self.peg_offset, env.device + ) + return gear_geometry.compute_asset_local_offset_pose(env, board, peg_offset_tensor) + + def _apply_target_noise(self, target_pos: torch.Tensor, peg_offset_xy_noise: float) -> torch.Tensor: + """Apply the per-reset XY target offset used for insertion robustness.""" + if peg_offset_xy_noise <= 0.0: + return target_pos + return target_pos + self._offset_noise[: target_pos.shape[0]] + + def _compute_squashed_keypoint_reward( + self, + target_pos: torch.Tensor, + target_quat: torch.Tensor, + held_base_pos: torch.Tensor, + gear_quat: torch.Tensor, + keypoint_scale: float, + squash_a: float, + squash_b: float, + ) -> torch.Tensor: + """Return the squashed mean distance between target and held-gear keypoints.""" + kp_dist = self.kp.compute(target_pos, target_quat, held_base_pos, gear_quat, scale=keypoint_scale) + return squashing_fn(kp_dist, squash_a, squash_b) + + +class gear_insertion_geometry_bonus(ManagerTermBase): + """Bonus when the gear satisfies the configured insertion geometry. + + Different reward terms reuse this class with different ``z_fraction`` values: + one for early peg engagement and one for the final success depth. + """ + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self._peg_offset_values = tuple(cfg.params["peg_offset"]) + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) + self._held_gear_base_offset_values = tuple(cfg.params["held_gear_base_offset"]) + self._held_gear_base_offset = torch.tensor( + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 + ) + + def __call__( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + gear_peg_height: float, + z_fraction: float, + xy_threshold: float, + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + ) -> torch.Tensor: + """Return a binary insertion-geometry bonus as a float tensor.""" + return gear_geometry.compute_gear_insertion_success( + env, + gear_cfg, + board_cfg, + gear_geometry.resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), + gear_geometry.resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self._held_gear_base_offset, + env.device, + ), + gear_peg_height, + z_fraction, + xy_threshold, + ).float() diff --git a/isaaclab_arena/tasks/nist_gear_insertion_task.py b/isaaclab_arena/tasks/nist_gear_insertion/task.py similarity index 51% rename from isaaclab_arena/tasks/nist_gear_insertion_task.py rename to isaaclab_arena/tasks/nist_gear_insertion/task.py index 6b7c14e4f..7fd684fbc 100644 --- a/isaaclab_arena/tasks/nist_gear_insertion_task.py +++ b/isaaclab_arena/tasks/nist_gear_insertion/task.py @@ -3,14 +3,17 @@ # # SPDX-License-Identifier: Apache-2.0 -"""NIST gear insertion task.""" +"""NIST gear insertion task. + +The task wiring follows the structure of Isaac Lab Factory/Forge insertion +tasks, with Arena assets and task registration. +""" from __future__ import annotations import math import numpy as np -from collections.abc import Callable -from dataclasses import MISSING, dataclass, field +from dataclasses import MISSING, field import isaaclab.envs.mdp as mdp_isaac_lab from isaaclab.envs.common import ViewerCfg @@ -24,19 +27,26 @@ from isaaclab_arena.embodiments.common.arm_mode import ArmMode from isaaclab_arena.metrics.metric_base import MetricBase from isaaclab_arena.metrics.success_rate import SuccessRateMetric -from isaaclab_arena.tasks.events import place_gear_in_gripper -from isaaclab_arena.tasks.observations import gear_insertion_observations -from isaaclab_arena.tasks.rewards import gear_insertion_rewards from isaaclab_arena.tasks.task_base import TaskBase -from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success from isaaclab_arena.utils.cameras import get_viewer_cfg_look_at_object +from . import geometry as gear_insertion_geometry +from . import observations as gear_insertion_observations +from . import rewards as gear_insertion_rewards +from .events import GraspCfg, place_gear_in_gripper +from .terminations import gear_dropped_from_gripper, gear_mesh_insertion_success + _DEFAULT_PEG_OFFSET = (2.025e-2, 0.0, 0.0) -@dataclass +@configclass class GearInsertionGeometryCfg: - """Geometry parameters for the insertion target.""" + """Geometry parameters shared by observations, rewards, and terminations. + + Offsets are expressed in the local frame of their asset. ``peg_offset_for_obs`` + may differ from ``peg_offset_from_board`` when the policy should target the + peg tip while success checks use the peg base. + """ peg_offset_from_board: list[float] = field(default_factory=lambda: list(_DEFAULT_PEG_OFFSET)) peg_offset_for_obs: list[float] | None = None @@ -47,22 +57,14 @@ class GearInsertionGeometryCfg: peg_offset_xy_noise: float = 0.005 -@dataclass -class GraspCfg: - """Embodiment-specific reset grasp parameters.""" - - hand_grasp_width: float = 0.03 - hand_close_width: float = 0.0 - gripper_joint_setter_func: Callable | None = None - end_effector_body_name: str = "panda_hand" - finger_joint_names: str = "panda_finger_joint[1-2]" - grasp_rot_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0, 1.0]) - grasp_offset: list[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) - arm_joint_names: str = "panda_joint.*" - +class NistGearInsertionRLTask(TaskBase): + """RL task for inserting the held gear onto a fixed NIST peg. -class NistGearInsertionTask(TaskBase): - """Task for inserting the held gear onto a fixed NIST peg.""" + The task owns scene-level wiring: generic observations and geometry rewards, + reset events, success/drop terminations, and success-rate metrics. The OSC + policy observation and controller penalties are configured by the + environment MDP layer because they depend on a specific action term. + """ def __init__( self, @@ -74,9 +76,10 @@ def __init__( episode_length_s: float | None = None, task_description: str | None = None, grasp_cfg: GraspCfg | None = None, + fingertip_body_name: str = "panda_fingertip_centered", enable_randomization: bool = False, disable_drop_terminations: bool = True, - rl_training_mode: bool = False, + disable_success_termination: bool = False, ): super().__init__(episode_length_s=episode_length_s, task_description=task_description) self.assembled_board = assembled_board @@ -86,17 +89,24 @@ def __init__( self._gear_base_asset = gear_base_asset if gear_base_asset is not None else assembled_board self.geometry_cfg = geometry_cfg if geometry_cfg is not None else GearInsertionGeometryCfg() self.grasp_cfg = grasp_cfg + self.fingertip_body_name = fingertip_body_name self.enable_randomization = enable_randomization self.disable_drop_terminations = disable_drop_terminations - self.rl_training_mode = rl_training_mode + self.disable_success_termination = disable_success_termination if self.task_description is None: self.task_description = f"Insert the {held_gear.name} onto the gear base on the {assembled_board.name}" def get_scene_cfg(self): - return None + """Return no additional scene config. + + Arena constructs the scene from assets supplied by the environment. + """ def get_observation_cfg(self): + """Return generic task observations for gear and peg geometry.""" geometry_cfg = self.geometry_cfg + # Policies can observe the peg tip while geometry rewards and + # terminations evaluate insertion against the peg base. peg_obs = ( geometry_cfg.peg_offset_for_obs if geometry_cfg.peg_offset_for_obs is not None @@ -107,9 +117,11 @@ def get_observation_cfg(self): board_name=self._gear_base_asset.name, peg_offset=peg_obs, held_gear_base_offset=geometry_cfg.held_gear_base_offset, + fingertip_body_name=self.fingertip_body_name, ) def get_rewards_cfg(self): + """Return geometry-only reward terms for the task.""" geometry_cfg = self.geometry_cfg return GearInsertionRewardsCfg( gear_name=self.held_gear.name, @@ -123,6 +135,7 @@ def get_rewards_cfg(self): ) def get_termination_cfg(self): + """Return success and optional drop termination terms.""" geometry_cfg = self.geometry_cfg success = TerminationTermCfg( func=gear_mesh_insertion_success, @@ -134,7 +147,7 @@ def get_termination_cfg(self): "gear_peg_height": geometry_cfg.gear_peg_height, "success_z_fraction": geometry_cfg.success_z_fraction, "xy_threshold": geometry_cfg.xy_threshold, - "rl_training": self.rl_training_mode, + "disable_success_termination": self.disable_success_termination, }, ) @@ -161,84 +174,98 @@ def get_termination_cfg(self): return cfg def get_events_cfg(self): + """Return reset and randomization events for gear insertion.""" cfg = GearInsertionEventsCfg() + self._add_grasp_reset_event(cfg) + if self.enable_randomization: + self._add_randomization_events(cfg) + return cfg + + def _add_grasp_reset_event(self, cfg: GearInsertionEventsCfg) -> None: + """Add the reset event that places the held gear in the gripper.""" grasp_cfg = self.grasp_cfg if grasp_cfg is not None and grasp_cfg.gripper_joint_setter_func is not None: + # The held gear does not use its default reset pose in this task; + # the embodiment-specific grasp event places it in the gripper. cfg.place_gear = EventTermCfg( func=place_gear_in_gripper, mode="reset", params={ "gear_cfg": SceneEntityCfg(self.held_gear.name), - "hand_grasp_width": grasp_cfg.hand_grasp_width, - "hand_close_width": grasp_cfg.hand_close_width, - "gripper_joint_setter_func": grasp_cfg.gripper_joint_setter_func, - "end_effector_body_name": grasp_cfg.end_effector_body_name, - "finger_joint_names": grasp_cfg.finger_joint_names, - "grasp_rot_offset": grasp_cfg.grasp_rot_offset, - "grasp_offset": grasp_cfg.grasp_offset, - }, - ) - if self.enable_randomization: - if grasp_cfg is None: - raise ValueError("NIST gear insertion randomization requires an embodiment grasp configuration.") - arm_joints = grasp_cfg.arm_joint_names - cfg.held_object_mass = EventTermCfg( - func=mdp_isaac_lab.randomize_rigid_body_mass, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg(self.held_gear.name), - "mass_distribution_params": (-0.005, 0.005), - "operation": "add", - "distribution": "uniform", - }, - ) - cfg.fixed_asset_pose = EventTermCfg( - func=mdp_isaac_lab.reset_root_state_uniform, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg(self._gear_base_asset.name), - "pose_range": { - "x": (0.0, 0.0), - "y": (0.0, 0.0), - "z": (0.0, 0.0), - "roll": (0.0, 0.0), - "pitch": (0.0, 0.0), - "yaw": (0.0, math.radians(15.0)), - }, - "velocity_range": {}, - }, - ) - cfg.robot_actuator_gains = EventTermCfg( - func=mdp_isaac_lab.randomize_actuator_gains, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=arm_joints), - "stiffness_distribution_params": (0.75, 1.5), - "damping_distribution_params": (0.3, 3.0), - "operation": "scale", - "distribution": "log_uniform", + "grasp_cfg": grasp_cfg, }, ) - cfg.robot_joint_friction = EventTermCfg( - func=mdp_isaac_lab.randomize_joint_parameters, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=arm_joints), - "friction_distribution_params": (0.3, 0.7), - "operation": "add", - "distribution": "uniform", + + def _add_randomization_events(self, cfg: GearInsertionEventsCfg) -> None: + """Add optional Factory/Forge-style domain randomization events.""" + grasp_cfg = self.grasp_cfg + assert grasp_cfg is not None, "NIST gear insertion randomization requires an embodiment grasp configuration." + arm_joints = grasp_cfg.arm_joint_names + + self._add_asset_randomization_events(cfg) + self._add_robot_randomization_events(cfg, arm_joints) + + def _add_asset_randomization_events(self, cfg: GearInsertionEventsCfg) -> None: + """Add reset randomization for the held gear and fixed gear base.""" + cfg.held_object_mass = EventTermCfg( + func=mdp_isaac_lab.randomize_rigid_body_mass, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(self.held_gear.name), + "mass_distribution_params": (-0.005, 0.005), + "operation": "add", + "distribution": "uniform", + }, + ) + cfg.fixed_asset_pose = EventTermCfg( + func=mdp_isaac_lab.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg(self._gear_base_asset.name), + # Keep translational variation fixed; only the board yaw is + # randomized as in the Forge insertion setup. + "pose_range": { + "yaw": (0.0, math.radians(15.0)), }, - ) - return cfg + "velocity_range": {}, + }, + ) + + def _add_robot_randomization_events(self, cfg: GearInsertionEventsCfg, arm_joints: str) -> None: + """Add actuator and joint-parameter randomization for the arm.""" + cfg.robot_actuator_gains = EventTermCfg( + func=mdp_isaac_lab.randomize_actuator_gains, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=arm_joints), + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + cfg.robot_joint_friction = EventTermCfg( + func=mdp_isaac_lab.randomize_joint_parameters, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=arm_joints), + "friction_distribution_params": (0.3, 0.7), + "operation": "add", + "distribution": "uniform", + }, + ) def get_mimic_env_cfg(self, arm_mode: ArmMode): + """Raise because this task currently only defines the RL setup.""" del arm_mode raise NotImplementedError("NIST gear insertion does not define a Mimic configuration yet.") def get_metrics(self) -> list[MetricBase]: + """Return task metrics used during evaluation.""" return [SuccessRateMetric()] def get_viewer_cfg(self) -> ViewerCfg: + """Return a camera view focused on the held gear and peg area.""" return get_viewer_cfg_look_at_object( lookat_object=self.held_gear, offset=np.array([1.5, -0.5, 1.0]), @@ -247,17 +274,25 @@ def get_viewer_cfg(self) -> ViewerCfg: @configclass class GearInsertionTerminationsCfg: - """Termination terms for the gear insertion task.""" + """Termination terms for the gear insertion task. + + Success is always registered so metrics can read it. Drop checks are + optional and are disabled for recovery-focused RL training. + """ time_out: TerminationTermCfg = TerminationTermCfg(func=mdp_isaac_lab.time_out) success: TerminationTermCfg = MISSING - object_dropped: TerminationTermCfg | None = MISSING + object_dropped: TerminationTermCfg | None = None gear_dropped_from_gripper: TerminationTermCfg | None = None @configclass class GearInsertionEventsCfg: - """Reset and randomization events for gear insertion.""" + """Reset and randomization events for gear insertion. + + ``reset_all`` restores the base scene first; task-specific terms then place + the held gear in the gripper and apply optional domain randomization. + """ reset_all: EventTermCfg = EventTermCfg( func=mdp_isaac_lab.reset_scene_to_default, @@ -271,11 +306,35 @@ class GearInsertionEventsCfg: robot_joint_friction: EventTermCfg | None = None +@configclass +class GearInsertionTaskObsCfg(ObsGroup): + """Generic observation group for gear insertion task state. + + These terms expose object poses, peg geometry, robot joints, and noiseless + end-effector pose. Controller-specific policy packing remains outside this + generic task config. + """ + + gear_pos: ObsTerm = MISSING + gear_quat: ObsTerm = MISSING + peg_pos: ObsTerm = MISSING + board_quat: ObsTerm = MISSING + peg_delta: ObsTerm = MISSING + joint_pos: ObsTerm = MISSING + joint_vel: ObsTerm = MISSING + ee_pos_noiseless: ObsTerm = MISSING + ee_quat_noiseless: ObsTerm = MISSING + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + @configclass class GearInsertionObservationsCfg: - """Task-specific observations for the gear insertion task.""" + """Observation config for the generic gear insertion task state.""" - task_obs: ObsGroup = MISSING + task_obs: GearInsertionTaskObsCfg = MISSING def __init__( self, @@ -283,73 +342,67 @@ def __init__( board_name: str, peg_offset: list[float], held_gear_base_offset: list[float] | None = None, + fingertip_body_name: str = "panda_fingertip_centered", ): held_offset = held_gear_base_offset if held_gear_base_offset is not None else [2.025e-2, 0.0, 0.0] + self.task_obs = GearInsertionTaskObsCfg() - @configclass - class TaskObsCfg(ObsGroup): - gear_pos = ObsTerm( - func=mdp_isaac_lab.root_pos_w, - params={"asset_cfg": SceneEntityCfg(gear_name)}, - ) - gear_quat = ObsTerm( - func=mdp_isaac_lab.root_quat_w, - params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(gear_name)}, - ) - peg_pos = ObsTerm( - func=gear_insertion_observations.peg_pos_in_env_frame, - params={"board_cfg": SceneEntityCfg(board_name), "peg_offset": peg_offset}, - ) - board_quat = ObsTerm( - func=mdp_isaac_lab.root_quat_w, - params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(board_name)}, - ) - peg_delta = ObsTerm( - func=gear_insertion_observations.peg_delta_from_held_gear_base, - params={ - "gear_cfg": SceneEntityCfg(gear_name), - "board_cfg": SceneEntityCfg(board_name), - "peg_offset": peg_offset, - "held_gear_base_offset": held_offset, - }, - ) - joint_pos = ObsTerm( - func=mdp_isaac_lab.joint_pos_rel, - params={"asset_cfg": SceneEntityCfg("robot")}, - ) - joint_vel = ObsTerm( - func=mdp_isaac_lab.joint_vel_rel, - params={"asset_cfg": SceneEntityCfg("robot")}, - ) - ee_pos_noiseless = ObsTerm( - func=gear_insertion_observations.body_pos_in_env_frame, - params={"body_name": "panda_fingertip_centered"}, - ) - ee_quat_noiseless = ObsTerm( - func=gear_insertion_observations.body_quat_canonical, - params={"body_name": "panda_fingertip_centered"}, - ) - - def __post_init__(self): - self.enable_corruption = False - self.concatenate_terms = True - - self.task_obs = TaskObsCfg() + self.task_obs.gear_pos = ObsTerm( + func=mdp_isaac_lab.root_pos_w, + params={"asset_cfg": SceneEntityCfg(gear_name)}, + ) + self.task_obs.gear_quat = ObsTerm( + func=mdp_isaac_lab.root_quat_w, + params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(gear_name)}, + ) + self.task_obs.board_quat = ObsTerm( + func=mdp_isaac_lab.root_quat_w, + params={"make_quat_unique": True, "asset_cfg": SceneEntityCfg(board_name)}, + ) + self.task_obs.peg_pos = ObsTerm( + func=gear_insertion_geometry.peg_pos_in_env_frame, + params={"board_cfg": SceneEntityCfg(board_name), "peg_offset": peg_offset}, + ) + self.task_obs.peg_delta = ObsTerm( + func=gear_insertion_geometry.peg_delta_from_held_gear_base, + params={ + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": peg_offset, + "held_gear_base_offset": held_offset, + }, + ) + self.task_obs.joint_pos = ObsTerm( + func=mdp_isaac_lab.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + self.task_obs.joint_vel = ObsTerm( + func=mdp_isaac_lab.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + self.task_obs.ee_pos_noiseless = ObsTerm( + func=gear_insertion_observations.body_pos_in_env_frame, + params={"body_name": fingertip_body_name}, + ) + self.task_obs.ee_quat_noiseless = ObsTerm( + func=gear_insertion_observations.body_quat_canonical, + params={"body_name": fingertip_body_name}, + ) @configclass class GearInsertionRewardsCfg: - """Reward terms for gear insertion.""" + """Reward terms for gear insertion. + + Keypoint shaping and insertion-depth bonuses mirror the Factory/Forge + assembly reward structure used for peg and gear insertion. + """ kp_baseline: RewardTermCfg = MISSING kp_coarse: RewardTermCfg = MISSING kp_fine: RewardTermCfg = MISSING engagement_bonus: RewardTermCfg = MISSING success_bonus: RewardTermCfg = MISSING - action_penalty_asset: RewardTermCfg = MISSING - action_grad_penalty: RewardTermCfg = MISSING - contact_penalty: RewardTermCfg = MISSING - success_pred_error: RewardTermCfg = MISSING def __init__( self, @@ -364,7 +417,7 @@ def __init__( ): gear_cfg = SceneEntityCfg(gear_name) board_cfg = SceneEntityCfg(board_name) - common_params = { + keypoint_params = { "gear_cfg": gear_cfg, "board_cfg": board_cfg, "peg_offset": peg_offset, @@ -373,7 +426,7 @@ def __init__( "num_keypoints": 4, "peg_offset_xy_noise": peg_offset_xy_noise, } - bonus_params = { + geometry_params = { "gear_cfg": gear_cfg, "board_cfg": board_cfg, "peg_offset": peg_offset, @@ -382,57 +435,33 @@ def __init__( "xy_threshold": xy_threshold, } + # The keypoint terms share geometry and differ only by squashing + # strength, giving coarse-to-fine shaping around the peg. self.kp_baseline = RewardTermCfg( func=gear_insertion_rewards.gear_peg_keypoint_squashing, weight=1.0, - params={**common_params, "squash_a": 5.0, "squash_b": 4.0}, + params={**keypoint_params, "squash_a": 5.0, "squash_b": 4.0}, ) self.kp_coarse = RewardTermCfg( func=gear_insertion_rewards.gear_peg_keypoint_squashing, weight=1.0, - params={**common_params, "squash_a": 50.0, "squash_b": 2.0}, + params={**keypoint_params, "squash_a": 50.0, "squash_b": 2.0}, ) self.kp_fine = RewardTermCfg( func=gear_insertion_rewards.gear_peg_keypoint_squashing, weight=1.0, - params={**common_params, "squash_a": 100.0, "squash_b": 0.0}, + params={**keypoint_params, "squash_a": 100.0, "squash_b": 0.0}, ) + + # Sparse insertion bonuses use the same success geometry as the task + # termination with different insertion-depth thresholds. self.engagement_bonus = RewardTermCfg( func=gear_insertion_rewards.gear_insertion_geometry_bonus, weight=1.0, - params={**bonus_params, "z_fraction": 0.90}, + params={**geometry_params, "z_fraction": 0.90}, ) self.success_bonus = RewardTermCfg( func=gear_insertion_rewards.gear_insertion_geometry_bonus, weight=1.0, - params={**bonus_params, "z_fraction": success_z_fraction}, - ) - self.action_penalty_asset = RewardTermCfg( - func=gear_insertion_rewards.osc_action_magnitude_penalty, - weight=-0.0005, - params={}, - ) - self.action_grad_penalty = RewardTermCfg( - func=gear_insertion_rewards.osc_action_delta_penalty, - weight=-0.01, - params={}, - ) - self.contact_penalty = RewardTermCfg( - func=gear_insertion_rewards.wrist_contact_force_penalty, - weight=-0.001, - params={}, - ) - self.success_pred_error = RewardTermCfg( - func=gear_insertion_rewards.success_prediction_error, - weight=-1.0, - params={ - "gear_cfg": gear_cfg, - "board_cfg": board_cfg, - "peg_offset": peg_offset, - "held_gear_base_offset": held_gear_base_offset, - "gear_peg_height": gear_peg_height, - "success_z_fraction": success_z_fraction, - "xy_threshold": xy_threshold, - "delay_until_ratio": 0.25, - }, + params={**geometry_params, "z_fraction": success_z_fraction}, ) diff --git a/isaaclab_arena/tasks/nist_gear_insertion/terminations.py b/isaaclab_arena/tasks/nist_gear_insertion/terminations.py new file mode 100644 index 000000000..b8b0d9a3d --- /dev/null +++ b/isaaclab_arena/tasks/nist_gear_insertion/terminations.py @@ -0,0 +1,70 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Termination terms for gear insertion tasks.""" + +from __future__ import annotations + +import torch + +import warp as wp +from isaaclab.assets import Articulation, RigidObject +from isaaclab.envs import ManagerBasedRLEnv +from isaaclab.managers import SceneEntityCfg + +from isaaclab_arena.tasks.nist_gear_insertion.geometry import compute_gear_insertion_success + + +def gear_mesh_insertion_success( + env: ManagerBasedRLEnv, + held_object_cfg: SceneEntityCfg, + fixed_object_cfg: SceneEntityCfg, + gear_base_offset: tuple[float, ...], + held_gear_base_offset: list[float] | None = None, + gear_peg_height: float = 0.02, + success_z_fraction: float = 0.30, + xy_threshold: float = 0.0025, + disable_success_termination: bool = False, +) -> torch.Tensor: + """Terminate when the held gear is centered on the peg and lowered to the success depth. + + When success termination is disabled, the same term remains registered but + always returns false. This keeps success-rate metrics available during RL + training while allowing policies to continue after first success. + """ + if disable_success_termination: + return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device) + return compute_gear_insertion_success( + env=env, + gear_cfg=held_object_cfg, + board_cfg=fixed_object_cfg, + peg_offset=gear_base_offset, + held_gear_base_offset=held_gear_base_offset if held_gear_base_offset is not None else gear_base_offset, + gear_peg_height=gear_peg_height, + z_fraction=success_z_fraction, + xy_threshold=xy_threshold, + ) + + +def gear_dropped_from_gripper( + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ee_body_name: str = "panda_hand", + distance_threshold: float = 0.15, +) -> torch.Tensor: + """Reset when the gear has fallen too far from the end-effector. + + This optional guard is useful for evaluation and non-training rollouts. It + is disabled during recovery-focused training so the policy can learn from + off-nominal states. + """ + gear: RigidObject = env.scene[gear_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + eef_indices, _ = robot.find_bodies([ee_body_name]) + ee_pos = wp.to_torch(robot.data.body_pos_w)[:, eef_indices[0]] + gear_pos = wp.to_torch(gear.data.root_pos_w) + distance = torch.norm(gear_pos - ee_pos, dim=-1) + return distance > distance_threshold diff --git a/isaaclab_arena/tasks/observations/gear_insertion_observations.py b/isaaclab_arena/tasks/observations/gear_insertion_observations.py deleted file mode 100644 index 9f8c14831..000000000 --- a/isaaclab_arena/tasks/observations/gear_insertion_observations.py +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Observation terms for the NIST gear insertion task.""" - -from __future__ import annotations - -import math -import torch -import torch.nn.functional as F -from typing import TYPE_CHECKING - -import isaaclab.utils.math as math_utils -import warp as wp -from isaaclab.assets import Articulation, RigidObject -from isaaclab.managers import ManagerTermBase, SceneEntityCfg -from isaaclab.utils.math import axis_angle_from_quat, quat_unique - -from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import get_nist_gear_insertion_arm_action - -if TYPE_CHECKING: - from isaaclab.envs import ManagerBasedRLEnv - from isaaclab.managers import ObservationTermCfg - - -def _offset_pos_in_env_frame( - env: ManagerBasedRLEnv, - asset: RigidObject, - offset: tuple[float, ...] | torch.Tensor, -) -> torch.Tensor: - """Return an asset-local offset position in each environment frame.""" - root_pos = wp.to_torch(asset.data.root_pos_w) - env.scene.env_origins - root_quat = wp.to_torch(asset.data.root_quat_w) - if isinstance(offset, torch.Tensor): - offset_tensor = offset.to(device=env.device, dtype=torch.float32) - else: - offset_tensor = torch.tensor(offset, device=env.device, dtype=torch.float32) - offset_tensor = offset_tensor.unsqueeze(0).expand(env.num_envs, 3) - return root_pos + math_utils.quat_apply(root_quat, offset_tensor) - - -def peg_pos_in_env_frame( - env: ManagerBasedRLEnv, - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), -) -> torch.Tensor: - """Return the target peg position in each environment frame.""" - board: RigidObject = env.scene[board_cfg.name] - return _offset_pos_in_env_frame(env, board, peg_offset) - - -def held_gear_base_pos_in_env_frame( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), -) -> torch.Tensor: - """Return the held gear insertion point in each environment frame.""" - gear: RigidObject = env.scene[gear_cfg.name] - return _offset_pos_in_env_frame(env, gear, held_gear_base_offset) - - -def body_pos_in_env_frame( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Return a robot body position in each environment frame.""" - robot: Articulation = env.scene[robot_cfg.name] - body_id = robot.body_names.index(body_name) - return wp.to_torch(robot.data.body_pos_w)[:, body_id] - env.scene.env_origins - - -def body_quat_canonical( - env: ManagerBasedRLEnv, - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - body_name: str = "panda_fingertip_centered", -) -> torch.Tensor: - """Return a robot body quaternion with a unique sign convention.""" - robot: Articulation = env.scene[robot_cfg.name] - body_id = robot.body_names.index(body_name) - quat = wp.to_torch(robot.data.body_quat_w)[:, body_id] - return quat_unique(quat) - - -def check_gear_insertion_geometry( - held_base_pos: torch.Tensor, - peg_pos: torch.Tensor, - gear_peg_height: float, - z_fraction: float, - xy_threshold: float, -) -> torch.Tensor: - """Return whether the gear is centered and inserted on the peg. - - Args: - held_base_pos: Position of the held gear insertion base. - peg_pos: Position of the target peg. - gear_peg_height: Physical height of the peg. - z_fraction: Fraction of peg height used for the insertion threshold. - xy_threshold: Maximum XY distance from the peg center. - - Returns: - Boolean tensor indicating insertion success. - """ - xy_dist = torch.norm(held_base_pos[:, :2] - peg_pos[:, :2], dim=-1) - z_diff = held_base_pos[:, 2] - peg_pos[:, 2] - return (xy_dist < xy_threshold) & (z_diff < gear_peg_height * z_fraction) - - -def peg_delta_from_held_gear_base( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: tuple[float, ...] = (0.0, 0.0, 0.0), - held_gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), -) -> torch.Tensor: - """Return the vector from the held gear insertion point to the peg.""" - held_base = held_gear_base_pos_in_env_frame(env, gear_cfg, held_gear_base_offset) - peg_pos = peg_pos_in_env_frame(env, board_cfg, peg_offset) - return peg_pos - held_base - - -class NistGearInsertionPolicyObservations(ManagerTermBase): - """Policy observation term for OSC-based NIST gear insertion. - - Output layout (per env):: - - fingertip_pos_rel_fixed (3) - fingertip_quat (4) - ee_linvel (3) - ee_angvel (3) - ft_force (3) - force_threshold (1) - prev_actions (7) - """ - - def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): - super().__init__(cfg, env) - - p = cfg.params - self._robot_name: str = p.get("robot_name", "robot") - self._board_name: str = p.get("board_name", "gears_and_base") - self._peg_offset_values = tuple(p.get("peg_offset", [0.0, 0.0, 0.0])) - self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device) - self._fingertip_body: str = p.get("fingertip_body_name", "panda_fingertip_centered") - self._force_body: str = p.get("force_body_name", "force_sensor") - - self._pos_noise: float = p.get("pos_noise_level", 0.00025) - self._rot_noise_deg: float = p.get("rot_noise_level_deg", 0.1) - self._force_noise: float = p.get("force_noise_level", 1.0) - - n = env.num_envs - dev = env.device - - self._fingertip_idx: int | None = None - self._body_key = (self._robot_name, self._fingertip_body, self._force_body) - - self._flip_quats = torch.ones(n, device=dev) - self._prev_noisy_pos = torch.zeros(n, 3, device=dev) - self._prev_noisy_quat = torch.zeros(n, 4, device=dev) - self._prev_noisy_quat[:, 3] = 1.0 - - def _resolve_fingertip_idx( - self, - robot_name: str, - fingertip_body_name: str, - force_body_name: str, - ) -> int: - """Resolve the fingertip body index used by the observation term.""" - body_key = (robot_name, fingertip_body_name, force_body_name) - if self._fingertip_idx is not None and body_key == self._body_key: - return self._fingertip_idx - - robot: Articulation = self._env.scene[robot_name] - for body_name in (fingertip_body_name, force_body_name): - if body_name not in robot.body_names: - raise ValueError( - f"Body '{body_name}' is missing from robot '{robot_name}'. Use a USD that defines this " - "body or override the corresponding observation parameter." - ) - fingertip_idx = robot.body_names.index(fingertip_body_name) - if body_key == self._body_key: - self._fingertip_idx = fingertip_idx - return fingertip_idx - - def _resolve_peg_offset(self, peg_offset: list[float] | None, device: torch.device) -> torch.Tensor: - """Return the cached peg offset unless the manager supplies different values.""" - if peg_offset is None or tuple(peg_offset) == self._peg_offset_values: - return self._peg_offset - return torch.tensor(peg_offset, device=device, dtype=torch.float32) - - def _sample_noisy_pose( - self, - env: ManagerBasedRLEnv, - ft_pos: torch.Tensor, - ft_quat: torch.Tensor, - pos_noise_level: float, - rot_noise_level_deg: float, - ) -> tuple[torch.Tensor, torch.Tensor]: - """Return fingertip pose with configured sensor noise.""" - pos_noise = torch.randn(env.num_envs, 3, device=env.device) * pos_noise_level - noisy_pos = ft_pos + pos_noise - - rot_noise_axis = torch.randn(env.num_envs, 3, device=env.device) - rot_noise_axis = F.normalize(rot_noise_axis, dim=1, eps=1e-8) - rot_noise_angle = torch.randn(env.num_envs, device=env.device) * math.radians(rot_noise_level_deg) - noisy_quat = math_utils.quat_mul( - ft_quat, - math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), - ) - return noisy_pos, quat_unique(noisy_quat) - - def _compute_fingertip_target_delta( - self, - env: ManagerBasedRLEnv, - noisy_pos: torch.Tensor, - board_name: str, - peg_offset: torch.Tensor, - ) -> torch.Tensor: - """Return fingertip position relative to the noisy peg target.""" - board: RigidObject = env.scene[board_name] - arm_osc_action = get_nist_gear_insertion_arm_action(env) - peg_pos = _offset_pos_in_env_frame(env, board, peg_offset) - noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise - return noisy_pos - noisy_fixed_pos - - def _compute_velocities( - self, - noisy_pos: torch.Tensor, - noisy_quat: torch.Tensor, - dt: float, - ) -> tuple[torch.Tensor, torch.Tensor]: - """Estimate fingertip linear and angular velocity from noisy poses.""" - safe_dt = max(dt, 1e-6) - ee_linvel = (noisy_pos - self._prev_noisy_pos) / safe_dt - - rot_diff = math_utils.quat_mul(noisy_quat, math_utils.quat_conjugate(self._prev_noisy_quat)) - rot_diff = quat_unique(rot_diff) - ee_angvel = axis_angle_from_quat(rot_diff) / safe_dt - ee_angvel[:, 0:2] = 0.0 - - self._prev_noisy_pos[:] = noisy_pos - self._prev_noisy_quat[:] = noisy_quat - return ee_linvel, ee_angvel - - def _read_smoothed_force( - self, - env: ManagerBasedRLEnv, - arm_osc_action, - force_noise_level: float, - ) -> torch.Tensor: - """Return the smoothed wrist force with configured observation noise.""" - return arm_osc_action.force_smooth + torch.randn(env.num_envs, 3, device=env.device) * force_noise_level - - def reset(self, env_ids: list[int] | None = None): - """Reset cached noisy pose state for the selected environments.""" - if env_ids is None or len(env_ids) == 0: - return - - n = len(env_ids) - dev = self._env.device - - flip = torch.ones(n, device=dev) - flip[torch.rand(n, device=dev) > 0.5] = -1.0 - self._flip_quats[env_ids] = flip - - fingertip_idx = self._resolve_fingertip_idx(self._robot_name, self._fingertip_body, self._force_body) - robot: Articulation = self._env.scene[self._robot_name] - origins = self._env.scene.env_origins - self._prev_noisy_pos[env_ids] = wp.to_torch(robot.data.body_pos_w)[env_ids, fingertip_idx] - origins[env_ids] - reset_quat = wp.to_torch(robot.data.body_quat_w)[env_ids, fingertip_idx] - self._prev_noisy_quat[env_ids] = quat_unique(reset_quat) - - def __call__( - self, - env: ManagerBasedRLEnv, - robot_name: str | None = None, - board_name: str | None = None, - peg_offset: list[float] | None = None, - fingertip_body_name: str | None = None, - force_body_name: str | None = None, - pos_noise_level: float | None = None, - rot_noise_level_deg: float | None = None, - force_noise_level: float | None = None, - ) -> torch.Tensor: - """Return the 24-D policy observation tensor.""" - robot_name = self._robot_name if robot_name is None else robot_name - board_name = self._board_name if board_name is None else board_name - fingertip_body_name = self._fingertip_body if fingertip_body_name is None else fingertip_body_name - force_body_name = self._force_body if force_body_name is None else force_body_name - pos_noise_level = self._pos_noise if pos_noise_level is None else pos_noise_level - rot_noise_level_deg = self._rot_noise_deg if rot_noise_level_deg is None else rot_noise_level_deg - force_noise_level = self._force_noise if force_noise_level is None else force_noise_level - - peg_offset_tensor = self._resolve_peg_offset(peg_offset, env.device) - fingertip_idx = self._resolve_fingertip_idx(robot_name, fingertip_body_name, force_body_name) - - dt = env.step_dt - - robot: Articulation = env.scene[robot_name] - origins = env.scene.env_origins - - ft_pos = wp.to_torch(robot.data.body_pos_w)[:, fingertip_idx] - origins - ft_quat = wp.to_torch(robot.data.body_quat_w)[:, fingertip_idx] - - noisy_pos, noisy_quat_full = self._sample_noisy_pose(env, ft_pos, ft_quat, pos_noise_level, rot_noise_level_deg) - fingertip_pos_rel = self._compute_fingertip_target_delta(env, noisy_pos, board_name, peg_offset_tensor) - ee_linvel, ee_angvel = self._compute_velocities(noisy_pos, noisy_quat_full, dt) - - obs_quat = noisy_quat_full.clone() - # The gear is symmetric about the peg axis, so the policy observes the tilt quaternion up to sign. - obs_quat[:, [2, 3]] = 0.0 - obs_quat = obs_quat * self._flip_quats.unsqueeze(-1) - - arm_osc_action = get_nist_gear_insertion_arm_action(env) - noisy_force = self._read_smoothed_force(env, arm_osc_action, force_noise_level) - - force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) - - prev_actions = arm_osc_action.smoothed_actions.clone() - prev_actions[:, 3:5] = 0.0 - - obs = torch.cat( - [ - fingertip_pos_rel, - obs_quat, - ee_linvel, - ee_angvel, - noisy_force, - force_threshold, - prev_actions, - ], - dim=-1, - ) - return torch.nan_to_num(obs, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) diff --git a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py b/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py deleted file mode 100644 index 312be010f..000000000 --- a/isaaclab_arena/tasks/rewards/gear_insertion_rewards.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Reward terms for the NIST gear insertion task.""" - -from __future__ import annotations - -import torch -from typing import TYPE_CHECKING - -import warp as wp -from isaaclab.assets import RigidObject -from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg -from isaaclab.utils.math import combine_frame_transforms, quat_apply -from isaaclab_tasks.direct.factory.factory_utils import get_keypoint_offsets, squashing_fn - -from isaaclab_arena.tasks.observations.gear_insertion_observations import check_gear_insertion_geometry -from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import get_nist_gear_insertion_arm_action - -if TYPE_CHECKING: - from isaaclab.envs import ManagerBasedRLEnv - - -class _KeypointDistanceComputer: - """Keypoint distance calculator with reusable buffers.""" - - def __init__(self, num_envs: int, device: torch.device, num_keypoints: int = 4): - self.offsets_base = get_keypoint_offsets(num_keypoints, device) - self.n_kp = self.offsets_base.shape[0] - self.identity_quat = ( - torch.tensor([[0.0, 0.0, 0.0, 1.0]], device=device, dtype=torch.float32) - .repeat(num_envs * self.n_kp, 1) - .contiguous() - ) - self.offsets_buf = torch.zeros(num_envs, self.n_kp, 3, device=device, dtype=torch.float32) - - def compute( - self, - pos_a: torch.Tensor, - quat_a: torch.Tensor, - pos_b: torch.Tensor, - quat_b: torch.Tensor, - scale: float = 1.0, - ) -> torch.Tensor: - """Return mean keypoint L2 distance.""" - n = pos_a.shape[0] - offsets = self.offsets_base * scale - self.offsets_buf[:n] = offsets.unsqueeze(0) - off_flat = self.offsets_buf[:n].reshape(-1, 3) - iq = self.identity_quat[: n * self.n_kp] - - def _expand(t: torch.Tensor) -> torch.Tensor: - return t.unsqueeze(1).expand(-1, self.n_kp, -1).reshape(-1, t.shape[-1]) - - kp_a, _ = combine_frame_transforms(_expand(pos_a), _expand(quat_a), off_flat, iq) - kp_b, _ = combine_frame_transforms(_expand(pos_b), _expand(quat_b), off_flat, iq) - per_kp_dist = torch.norm(kp_b.reshape(n, self.n_kp, 3) - kp_a.reshape(n, self.n_kp, 3), p=2, dim=-1) - return per_kp_dist.mean(-1) - - -def _resolve_offset_tensor( - values: list[float] | None, - cached_values: tuple[float, ...], - cached_tensor: torch.Tensor, - device: torch.device, -) -> torch.Tensor: - """Return the cached offset tensor unless the manager supplies different values.""" - if values is None or tuple(values) == cached_values: - return cached_tensor - return torch.tensor(values, device=device, dtype=torch.float32) - - -def _offset_pose_in_env_frame( - env: ManagerBasedRLEnv, - asset: RigidObject, - offset: torch.Tensor, - num_envs: int, -) -> tuple[torch.Tensor, torch.Tensor]: - """Return an asset-local offset pose in each environment frame.""" - root_pos = wp.to_torch(asset.data.root_pos_w)[:num_envs] - env.scene.env_origins[:num_envs] - root_quat = wp.to_torch(asset.data.root_quat_w)[:num_envs] - offset = offset.unsqueeze(0).expand(num_envs, 3) - return root_pos + quat_apply(root_quat, offset), root_quat - - -class gear_peg_keypoint_squashing(ManagerTermBase): - """Squashing-function keypoint reward for gear-to-peg alignment.""" - - def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): - super().__init__(cfg, env) - self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) - self.peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) - self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) - self.held_gear_base_offset = torch.tensor( - self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 - ) - self._xy_noise_range = cfg.params.get("peg_offset_xy_noise", 0.0) - self._num_keypoints: int = cfg.params.get("num_keypoints", 4) - self.kp = _KeypointDistanceComputer(env.num_envs, env.device, num_keypoints=self._num_keypoints) - self._offset_noise = torch.zeros(env.num_envs, 3, device=env.device, dtype=torch.float32) - - def reset(self, env_ids: torch.Tensor | None = None) -> None: - if self._xy_noise_range <= 0.0: - return - if env_ids is None: - env_ids = torch.arange(self._offset_noise.shape[0], device=self._offset_noise.device) - elif isinstance(env_ids, slice): - env_ids = torch.arange(self._offset_noise.shape[0], device=self._offset_noise.device)[env_ids] - if len(env_ids) == 0: - return - - n = len(env_ids) - noise_dev = self._offset_noise.device - self._offset_noise[env_ids, 0] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range - self._offset_noise[env_ids, 1] = (torch.rand(n, device=noise_dev) * 2 - 1) * self._xy_noise_range - - def __call__( - self, - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] | None = None, - held_gear_base_offset: list[float] | None = None, - keypoint_scale: float = 0.15, - num_keypoints: int = 4, - peg_offset_xy_noise: float = 0.0, - squash_a: float = 50.0, - squash_b: float = 2.0, - ) -> torch.Tensor: - if num_keypoints != self._num_keypoints: - raise ValueError( - f"num_keypoints is fixed at term initialization. Expected {self._num_keypoints}, got {num_keypoints}." - ) - - n = env.num_envs - gear: RigidObject = env.scene[gear_cfg.name] - held_gear_base_offset_tensor = _resolve_offset_tensor( - held_gear_base_offset, - self._held_gear_base_offset_values, - self.held_gear_base_offset, - env.device, - ) - held_base_pos, gear_quat = _offset_pose_in_env_frame(env, gear, held_gear_base_offset_tensor, n) - - board: RigidObject = env.scene[board_cfg.name] - peg_offset_tensor = _resolve_offset_tensor(peg_offset, self._peg_offset_values, self.peg_offset, env.device) - target_pos, target_quat = _offset_pose_in_env_frame(env, board, peg_offset_tensor, n) - offset_noise = self._offset_noise[:n] if peg_offset_xy_noise > 0.0 else 0.0 - target_pos = target_pos + offset_noise - kp_dist = self.kp.compute(target_pos, target_quat, held_base_pos, gear_quat, scale=keypoint_scale) - return squashing_fn(kp_dist, squash_a, squash_b) - - -def _compute_gear_position_success( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg, - board_cfg: SceneEntityCfg, - peg_offset: torch.Tensor, - held_gear_base_offset: torch.Tensor, - gear_peg_height: float, - z_fraction: float, - xy_threshold: float, -) -> torch.Tensor: - """Return whether the gear meets the XY-centering and Z-depth thresholds.""" - gear: RigidObject = env.scene[gear_cfg.name] - held_base_pos, _ = _offset_pose_in_env_frame(env, gear, held_gear_base_offset, env.num_envs) - - board: RigidObject = env.scene[board_cfg.name] - peg_pos, _ = _offset_pose_in_env_frame(env, board, peg_offset, env.num_envs) - - return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, z_fraction, xy_threshold) - - -class gear_insertion_geometry_bonus(ManagerTermBase): - """Bonus when the gear satisfies the configured insertion geometry.""" - - def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): - super().__init__(cfg, env) - self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) - self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) - self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) - self._held_gear_base_offset = torch.tensor( - self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 - ) - - def __call__( - self, - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] | None = None, - held_gear_base_offset: list[float] | None = None, - gear_peg_height: float = 0.02, - z_fraction: float = 0.05, - xy_threshold: float = 0.015, - ) -> torch.Tensor: - return _compute_gear_position_success( - env, - gear_cfg, - board_cfg, - _resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), - _resolve_offset_tensor( - held_gear_base_offset, - self._held_gear_base_offset_values, - self._held_gear_base_offset, - env.device, - ), - gear_peg_height, - z_fraction, - xy_threshold, - ).float() - - -class osc_action_magnitude_penalty(ManagerTermBase): - """Penalize large asset-relative position and yaw commands.""" - - def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: - action_term = get_nist_gear_insertion_arm_action(env) - pos_scale = action_term.position_thresholds.clamp_min(1.0e-6) - rot_scale = action_term.rotation_thresholds[:, 2].clamp_min(1.0e-6) - pos_error = torch.norm(action_term.delta_pos / pos_scale, p=2, dim=-1) - rot_error = torch.abs(action_term.delta_yaw) / rot_scale - return pos_error + rot_error - - -class osc_action_delta_penalty(ManagerTermBase): - """Penalize jerky actions using smoothed action deltas.""" - - def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: - action_term = get_nist_gear_insertion_arm_action(env) - return torch.norm( - action_term.smoothed_actions - action_term.previous_smoothed_actions, - p=2, - dim=-1, - ) - - -class wrist_contact_force_penalty(ManagerTermBase): - """Penalize wrist/contact force magnitude above per-episode threshold.""" - - def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: - action_term = get_nist_gear_insertion_arm_action(env) - force_mag = torch.norm(action_term.force_smooth, p=2, dim=-1) - return torch.nn.functional.relu(force_mag - action_term.contact_thresholds) - - -class success_prediction_error(ManagerTermBase): - """Penalize incorrect success predictions from the seventh action dimension.""" - - def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): - super().__init__(cfg, env) - self._pred_scale = 0.0 - self._held_gear_base_offset_values = tuple(cfg.params.get("held_gear_base_offset", [2.025e-2, 0.0, 0.0])) - self._held_gear_base_offset = torch.tensor( - self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 - ) - self._peg_offset_values = tuple(cfg.params.get("peg_offset", [0.0, 0.0, 0.0])) - self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) - - def __call__( - self, - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - board_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - peg_offset: list[float] | None = None, - held_gear_base_offset: list[float] | None = None, - gear_peg_height: float = 0.02, - success_z_fraction: float = 0.05, - xy_threshold: float = 0.0025, - delay_until_ratio: float = 0.25, - ) -> torch.Tensor: - true_success = _compute_gear_position_success( - env, - gear_cfg, - board_cfg, - _resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), - _resolve_offset_tensor( - held_gear_base_offset, - self._held_gear_base_offset_values, - self._held_gear_base_offset, - env.device, - ), - gear_peg_height, - success_z_fraction, - xy_threshold, - ) - - # Once enough environments have reached the success geometry, keep the auxiliary loss enabled. - if true_success.float().mean() >= delay_until_ratio: - self._pred_scale = 1.0 - - arm_osc_action = get_nist_gear_insertion_arm_action(env) - pred = (arm_osc_action.success_pred + 1.0) / 2.0 - error = torch.abs(true_success.float() - pred) - return error * self._pred_scale diff --git a/isaaclab_arena/tasks/terminations.py b/isaaclab_arena/tasks/terminations.py index 5ec081a2d..8559fc0a2 100644 --- a/isaaclab_arena/tasks/terminations.py +++ b/isaaclab_arena/tasks/terminations.py @@ -6,17 +6,14 @@ import math import torch -import isaaclab.utils.math as math_utils import warp as wp -from isaaclab.assets import Articulation, RigidObject +from isaaclab.assets import RigidObject from isaaclab.envs import ManagerBasedRLEnv from isaaclab.envs.mdp.terminations import root_height_below_minimum from isaaclab.managers import SceneEntityCfg from isaaclab.sensors.contact_sensor.contact_sensor import ContactSensor from isaaclab.utils.math import combine_frame_transforms -from isaaclab_arena.tasks.observations.gear_insertion_observations import check_gear_insertion_geometry - # NOTE(alexmillane, 2025.09.15): The velocity threshold is set high because some stationary # seem to generate a "small" velocity. @@ -261,59 +258,3 @@ def root_height_below_minimum_multi_objects( outs_tensor = torch.stack(outs, dim=0) # [X, N] terminated = outs_tensor.any(dim=0) # [N], bool return terminated - - -def gear_mesh_insertion_success( - env: ManagerBasedRLEnv, - held_object_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - fixed_object_cfg: SceneEntityCfg = SceneEntityCfg("gears_and_base"), - gear_base_offset: tuple[float, ...] = (2.025e-2, 0.0, 0.0), - held_gear_base_offset: list[float] | None = None, - gear_peg_height: float = 0.02, - success_z_fraction: float = 0.80, - xy_threshold: float = 0.0025, - rl_training: bool = False, -) -> torch.Tensor: - """Terminate when the gear is inserted onto the peg to the required depth. - - Checks that the held gear's base (root + held_gear_base_offset in gear frame) - is centered on the peg (XY) and lowered past a fraction of the peg height (Z). - Peg position is fixed_asset pose + gear_base_offset in the fixed asset's local frame. - - When ``rl_training`` is True, always returns False (no early termination) - but the term stays registered so that ``SuccessRateMetric`` can query it. - """ - if rl_training: - return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device) - held_object: RigidObject = env.scene[held_object_cfg.name] - fixed_object: RigidObject = env.scene[fixed_object_cfg.name] - - held_root = wp.to_torch(held_object.data.root_pos_w) - env.scene.env_origins - held_quat = wp.to_torch(held_object.data.root_quat_w) - h_offset = held_gear_base_offset if held_gear_base_offset is not None else gear_base_offset - held_off = torch.tensor(h_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) - held_base_pos = held_root + math_utils.quat_apply(held_quat, held_off) - - fixed_pos = wp.to_torch(fixed_object.data.root_pos_w) - env.scene.env_origins - fixed_quat = wp.to_torch(fixed_object.data.root_quat_w) - offset = torch.tensor(gear_base_offset, device=env.device, dtype=torch.float32).unsqueeze(0).expand(env.num_envs, 3) - peg_pos = fixed_pos + math_utils.quat_apply(fixed_quat, offset) - - return check_gear_insertion_geometry(held_base_pos, peg_pos, gear_peg_height, success_z_fraction, xy_threshold) - - -def gear_dropped_from_gripper( - env: ManagerBasedRLEnv, - gear_cfg: SceneEntityCfg = SceneEntityCfg("medium_nist_gear"), - robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), - ee_body_name: str = "panda_hand", - distance_threshold: float = 0.15, -) -> torch.Tensor: - """Reset when the gear has fallen too far from the end-effector.""" - gear: RigidObject = env.scene[gear_cfg.name] - robot: Articulation = env.scene[robot_cfg.name] - eef_indices, _ = robot.find_bodies([ee_body_name]) - ee_pos = wp.to_torch(robot.data.body_pos_w)[:, eef_indices[0]] - gear_pos = wp.to_torch(gear.data.root_pos_w) - distance = torch.norm(gear_pos - ee_pos, dim=-1) - return distance > distance_threshold diff --git a/isaaclab_arena/terms/events.py b/isaaclab_arena/terms/events.py index 422361527..e0e859667 100644 --- a/isaaclab_arena/terms/events.py +++ b/isaaclab_arena/terms/events.py @@ -3,8 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - import torch import warp as wp @@ -31,14 +29,12 @@ def set_object_pose( pose_t_xyz_q_xyzw = pose.to_tensor(device=env.device).repeat(num_envs, 1) pose_t_xyz_q_xyzw[:, :3] += env.scene.env_origins[env_ids] # Set the pose and velocity - asset.write_root_pose_to_sim_index(root_pose=pose_t_xyz_q_xyzw, env_ids=env_ids) + asset.write_root_pose_to_sim(pose_t_xyz_q_xyzw, env_ids=env_ids) if velocity is not None: vel = velocity.to_tensor(device=env.device).unsqueeze(0).expand(num_envs, -1) - asset.write_root_velocity_to_sim_index(root_velocity=vel, env_ids=env_ids) + asset.write_root_velocity_to_sim(vel, env_ids=env_ids) else: - asset.write_root_velocity_to_sim_index( - root_velocity=torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids - ) + asset.write_root_velocity_to_sim(torch.zeros(num_envs, 6, device=env.device), env_ids=env_ids) def set_object_pose_per_env( @@ -61,11 +57,9 @@ def set_object_pose_per_env( pose_t_xyz_q_xyzw = pose.to_tensor(device=env.device).unsqueeze(0) pose_t_xyz_q_xyzw[0, :3] += env.scene.env_origins[cur_env, :] # Set the pose and velocity - asset.write_root_pose_to_sim_index( - root_pose=pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device) - ) - asset.write_root_velocity_to_sim_index( - root_velocity=torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) + asset.write_root_pose_to_sim(pose_t_xyz_q_xyzw, env_ids=torch.tensor([cur_env], device=env.device)) + asset.write_root_velocity_to_sim( + torch.zeros(1, 6, device=env.device), env_ids=torch.tensor([cur_env], device=env.device) ) @@ -76,11 +70,10 @@ def reset_all_articulation_joints(env: ManagerBasedEnv, env_ids: torch.Tensor): default_root_state = wp.to_torch(articulation_asset.data.default_root_state)[env_ids].clone() default_root_state[:, 0:3] += env.scene.env_origins[env_ids] # set into the physics simulation - articulation_asset.write_root_pose_to_sim_index(root_pose=default_root_state[:, :7], env_ids=env_ids) - articulation_asset.write_root_velocity_to_sim_index(root_velocity=default_root_state[:, 7:], env_ids=env_ids) + articulation_asset.write_root_pose_to_sim(default_root_state[:, :7], env_ids=env_ids) + articulation_asset.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids=env_ids) # obtain default joint positions default_joint_pos = wp.to_torch(articulation_asset.data.default_joint_pos)[env_ids].clone() default_joint_vel = wp.to_torch(articulation_asset.data.default_joint_vel)[env_ids].clone() # set into the physics simulation - articulation_asset.write_joint_position_to_sim_index(position=default_joint_pos, env_ids=env_ids) - articulation_asset.write_joint_velocity_to_sim_index(velocity=default_joint_vel, env_ids=env_ids) + articulation_asset.write_joint_state_to_sim(default_joint_pos, default_joint_vel, env_ids=env_ids) diff --git a/isaaclab_arena/tests/test_nist_gear_insertion_task.py b/isaaclab_arena/tests/test_nist_gear_insertion_task.py index 53eaa1113..b488870ca 100644 --- a/isaaclab_arena/tests/test_nist_gear_insertion_task.py +++ b/isaaclab_arena/tests/test_nist_gear_insertion_task.py @@ -3,32 +3,39 @@ # # SPDX-License-Identifier: Apache-2.0 -class _TestAsset: - def __init__(self, name: str, object_min_z: float | None = None): - self.name = name - self.object_min_z = object_min_z - self.reset_pose = True +import argparse +import math +import torch +from dataclasses import dataclass - def disable_reset_pose(self) -> None: - self.reset_pose = False +import pytest + +_PEG_BASE_OFFSET = [0.02025, 0.0, 0.0] +_PEG_TIP_OFFSET = [0.02025, 0.0, 0.025] -def _asset(name: str, object_min_z: float | None = None) -> _TestAsset: - return _TestAsset(name=name, object_min_z=object_min_z) +@dataclass +class _DummyAsset: + name: str + object_min_z: float | None = None + reset_pose_disabled: bool = False + def disable_reset_pose(self) -> None: + self.reset_pose_disabled = True -def _nist_task(**kwargs): - from isaaclab_arena.tasks.nist_gear_insertion_task import GearInsertionGeometryCfg, NistGearInsertionTask - return NistGearInsertionTask( - assembled_board=_asset("nist_board_assembled"), - held_gear=_asset("medium_nist_gear"), - background_scene=_asset("table", object_min_z=0.0), - gear_base_asset=_asset("gears_and_base"), +def _make_nist_task(**kwargs): + from isaaclab_arena.tasks.nist_gear_insertion.task import GearInsertionGeometryCfg, NistGearInsertionRLTask + + return NistGearInsertionRLTask( + assembled_board=_DummyAsset("nist_board_assembled"), + held_gear=_DummyAsset("medium_nist_gear"), + background_scene=_DummyAsset("table", object_min_z=0.0), + gear_base_asset=_DummyAsset("gears_and_base"), geometry_cfg=GearInsertionGeometryCfg( - peg_offset_from_board=[0.02025, 0.0, 0.0], - peg_offset_for_obs=[0.02025, 0.0, 0.025], - held_gear_base_offset=[0.02025, 0.0, 0.0], + peg_offset_from_board=list(_PEG_BASE_OFFSET), + peg_offset_for_obs=list(_PEG_TIP_OFFSET), + held_gear_base_offset=list(_PEG_BASE_OFFSET), success_z_fraction=0.20, xy_threshold=0.0025, ), @@ -36,174 +43,223 @@ def _nist_task(**kwargs): ) -def test_franka_nist_gear_osc_embodiment_registers_task_terms(): - from isaaclab_arena.assets.registries import AssetRegistry - from isaaclab_arena.embodiments.franka.franka import ( - FrankaNistGearInsertionObservationsCfg, - FrankaNistGearInsertionOscEmbodiment, - ) - from isaaclab_arena.tasks.observations.gear_insertion_observations import NistGearInsertionPolicyObservations - from isaaclab_arena_environments.mdp.nist_gear_insertion_osc_action import NistGearInsertionOscActionCfg - - asset_registry = AssetRegistry() - embodiment_cls = asset_registry.get_asset_by_name("franka_nist_gear_osc") - embodiment = embodiment_cls(fixed_asset_name="gears_and_base", peg_offset=(0.02025, 0.0, 0.025)) - - assert embodiment_cls is FrankaNistGearInsertionOscEmbodiment - assert isinstance(embodiment.action_config.arm_action, NistGearInsertionOscActionCfg) - assert embodiment.action_config.arm_action.fixed_asset_name == "gears_and_base" - assert embodiment.action_config.arm_action.peg_offset == (0.02025, 0.0, 0.025) - assert embodiment.action_config.arm_action.body_name == "panda_fingertip_centered" - - assert isinstance(embodiment.observation_config, FrankaNistGearInsertionObservationsCfg) - policy_obs = embodiment.observation_config.policy - assert policy_obs.nist_gear_policy_obs.func is NistGearInsertionPolicyObservations - assert policy_obs.nist_gear_policy_obs.params["board_name"] == "gears_and_base" - assert policy_obs.nist_gear_policy_obs.params["peg_offset"] == [0.02025, 0.0, 0.025] - assert policy_obs.nist_gear_policy_obs.params["fingertip_body_name"] == "panda_fingertip_centered" - - -def test_nist_object_library_uses_shared_nucleus_assets(): - from isaaclab_arena.assets.registries import AssetRegistry - - asset_registry = AssetRegistry() - expected_paths = { - "gears_and_base": ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" - "gearbase_and_gears_gearbase_root.usd" - ), - "medium_nist_gear": ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_medium.usd" - ), - "nist_board_assembled": ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/" - "nist_board_assembled.usd" - ), - "nist_gear_base": ( - "omniverse://isaac-dev.ov.nvidia.com/Isaac/IsaacLab/Arena/assets/object_library/NIST/gear_base.usd" - ), - } +def test_task_geometry_config_feeds_observations_rewards_and_success(): + from isaaclab_arena.tasks.nist_gear_insertion import geometry, rewards + from isaaclab_arena.tasks.nist_gear_insertion.terminations import gear_mesh_insertion_success - for asset_name, expected_path in expected_paths.items(): - asset_cls = asset_registry.get_asset_by_name(asset_name) - assert asset_cls.usd_path == expected_path + task = _make_nist_task(disable_success_termination=True) + obs_cfg = task.get_observation_cfg() + reward_cfg = task.get_rewards_cfg() + termination_cfg = task.get_termination_cfg() -def test_nist_task_observation_terms_use_task_geometry(): - from isaaclab.managers import ObservationGroupCfg as ObsGroup + assert task.held_gear.reset_pose_disabled + assert obs_cfg.task_obs.peg_pos.func is geometry.peg_pos_in_env_frame + assert obs_cfg.task_obs.peg_pos.params["board_cfg"].name == "gears_and_base" + assert obs_cfg.task_obs.peg_pos.params["peg_offset"] == _PEG_TIP_OFFSET + + assert obs_cfg.task_obs.peg_delta.func is geometry.peg_delta_from_held_gear_base + assert obs_cfg.task_obs.peg_delta.params["gear_cfg"].name == "medium_nist_gear" + assert obs_cfg.task_obs.peg_delta.params["held_gear_base_offset"] == _PEG_BASE_OFFSET + assert obs_cfg.task_obs.concatenate_terms + + for term in (reward_cfg.kp_baseline, reward_cfg.kp_coarse, reward_cfg.kp_fine): + assert term.func is rewards.gear_peg_keypoint_squashing + assert term.params["peg_offset"] == _PEG_BASE_OFFSET + assert term.params["held_gear_base_offset"] == _PEG_BASE_OFFSET - from isaaclab_arena.tasks.observations.gear_insertion_observations import ( - peg_delta_from_held_gear_base, - peg_pos_in_env_frame, + assert reward_cfg.engagement_bonus.func is rewards.gear_insertion_geometry_bonus + assert reward_cfg.engagement_bonus.params["z_fraction"] == 0.90 + assert reward_cfg.success_bonus.func is rewards.gear_insertion_geometry_bonus + assert reward_cfg.success_bonus.params["z_fraction"] == 0.20 + + assert termination_cfg.success.func is gear_mesh_insertion_success + assert termination_cfg.success.params["gear_base_offset"] == _PEG_BASE_OFFSET + assert termination_cfg.success.params["held_gear_base_offset"] == _PEG_BASE_OFFSET + assert termination_cfg.success.params["disable_success_termination"] + assert termination_cfg.object_dropped is None + assert termination_cfg.gear_dropped_from_gripper is None + + +def test_task_events_add_grasp_reset_and_optional_randomization(): + from isaaclab_arena.embodiments.franka.nist_gear_insertion.gear_grasp import franka_gripper_joint_setter + from isaaclab_arena.tasks.nist_gear_insertion.events import GraspCfg, place_gear_in_gripper + + grasp_cfg = GraspCfg( + gripper_joint_setter_func=franka_gripper_joint_setter, + arm_joint_names="panda_joint[1-7]", ) - obs_cfg = _nist_task().get_observation_cfg() + events_cfg = _make_nist_task(grasp_cfg=grasp_cfg).get_events_cfg() - assert isinstance(obs_cfg.task_obs, ObsGroup) - assert obs_cfg.task_obs.peg_pos.func is peg_pos_in_env_frame - assert obs_cfg.task_obs.peg_pos.params["board_cfg"].name == "gears_and_base" - assert obs_cfg.task_obs.peg_pos.params["peg_offset"] == [0.02025, 0.0, 0.025] + assert events_cfg.place_gear.func is place_gear_in_gripper + assert events_cfg.place_gear.params["gear_cfg"].name == "medium_nist_gear" + assert events_cfg.place_gear.params["grasp_cfg"].gripper_joint_setter_func is franka_gripper_joint_setter + assert events_cfg.place_gear.params["grasp_cfg"].arm_joint_names == "panda_joint[1-7]" + assert events_cfg.held_object_mass is None + assert events_cfg.fixed_asset_pose is None + assert events_cfg.robot_actuator_gains is None + assert events_cfg.robot_joint_friction is None - assert obs_cfg.task_obs.peg_delta.func is peg_delta_from_held_gear_base - assert obs_cfg.task_obs.peg_delta.params["gear_cfg"].name == "medium_nist_gear" - assert obs_cfg.task_obs.peg_delta.params["board_cfg"].name == "gears_and_base" - assert obs_cfg.task_obs.peg_delta.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] + randomized_cfg = _make_nist_task(grasp_cfg=grasp_cfg, enable_randomization=True).get_events_cfg() - assert obs_cfg.task_obs.ee_pos_noiseless.func.__name__ == "body_pos_in_env_frame" - assert obs_cfg.task_obs.ee_pos_noiseless.params["body_name"] == "panda_fingertip_centered" - assert obs_cfg.task_obs.ee_quat_noiseless.func.__name__ == "body_quat_canonical" - assert obs_cfg.task_obs.ee_quat_noiseless.params["body_name"] == "panda_fingertip_centered" - assert obs_cfg.task_obs.concatenate_terms + assert randomized_cfg.held_object_mass.params["asset_cfg"].name == "medium_nist_gear" + assert randomized_cfg.fixed_asset_pose.params["asset_cfg"].name == "gears_and_base" + assert randomized_cfg.fixed_asset_pose.params["pose_range"]["yaw"] == (0.0, math.radians(15.0)) + assert randomized_cfg.robot_actuator_gains.params["asset_cfg"].joint_names == "panda_joint[1-7]" + assert randomized_cfg.robot_joint_friction.params["asset_cfg"].joint_names == "panda_joint[1-7]" + with pytest.raises(AssertionError): + _make_nist_task(enable_randomization=True).get_events_cfg() -def test_nist_task_reward_terms_share_insertion_geometry(): - from isaaclab_arena.tasks.rewards import gear_insertion_rewards - rewards_cfg = _nist_task().get_rewards_cfg() +def test_grasp_cfg_width_setter_maps_total_width_to_finger_joints(): + from isaaclab_arena.embodiments.franka.nist_gear_insertion.gear_grasp import ( + franka_gripper_joint_setter, + get_franka_nist_gear_insertion_grasp_config, + ) - keypoint_terms = [rewards_cfg.kp_baseline, rewards_cfg.kp_coarse, rewards_cfg.kp_fine] - for term in keypoint_terms: - assert term.func is gear_insertion_rewards.gear_peg_keypoint_squashing - assert term.params["gear_cfg"].name == "medium_nist_gear" - assert term.params["board_cfg"].name == "gears_and_base" - assert term.params["peg_offset"] == [0.02025, 0.0, 0.0] - assert term.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] + grasp_cfg = get_franka_nist_gear_insertion_grasp_config() + joint_pos = torch.zeros(2, 9) - assert rewards_cfg.engagement_bonus.func is gear_insertion_rewards.gear_insertion_geometry_bonus - assert rewards_cfg.success_bonus.func is gear_insertion_rewards.gear_insertion_geometry_bonus - assert rewards_cfg.engagement_bonus.params["z_fraction"] == 0.90 - assert rewards_cfg.success_bonus.params["z_fraction"] == 0.20 - assert rewards_cfg.success_bonus.params["xy_threshold"] == 0.0025 + franka_gripper_joint_setter(joint_pos, torch.tensor([0, 1]), [7, 8], width=0.04) - assert rewards_cfg.action_penalty_asset.func is gear_insertion_rewards.osc_action_magnitude_penalty - assert rewards_cfg.action_grad_penalty.func is gear_insertion_rewards.osc_action_delta_penalty - assert rewards_cfg.contact_penalty.func is gear_insertion_rewards.wrist_contact_force_penalty - assert rewards_cfg.success_pred_error.func is gear_insertion_rewards.success_prediction_error + assert torch.allclose(joint_pos[:, 7], torch.full((2,), 0.02)) + assert torch.allclose(joint_pos[:, 8], torch.full((2,), 0.02)) + assert grasp_cfg.gripper_joint_setter_func is franka_gripper_joint_setter + assert grasp_cfg.hand_grasp_width == 0.03 + assert grasp_cfg.hand_close_width == 0.0 + assert grasp_cfg.grasp_rot_offset == [1.0, 0.0, 0.0, 0.0] + assert grasp_cfg.grasp_offset == [0.02, 0.0, -0.128] -def test_nist_task_events_use_embodiment_grasp_and_randomization_cfg(): - from isaaclab_arena.assets.registries import AssetRegistry - from isaaclab_arena.embodiments.franka.franka import franka_gripper_joint_setter - from isaaclab_arena.tasks.events import place_gear_in_gripper - from isaaclab_arena.tasks.nist_gear_insertion_task import GraspCfg +def test_franka_osc_action_observation_and_reward_configs_use_scene_geometry(): + from isaaclab_arena_environments.mdp.nist_gear_insertion import osc_rewards + from isaaclab_arena_environments.mdp.nist_gear_insertion.franka_osc_cfg import ( + FrankaNistGearInsertionObservationsCfg, + FrankaNistGearInsertionOscActionsCfg, + ) + from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_action import ACTION_DIM, NistGearInsertionOscActionCfg + from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_observations import ( + POLICY_OBS_DIM, + POLICY_OBS_LAYOUT, + NistGearInsertionPolicyObservations, + ) - embodiment = AssetRegistry().get_asset_by_name("franka_nist_gear_osc")() - task = _nist_task( - grasp_cfg=GraspCfg(**embodiment.get_gear_insertion_grasp_config()), - enable_randomization=True, + action_cfg = FrankaNistGearInsertionOscActionsCfg( + fixed_asset_name="gears_and_base", + peg_offset=tuple(_PEG_TIP_OFFSET), + ) + obs_cfg = FrankaNistGearInsertionObservationsCfg( + fixed_asset_name="gears_and_base", + peg_offset=tuple(_PEG_TIP_OFFSET), + fingertip_body_name="panda_fingertip_centered", + concatenate_observation_terms=True, + ) + reward_cfg = osc_rewards.NistGearInsertionOscRewardsCfg( + gear_name="medium_nist_gear", + board_name="gears_and_base", + peg_offset=_PEG_BASE_OFFSET, + held_gear_base_offset=_PEG_BASE_OFFSET, + gear_peg_height=0.02, + success_z_fraction=0.20, + xy_threshold=0.0025, ) - events_cfg = task.get_events_cfg() + assert isinstance(action_cfg.arm_action, NistGearInsertionOscActionCfg) + assert action_cfg.arm_action.fixed_asset_name == "gears_and_base" + assert action_cfg.arm_action.peg_offset == tuple(_PEG_TIP_OFFSET) + assert action_cfg.arm_action.body_name == "panda_fingertip_centered" + assert action_cfg.gripper_action is None + assert ACTION_DIM == 7 + + policy_obs = obs_cfg.policy.nist_gear_insertion_policy_obs + assert policy_obs.func is NistGearInsertionPolicyObservations + assert policy_obs.params["board_name"] == "gears_and_base" + assert policy_obs.params["peg_offset"] == _PEG_TIP_OFFSET + assert policy_obs.params["fingertip_body_name"] == "panda_fingertip_centered" + assert obs_cfg.policy.concatenate_terms + assert POLICY_OBS_DIM == sum(size for _, size in POLICY_OBS_LAYOUT) + + assert reward_cfg.action_magnitude_penalty.func is osc_rewards.osc_action_magnitude_penalty + assert reward_cfg.action_delta_penalty.func is osc_rewards.osc_action_delta_penalty + assert reward_cfg.contact_penalty.func is osc_rewards.wrist_contact_force_penalty + assert reward_cfg.success_prediction_error.func is osc_rewards.success_prediction_error + assert reward_cfg.success_prediction_error.params["peg_offset"] == _PEG_BASE_OFFSET + assert reward_cfg.success_prediction_error.params["success_z_fraction"] == 0.20 + + +def test_gear_insertion_geometry_success_thresholds(): + from isaaclab_arena.tasks.nist_gear_insertion.geometry import check_gear_insertion_geometry + + held_base_pos = torch.tensor([ + [0.0, 0.0, 0.001], + [0.01, 0.0, 0.001], + [0.0, 0.0, 0.01], + ]) + peg_pos = torch.zeros(3, 3) + + success = check_gear_insertion_geometry( + held_base_pos=held_base_pos, + peg_pos=peg_pos, + gear_peg_height=0.02, + z_fraction=0.2, + xy_threshold=0.0025, + ) - assert events_cfg.place_gear.func is place_gear_in_gripper - assert events_cfg.place_gear.params["gear_cfg"].name == "medium_nist_gear" - assert events_cfg.place_gear.params["hand_grasp_width"] == 0.03 - assert events_cfg.place_gear.params["hand_close_width"] == 0.03 - assert events_cfg.place_gear.params["gripper_joint_setter_func"] is franka_gripper_joint_setter - assert events_cfg.place_gear.params["end_effector_body_name"] == "panda_hand" - assert events_cfg.place_gear.params["finger_joint_names"] == "panda_finger_joint[1-2]" + assert success.tolist() == [True, False, False] - assert events_cfg.fixed_asset_pose.params["asset_cfg"].name == "gears_and_base" - assert events_cfg.held_object_mass.params["asset_cfg"].name == "medium_nist_gear" - assert events_cfg.robot_actuator_gains.params["asset_cfg"].joint_names == "panda_joint[1-7]" - assert events_cfg.robot_joint_friction.params["asset_cfg"].joint_names == "panda_joint[1-7]" +def test_osc_yaw_mapping_wraps_to_policy_interval(): + from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_action import ( + _target_yaw_to_action, + _wrap_yaw_to_action_range, + ) -def test_nist_task_termination_terms_include_success_and_optional_drop_checks(): - from isaaclab_arena.embodiments.franka.franka import franka_gripper_joint_setter - from isaaclab_arena.tasks.nist_gear_insertion_task import GraspCfg - from isaaclab_arena.tasks.terminations import gear_dropped_from_gripper, gear_mesh_insertion_success + yaw = torch.tensor([-math.pi, -math.pi / 2.0, 0.0, math.pi / 2.0, 0.75 * math.pi, math.pi]) + wrapped_yaw = _wrap_yaw_to_action_range(yaw) + action = _target_yaw_to_action(wrapped_yaw) - terminations_cfg = _nist_task(rl_training_mode=True).get_termination_cfg() + assert torch.allclose(wrapped_yaw, torch.tensor([-math.pi, -math.pi / 2.0, 0.0, math.pi / 2.0, -math.pi, -math.pi])) + assert torch.all(action <= 1.0) + assert torch.all(action >= -1.0) - assert terminations_cfg.success.func is gear_mesh_insertion_success - assert terminations_cfg.success.params["held_object_cfg"].name == "medium_nist_gear" - assert terminations_cfg.success.params["fixed_object_cfg"].name == "gears_and_base" - assert terminations_cfg.success.params["gear_base_offset"] == [0.02025, 0.0, 0.0] - assert terminations_cfg.success.params["held_gear_base_offset"] == [0.02025, 0.0, 0.0] - assert terminations_cfg.success.params["rl_training"] - assert terminations_cfg.object_dropped is None - assert terminations_cfg.gear_dropped_from_gripper is None - drop_cfg = _nist_task( - grasp_cfg=GraspCfg(gripper_joint_setter_func=franka_gripper_joint_setter), - disable_drop_terminations=False, - ).get_termination_cfg() +def test_policy_observation_layout_masks_symmetric_quaternion_and_computes_velocity(): + from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_observations import ( + POLICY_OBS_LAYOUT, + PREV_ACTION_DIM, + _compute_pose_velocities, + _make_reported_quat, + ) - assert drop_cfg.object_dropped is not None - assert drop_cfg.object_dropped.params["minimum_height"] == 0.0 - assert drop_cfg.gear_dropped_from_gripper.func is gear_dropped_from_gripper - assert drop_cfg.gear_dropped_from_gripper.params["gear_cfg"].name == "medium_nist_gear" - assert drop_cfg.gear_dropped_from_gripper.params["ee_body_name"] == "panda_hand" + assert POLICY_OBS_LAYOUT[-1] == ("prev_actions", PREV_ACTION_DIM) + flip = torch.tensor([1.0, -1.0]) + noisy_quat = torch.tensor([[0.1, 0.2, 0.3, 0.4], [-0.2, 0.1, -0.4, 0.3]]) + reported_quat = _make_reported_quat(noisy_quat, flip) -def test_nist_environment_defaults_to_nist_franka_embodiment(): - import argparse + assert torch.allclose(reported_quat[:, 2:], torch.zeros(2, 2)) + assert torch.allclose(reported_quat[:, :2], noisy_quat[:, :2] * flip.unsqueeze(-1)) + prev_pos = torch.zeros(2, 3) + noisy_pos = torch.tensor([[0.01, 0.0, 0.0], [0.0, 0.02, 0.0]]) + ee_linvel, ee_angvel = _compute_pose_velocities(prev_pos, reported_quat, noisy_pos, reported_quat, dt=0.05) + + assert torch.allclose(ee_linvel, noisy_pos / 0.05) + assert torch.allclose(ee_angvel, torch.zeros_like(ee_angvel), atol=1e-6) + + +def test_nist_osc_environment_cli_is_fixed_to_specialized_embodiment(): from isaaclab_arena_environments.nist_assembled_gearmesh_osc_environment import NISTAssembledGearMeshOSCEnvironment parser = argparse.ArgumentParser() NISTAssembledGearMeshOSCEnvironment.add_cli_args(parser) + args_cli = parser.parse_args([]) - assert args_cli.embodiment == "franka_nist_gear_osc" + assert not hasattr(args_cli, "embodiment") + assert not args_cli.disable_success_termination + assert parser.parse_args(["--rl_training_mode"]).disable_success_termination + with pytest.raises(SystemExit): + parser.parse_args(["--embodiment", "franka_ik"]) diff --git a/isaaclab_arena/tests/test_rl_games_action_policy.py b/isaaclab_arena/tests/test_rl_games_action_policy.py new file mode 100644 index 000000000..d92fe9495 --- /dev/null +++ b/isaaclab_arena/tests/test_rl_games_action_policy.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +import torch + + +class _DummyUnwrappedEnv: + def __init__(self): + self.obs_buf = {"policy": torch.ones(1, 3)} + + +class _DummyEnv: + def __init__(self): + self.unwrapped = _DummyUnwrappedEnv() + self.num_resets = 0 + + def reset(self, *args, **kwargs): + self.num_resets += 1 + return {"policy": torch.zeros(1, 3)}, {"reset": True} + + +def test_rl_games_wrapper_init_reuses_current_observation_buffer(monkeypatch): + from isaaclab_arena.policy import rl_games_action_policy + + env = _DummyEnv() + original_reset = env.reset + observed = {} + + def _mock_base_init(self, env, *args, **kwargs): + obs, info = env.reset() + observed["obs"] = obs + observed["info"] = info + self.env = env + + monkeypatch.setattr(rl_games_action_policy.RlGamesVecEnvWrapper, "__init__", _mock_base_init) + rl_games_action_policy._RlGamesInferenceEnvWrapper(env, "cuda:0", torch.inf, torch.inf) + + assert env.reset == original_reset + assert env.num_resets == 0 + assert observed["info"] == {} + assert torch.equal(observed["obs"]["policy"], env.unwrapped.obs_buf["policy"]) diff --git a/isaaclab_arena/tests/test_train_rl_games.py b/isaaclab_arena/tests/test_train_rl_games.py deleted file mode 100644 index ada00f5fc..000000000 --- a/isaaclab_arena/tests/test_train_rl_games.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -from isaaclab_arena.scripts.reinforcement_learning import train_rl_games - - -def test_copy_forwarded_args_preserves_isaac_lab_options(): - argv = [ - "--task", - "nist_assembled_gear_mesh_osc", - "--num_envs=64", - "--headless", - "--agent_cfg_path", - "legacy.yaml", - "--embodiment", - "franka_nist_gear_osc", - ] - - assert train_rl_games._copy_forwarded_args(argv) == [ - "--task", - "nist_assembled_gear_mesh_osc", - "--num_envs=64", - "--headless", - ] - - -def test_drop_forwarded_args_keeps_environment_specific_options(): - argv = [ - "--task", - "nist_assembled_gear_mesh_osc", - "--num_envs", - "64", - "--headless", - "--embodiment", - "franka_nist_gear_osc", - "--rl_training_mode", - ] - - assert train_rl_games._drop_forwarded_args(argv) == [ - "--embodiment", - "franka_nist_gear_osc", - "--rl_training_mode", - ] - - -def test_remove_deprecated_args_returns_experiment_name(): - remaining, experiment_name = train_rl_games._remove_deprecated_args([ - "--agent_cfg_path", - "legacy.yaml", - "--experiment_name", - "test_run", - "--rl_training_mode", - ]) - - assert remaining == ["--rl_training_mode"] - assert experiment_name == "test_run" - - -def test_normalize_legacy_positional_task(): - assert train_rl_games._normalize_legacy_positional_task(["nist_assembled_gear_mesh_osc", "--num_envs", "64"]) == [ - "--task", - "nist_assembled_gear_mesh_osc", - "--num_envs", - "64", - ] - - -def test_normalize_legacy_positional_task_keeps_explicit_task(): - argv = ["--task", "nist_assembled_gear_mesh_osc", "--num_envs", "64"] - - assert train_rl_games._normalize_legacy_positional_task(argv) == argv diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion/__init__.py b/isaaclab_arena_environments/mdp/nist_gear_insertion/__init__.py new file mode 100644 index 000000000..adf64306f --- /dev/null +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Controller-specific MDP terms for NIST gear insertion environments.""" + +from .franka_osc_cfg import FrankaNistGearInsertionObservationsCfg, FrankaNistGearInsertionOscActionsCfg diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion/franka_osc_cfg.py b/isaaclab_arena_environments/mdp/nist_gear_insertion/franka_osc_cfg.py new file mode 100644 index 000000000..431fc18d5 --- /dev/null +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion/franka_osc_cfg.py @@ -0,0 +1,106 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Franka OSC action and observation configs for NIST gear insertion.""" + +from __future__ import annotations + +from dataclasses import MISSING + +from isaaclab.controllers import OperationalSpaceControllerCfg +from isaaclab.managers import ActionTermCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.utils import configclass + +from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_action import NistGearInsertionOscActionCfg +from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_observations import NistGearInsertionPolicyObservations + + +@configclass +class FrankaNistGearInsertionObservationsCfg: + """Policy observations for a Franka gear-insertion OSC environment. + + The fixed asset name and peg offset are supplied by the environment because + they describe scene geometry. + """ + + @configclass + class PolicyCfg(ObsGroup): + """Gear-insertion policy observation group.""" + + nist_gear_insertion_policy_obs: ObsTerm = MISSING + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = False + + policy: PolicyCfg = MISSING + + def __init__( + self, + fixed_asset_name: str, + peg_offset: tuple[float, float, float], + fingertip_body_name: str = "panda_fingertip_centered", + concatenate_observation_terms: bool = False, + ): + self.policy = self.PolicyCfg() + self.policy.nist_gear_insertion_policy_obs = ObsTerm( + func=NistGearInsertionPolicyObservations, + params={ + "robot_name": "robot", + "board_name": fixed_asset_name, + "peg_offset": list(peg_offset), + "fingertip_body_name": fingertip_body_name, + "force_body_name": "force_sensor", + "pos_noise_level": 0.0, + "rot_noise_level_deg": 0.0, + "force_noise_level": 0.0, + }, + ) + self.policy.concatenate_terms = concatenate_observation_terms + + +@configclass +class FrankaNistGearInsertionOscActionsCfg: + """Action terms for the Franka gear-insertion OSC policy. + + The seven-dimensional arm policy controls the insertion OSC term. The + gripper is not exposed as a policy action; reset events place the held gear + in the hand and maintain the grasp target. + """ + + arm_action: ActionTermCfg = MISSING + gripper_action: ActionTermCfg | None = None + + def __init__( + self, + fixed_asset_name: str, + peg_offset: tuple[float, float, float], + ): + self.arm_action = NistGearInsertionOscActionCfg( + asset_name="robot", + joint_names=["panda_joint[1-7]"], + body_name="panda_fingertip_centered", + controller_cfg=OperationalSpaceControllerCfg( + target_types=["pose_abs"], + impedance_mode="fixed", + inertial_dynamics_decoupling=True, + partial_inertial_dynamics_decoupling=False, + gravity_compensation=False, + motion_stiffness_task=[565.0, 565.0, 565.0, 28.0, 28.0, 28.0], + motion_damping_ratio_task=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + nullspace_control="position", + nullspace_stiffness=10.0, + nullspace_damping_ratio=1.0, + ), + position_scale=1.0, + orientation_scale=1.0, + nullspace_joint_pos_target="default", + fixed_asset_name=fixed_asset_name, + peg_offset=peg_offset, + force_body_name="force_sensor", + ) + self.gripper_action = None diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_action.py similarity index 55% rename from isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py rename to isaaclab_arena_environments/mdp/nist_gear_insertion/osc_action.py index f5203af83..7b09013ac 100644 --- a/isaaclab_arena_environments/mdp/nist_gear_insertion_osc_action.py +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_action.py @@ -3,10 +3,12 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Operational-space action term for NIST gear insertion with a Franka arm. +"""Operational-space action term for Franka gear insertion. This module converts normalized policy commands into end-effector pose targets -for the OSC controller and applies task-specific filtering around peg contact. +for the OSC controller. The policy targets the insertion peg directly, while +this term applies the filtering, command limits, force smoothing, and reset-time +randomization used by the Isaac Lab Forge gear-insertion setup. """ from __future__ import annotations @@ -14,6 +16,7 @@ import math import torch from collections.abc import Sequence +from dataclasses import MISSING from typing import TYPE_CHECKING import isaaclab.utils.math as math_utils @@ -25,24 +28,43 @@ from isaaclab_tasks.direct.factory.factory_utils import wrap_yaw from isaaclab_tasks.direct.forge.forge_utils import get_random_prop_gains +from isaaclab_arena.tasks.nist_gear_insertion.geometry import compute_asset_local_offset_pos + if TYPE_CHECKING: from isaaclab.envs import ManagerBasedEnv +ACTION_DIM = 7 +POS_SLICE = slice(0, 3) +QUAT_SLICE = slice(3, 7) +ROLL_IDX = 3 +YAW_IDX = 5 +SUCCESS_IDX = 6 +ROLL_PITCH_SLICE = slice(ROLL_IDX, YAW_IDX) +ROT_ROLL_IDX = 0 +ROT_PITCH_IDX = 1 +ROT_YAW_IDX = 2 +# The trained policy uses a gripper-down orientation and controls only yaw. +# The interval avoids the wrist-frame discontinuity behind the end effector. +YAW_RANGE_RAD = math.radians(270.0) +YAW_MIN_RAD = -math.pi +YAW_MAX_RAD = math.pi / 2.0 + + def _action_to_target_yaw(action: torch.Tensor) -> torch.Tensor: """Map normalized policy action to the commanded yaw interval.""" - return -math.pi + math.radians(270.0) * (action + 1.0) / 2.0 + return YAW_MIN_RAD + YAW_RANGE_RAD * (action + 1.0) / 2.0 def _target_yaw_to_action(yaw: torch.Tensor) -> torch.Tensor: """Map commanded yaw back to the normalized policy interval.""" - return (yaw + math.pi) / math.radians(270.0) * 2.0 - 1.0 + return (yaw - YAW_MIN_RAD) / YAW_RANGE_RAD * 2.0 - 1.0 def _wrap_yaw_to_action_range(yaw: torch.Tensor) -> torch.Tensor: - """Represent wrapped yaw in the policy command interval.""" - yaw = torch.where(yaw > math.pi / 2, yaw - 2 * math.pi, yaw) - return torch.where(yaw < -math.pi, yaw + 2 * math.pi, yaw) + """Wrap yaw and clamp the excluded wrist sector to the policy interval.""" + yaw = torch.where(yaw > YAW_MAX_RAD, yaw - 2.0 * math.pi, yaw) + return torch.clamp(yaw, YAW_MIN_RAD, YAW_MAX_RAD) def _gripper_down_to_yaw_frame_quat(num_envs: int, device: torch.device) -> torch.Tensor: @@ -62,7 +84,7 @@ def get_nist_gear_insertion_arm_action( try: action_term = env.action_manager.get_term(term_name) except KeyError as exc: - raise KeyError(f"Action term '{term_name}' is required for NIST gear insertion.") from exc + raise KeyError(f"Action term '{term_name}' is required for gear insertion.") from exc if not isinstance(action_term, NistGearInsertionOscAction): raise TypeError( f"Action term '{term_name}' must be {NistGearInsertionOscAction.__name__}; " @@ -72,34 +94,40 @@ def get_nist_gear_insertion_arm_action( class NistGearInsertionOscAction(OperationalSpaceControllerAction): - """Operational-space control action term for NIST gear insertion.""" + """Operational-space action term for the 7-D NIST gear insertion policy. + + The policy layout is ``xyz, roll, pitch, yaw, success``. Only ``xyz`` and + ``yaw`` are sent to the OSC controller; roll and pitch are intentionally + ignored for the gripper-down insertion strategy. The final channel is an + auxiliary success prediction consumed by the OSC reward terms. + """ cfg: NistGearInsertionOscActionCfg def __init__(self, cfg: NistGearInsertionOscActionCfg, env: ManagerBasedEnv): super().__init__(cfg, env) - self._smoothed_actions = torch.zeros(self.num_envs, 7, device=self.device) - self.ema_factor = torch.full((self.num_envs, 1), 0.05, device=self.device) - self._pos_bounds = torch.tensor(cfg.pos_action_bounds, device=self.device) + self._smoothed_actions = torch.zeros(self.num_envs, ACTION_DIM, device=self.device) + self._ema_factor = torch.full((self.num_envs, 1), 0.05, device=self.device) + self._position_action_bounds = torch.tensor(cfg.pos_action_bounds, device=self.device) pos_threshold = torch.tensor(cfg.pos_action_threshold, device=self.device) rot_threshold = torch.tensor(cfg.rot_action_threshold, device=self.device) - self._default_pos_thresh = pos_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() - self._default_rot_thresh = rot_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() - self._pos_thresh = self._default_pos_thresh.clone() - self._rot_thresh = self._default_rot_thresh.clone() + self._default_position_step_limits = pos_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() + self._default_rotation_step_limits = rot_threshold.unsqueeze(0).expand(self.num_envs, -1).clone() + self._position_step_limits = self._default_position_step_limits.clone() + self._rotation_step_limits = self._default_rotation_step_limits.clone() self._peg_offset = torch.tensor(cfg.peg_offset, device=self.device) self._fixed_pos_noise_levels = torch.tensor(cfg.fixed_pos_noise_levels, device=self.device) - self.fixed_pos_noise = torch.zeros(self.num_envs, 3, device=self.device) - self.contact_thresholds = torch.full((self.num_envs,), 7.5, device=self.device) - self.force_smooth = torch.zeros(self.num_envs, 3, device=self.device) - self._prev_smoothed_actions = torch.zeros(self.num_envs, 7, device=self.device) - - self.delta_pos = torch.zeros(self.num_envs, 3, device=self.device) - self.delta_yaw = torch.zeros(self.num_envs, device=self.device) - self.success_pred = torch.full((self.num_envs,), -1.0, device=self.device) + self._fixed_pos_noise = torch.zeros(self.num_envs, 3, device=self.device) + self._contact_thresholds = torch.full((self.num_envs,), 7.5, device=self.device) + self._smoothed_force = torch.zeros(self.num_envs, 3, device=self.device) + self._prev_smoothed_actions = torch.zeros(self.num_envs, ACTION_DIM, device=self.device) + + self._delta_pos = torch.zeros(self.num_envs, 3, device=self.device) + self._delta_yaw = torch.zeros(self.num_envs, device=self.device) + self._success_prediction = torch.full((self.num_envs,), -1.0, device=self.device) self._pos_dead_zone = torch.tensor(cfg.pos_dead_zone, device=self.device).unsqueeze(0) self._rot_dead_zone = cfg.rot_dead_zone self._force_body_idx: int | None = None @@ -107,12 +135,8 @@ def __init__(self, cfg: NistGearInsertionOscActionCfg, env: ManagerBasedEnv): def _get_peg_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: """Return the peg target position in each environment frame.""" - origins = self._env.scene.env_origins board = self._env.scene[self.cfg.fixed_asset_name] - pos = wp.to_torch(board.data.root_pos_w) - origins - quat = wp.to_torch(board.data.root_quat_w) - offset = self._peg_offset.unsqueeze(0).expand(pos.shape[0], 3) - peg_pos = pos + math_utils.quat_apply(quat, offset) + peg_pos = compute_asset_local_offset_pos(self._env, board, self._peg_offset) if env_ids is not None: return peg_pos[env_ids] return peg_pos @@ -120,7 +144,7 @@ def _get_peg_pos(self, env_ids: torch.Tensor | None = None) -> torch.Tensor: @property def action_dim(self) -> int: """Number of policy actions consumed by the OSC term.""" - return 7 + return ACTION_DIM @property def smoothed_actions(self) -> torch.Tensor: @@ -135,23 +159,54 @@ def previous_smoothed_actions(self) -> torch.Tensor: @property def position_thresholds(self) -> torch.Tensor: """Per-environment position command limits after reset randomization.""" - return self._pos_thresh + return self._position_step_limits @property def rotation_thresholds(self) -> torch.Tensor: """Per-environment orientation command limits after reset randomization.""" - return self._rot_thresh + return self._rotation_step_limits + + @property + def fixed_pos_noise(self) -> torch.Tensor: + """Per-environment fixed-asset position noise sampled at reset.""" + return self._fixed_pos_noise + + @property + def contact_thresholds(self) -> torch.Tensor: + """Per-environment wrist-force thresholds sampled at reset.""" + return self._contact_thresholds + + @property + def smoothed_force(self) -> torch.Tensor: + """EMA-filtered wrist force owned by the action term.""" + return self._smoothed_force + + @property + def delta_pos(self) -> torch.Tensor: + """Last unclipped peg-relative position delta requested by the policy.""" + return self._delta_pos + + @property + def delta_yaw(self) -> torch.Tensor: + """Last unclipped yaw delta requested by the policy.""" + return self._delta_yaw + + @property + def success_prediction(self) -> torch.Tensor: + """Auxiliary success prediction from the final OSC action channel.""" + return self._success_prediction def process_actions(self, actions: torch.Tensor): + """Filter policy actions and write the corresponding OSC command.""" self._update_smoothed_force() self._update_smoothed_actions(actions) self._compute_ee_pose() # Convert normalized policy output into a bounded OSC pose command. - ee_pos_b = self._ee_pose_b[:, :3] - ee_quat_b = self._ee_pose_b[:, 3:7] - self._processed_actions[:, :3] = self._compute_target_position(ee_pos_b) - self._processed_actions[:, 3:7] = self._compute_target_orientation(ee_quat_b) + ee_pos_b = self._ee_pose_b[:, POS_SLICE] + ee_quat_b = self._ee_pose_b[:, QUAT_SLICE] + self._processed_actions[:, POS_SLICE] = self._compute_bounded_target_position_from_policy_action(ee_pos_b) + self._processed_actions[:, QUAT_SLICE] = self._compute_bounded_target_orientation_from_policy_yaw(ee_quat_b) self._compute_task_frame_pose() self._osc.set_command( @@ -164,8 +219,8 @@ def _update_smoothed_actions(self, actions: torch.Tensor) -> None: actions = torch.nan_to_num(actions, nan=0.0, posinf=1.0, neginf=-1.0).clamp(-1.0, 1.0) self._raw_actions[:] = actions self._prev_smoothed_actions[:] = self._smoothed_actions - self._smoothed_actions[:] = self.ema_factor * actions + (1.0 - self.ema_factor) * self._smoothed_actions - self.success_pred[:] = self._smoothed_actions[:, 6] + self._smoothed_actions[:] = self._ema_factor * actions + (1.0 - self._ema_factor) * self._smoothed_actions + self._success_prediction[:] = self._smoothed_actions[:, SUCCESS_IDX] def _ensure_force_body_idx(self) -> None: """Resolve the wrist force-sensor body index.""" @@ -181,28 +236,37 @@ def _ensure_force_body_idx(self) -> None: self._force_body_idx = body_ids[0] def _update_smoothed_force(self) -> None: - """Update the wrist-force EMA used by observations and rewards.""" + """Update the wrist-force EMA owned by the action term.""" self._ensure_force_body_idx() raw_force = wp.to_torch(self._asset.root_view.get_link_incoming_joint_force())[:, self._force_body_idx, :3] raw_force = torch.nan_to_num(raw_force, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) - self.force_smooth[:] = ( - self._force_smoothing_factor * raw_force + (1.0 - self._force_smoothing_factor) * self.force_smooth + self._smoothed_force[:] = ( + self._force_smoothing_factor * raw_force + (1.0 - self._force_smoothing_factor) * self._smoothed_force ) - def _compute_target_position(self, ee_pos_b: torch.Tensor) -> torch.Tensor: - """Return the next end-effector position command in the robot base frame.""" - peg_pos_b = self._get_peg_pos() + self.fixed_pos_noise + def _compute_bounded_target_position_from_policy_action(self, ee_pos_b: torch.Tensor) -> torch.Tensor: + """Return a bounded peg-relative position target for stable contact search. - pos_actions = self._smoothed_actions[:, :3] - target_pos = peg_pos_b + pos_actions * self._pos_bounds + The policy selects a target around the noisy peg estimate, but the OSC + command is clipped to a small per-step delta so contact corrections do + not jump through the gear or peg geometry. + """ + peg_pos_b = self._get_peg_pos() + self._fixed_pos_noise - self.delta_pos[:] = target_pos - ee_pos_b - clipped_delta = self._clip_delta_with_dead_zone(self.delta_pos, self._pos_thresh, self._pos_dead_zone) + pos_actions = self._smoothed_actions[:, POS_SLICE] + target_pos = peg_pos_b + pos_actions * self._position_action_bounds + + self._delta_pos[:] = target_pos - ee_pos_b + clipped_delta = self._clip_delta_with_dead_zone( + self._delta_pos, + self._position_step_limits, + self._pos_dead_zone, + ) return ee_pos_b + clipped_delta - def _compute_target_orientation(self, ee_quat_b: torch.Tensor) -> torch.Tensor: - """Return the next end-effector orientation command in the robot base frame.""" - target_yaw = _action_to_target_yaw(self._smoothed_actions[:, 5]) + def _compute_bounded_target_orientation_from_policy_yaw(self, ee_quat_b: torch.Tensor) -> torch.Tensor: + """Return a bounded gripper-down yaw target around the gear symmetry axis.""" + target_yaw = _action_to_target_yaw(self._smoothed_actions[:, YAW_IDX]) target_quat = self._target_quat_from_yaw(target_yaw) desired_xyz = self._clip_orientation_delta(ee_quat_b, target_quat) return math_utils.quat_from_euler_xyz( @@ -224,16 +288,20 @@ def _clip_orientation_delta(self, ee_quat_b: torch.Tensor, target_quat: torch.Te desired_roll, desired_pitch, desired_yaw = math_utils.euler_xyz_from_quat(target_quat, wrap_to_2pi=True) desired_xyz = torch.stack([desired_roll, desired_pitch, desired_yaw], dim=1) - desired_xyz[:, 0] = self._clip_roll_delta(curr_roll, desired_roll) - desired_xyz[:, 1] = self._clip_pitch_delta(curr_pitch, desired_pitch) - desired_xyz[:, 2] = self._clip_yaw_delta(curr_yaw, desired_yaw) + desired_xyz[:, ROT_ROLL_IDX] = self._clip_roll_delta(curr_roll, desired_roll) + desired_xyz[:, ROT_PITCH_IDX] = self._clip_pitch_delta(curr_pitch, desired_pitch) + desired_xyz[:, ROT_YAW_IDX] = self._clip_yaw_delta(curr_yaw, desired_yaw) return desired_xyz def _clip_roll_delta(self, curr_roll: torch.Tensor, desired_roll: torch.Tensor) -> torch.Tensor: """Return roll target after applying the per-step rotation limit.""" desired_roll = torch.where(desired_roll < 0.0, desired_roll + 2 * math.pi, desired_roll) delta_roll = desired_roll - curr_roll - clipped_roll = torch.clamp(delta_roll, -self._rot_thresh[:, 0], self._rot_thresh[:, 0]) + clipped_roll = torch.clamp( + delta_roll, + -self._rotation_step_limits[:, ROT_ROLL_IDX], + self._rotation_step_limits[:, ROT_ROLL_IDX], + ) return curr_roll + clipped_roll def _clip_pitch_delta(self, curr_pitch: torch.Tensor, desired_pitch: torch.Tensor) -> torch.Tensor: @@ -242,7 +310,11 @@ def _clip_pitch_delta(self, curr_pitch: torch.Tensor, desired_pitch: torch.Tenso desired_pitch = torch.where(desired_pitch < 0.0, desired_pitch + 2 * math.pi, desired_pitch) desired_pitch_w = torch.where(desired_pitch > math.pi, desired_pitch - 2 * math.pi, desired_pitch) delta_pitch = desired_pitch_w - curr_pitch_w - clipped_pitch = torch.clamp(delta_pitch, -self._rot_thresh[:, 1], self._rot_thresh[:, 1]) + clipped_pitch = torch.clamp( + delta_pitch, + -self._rotation_step_limits[:, ROT_PITCH_IDX], + self._rotation_step_limits[:, ROT_PITCH_IDX], + ) return curr_pitch_w + clipped_pitch def _clip_yaw_delta(self, curr_yaw: torch.Tensor, desired_yaw: torch.Tensor) -> torch.Tensor: @@ -250,10 +322,10 @@ def _clip_yaw_delta(self, curr_yaw: torch.Tensor, desired_yaw: torch.Tensor) -> curr_yaw = wrap_yaw(curr_yaw) desired_yaw = wrap_yaw(desired_yaw) - self.delta_yaw[:] = desired_yaw - curr_yaw + self._delta_yaw[:] = desired_yaw - curr_yaw clipped_yaw = self._clip_delta_with_dead_zone( - self.delta_yaw, - self._rot_thresh[:, 2], + self._delta_yaw, + self._rotation_step_limits[:, ROT_YAW_IDX], self._rot_dead_zone, ) return curr_yaw + clipped_yaw @@ -278,71 +350,90 @@ def _ee_quat_to_yaw_action(self, ee_quat: torch.Tensor) -> torch.Tensor: return _target_yaw_to_action(target_yaw) def reset(self, env_ids: Sequence[int] | None = None) -> None: + env_ids_index, num_envs = self._normalize_env_ids(env_ids) + if num_envs == 0: + return + super().reset(env_ids) + self._reset_action_filter(env_ids_index, num_envs) + self._reset_command_thresholds(env_ids_index, num_envs) + self._reset_contact_state(env_ids_index, num_envs) + self._reset_initial_smoothed_actions(env_ids_index) + self._reset_debug_state(env_ids_index) + + def _normalize_env_ids(self, env_ids: Sequence[int] | None) -> tuple[Sequence[int] | slice, int]: + """Return an index object and count for tensor resets.""" if env_ids is None: - env_ids = slice(None) - n = self.num_envs - elif isinstance(env_ids, slice): - n = len(range(*env_ids.indices(self.num_envs))) - elif len(env_ids) == 0: - return - else: - n = len(env_ids) + return slice(None), self.num_envs + return env_ids, len(env_ids) + def _reset_action_filter(self, env_ids: Sequence[int] | slice, num_envs: int) -> None: + """Randomize the EMA factor used to smooth policy actions this episode.""" lo, hi = self.cfg.ema_factor_range - self.ema_factor[env_ids] = lo + torch.rand(n, 1, device=self.device) * (hi - lo) + self._ema_factor[env_ids] = lo + torch.rand(num_envs, 1, device=self.device) * (hi - lo) - self._pos_thresh[env_ids] = get_random_prop_gains( - self._default_pos_thresh[env_ids], + def _reset_command_thresholds(self, env_ids: Sequence[int] | slice, num_envs: int) -> None: + """Randomize per-step OSC command limits and peg-position noise.""" + self._position_step_limits[env_ids] = get_random_prop_gains( + self._default_position_step_limits[env_ids], self.cfg.pos_threshold_noise_level, - n, + num_envs, self.device, ) - self._rot_thresh[env_ids] = get_random_prop_gains( - self._default_rot_thresh[env_ids], + self._rotation_step_limits[env_ids] = get_random_prop_gains( + self._default_rotation_step_limits[env_ids], self.cfg.rot_threshold_noise_level, - n, + num_envs, self.device, ) + self._fixed_pos_noise[env_ids] = torch.randn(num_envs, 3, device=self.device) * self._fixed_pos_noise_levels - self.fixed_pos_noise[env_ids] = torch.randn(n, 3, device=self.device) * self._fixed_pos_noise_levels - + def _reset_contact_state(self, env_ids: Sequence[int] | slice, num_envs: int) -> None: + """Randomize contact threshold and clear the smoothed force state.""" ct_lo, ct_hi = self.cfg.contact_threshold_range - self.contact_thresholds[env_ids] = ct_lo + torch.rand(n, device=self.device) * (ct_hi - ct_lo) - - self.force_smooth[env_ids] = 0.0 + self._contact_thresholds[env_ids] = ct_lo + torch.rand(num_envs, device=self.device) * (ct_hi - ct_lo) + self._smoothed_force[env_ids] = 0.0 + def _reset_initial_smoothed_actions(self, env_ids: Sequence[int] | slice) -> None: + """Seed smoothed actions from the current EE pose to avoid a reset-time command jump.""" self._compute_ee_pose() - ee_pos = self._ee_pose_b[env_ids, :3] - ee_quat = self._ee_pose_b[env_ids, 3:7] + ee_pos = self._ee_pose_b[env_ids, POS_SLICE] + ee_quat = self._ee_pose_b[env_ids, QUAT_SLICE] - peg_pos = self._get_peg_pos(env_ids) + self.fixed_pos_noise[env_ids] + peg_pos = self._get_peg_pos(env_ids) + self._fixed_pos_noise[env_ids] - pos_actions = (ee_pos - peg_pos) / self._pos_bounds - self._smoothed_actions[env_ids, 0:3] = pos_actions + pos_actions = (ee_pos - peg_pos) / self._position_action_bounds + self._smoothed_actions[env_ids, POS_SLICE] = pos_actions yaw_action = self._ee_quat_to_yaw_action(ee_quat) - self._smoothed_actions[env_ids, 3:5] = 0.0 - self._smoothed_actions[env_ids, 5] = yaw_action - self._smoothed_actions[env_ids, 6] = -1.0 + self._smoothed_actions[env_ids, ROLL_PITCH_SLICE] = 0.0 + self._smoothed_actions[env_ids, YAW_IDX] = yaw_action + self._smoothed_actions[env_ids, SUCCESS_IDX] = -1.0 self._prev_smoothed_actions[env_ids] = self._smoothed_actions[env_ids] - self.delta_pos[env_ids] = 0.0 - self.delta_yaw[env_ids] = 0.0 - self.success_pred[env_ids] = -1.0 + + def _reset_debug_state(self, env_ids: Sequence[int] | slice) -> None: + """Clear diagnostic buffers used by observations and rewards.""" + self._delta_pos[env_ids] = 0.0 + self._delta_yaw[env_ids] = 0.0 + self._success_prediction[env_ids] = -1.0 @configclass class NistGearInsertionOscActionCfg(OperationalSpaceControllerActionCfg): - """Config for :class:`NistGearInsertionOscAction`.""" + """Config for :class:`NistGearInsertionOscAction`. + + Environments provide the fixed asset name and peg offset because those + values are scene geometry, not robot configuration. + """ class_type: type[ActionTerm] = NistGearInsertionOscAction - fixed_asset_name: str = "gears_and_base" + fixed_asset_name: str = MISSING """Name of the fixed asset that contains the insertion peg.""" - peg_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) + peg_offset: tuple[float, float, float] = MISSING """Local-frame peg offset on :attr:`fixed_asset_name`.""" fixed_pos_noise_levels: tuple[float, float, float] = (0.001, 0.001, 0.001) diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_observations.py b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_observations.py new file mode 100644 index 000000000..9739c49f5 --- /dev/null +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_observations.py @@ -0,0 +1,436 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""OSC policy observations for Franka NIST gear insertion. + +The observation term is paired with :class:`NistGearInsertionOscAction`: it exposes +the peg-relative fingertip state, smoothed wrist force, force threshold, and +filtered previous action values expected by the trained 24-D policy observation. +""" + +from __future__ import annotations + +import math +import torch +import torch.nn.functional as F +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +import warp as wp +from isaaclab.assets import Articulation +from isaaclab.managers import ManagerTermBase +from isaaclab.utils.math import axis_angle_from_quat, quat_unique + +from isaaclab_arena.tasks.nist_gear_insertion.geometry import compute_asset_local_offset_pos +from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_action import get_nist_gear_insertion_arm_action + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + from isaaclab.managers import ObservationTermCfg + + +FINGERTIP_POS_REL_DIM = 3 +FINGERTIP_QUAT_DIM = 4 +EE_LINVEL_DIM = 3 +EE_ANGVEL_DIM = 3 +FT_FORCE_DIM = 3 +FORCE_THRESHOLD_DIM = 1 +PREV_ACTION_DIM = 7 +POLICY_OBS_LAYOUT = ( + ("fingertip_pos_rel_fixed", FINGERTIP_POS_REL_DIM), + ("fingertip_quat", FINGERTIP_QUAT_DIM), + ("ee_linvel", EE_LINVEL_DIM), + ("ee_angvel", EE_ANGVEL_DIM), + ("ft_force", FT_FORCE_DIM), + ("force_threshold", FORCE_THRESHOLD_DIM), + ("prev_actions", PREV_ACTION_DIM), +) +POLICY_OBS_DIM = sum(dim for _, dim in POLICY_OBS_LAYOUT) +PREV_ACTION_ROLL_PITCH_SLICE = slice(3, 5) + + +@dataclass(frozen=True) +class NistGearInsertionPolicyObsParams: + """Resolved config values for :class:`NistGearInsertionPolicyObservations`.""" + + robot_name: str = "robot" + board_name: str = "fixed_asset" + peg_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) + fingertip_body_name: str = "panda_fingertip_centered" + force_body_name: str = "force_sensor" + """Body whose presence is validated because the paired action term reads wrist force.""" + pos_noise_level: float = 0.00025 + rot_noise_level_deg: float = 0.1 + force_noise_level: float = 1.0 + + @classmethod + def from_dict(cls, params: dict) -> NistGearInsertionPolicyObsParams: + return cls( + robot_name=params.get("robot_name", cls.robot_name), + board_name=params.get("board_name", cls.board_name), + peg_offset=tuple(params.get("peg_offset", cls.peg_offset)), + fingertip_body_name=params.get("fingertip_body_name", cls.fingertip_body_name), + force_body_name=params.get("force_body_name", cls.force_body_name), + pos_noise_level=params.get("pos_noise_level", cls.pos_noise_level), + rot_noise_level_deg=params.get("rot_noise_level_deg", cls.rot_noise_level_deg), + force_noise_level=params.get("force_noise_level", cls.force_noise_level), + ) + + +def _make_reported_quat(quat: torch.Tensor, flip: torch.Tensor) -> torch.Tensor: + """Return the symmetry-reduced quaternion channels exposed to the policy. + + Gear rotation about the peg axis is handled through the yaw action. For + checkpoint parity, the packed observation keeps the original x/y channels + and zeros z/w instead of renormalizing the quaternion. + """ + reported_quat = quat.clone() + reported_quat[:, [2, 3]] = 0.0 + return reported_quat * flip.unsqueeze(-1) + + +def _compute_pose_velocities( + prev_pos: torch.Tensor, + prev_quat: torch.Tensor, + pos: torch.Tensor, + quat: torch.Tensor, + dt: float, +) -> tuple[torch.Tensor, torch.Tensor]: + """Estimate finite-difference linear velocity and yaw angular velocity.""" + safe_dt = max(dt, 1e-6) + linvel = (pos - prev_pos) / safe_dt + + rot_diff = math_utils.quat_mul(quat, math_utils.quat_conjugate(prev_quat)) + rot_diff = quat_unique(rot_diff) + angvel = axis_angle_from_quat(rot_diff) / safe_dt + angvel[:, 0:2] = 0.0 + return linvel, angvel + + +class NistGearInsertionPolicyObservations(ManagerTermBase): + """Policy observation term for OSC-based gear insertion. + + The term reads state owned by :class:`NistGearInsertionOscAction`, including + reset-time peg noise, smoothed force, per-episode force threshold, and + previous filtered actions. The action term owns the wrist-force EMA and + updates it at most once per simulation step. + + Output layout (per env):: + + fingertip_pos_rel_fixed (3) + fingertip_quat (4) + ee_linvel (3) + ee_angvel (3) + ft_force (3) + force_threshold (1) + prev_actions (7) + """ + + def __init__(self, cfg: ObservationTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + + self._params = NistGearInsertionPolicyObsParams.from_dict(cfg.params) + self._robot_name = self._params.robot_name + self._board_name = self._params.board_name + self._peg_offset_values = self._params.peg_offset + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device) + self._fingertip_body = self._params.fingertip_body_name + self._force_body = self._params.force_body_name + + self._pos_noise = self._params.pos_noise_level + self._rot_noise_deg = self._params.rot_noise_level_deg + self._force_noise = self._params.force_noise_level + + n = env.num_envs + dev = env.device + + self._fingertip_idx: int | None = None + self._body_key = (self._robot_name, self._fingertip_body, self._force_body) + + self._flip_quats = torch.ones(n, device=dev) + self._prev_noisy_pos = torch.zeros(n, 3, device=dev) + self._prev_noisy_quat = torch.zeros(n, 4, device=dev) + self._prev_noisy_quat[:, 3] = 1.0 + + def _resolve_fingertip_idx( + self, + robot_name: str, + fingertip_body_name: str, + force_body_name: str, + ) -> int: + """Resolve the fingertip body index used by the observation term.""" + body_key = (robot_name, fingertip_body_name, force_body_name) + if self._fingertip_idx is not None and body_key == self._body_key: + return self._fingertip_idx + + robot: Articulation = self._env.scene[robot_name] + for body_name in (fingertip_body_name, force_body_name): + body_ids, body_names = robot.find_bodies([body_name]) + if not body_ids: + raise ValueError( + f"Body '{body_name}' is missing from robot '{robot_name}'. Use a USD that defines this " + "body or override the corresponding observation parameter. " + f"Matches: {body_names}." + ) + + fingertip_ids, _ = robot.find_bodies([fingertip_body_name]) + fingertip_idx = fingertip_ids[0] + if body_key == self._body_key: + self._fingertip_idx = fingertip_idx + return fingertip_idx + + def _resolve_peg_offset(self, peg_offset: list[float] | None, device: torch.device) -> torch.Tensor: + """Return the cached peg offset unless the manager supplies different values.""" + if peg_offset is None or tuple(peg_offset) == self._peg_offset_values: + return self._peg_offset + return torch.tensor(peg_offset, device=device, dtype=torch.float32) + + def _sample_noisy_pose( + self, + env: ManagerBasedRLEnv, + ft_pos: torch.Tensor, + ft_quat: torch.Tensor, + pos_noise_level: float, + rot_noise_level_deg: float, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return fingertip pose with configured sensor noise.""" + pos_noise = torch.randn(env.num_envs, 3, device=env.device) * pos_noise_level + noisy_pos = ft_pos + pos_noise + + rot_noise_axis = torch.randn(env.num_envs, 3, device=env.device) + rot_noise_axis = F.normalize(rot_noise_axis, dim=1, eps=1e-8) + rot_noise_angle = torch.randn(env.num_envs, device=env.device) * math.radians(rot_noise_level_deg) + noisy_quat = math_utils.quat_mul( + ft_quat, + math_utils.quat_from_angle_axis(rot_noise_angle, rot_noise_axis), + ) + return noisy_pos, quat_unique(noisy_quat) + + def _compute_fingertip_target_delta( + self, + env: ManagerBasedRLEnv, + noisy_pos: torch.Tensor, + board_name: str, + peg_offset: torch.Tensor, + arm_osc_action, + ) -> torch.Tensor: + """Return fingertip position relative to the noisy peg target.""" + board = env.scene[board_name] + peg_pos = compute_asset_local_offset_pos(env, board, peg_offset) + noisy_fixed_pos = peg_pos + arm_osc_action.fixed_pos_noise + return noisy_pos - noisy_fixed_pos + + def _compute_velocities( + self, + noisy_pos: torch.Tensor, + obs_quat: torch.Tensor, + dt: float, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Estimate fingertip linear and angular velocity from noisy poses.""" + ee_linvel, ee_angvel = _compute_pose_velocities( + self._prev_noisy_pos, + self._prev_noisy_quat, + noisy_pos, + obs_quat, + dt, + ) + + self._prev_noisy_pos[:] = noisy_pos + self._prev_noisy_quat[:] = obs_quat + return ee_linvel, ee_angvel + + def _read_smoothed_force( + self, + env: ManagerBasedRLEnv, + arm_osc_action, + force_noise_level: float, + ) -> torch.Tensor: + """Return the smoothed wrist force with configured observation noise.""" + force = arm_osc_action.smoothed_force + return force + torch.randn(env.num_envs, 3, device=env.device) * force_noise_level + + def _read_fingertip_pose( + self, + env: ManagerBasedRLEnv, + robot_name: str, + fingertip_idx: int, + ) -> tuple[torch.Tensor, torch.Tensor]: + """Return fingertip pose in the environment frame.""" + robot: Articulation = env.scene[robot_name] + origins = env.scene.env_origins + fingertip_pos = wp.to_torch(robot.data.body_pos_w)[:, fingertip_idx] - origins + fingertip_quat = wp.to_torch(robot.data.body_quat_w)[:, fingertip_idx] + return fingertip_pos, fingertip_quat + + def _compute_fingertip_observations( + self, + env: ManagerBasedRLEnv, + robot_name: str, + board_name: str, + peg_offset: torch.Tensor, + fingertip_idx: int, + pos_noise_level: float, + rot_noise_level_deg: float, + arm_osc_action, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Return target-relative pose and velocity observations for the fingertip.""" + ft_pos, ft_quat = self._read_fingertip_pose(env, robot_name, fingertip_idx) + noisy_pos, noisy_quat_full = self._sample_noisy_pose(env, ft_pos, ft_quat, pos_noise_level, rot_noise_level_deg) + fingertip_pos_rel = self._compute_fingertip_target_delta(env, noisy_pos, board_name, peg_offset, arm_osc_action) + obs_quat = _make_reported_quat(noisy_quat_full, self._flip_quats) + ee_linvel, ee_angvel = self._compute_velocities(noisy_pos, noisy_quat_full, env.step_dt) + return fingertip_pos_rel, obs_quat, ee_linvel, ee_angvel + + def _compute_force_action_observations( + self, + env: ManagerBasedRLEnv, + arm_osc_action, + force_noise_level: float, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Return wrist-force, force-threshold, and previous-action observations.""" + noisy_force = self._read_smoothed_force(env, arm_osc_action, force_noise_level) + force_threshold = arm_osc_action.contact_thresholds.unsqueeze(-1) + + prev_actions = arm_osc_action.smoothed_actions.clone() + prev_actions[:, PREV_ACTION_ROLL_PITCH_SLICE] = 0.0 + return noisy_force, force_threshold, prev_actions + + def _pack_observation( + self, + fingertip_pos_rel: torch.Tensor, + obs_quat: torch.Tensor, + ee_linvel: torch.Tensor, + ee_angvel: torch.Tensor, + noisy_force: torch.Tensor, + force_threshold: torch.Tensor, + prev_actions: torch.Tensor, + ) -> torch.Tensor: + """Concatenate the policy observation and enforce the documented layout.""" + obs = torch.cat( + [ + fingertip_pos_rel, + obs_quat, + ee_linvel, + ee_angvel, + noisy_force, + force_threshold, + prev_actions, + ], + dim=-1, + ) + assert obs.shape[-1] == POLICY_OBS_DIM, f"Expected {POLICY_OBS_DIM}D gear insertion policy obs." + return torch.nan_to_num(obs, nan=0.0, posinf=100.0, neginf=-100.0).clamp(-100.0, 100.0) + + def _resolve_call_params( + self, + robot_name: str | None, + board_name: str | None, + fingertip_body_name: str | None, + force_body_name: str | None, + pos_noise_level: float | None, + rot_noise_level_deg: float | None, + force_noise_level: float | None, + ) -> tuple[str, str, str, str, float, float, float]: + """Resolve optional call-time overrides against the configured defaults.""" + return ( + self._robot_name if robot_name is None else robot_name, + self._board_name if board_name is None else board_name, + self._fingertip_body if fingertip_body_name is None else fingertip_body_name, + self._force_body if force_body_name is None else force_body_name, + self._pos_noise if pos_noise_level is None else pos_noise_level, + self._rot_noise_deg if rot_noise_level_deg is None else rot_noise_level_deg, + self._force_noise if force_noise_level is None else force_noise_level, + ) + + def reset(self, env_ids: list[int] | None = None): + """Reset noisy pose history and quaternion sign for selected environments.""" + env_ids, num_envs = self._normalize_env_ids(env_ids) + if num_envs == 0: + return + + dev = self._env.device + + flip = torch.ones(num_envs, device=dev) + flip[torch.rand(num_envs, device=dev) > 0.5] = -1.0 + self._flip_quats[env_ids] = flip + + fingertip_idx = self._resolve_fingertip_idx(self._robot_name, self._fingertip_body, self._force_body) + robot: Articulation = self._env.scene[self._robot_name] + origins = self._env.scene.env_origins + self._prev_noisy_pos[env_ids] = wp.to_torch(robot.data.body_pos_w)[env_ids, fingertip_idx] - origins[env_ids] + reset_quat = wp.to_torch(robot.data.body_quat_w)[env_ids, fingertip_idx] + self._prev_noisy_quat[env_ids] = quat_unique(reset_quat) + + def _normalize_env_ids( + self, + env_ids: list[int] | torch.Tensor | None, + ) -> tuple[torch.Tensor | slice, int]: + """Return an index object and count for observation reset buffers.""" + if env_ids is None: + return slice(None), self._env.num_envs + env_ids = torch.as_tensor(env_ids, device=self._env.device, dtype=torch.long) + return env_ids, env_ids.numel() + + def __call__( + self, + env: ManagerBasedRLEnv, + robot_name: str | None = None, + board_name: str | None = None, + peg_offset: list[float] | None = None, + fingertip_body_name: str | None = None, + force_body_name: str | None = None, + pos_noise_level: float | None = None, + rot_noise_level_deg: float | None = None, + force_noise_level: float | None = None, + ) -> torch.Tensor: + """Return the 24-D policy observation tensor.""" + ( + robot_name, + board_name, + fingertip_body_name, + force_body_name, + pos_noise_level, + rot_noise_level_deg, + force_noise_level, + ) = self._resolve_call_params( + robot_name, + board_name, + fingertip_body_name, + force_body_name, + pos_noise_level, + rot_noise_level_deg, + force_noise_level, + ) + + peg_offset_tensor = self._resolve_peg_offset(peg_offset, env.device) + fingertip_idx = self._resolve_fingertip_idx(robot_name, fingertip_body_name, force_body_name) + arm_osc_action = get_nist_gear_insertion_arm_action(env) + + fingertip_pos_rel, obs_quat, ee_linvel, ee_angvel = self._compute_fingertip_observations( + env, + robot_name, + board_name, + peg_offset_tensor, + fingertip_idx, + pos_noise_level, + rot_noise_level_deg, + arm_osc_action, + ) + noisy_force, force_threshold, prev_actions = self._compute_force_action_observations( + env, + arm_osc_action, + force_noise_level, + ) + + return self._pack_observation( + fingertip_pos_rel, + obs_quat, + ee_linvel, + ee_angvel, + noisy_force, + force_threshold, + prev_actions, + ) diff --git a/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_rewards.py b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_rewards.py new file mode 100644 index 000000000..e1bc01d7b --- /dev/null +++ b/isaaclab_arena_environments/mdp/nist_gear_insertion/osc_rewards.py @@ -0,0 +1,197 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""OSC-specific reward terms for NIST gear insertion. + +These terms consume public state from :class:`NistGearInsertionOscAction`, such as +filtered actions, command deltas, smoothed force, and the auxiliary success +prediction channel. +""" + +from __future__ import annotations + +import torch +from dataclasses import MISSING + +from isaaclab.envs import ManagerBasedRLEnv +from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg +from isaaclab.utils import configclass + +from isaaclab_arena.tasks.nist_gear_insertion import geometry as gear_geometry +from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_action import get_nist_gear_insertion_arm_action + + +@configclass +class NistGearInsertionOscRewardsCfg: + """Config group for OSC-specific gear insertion rewards. + + Environments pass scene-specific asset names and insertion geometry so this + OSC reward group remains reusable across gear-insertion scenes. + """ + + action_magnitude_penalty: RewardTermCfg = MISSING + action_delta_penalty: RewardTermCfg = MISSING + contact_penalty: RewardTermCfg = MISSING + success_prediction_error: RewardTermCfg = MISSING + + def __init__( + self, + gear_name: str, + board_name: str, + peg_offset: list[float] | tuple[float, ...], + held_gear_base_offset: list[float] | tuple[float, ...], + gear_peg_height: float, + success_z_fraction: float, + xy_threshold: float, + ): + """Initialize OSC reward terms with shared insertion geometry.""" + geometry_params = { + "gear_cfg": SceneEntityCfg(gear_name), + "board_cfg": SceneEntityCfg(board_name), + "peg_offset": list(peg_offset), + "held_gear_base_offset": list(held_gear_base_offset), + "gear_peg_height": gear_peg_height, + "xy_threshold": xy_threshold, + } + + self.action_magnitude_penalty = RewardTermCfg( + func=osc_action_magnitude_penalty, + weight=-0.0005, + params={}, + ) + self.action_delta_penalty = RewardTermCfg( + func=osc_action_delta_penalty, + weight=-0.01, + params={}, + ) + self.contact_penalty = RewardTermCfg( + func=wrist_contact_force_penalty, + weight=-0.001, + params={}, + ) + self.success_prediction_error = RewardTermCfg( + func=success_prediction_error, + weight=-1.0, + params={ + **geometry_params, + "success_z_fraction": success_z_fraction, + "delay_until_ratio": 0.25, + }, + ) + + +class osc_action_magnitude_penalty(ManagerTermBase): + """Penalize large asset-relative position and yaw commands.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = get_nist_gear_insertion_arm_action(env) + position_thresholds = action_term.position_thresholds.clamp_min(1.0e-6) + yaw_thresholds = action_term.rotation_thresholds[:, 2].clamp_min(1.0e-6) + pos_error = torch.norm(action_term.delta_pos / position_thresholds, p=2, dim=-1) + rot_error = torch.abs(action_term.delta_yaw) / yaw_thresholds + return pos_error + rot_error + + +class osc_action_delta_penalty(ManagerTermBase): + """Penalize jerky actions using smoothed action deltas.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = get_nist_gear_insertion_arm_action(env) + return torch.norm( + action_term.smoothed_actions - action_term.previous_smoothed_actions, + p=2, + dim=-1, + ) + + +class wrist_contact_force_penalty(ManagerTermBase): + """Penalize wrist/contact force magnitude above per-episode threshold.""" + + def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor: + action_term = get_nist_gear_insertion_arm_action(env) + force_mag = torch.norm(action_term.smoothed_force, p=2, dim=-1) + return torch.nn.functional.relu(force_mag - action_term.contact_thresholds) + + +class success_prediction_error(ManagerTermBase): + """Penalize incorrect success predictions from the seventh action channel.""" + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self._prediction_loss_weight = 0.0 + self._held_gear_base_offset_values = tuple(cfg.params["held_gear_base_offset"]) + self._held_gear_base_offset = torch.tensor( + self._held_gear_base_offset_values, device=env.device, dtype=torch.float32 + ) + self._peg_offset_values = tuple(cfg.params["peg_offset"]) + self._peg_offset = torch.tensor(self._peg_offset_values, device=env.device, dtype=torch.float32) + + def __call__( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + gear_peg_height: float, + success_z_fraction: float, + xy_threshold: float, + peg_offset: list[float] | None = None, + held_gear_base_offset: list[float] | None = None, + delay_until_ratio: float = 0.25, + ) -> torch.Tensor: + true_success = self._compute_true_success( + env, + gear_cfg, + board_cfg, + peg_offset, + held_gear_base_offset, + gear_peg_height, + success_z_fraction, + xy_threshold, + ) + self._update_prediction_loss_weight(true_success, delay_until_ratio) + pred = self._read_success_prediction(env) + return self._compute_prediction_error(true_success, pred) + + def _compute_true_success( + self, + env: ManagerBasedRLEnv, + gear_cfg: SceneEntityCfg, + board_cfg: SceneEntityCfg, + peg_offset: list[float] | None, + held_gear_base_offset: list[float] | None, + gear_peg_height: float, + success_z_fraction: float, + xy_threshold: float, + ) -> torch.Tensor: + """Return the geometric success label used by the auxiliary prediction loss.""" + return gear_geometry.compute_gear_insertion_success( + env, + gear_cfg, + board_cfg, + gear_geometry.resolve_offset_tensor(peg_offset, self._peg_offset_values, self._peg_offset, env.device), + gear_geometry.resolve_offset_tensor( + held_gear_base_offset, + self._held_gear_base_offset_values, + self._held_gear_base_offset, + env.device, + ), + gear_peg_height, + success_z_fraction, + xy_threshold, + ) + + def _update_prediction_loss_weight(self, true_success: torch.Tensor, delay_until_ratio: float) -> None: + """Enable the auxiliary loss once enough environments have reached success.""" + if true_success.float().mean() >= delay_until_ratio: + self._prediction_loss_weight = 1.0 + + def _read_success_prediction(self, env: ManagerBasedRLEnv) -> torch.Tensor: + """Read the success-probability prediction from the OSC action term.""" + arm_osc_action = get_nist_gear_insertion_arm_action(env) + return (arm_osc_action.success_prediction + 1.0) / 2.0 + + def _compute_prediction_error(self, true_success: torch.Tensor, pred: torch.Tensor) -> torch.Tensor: + """Return the gated auxiliary success-prediction error.""" + return torch.abs(true_success.float() - pred) * self._prediction_loss_weight diff --git a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py index 2a48234e9..67f3db72a 100644 --- a/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py +++ b/isaaclab_arena_environments/nist_assembled_gearmesh_osc_environment.py @@ -8,10 +8,22 @@ from __future__ import annotations import argparse +from typing import TYPE_CHECKING from isaaclab_arena.assets.register import register_environment from isaaclab_arena_environments.example_environment_base import ExampleEnvironmentBase +if TYPE_CHECKING: + from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment + +_FRANKA_NIST_GEAR_INSERTION_OSC_EMBODIMENT = "franka_nist_gear_insertion_osc" +_PEG_TIP_OFFSET = (0.02025, 0.0, 0.025) +_PEG_BASE_OFFSET = (0.02025, 0.0, 0.0) +_GEAR_PEG_HEIGHT = 0.02 +_SUCCESS_Z_FRACTION = 0.20 +_XY_THRESHOLD = 0.0025 +_EPISODE_LENGTH_S = 15.0 + @register_environment class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): @@ -19,24 +31,19 @@ class NISTAssembledGearMeshOSCEnvironment(ExampleEnvironmentBase): name: str = "nist_assembled_gear_mesh_osc" - def get_env(self, args_cli: argparse.Namespace): + def get_env(self, args_cli: argparse.Namespace) -> IsaacLabArenaEnvironment: import isaaclab.sim as sim_utils import isaaclab_arena_environments.mdp as mdp from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment from isaaclab_arena.scene.scene import Scene - from isaaclab_arena.tasks.nist_gear_insertion_task import ( - GearInsertionGeometryCfg, - GraspCfg, - NistGearInsertionTask, - ) + from isaaclab_arena.tasks.nist_gear_insertion.task import GearInsertionGeometryCfg, NistGearInsertionRLTask from isaaclab_arena.utils.pose import Pose - - peg_tip_offset = (0.02025, 0.0, 0.025) - peg_base_offset = (0.02025, 0.0, 0.0) - success_z_fraction = 0.20 - xy_threshold = 0.0025 - episode_length_s = 15.0 + from isaaclab_arena_environments.mdp.nist_gear_insertion.franka_osc_cfg import ( + FrankaNistGearInsertionObservationsCfg, + FrankaNistGearInsertionOscActionsCfg, + ) + from isaaclab_arena_environments.mdp.nist_gear_insertion.osc_rewards import NistGearInsertionOscRewardsCfg table = self.asset_registry.get_asset_by_name("table")() assembled_board = self.asset_registry.get_asset_by_name("nist_board_assembled")() @@ -45,13 +52,29 @@ def get_env(self, args_cli: argparse.Namespace): light_spawner_cfg = sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=1500.0) light = self.asset_registry.get_asset_by_name("light")(spawner_cfg=light_spawner_cfg) - embodiment = self.asset_registry.get_asset_by_name(args_cli.embodiment)( + embodiment = self.asset_registry.get_asset_by_name(_FRANKA_NIST_GEAR_INSERTION_OSC_EMBODIMENT)( enable_cameras=args_cli.enable_cameras, concatenate_observation_terms=True, + ) + embodiment.action_config = FrankaNistGearInsertionOscActionsCfg( fixed_asset_name=gears_and_base.name, - peg_offset=peg_tip_offset, + peg_offset=_PEG_TIP_OFFSET, + ) + embodiment.observation_config = FrankaNistGearInsertionObservationsCfg( + fixed_asset_name=gears_and_base.name, + peg_offset=_PEG_TIP_OFFSET, + fingertip_body_name=embodiment.get_command_body_name(), + concatenate_observation_terms=embodiment.concatenate_observation_terms, + ) + embodiment.reward_config = NistGearInsertionOscRewardsCfg( + gear_name=medium_gear.name, + board_name=gears_and_base.name, + peg_offset=_PEG_BASE_OFFSET, + held_gear_base_offset=_PEG_BASE_OFFSET, + gear_peg_height=_GEAR_PEG_HEIGHT, + success_z_fraction=_SUCCESS_Z_FRACTION, + xy_threshold=_XY_THRESHOLD, ) - if args_cli.teleop_device is not None: teleop_device = self.device_registry.get_device_by_name(args_cli.teleop_device)() else: @@ -67,24 +90,24 @@ def get_env(self, args_cli: argparse.Namespace): ) scene = Scene(assets=[table, assembled_board, medium_gear, gears_and_base, light]) - grasp_cfg = GraspCfg(**embodiment.get_gear_insertion_grasp_config()) geometry_cfg = GearInsertionGeometryCfg( - peg_offset_from_board=list(peg_base_offset), - peg_offset_for_obs=list(peg_tip_offset), - success_z_fraction=success_z_fraction, - xy_threshold=xy_threshold, + peg_offset_from_board=list(_PEG_BASE_OFFSET), + peg_offset_for_obs=list(_PEG_TIP_OFFSET), + success_z_fraction=_SUCCESS_Z_FRACTION, + xy_threshold=_XY_THRESHOLD, ) - task = NistGearInsertionTask( + task = NistGearInsertionRLTask( assembled_board=assembled_board, held_gear=medium_gear, background_scene=table, gear_base_asset=gears_and_base, geometry_cfg=geometry_cfg, - episode_length_s=episode_length_s, - grasp_cfg=grasp_cfg, + episode_length_s=_EPISODE_LENGTH_S, + grasp_cfg=embodiment.get_gear_insertion_grasp_config(), + fingertip_body_name=embodiment.get_command_body_name(), enable_randomization=True, - rl_training_mode=args_cli.rl_training_mode, + disable_success_termination=args_cli.disable_success_termination, ) return IsaacLabArenaEnvironment( @@ -94,18 +117,21 @@ def get_env(self, args_cli: argparse.Namespace): task=task, teleop_device=teleop_device, env_cfg_callback=mdp.assembly_env_cfg_callback, - rl_framework_entry_point="rl_games_cfg_entry_point", - rl_policy_cfg="isaaclab_arena_examples.policy:nist_gear_insertion_osc_rl_games.yaml", ) @staticmethod def add_cli_args(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--embodiment", type=str, default="franka_nist_gear_osc", help="Robot embodiment") parser.add_argument( "--teleop_device", type=str, default=None, help="Teleoperation device (e.g., keyboard, spacemouse)" ) + parser.add_argument( + "--disable_success_termination", + action="store_true", + help="Disable success termination during training.", + ) parser.add_argument( "--rl_training_mode", + dest="disable_success_termination", action="store_true", - help="Disable success termination (use when training with RL-Games).", + help="Alias for --disable_success_termination.", ) diff --git a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml b/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml deleted file mode 100644 index 2e7a55eba..000000000 --- a/isaaclab_arena_examples/policy/nist_gear_insertion_osc_rl_games.yaml +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -# RL Games PPO config for NIST gear insertion (operational-space torque control). -# Network and hyperparameters align with common assembly peg-insert RL benchmarks. - -params: - seed: 0 - algo: - name: a2c_continuous - - env: - clip_actions: 1.0 - obs_groups: - obs: [policy, task_obs] - states: [policy, task_obs] - - model: - name: continuous_a2c_logstd - - network: - name: actor_critic - separate: False - - space: - continuous: - mu_activation: None - sigma_activation: tanh - mu_init: - name: default - sigma_init: - name: const_initializer - val: 0 - fixed_sigma: False - mlp: - units: [512, 128, 64] - activation: elu - d2rl: False - - initializer: - name: default - regularizer: - name: None - - rnn: - name: lstm - units: 1024 - layers: 2 - before_mlp: True - concat_input: True - layer_norm: True - - load_checkpoint: False - load_path: "" - - config: - name: NistGearInsertionOscRlg - device: cuda:0 - full_experiment_name: test - env_name: rlgpu - multi_gpu: False - ppo: True - mixed_precision: True - normalize_input: True - normalize_value: True - value_bootstrap: True - num_actors: 128 - reward_shaper: - scale_value: 1.0 - normalize_advantage: True - gamma: 0.995 - tau: 0.95 - learning_rate: 1.0e-4 - lr_schedule: adaptive - schedule_type: standard - kl_threshold: 0.008 - score_to_win: 20000 - max_epochs: 4000 - save_best_after: 10 - save_frequency: 10 - print_stats: True - grad_norm: 1.0 - entropy_coef: 0.0 - truncate_grads: True - e_clip: 0.2 - horizon_length: 128 - minibatch_size: 512 - mini_epochs: 4 - critic_coef: 2 - clip_value: True - seq_length: 128 - bounds_loss_coef: 0.0001 - - central_value_config: - minibatch_size: 512 - mini_epochs: 4 - learning_rate: 1e-4 - lr_schedule: adaptive - kl_threshold: 0.008 - clip_value: True - normalize_input: True - truncate_grads: True - - network: - name: actor_critic - central_value: True - - mlp: - units: [512, 128, 64] - activation: elu - d2rl: False - - initializer: - name: default - regularizer: - name: None - - rnn: - name: lstm - units: 1024 - layers: 2 - before_mlp: True - concat_input: True - layer_norm: True - - player: - deterministic: True From 70ed06e9667e9da3c2ef51994bb6f3797227560b Mon Sep 17 00:00:00 2001 From: Seun Doherty Date: Wed, 13 May 2026 14:12:19 -0700 Subject: [PATCH 12/12] Update isaaclab_arena/assets/object_library.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- isaaclab_arena/assets/object_library.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/isaaclab_arena/assets/object_library.py b/isaaclab_arena/assets/object_library.py index c14672292..eb8b686d1 100644 --- a/isaaclab_arena/assets/object_library.py +++ b/isaaclab_arena/assets/object_library.py @@ -318,8 +318,10 @@ class MediumNistGear(LibraryObject): "collision_props": sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), } - def __init__(self, prim_path: str | None = None, initial_pose: Pose | None = None): - super().__init__(prim_path=prim_path, initial_pose=initial_pose) + def __init__( + self, instance_name: str | None = None, prim_path: str | None = None, initial_pose: Pose | None = None + ): + super().__init__(instance_name=instance_name, prim_path=prim_path, initial_pose=initial_pose) @register_asset