SpesRobotics · spirosperos · Aug 7, 2025 · Aug 7, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -36,26 +36,32 @@ repos:
       - id: check-yaml
       - id: check-toml
       - id: end-of-file-fixer
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
       - id: trailing-whitespace
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.12.4
     hooks:
       - id: ruff-format
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
 
   - repo: https://github.com/adhtruong/mirrors-typos
     rev: v1.34.0
     hooks:
       - id: typos
         args: [--force-exclude]
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
 
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.20.0
     hooks:
     -   id: pyupgrade
         args: [--py310-plus]
+        exclude: ^(outputs/|examples/hil_serl_simulation_training/outputs/)
 
   ##### Markdown Quality #####
   - repo: https://github.com/rbubley/mirrors-prettier

diff --git a/examples/hil_serl_simulation_training/hi_rl_test_gamepad.json b/examples/hil_serl_simulation_training/hi_rl_test_gamepad.json
@@ -0,0 +1,92 @@
+{
+    "type": "hil",
+    "wrapper": {
+        "gripper_penalty": -0.02,
+        "display_cameras": false,
+        "add_joint_velocity_to_observation": true,
+        "add_ee_pose_to_observation": true,
+        "crop_params_dict": {
+            "observation.images.front": [
+                0,
+                0,
+                128,
+                128
+            ],
+            "observation.images.wrist": [
+                0,
+                0,
+                128,
+                128
+            ]
+        },
+        "resize_size": [
+            128,
+            128
+        ],
+        "control_time_s": 40.0,
+        "use_gripper": true,
+        "fixed_reset_joint_positions": [
+            0.0,
+            0.195,
+            0.0,
+            -2.43,
+            0.0,
+            2.62,
+            0.785
+        ],
+        "reset_time_s": 2.0,
+        "control_mode": "gamepad"
+    },
+    "use_viewer": true,
+    "use_gamepad": true,
+    "name": "franka_sim",
+    "mode": "record",
+    "repo_id": "username/panda-pick-gamepad",
+    "dataset_root": "./recordings_static_gamepad",
+    "task": "PandaPickCubeGamepad-v0",
+    "num_episodes": 30,
+    "episode": 0,
+    "pretrained_policy_name_or_path": null,
+    "device": "cuda",
+    "push_to_hub": true,
+    "fps": 10,
+    "random_block_position": true,
+    "features": {
+        "observation.images.front": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                128,
+                128
+            ]
+        },
+        "observation.images.wrist": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                128,
+                128
+            ]
+        },
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                18
+            ]
+        },
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                4
+            ]
+        }
+    },
+    "features_map": {
+        "observation.images.front": "observation.images.front",
+        "observation.images.wrist": "observation.images.wrist",
+        "observation.state": "observation.state",
+        "action": "action"
+    },
+    "reward_classifier_pretrained_path": null,
+    "number_of_steps_after_success": 0
+}
diff --git a/examples/hil_serl_simulation_training/hil_serl_simulation_training_guide_README.md b/examples/hil_serl_simulation_training/hil_serl_simulation_training_guide_README.md
@@ -0,0 +1,98 @@
+# LeRobot Training Guide: Panda Cube Picking
+
+A concise guide for training a robot policy to pick cubes using LeRobot's HIL-SERL framework.
+
+## Installation
+
+Install LeRobot with HIL-SERL support as shown in the [official documentation](https://huggingface.co/docs/lerobot/hilserl):
+
+```bash
+pip install -e ".[hilserl]"
+```
+
+## Recording Demonstrations
+
+Proceed with recording sample episodes for pretraining the reinforcement learning algorithm:
+
+```bash
+python -m lerobot.scripts.rl.gym_manipulator --config_path examples/hil_serl_simulation_training/hi_rl_test_gamepad.json
+```
+
+**Important Notes:**
+
+- **Recommended**: Collect 30-40 episodes including some failed picking attempts
+- **Control**: Use a gamepad controller for better end-effector control (configuration already set up)
+- **Dataset Storage**:
+  - `dataset_root` points to local folder where dataset is saved
+  - `repo_id` points to HuggingFace account for dataset upload
+  - **Prerequisite**: Install and authorize HuggingFace CLI with token to push datasets and replace the username in the `hi_rl_test_gamepad.json` and `train_gym_hil_env_gamepad.json`
+
+## Training the Policy
+
+Start both the learner and actor using these commands:
+
+```bash
+# Terminal 1: Start learner
+python -m lerobot.scripts.rl.learner --config_path examples/hil_serl_simulation_training/train_gym_hil_env_gamepad.json
+
+# Terminal 2: Start actor
+python -m lerobot.scripts.rl.actor --config_path examples/hil_serl_simulation_training/train_gym_hil_env_gamepad.json
+```
+
+The learner initializes with parameters from the offline training dataset.
+
+## Human Intervention Strategy
+
+**Important**: Allow the policy a few episodes to test picking on its own before intervening.
+
+**Best practices** for human intervention (see [official guide](https://huggingface.co/docs/lerobot/hilserl#guide-to-human-interventions)):
+
+1. **Guide the end effector back to the object** if it's drifting away from the cube
+2. **Guide the end effector above the cube** and let the policy try to pick it and raise it
+3. **Completely pick the object** if needed
+
+**Progression Strategy:**
+
+- As training progresses, interventions should become less aggressive
+- Once the policy manages to pick the cube on its own, start intervening only to guide the end effector to the approximate area where the object is found
+
+## Training Results
+
+The fully trained policy result can be seen in this video:
+
+![Panda Cube Picking Demo](../../media/hil-serl-sim/panda-hil-serl.gif)
+
+## Resuming Training from Checkpoint
+
+Checkpoints are automatically saved based on the `save_freq` parameter in `train_gym_hil_env_gamepad.json` and are stored in the `outputs` folder.
+
+To resume training, both actor and learner need to be called with the policy path:
+
+```bash
+# Terminal 1: Resume learner
+python -m lerobot.scripts.rl.learner --config_path examples/hil_serl_simulation_training/train_gym_hil_env_gamepad.json --policy.path=examples/hil_serl_simulation_training/outputs/train/2025-08-05/13-45-02_default/checkpoints/0002000/pretrained_model
+
+# Terminal 2: Resume actor
+python -m lerobot.scripts.rl.actor --config_path examples/hil_serl_simulation_training/train_gym_hil_env_gamepad.json --policy.path=examples/hil_serl_simulation_training/outputs/train/2025-08-05/13-45-02_default/checkpoints/0002000/pretrained_model
+```
+
+Where `policy.path` is the full path to the locally saved checkpoints.
+
+## Monitoring Training
+
+Set the Weights & Biases flag to `true` in `train_gym_hil_env_gamepad.json` to follow live training results.
+
+The trained intervention rate should approximately look like this after a while:
+
+![Training Intervention Rate](../../media/hil-serl-sim/train_intervention_rate.png)
+
+## Configuration Files
+
+- `hi_rl_test_gamepad.json`: Recording configuration
+- `train_gym_hil_env_gamepad.json`: Training configuration
+
+## Key Parameters
+
+- **Save Frequency**: Controlled by `save_freq` in training config
+- **Checkpoint Location**: `outputs/train/YYYY-MM-DD/HH-MM-SS_jobname/checkpoints/`
+- **Policy Path**: Points to `pretrained_model/` directory within checkpoints