bit-bots · BloodWork44 · Apr 30, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/src/bitbots_misc/bitbots_bringup/launch/motion.launch b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
@@ -42,6 +42,11 @@
         </node>
     </group>
 
+    <!-- launch rl motion nodes (standup etc.) -->
+    <include file="$(find-pkg-share bitbots_rl_motion)/launch/rl_motion.launch">
+        <arg name="sim" value="$(var sim)"/>
+    </include>
+
     <!-- launch the hcm -->
     <include file="$(find-pkg-share bitbots_hcm)/launch/hcm.launch">
         <arg name="sim" value="$(var sim)"/>

diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py
@@ -0,0 +1,102 @@
+"""HCM action element that triggers the RL standup-back policy.
+
+Modeled on ``PlayAnimationDynup`` from ``play_animation.py``: the action
+holds a goal open on a ROS action server while the standup policy runs and
+publishes joint commands. When the DSD pops this action (e.g. because the
+robot is no longer detected as fallen), the goal is canceled and the policy
+node stops publishing.
+
+Notes
+-----
+* Reuses the existing ``bitbots_msgs/Dynup`` action type, with
+  ``direction = "rl_standup_back"``. No new action message is required.
+* Creates its own ``ActionClient`` rather than relying on a blackboard field,
+  so ``hcm_blackboard.py`` does not need to be modified.
+* This file is provided so the action *exists*; it is not yet wired into
+  ``hcm.dsd``. Add ``@RLStandupBack`` to the desired DSD branch to use it.
+"""
+
+from action_msgs.msg import GoalStatus
+from rclpy.action import ActionClient
+
+from bitbots_hcm.hcm_dsd.actions import AbstractHCMActionElement
+from bitbots_msgs.action import Dynup
+
+GOAL_DIRECTION = "rl_standup_back"
+ACTION_NAME = "rl_standup_back"
+
+
+class RLStandupBack(AbstractHCMActionElement):
+    """DSD action that runs the RL standup-back policy until cancelled."""
+
+    def __init__(self, blackboard, dsd, parameters):
+        super().__init__(blackboard, dsd, parameters)
+        self.first_perform = True
+        self._action_client = ActionClient(self.blackboard.node, Dynup, ACTION_NAME)
+        self._goal_future = None
+
+    def perform(self, reevaluate=False):
+        # Don't reevaluate while the policy is running — same pattern as
+        # PlayAnimationDynup for FRONT/BACK directions.
+        self.do_not_reevaluate()
+
+        if self.first_perform:
+            if not self._start_goal():
+                self.blackboard.node.get_logger().error(
+                    "Could not start RL standup-back goal. Aborting action."
+                )
+                return self.pop()
+            self.first_perform = False
+            return
+
+        if self._goal_finished():
+            return self.pop()
+
+    def on_pop(self):
+        """Cancel the goal if the action is popped before completion."""
+        super().on_pop()
+        if self._goal_future is None:
+            return
+        if not self._goal_future.done():
+            return
+        goal_handle = self._goal_future.result()
+        if goal_handle is None or not goal_handle.accepted:
+            return
+        if not self._goal_finished():
+            goal_handle.cancel_goal_async()
+
+    # ------------------------------------------------------------------ helpers
+
+    def _start_goal(self) -> bool:
+        wait_param = self.blackboard.node.get_parameter("hcm.anim_server_wait_time").value
+        if not self._action_client.wait_for_server(timeout_sec=wait_param):
+            self.blackboard.node.get_logger().warn(
+                "RL standup-back action server not running; cannot start goal."
+            )
+            return False
+
+        goal = Dynup.Goal()
+        goal.direction = GOAL_DIRECTION
+        goal.from_hcm = True
+        self._goal_future = self._action_client.send_goal_async(
+            goal, feedback_callback=self._feedback_cb
+        )
+        return True
+
+    def _feedback_cb(self, msg):
+        feedback: Dynup.Feedback = msg.feedback
+        self.publish_debug_data("RL Standup Back Percent Done", str(feedback.percent_done))
+
+    def _goal_finished(self) -> bool:
+        if self._goal_future is None or not self._goal_future.done():
+            return False
+        if self._goal_future.cancelled():
+            return True
+        goal_handle = self._goal_future.result()
+        if goal_handle is None:
+            return False
+        return goal_handle.status in (
+            GoalStatus.STATUS_SUCCEEDED,
+            GoalStatus.STATUS_CANCELED,
+            GoalStatus.STATUS_ABORTED,
+        )
diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
@@ -34,7 +34,7 @@ $StartHCM
                                     FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationWalkReady
                                 FALLEN_BACK --> $GameControllerStop
                                     STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
-                                    FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationStandupBack
+                                    FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @RLStandupBack
                                 FALLEN_RIGHT --> $GameControllerStop
                                     STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
                                     FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @PlayAnimationWalkReady

diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+
+class ObservationHistory:
+    """Rolling history buffer that mirrors HoST's observation stacking.
+
+    HoST builds the actor input by stacking ``length`` consecutive one-step
+    observations of dimension ``one_step_dim``. New frames are appended at the
+    end and the oldest frame is dropped from the front. The buffer is
+    initialised with zeros, matching the training-time buffer state right after
+    a reset.
+    """
+
+    def __init__(self, length: int, one_step_dim: int):
+        self._length = length
+        self._one_step_dim = one_step_dim
+        self._buffer = np.zeros(length * one_step_dim, dtype=np.float32)
+
+    def reset(self) -> None:
+        self._buffer.fill(0.0)
+
+    def update(self, current_obs: np.ndarray) -> None:
+        assert current_obs.shape == (self._one_step_dim,), (
+            f"expected one-step obs of shape ({self._one_step_dim},), got {current_obs.shape}"
+        )
+        # shift left by one frame, append the new frame at the end
+        self._buffer[: -self._one_step_dim] = self._buffer[self._one_step_dim :]
+        self._buffer[-self._one_step_dim :] = current_obs.astype(np.float32, copy=False)
+
+    def get(self) -> np.ndarray:
+        return self._buffer
+
+    @property
+    def total_dim(self) -> int:
+        return self._length * self._one_step_dim
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
@@ -4,7 +4,6 @@
 
 
 class Phase:
-    _phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32)
     _phase_dt: Optional[float]
 
     def __init__(self, node):
@@ -14,12 +13,15 @@ def __init__(self, node):
             self._control_dt = self._node.get_parameter("phase.control_dt").value
             self._gait_frequency = self._node.get_parameter("phase.gait_frequency").value
             self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt
+            initial = self._node.get_parameter("phase.initial_phase").value
+            self._phase = np.array(initial, dtype=np.float32)
             self._use_phase = True
         else:
             self._control_dt = None
             self._gait_frequency = None
             self._phase_dt = None
             self._use_phase = False
+            self._phase = np.zeros(2, dtype=np.float32)
             self._node.get_logger().warning("No phase was found! Using policy without phase!")
 
         self._obs_phase = None

diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml
@@ -0,0 +1,66 @@
+standup_back_node:
+  ros__parameters:
+    model: pi_supine_training7.onnx
+
+    joints:
+      # 22-DOF order as reported by Isaac Gym for the HoST pi_plus_ground task.
+      # The ONNX policy expects observations and produces actions in exactly
+      # this order, so do not reorder.
+      ordered_relevant_joint_names: [
+        "head_yaw_joint",
+        "head_pitch_joint",
+        "l_hip_pitch_joint",
+        "l_hip_roll_joint",
+        "l_thigh_joint",
+        "l_calf_joint",
+        "l_ankle_pitch_joint",
+        "l_ankle_roll_joint",
+        "l_shoulder_pitch_joint",
+        "l_shoulder_roll_joint",
+        "l_upper_arm_joint",
+        "l_elbow_joint",
+        "r_hip_pitch_joint",
+        "r_hip_roll_joint",
+        "r_thigh_joint",
+        "r_calf_joint",
+        "r_ankle_pitch_joint",
+        "r_ankle_roll_joint",
+        "r_shoulder_pitch_joint",
+        "r_shoulder_roll_joint",
+        "r_upper_arm_joint",
+        "r_elbow_joint",
+      ]
+
+      # walkready_state is unused by this policy (HoST builds q_target from the
+      # current joint position, not from a default pose). Declared as 22 zeros
+      # only to satisfy the parameter declaration in RLNode.
+      walkready_state: [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      ]
+
+    obs_scales:
+      ang_vel: 0.25
+      dof_pos: 1.0
+      dof_vel: 0.05
+
+    action_scale: 1.0
+    action_rescale_obs: 1.0   # constant slot in the HoST observation vector
+
+    history:
+      length: 6
+      one_step_obs_dim: 73
+
+    # Number of policy ticks at the start of each goal during which the policy
+    # observes a zero vector and no joint commands are published, mirroring the
+    # `unactuated_timesteps` warmup HoST uses during training. Set to 0 to
+    # disable.
+    unactuated_steps: 30
+
+    phase:
+      # HoST does not use a phase signal.
+      use_phase: False
+      control_dt: 0.02
+      gait_frequency: 0.0
+
+    providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml
@@ -0,0 +1,40 @@
+walk_node:
+  ros__parameters:
+    # Filename of the ONNX model inside bitbots_rl_motion/models/
+    model: policy_walk.onnx
+
+    joints:
+      # 12-DOF leg-joint order matching the Pi_plus walk policy training setup.
+      # Observation and action vectors must follow exactly this order.
+      ordered_relevant_joint_names: [
+        "r_hip_pitch_joint",
+        "r_hip_roll_joint",
+        "r_thigh_joint",
+        "r_calf_joint",
+        "r_ankle_pitch_joint",
+        "r_ankle_roll_joint",
+        "l_hip_pitch_joint",
+        "l_hip_roll_joint",
+        "l_thigh_joint",
+        "l_calf_joint",
+        "l_ankle_pitch_joint",
+        "l_ankle_roll_joint",
+      ]
+
+      # Default (walkready) pose the policy is trained relative to.
+      # joint_angle_obs = q_current - walkready_state
+      # joint_command   = action * 0.5 + walkready_state
+      walkready_state: [0.6, 0.0, 0.0, 1.2, 0.6, 0.0, -0.6, 0.0, 0.0, -1.2, -0.6, 0.0]
+
+      # PD gains for the Pi_plus leg joints.
+      kp: [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0]
+      kd: [1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]
+
+    phase:
+      use_phase: true
+      control_dt: 0.02
+      gait_frequency: 1.5
+      # Initial phase [π/2, -π/2] matching the walk policy training setup.
+      initial_phase: [1.5707963267948966, -1.5707963267948966]
+
+    providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
@@ -16,8 +16,15 @@ def __init__(self, node):
 
     def get_command(self):
         assert self._cmd_vel is not None
-        command = np.array([self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z], dtype=np.float32)
-        return command
+        stop_signal = float(self.get_stop_signal())
+        return np.array(
+            [self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z, stop_signal],
+            dtype=np.float32,
+        )
+
+    def get_stop_signal(self) -> bool:
+        assert self._cmd_vel is not None
+        return self._cmd_vel.angular.x != 0.0
 
     def has_data(self):
         return self._cmd_vel is not None

diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
@@ -13,7 +13,12 @@ def __init__(self, node):
 
         self._ordered_relevant_joint_names = self._node.get_parameter("joints.ordered_relevant_joint_names").value
         self._walkready_state = self._node.get_parameter("joints.walkready_state").value
-        self._previous_action: np.ndarray = np.zeros(len(self._ordered_relevant_joint_names), dtype=np.float32)
+        n = len(self._ordered_relevant_joint_names)
+        kp_param = self._node.get_parameter("joints.kp").value
+        kd_param = self._node.get_parameter("joints.kd").value
+        self._kp = list(kp_param) if len(kp_param) > 1 else [kp_param[0]] * n
+        self._kd = list(kd_param) if len(kd_param) > 1 else [kd_param[0]] * n
+        self._previous_action: np.ndarray = np.zeros(n, dtype=np.float32)
         self._joint_state: Optional[JointState] = None
 
         self._joint_state_sub = self._node.create_subscription(
@@ -61,7 +66,9 @@ def get_joint_commands(self, onnx_pred):
         joint_command.positions = onnx_pred * 0.5 + self._walkready_state
         joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
         joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
-        joint_command.max_currents = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.kp = self._kp
+        joint_command.kd = self._kd
 
         return joint_command