diff --git a/src/bitbots_misc/bitbots_bringup/launch/motion.launch b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
index b32d55461..e2a5967d7 100644
--- a/src/bitbots_misc/bitbots_bringup/launch/motion.launch
+++ b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
@@ -42,6 +42,11 @@
+
+
+
+
+
diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py
new file mode 100644
index 000000000..6c8fa695d
--- /dev/null
+++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py
@@ -0,0 +1,102 @@
+"""HCM action element that triggers the RL standup-back policy.
+
+Modeled on ``PlayAnimationDynup`` from ``play_animation.py``: the action
+holds a goal open on a ROS action server while the standup policy runs and
+publishes joint commands. When the DSD pops this action (e.g. because the
+robot is no longer detected as fallen), the goal is canceled and the policy
+node stops publishing.
+
+Notes
+-----
+* Reuses the existing ``bitbots_msgs/Dynup`` action type, with
+ ``direction = "rl_standup_back"``. No new action message is required.
+* Creates its own ``ActionClient`` rather than relying on a blackboard field,
+ so ``hcm_blackboard.py`` does not need to be modified.
+* This file is provided so the action *exists*; it is not yet wired into
+ ``hcm.dsd``. Add ``@RLStandupBack`` to the desired DSD branch to use it.
+"""
+
+from action_msgs.msg import GoalStatus
+from rclpy.action import ActionClient
+
+from bitbots_hcm.hcm_dsd.actions import AbstractHCMActionElement
+from bitbots_msgs.action import Dynup
+
+GOAL_DIRECTION = "rl_standup_back"
+ACTION_NAME = "rl_standup_back"
+
+
+class RLStandupBack(AbstractHCMActionElement):
+ """DSD action that runs the RL standup-back policy until cancelled."""
+
+ def __init__(self, blackboard, dsd, parameters):
+ super().__init__(blackboard, dsd, parameters)
+ self.first_perform = True
+ self._action_client = ActionClient(self.blackboard.node, Dynup, ACTION_NAME)
+ self._goal_future = None
+
+ def perform(self, reevaluate=False):
+ # Don't reevaluate while the policy is running — same pattern as
+ # PlayAnimationDynup for FRONT/BACK directions.
+ self.do_not_reevaluate()
+
+ if self.first_perform:
+ if not self._start_goal():
+ self.blackboard.node.get_logger().error(
+ "Could not start RL standup-back goal. Aborting action."
+ )
+ return self.pop()
+ self.first_perform = False
+ return
+
+ if self._goal_finished():
+ return self.pop()
+
+ def on_pop(self):
+ """Cancel the goal if the action is popped before completion."""
+ super().on_pop()
+ if self._goal_future is None:
+ return
+ if not self._goal_future.done():
+ return
+ goal_handle = self._goal_future.result()
+ if goal_handle is None or not goal_handle.accepted:
+ return
+ if not self._goal_finished():
+ goal_handle.cancel_goal_async()
+
+ # ------------------------------------------------------------------ helpers
+
+ def _start_goal(self) -> bool:
+ wait_param = self.blackboard.node.get_parameter("hcm.anim_server_wait_time").value
+ if not self._action_client.wait_for_server(timeout_sec=wait_param):
+ self.blackboard.node.get_logger().warn(
+ "RL standup-back action server not running; cannot start goal."
+ )
+ return False
+
+ goal = Dynup.Goal()
+ goal.direction = GOAL_DIRECTION
+ goal.from_hcm = True
+ self._goal_future = self._action_client.send_goal_async(
+ goal, feedback_callback=self._feedback_cb
+ )
+ return True
+
+ def _feedback_cb(self, msg):
+ feedback: Dynup.Feedback = msg.feedback
+ self.publish_debug_data("RL Standup Back Percent Done", str(feedback.percent_done))
+
+ def _goal_finished(self) -> bool:
+ if self._goal_future is None or not self._goal_future.done():
+ return False
+ if self._goal_future.cancelled():
+ return True
+ goal_handle = self._goal_future.result()
+ if goal_handle is None:
+ return False
+ return goal_handle.status in (
+ GoalStatus.STATUS_SUCCEEDED,
+ GoalStatus.STATUS_CANCELED,
+ GoalStatus.STATUS_ABORTED,
+ )
diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
index ccd5e67ea..36f76cf8d 100644
--- a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
+++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
@@ -34,7 +34,7 @@ $StartHCM
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationWalkReady
FALLEN_BACK --> $GameControllerStop
STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
- FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationStandupBack
+ FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @RLStandupBack
FALLEN_RIGHT --> $GameControllerStop
STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @PlayAnimationWalkReady
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py
new file mode 100644
index 000000000..61291a3e6
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+
+class ObservationHistory:
+ """Rolling history buffer that mirrors HoST's observation stacking.
+
+ HoST builds the actor input by stacking ``length`` consecutive one-step
+ observations of dimension ``one_step_dim``. New frames are appended at the
+ end and the oldest frame is dropped from the front. The buffer is
+ initialised with zeros, matching the training-time buffer state right after
+ a reset.
+ """
+
+ def __init__(self, length: int, one_step_dim: int):
+ self._length = length
+ self._one_step_dim = one_step_dim
+ self._buffer = np.zeros(length * one_step_dim, dtype=np.float32)
+
+ def reset(self) -> None:
+ self._buffer.fill(0.0)
+
+ def update(self, current_obs: np.ndarray) -> None:
+ assert current_obs.shape == (self._one_step_dim,), (
+ f"expected one-step obs of shape ({self._one_step_dim},), got {current_obs.shape}"
+ )
+ # shift left by one frame, append the new frame at the end
+ self._buffer[: -self._one_step_dim] = self._buffer[self._one_step_dim :]
+ self._buffer[-self._one_step_dim :] = current_obs.astype(np.float32, copy=False)
+
+ def get(self) -> np.ndarray:
+ return self._buffer
+
+ @property
+ def total_dim(self) -> int:
+ return self._length * self._one_step_dim
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
index 6b57b3640..91ee7479a 100644
--- a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
+++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
@@ -4,7 +4,6 @@
class Phase:
- _phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32)
_phase_dt: Optional[float]
def __init__(self, node):
@@ -14,12 +13,15 @@ def __init__(self, node):
self._control_dt = self._node.get_parameter("phase.control_dt").value
self._gait_frequency = self._node.get_parameter("phase.gait_frequency").value
self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt
+ initial = self._node.get_parameter("phase.initial_phase").value
+ self._phase = np.array(initial, dtype=np.float32)
self._use_phase = True
else:
self._control_dt = None
self._gait_frequency = None
self._phase_dt = None
self._use_phase = False
+ self._phase = np.zeros(2, dtype=np.float32)
self._node.get_logger().warning("No phase was found! Using policy without phase!")
self._obs_phase = None
diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml
new file mode 100644
index 000000000..05dd1fb8a
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml
@@ -0,0 +1,66 @@
+standup_back_node:
+ ros__parameters:
+ model: pi_supine_training7.onnx
+
+ joints:
+ # 22-DOF order as reported by Isaac Gym for the HoST pi_plus_ground task.
+ # The ONNX policy expects observations and produces actions in exactly
+ # this order, so do not reorder.
+ ordered_relevant_joint_names: [
+ "head_yaw_joint",
+ "head_pitch_joint",
+ "l_hip_pitch_joint",
+ "l_hip_roll_joint",
+ "l_thigh_joint",
+ "l_calf_joint",
+ "l_ankle_pitch_joint",
+ "l_ankle_roll_joint",
+ "l_shoulder_pitch_joint",
+ "l_shoulder_roll_joint",
+ "l_upper_arm_joint",
+ "l_elbow_joint",
+ "r_hip_pitch_joint",
+ "r_hip_roll_joint",
+ "r_thigh_joint",
+ "r_calf_joint",
+ "r_ankle_pitch_joint",
+ "r_ankle_roll_joint",
+ "r_shoulder_pitch_joint",
+ "r_shoulder_roll_joint",
+ "r_upper_arm_joint",
+ "r_elbow_joint",
+ ]
+
+ # walkready_state is unused by this policy (HoST builds q_target from the
+ # current joint position, not from a default pose). Declared as 22 zeros
+ # only to satisfy the parameter declaration in RLNode.
+ walkready_state: [
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ ]
+
+ obs_scales:
+ ang_vel: 0.25
+ dof_pos: 1.0
+ dof_vel: 0.05
+
+ action_scale: 1.0
+ action_rescale_obs: 1.0 # constant slot in the HoST observation vector
+
+ history:
+ length: 6
+ one_step_obs_dim: 73
+
+ # Number of policy ticks at the start of each goal during which the policy
+ # observes a zero vector and no joint commands are published, mirroring the
+ # `unactuated_timesteps` warmup HoST uses during training. Set to 0 to
+ # disable.
+ unactuated_steps: 30
+
+ phase:
+ # HoST does not use a phase signal.
+ use_phase: False
+ control_dt: 0.02
+ gait_frequency: 0.0
+
+ providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml
new file mode 100644
index 000000000..1abc1a1b0
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml
@@ -0,0 +1,40 @@
+walk_node:
+ ros__parameters:
+ # Filename of the ONNX model inside bitbots_rl_motion/models/
+ model: policy_walk.onnx
+
+ joints:
+ # 12-DOF leg-joint order matching the Pi_plus walk policy training setup.
+ # Observation and action vectors must follow exactly this order.
+ ordered_relevant_joint_names: [
+ "r_hip_pitch_joint",
+ "r_hip_roll_joint",
+ "r_thigh_joint",
+ "r_calf_joint",
+ "r_ankle_pitch_joint",
+ "r_ankle_roll_joint",
+ "l_hip_pitch_joint",
+ "l_hip_roll_joint",
+ "l_thigh_joint",
+ "l_calf_joint",
+ "l_ankle_pitch_joint",
+ "l_ankle_roll_joint",
+ ]
+
+ # Default (walkready) pose the policy is trained relative to.
+ # joint_angle_obs = q_current - walkready_state
+ # joint_command = action * 0.5 + walkready_state
+ walkready_state: [0.6, 0.0, 0.0, 1.2, 0.6, 0.0, -0.6, 0.0, 0.0, -1.2, -0.6, 0.0]
+
+ # PD gains for the Pi_plus leg joints.
+ kp: [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0]
+ kd: [1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]
+
+ phase:
+ use_phase: true
+ control_dt: 0.02
+ gait_frequency: 1.5
+ # Initial phase [π/2, -π/2] matching the walk policy training setup.
+ initial_phase: [1.5707963267948966, -1.5707963267948966]
+
+ providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
index f20641394..bab868686 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
@@ -16,8 +16,15 @@ def __init__(self, node):
def get_command(self):
assert self._cmd_vel is not None
- command = np.array([self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z], dtype=np.float32)
- return command
+ stop_signal = float(self.get_stop_signal())
+ return np.array(
+ [self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z, stop_signal],
+ dtype=np.float32,
+ )
+
+ def get_stop_signal(self) -> bool:
+ assert self._cmd_vel is not None
+ return self._cmd_vel.angular.x != 0.0
def has_data(self):
return self._cmd_vel is not None
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
index a1871fd59..b7e8fc89f 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
@@ -13,7 +13,12 @@ def __init__(self, node):
self._ordered_relevant_joint_names = self._node.get_parameter("joints.ordered_relevant_joint_names").value
self._walkready_state = self._node.get_parameter("joints.walkready_state").value
- self._previous_action: np.ndarray = np.zeros(len(self._ordered_relevant_joint_names), dtype=np.float32)
+ n = len(self._ordered_relevant_joint_names)
+ kp_param = self._node.get_parameter("joints.kp").value
+ kd_param = self._node.get_parameter("joints.kd").value
+ self._kp = list(kp_param) if len(kp_param) > 1 else [kp_param[0]] * n
+ self._kd = list(kd_param) if len(kd_param) > 1 else [kd_param[0]] * n
+ self._previous_action: np.ndarray = np.zeros(n, dtype=np.float32)
self._joint_state: Optional[JointState] = None
self._joint_state_sub = self._node.create_subscription(
@@ -61,7 +66,9 @@ def get_joint_commands(self, onnx_pred):
joint_command.positions = onnx_pred * 0.5 + self._walkready_state
joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
- joint_command.max_currents = [-1.0] * len(self._ordered_relevant_joint_names)
+ joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
+ joint_command.kp = self._kp
+ joint_command.kd = self._kd
return joint_command
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py
new file mode 100644
index 000000000..d77b52b24
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py
@@ -0,0 +1,78 @@
+from typing import Optional
+
+import numpy as np
+from sensor_msgs.msg import JointState
+
+from bitbots_msgs.msg import JointCommand
+from handlers.handler import Handler
+
+
+class RawJointHandler(Handler):
+ """Joint handler for HoST-style policies.
+
+ Differs from ``JointHandler`` in two aspects that are HoST-specific:
+
+ * ``get_raw_angle_data`` returns absolute joint positions (no walkready
+ subtraction). HoST observations are built from raw ``dof_pos``.
+ * ``get_joint_commands`` builds a ``JointCommand`` with
+ ``q_target = q_current + action * action_scale`` (HoST control law,
+ ``host_ground.py:_compute_torques``), not the walking-style
+ ``action * scale + walkready_state``.
+
+ The existing ``JointHandler`` is left untouched so that walking and kick
+ policies keep their current behaviour.
+ """
+
+ def __init__(self, node, action_scale: float):
+ self._node = node
+ self._action_scale = float(action_scale)
+
+ self._ordered_relevant_joint_names = self._node.get_parameter(
+ "joints.ordered_relevant_joint_names"
+ ).value
+
+ self._joint_state: Optional[JointState] = None
+ self._joint_state_sub = self._node.create_subscription(
+ JointState, "joint_states", self._joint_state_callback, 10
+ )
+
+ def _joint_state_callback(self, msg: JointState) -> None:
+ self._joint_state = msg
+
+ def has_data(self) -> bool:
+ return self._joint_state is not None
+
+ def get_raw_angle_data(self) -> np.ndarray:
+ assert self._joint_state is not None
+ return np.array(
+ [
+ self._joint_state.position[self._joint_state.name.index(name)]
+ for name in self._ordered_relevant_joint_names
+ ],
+ dtype=np.float32,
+ )
+
+ def get_velocity_data(self) -> np.ndarray:
+ assert self._joint_state is not None
+ return np.array(
+ [
+ self._joint_state.velocity[self._joint_state.name.index(name)]
+ for name in self._ordered_relevant_joint_names
+ ],
+ dtype=np.float32,
+ )
+
+ def get_joint_commands(self, onnx_pred: np.ndarray) -> JointCommand:
+ assert self._joint_state is not None
+ q_current = self.get_raw_angle_data()
+ q_target = q_current + onnx_pred.astype(np.float32) * self._action_scale
+
+ joint_command = JointCommand()
+ joint_command.header.stamp = self._joint_state.header.stamp
+ joint_command.from_hcm = True
+ joint_command.joint_names = list(self._ordered_relevant_joint_names)
+ joint_command.positions = q_target.tolist()
+ joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
+ joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
+ joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
+ return joint_command
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
index 0c454cddc..7205de78a 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
@@ -13,6 +13,10 @@
RobotControlState.KICKING,
)
+GETTING_UP_STATES = (
+ RobotControlState.CONTROLLABLE,
+ RobotControlState.GETTING_UP,
+)
class RobotStateHandler(Handler):
def __init__(self, node):
@@ -31,3 +35,6 @@ def is_walkable(self):
def is_kickable(self):
return self._robot_state in KICKABLE_STATES
+
+ def is_getting_up(self):
+ return self._robot_state in GETTING_UP_STATES
diff --git a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
index 8436dce76..29749de3e 100644
--- a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
+++ b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
@@ -2,10 +2,11 @@
+
-
+
@@ -14,4 +15,11 @@
+
+
+
+
+
+
+
diff --git a/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx
new file mode 100644
index 000000000..622ed6865
Binary files /dev/null and b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx differ
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
index 1568d1b35..fa61088c3 100644
--- a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
@@ -3,7 +3,6 @@
from pathlib import Path
import numpy as np
-import onnx
import onnxruntime as rt
import rclpy
from ament_index_python import get_package_share_directory
@@ -25,9 +24,13 @@ def __init__(self, node_name: str):
self.declare_parameter("phase.control_dt", 0.0)
self.declare_parameter("phase.gait_frequency", 0.0)
self.declare_parameter("phase.use_phase", False)
+ self.declare_parameter("phase.initial_phase", [0.0, 3.141592653589793])
self.declare_parameter("providers", ["CPUExecutionProvider"])
self.declare_parameter("joints.ordered_relevant_joint_names", [""])
self.declare_parameter("joints.walkready_state", [0.0])
+ self.declare_parameter("joints.kp", [-1.0])
+ self.declare_parameter("joints.kd", [-1.0])
+ self.declare_parameter("command.include_stop_signal", False)
model = self.get_parameter("model").value
self.get_logger().info(f"Loaded model: {model}")
@@ -67,6 +70,9 @@ def _timer_callback(self):
if self.allowed_states():
self.publisher(onnx_pred)
+ self._phase_update_hook()
+
+ def _phase_update_hook(self):
if self._phase.check_phase_set():
phase_tp1 = self._phase.get_phase() + self._phase.get_phase_dt()
self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi)
@@ -85,10 +91,9 @@ def load_model(self, model):
# Load the ONNX model
self._onnx_session = rt.InferenceSession(self._onnx_model_path, providers=self.get_parameter("providers").value)
- self._onnx_model = onnx.load(self._onnx_model_path)
- self._onnx_input_name = [inp.name for inp in self._onnx_model.graph.input]
- self._onnx_output_name = [out.name for out in self._onnx_model.graph.output]
+ self._onnx_input_name = [inp.name for inp in self._onnx_session.get_inputs()]
+ self._onnx_output_name = [out.name for out in self._onnx_session.get_outputs()]
self._handlers = []
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py
new file mode 100644
index 000000000..a89285419
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py
@@ -0,0 +1,272 @@
+"""Standup-back policy node.
+
+Wraps the HoST standup-back ONNX policy (trained with
+``host_ground.py`` / ``pi_plus_config_ground.py``) as a ROS node. The node:
+
+* Builds the 73-dimensional one-step observation in the order HoST expects
+ (``base_ang_vel``, ``projected_gravity``, raw ``dof_pos``, ``dof_vel``,
+ previous action, ``action_rescale``).
+* Stacks the last ``history.length`` one-step observations into the policy
+ input (``ObservationHistory``).
+* Mirrors the ``unactuated_timesteps`` warmup from training: for the first
+ ``unactuated_steps`` ticks after a goal is accepted, the observation is
+ zeroed, the previous action is forced to zero, and no joint commands are
+ published.
+* Exposes a ROS action server on ``/rl_standup_back`` using the existing
+ ``bitbots_msgs/Dynup`` action type with ``direction = "rl_standup_back"``,
+ matching the dynup action-server pattern. While a goal is active, the
+ policy publishes joint targets on ``standup_back_motor_goals`` with
+ ``q_target = q_current + onnx_action * action_scale``.
+"""
+
+import threading
+
+import numpy as np
+import rclpy
+from handlers.gravity_handler import GravityHandler
+from handlers.gyro_handler import GyroHandler
+from handlers.raw_joint_handler import RawJointHandler
+from handlers.robot_state_handler import RobotStateHandler
+from rclpy.action import ActionServer, CancelResponse, GoalResponse
+from rclpy.callback_groups import ReentrantCallbackGroup
+from rclpy.duration import Duration
+from rclpy.executors import MultiThreadedExecutor
+
+from bitbots_msgs.action import Dynup
+from bitbots_msgs.msg import JointCommand
+from bitbots_rl_motion.observation_history import ObservationHistory
+from nodes.rl_node import RLNode
+
+GOAL_DIRECTION = "rl_standup_back"
+
+
+class StandupBackNode(RLNode):
+ def __init__(self):
+ # Configures self._phase, self._previous_action; loads parameters.
+ super().__init__(node_name="standup_back_node")
+
+ # Standup-specific parameters.
+ self.declare_parameter("obs_scales.ang_vel", 0.25)
+ self.declare_parameter("obs_scales.dof_pos", 1.0)
+ self.declare_parameter("obs_scales.dof_vel", 0.05)
+ self.declare_parameter("action_scale", 1.0)
+ self.declare_parameter("action_rescale_obs", 1.0)
+ self.declare_parameter("history.length", 6)
+ self.declare_parameter("history.one_step_obs_dim", 73)
+ self.declare_parameter("unactuated_steps", 30)
+ self.declare_parameter("goal_timeout_sec", 8.0)
+
+ self._ang_vel_scale = float(self.get_parameter("obs_scales.ang_vel").value)
+ self._dof_pos_scale = float(self.get_parameter("obs_scales.dof_pos").value)
+ self._dof_vel_scale = float(self.get_parameter("obs_scales.dof_vel").value)
+ self._action_scale = float(self.get_parameter("action_scale").value)
+ self._action_rescale_obs = float(self.get_parameter("action_rescale_obs").value)
+ self._history_length = int(self.get_parameter("history.length").value)
+ self._one_step_obs_dim = int(self.get_parameter("history.one_step_obs_dim").value)
+ self._unactuated_steps = int(self.get_parameter("unactuated_steps").value)
+ self._goal_timeout_sec = float(self.get_parameter("goal_timeout_sec").value)
+
+ self._num_joints = len(self.get_parameter("joints.ordered_relevant_joint_names").value)
+
+ # Sanity: 3 (ang_vel) + 3 (gravity) + N (dof_pos) + N (dof_vel) + N (prev_act) + 1 (action_rescale)
+ expected_one_step = 3 + 3 + self._num_joints * 3 + 1
+ if expected_one_step != self._one_step_obs_dim:
+ self.get_logger().warning(
+ f"history.one_step_obs_dim ({self._one_step_obs_dim}) does not match the "
+ f"expected dimension based on joints count ({expected_one_step}). "
+ "Check the config and the ONNX policy interface."
+ )
+
+ # Publisher
+ self._joint_command_pub = self.create_publisher(JointCommand, "dynup_motor_goals", 10)
+
+ # Handlers
+ self._gyro_handler = GyroHandler(self)
+ self._gravity_handler = GravityHandler(self)
+ self._raw_joint_handler = RawJointHandler(self, action_scale=self._action_scale)
+ self._robot_state_handler = RobotStateHandler(self)
+
+ # History buffer (stacked observations).
+ self._history = ObservationHistory(
+ length=self._history_length,
+ one_step_dim=self._one_step_obs_dim,
+ )
+
+ # Goal/episode state.
+ self._goal_lock = threading.Lock()
+ self._active = False
+ self._tick = 0
+ self._was_getting_up = False
+
+ # Action server (Dynup action type, same as the dynup pattern).
+ self._action_server_callback_group = ReentrantCallbackGroup()
+ self._action_server = ActionServer(
+ self,
+ Dynup,
+ "rl_standup_back",
+ execute_callback=self._execute_callback,
+ goal_callback=self._goal_callback,
+ cancel_callback=self._cancel_callback,
+ callback_group=self._action_server_callback_group,
+ )
+
+ # Load model. RLNode collects all Handler instances from self.__dict__,
+ # subscribes to them, and starts the timer.
+ model = self.get_parameter("model").value
+ self.load_model(model)
+
+ # ------------------------------------------------------------------ obs
+
+ def _build_one_step_obs(self) -> np.ndarray:
+ gyro = self._gyro_handler.get_gyro() * self._ang_vel_scale
+ gravity = self._gravity_handler.get_gravity()
+ dof_pos = self._raw_joint_handler.get_raw_angle_data() * self._dof_pos_scale
+ dof_vel = self._raw_joint_handler.get_velocity_data() * self._dof_vel_scale
+ prev_action = self._previous_action.get_previous_action()
+ action_rescale = np.array([self._action_rescale_obs], dtype=np.float32)
+
+ return np.concatenate(
+ [gyro, gravity, dof_pos, dof_vel, prev_action, action_rescale]
+ ).astype(np.float32)
+
+ def obs(self) -> np.ndarray:
+ one_step = self._build_one_step_obs()
+
+ # During the unactuated warmup, HoST trains with a fully zeroed
+ # observation. Reproduce that here so the policy sees the same input
+ # distribution.
+ if self._active and self._tick < self._unactuated_steps:
+ one_step = np.zeros_like(one_step)
+
+ self._history.update(one_step)
+ return self._history.get()
+
+ # -------------------------------------------------------------- publish
+
+ def publisher(self, onnx_pred: np.ndarray) -> None:
+ joint_command = self._raw_joint_handler.get_joint_commands(onnx_pred)
+ self._joint_command_pub.publish(joint_command)
+
+ def allowed_states(self) -> bool:
+ # Publishing is gated entirely by the active goal and warmup state.
+
+ return self._robot_state_handler.is_getting_up() and self._active and self._tick >= self._unactuated_steps
+
+ # ----------------------------------------------- override timer callback
+
+ def _timer_callback(self):
+ """Tick override: keep ``_previous_action`` zero during the warmup.
+
+ RLNode's default callback unconditionally stores the ONNX output as
+ the previous action. HoST training instead force-zeroes the action
+ during the unactuated phase, so the previous-action slot in the
+ observation only becomes non-zero after the warmup ends. We mirror
+ that here.
+ """
+ sensors_ready, missing_handler = self._all_sensors_ready()
+ if not sensors_ready:
+ self.get_logger().warning(
+ f"Waiting for all sensors to be available. Following handler hasn't got the needed information: {missing_handler}",
+ throttle_duration_sec=1.0,
+ )
+ return
+
+ # Reset episode state on rising edge into GETTING_UP so the warmup,
+ # history, and previous_action match the per-episode setup HoST trained
+ # with.
+ is_getting_up = self._robot_state_handler.is_getting_up()
+ if is_getting_up and not self._was_getting_up:
+ self._history.reset()
+ self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+ self._tick = 0
+ self._was_getting_up = is_getting_up
+
+ observation = self.obs()
+
+ onnx_input = {self._onnx_input_name[0]: observation.reshape(1, -1)}
+ onnx_pred = self._onnx_session.run(self._onnx_output_name, onnx_input)[0][0]
+
+ if self.allowed_states():
+ self._previous_action.set_previous_action(onnx_pred)
+ self.publisher(onnx_pred)
+ else:
+ self._previous_action.set_previous_action(np.zeros_like(onnx_pred))
+
+ if is_getting_up:
+ self._tick += 1
+
+ # ----------------------------------------------------- action server cbs
+
+ def _goal_callback(self, goal_request: Dynup.Goal) -> GoalResponse:
+ if goal_request.direction != GOAL_DIRECTION:
+ self.get_logger().warning(
+ f"Rejecting goal with direction='{goal_request.direction}'. "
+ f"This server only handles direction='{GOAL_DIRECTION}'."
+ )
+ return GoalResponse.REJECT
+
+ with self._goal_lock:
+ if self._active:
+ self.get_logger().warning(
+ "Rejecting standup-back goal: another goal is already active."
+ )
+ return GoalResponse.REJECT
+ return GoalResponse.ACCEPT
+
+ def _cancel_callback(self, goal_handle) -> CancelResponse:
+ self.get_logger().info("Cancel requested for standup-back goal.")
+ return CancelResponse.ACCEPT
+
+ def _execute_callback(self, goal_handle):
+ self.get_logger().info("Standup-back goal accepted; resetting episode state.")
+ with self._goal_lock:
+ self._history.reset()
+ self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+ self._tick = 0
+ self._active = True
+
+ # Hold the action open until either:
+ # - the client cancels (e.g. HCM detects the robot is upright again,
+ # mirroring how dynup gets cancelled when no longer needed), or
+ # - the hard timeout elapses. The policy has no built-in done signal,
+ # so we cap each attempt so the DSD can re-evaluate (retry or move
+ # on) instead of hanging on this action forever.
+ start_time = self.get_clock().now()
+ timeout = Duration(seconds=self._goal_timeout_sec)
+ rate = self.create_rate(20)
+ try:
+ while rclpy.ok():
+ if goal_handle.is_cancel_requested:
+ goal_handle.canceled()
+ self.get_logger().info("Standup-back goal canceled.")
+ return Dynup.Result(successful=False)
+ if self.get_clock().now() - start_time >= timeout:
+ self.get_logger().info(
+ f"Standup-back goal hit {self._goal_timeout_sec:.1f}s timeout; "
+ "returning control to HCM."
+ )
+ goal_handle.succeed()
+ return Dynup.Result(successful=True)
+ rate.sleep()
+ finally:
+ with self._goal_lock:
+ self._active = False
+ self._tick = 0
+ self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+ self._history.reset()
+ self.destroy_rate(rate)
+
+ goal_handle.abort()
+ return Dynup.Result(successful=False)
+
+
+def main():
+ rclpy.init()
+ node = StandupBackNode()
+ executor = MultiThreadedExecutor()
+ executor.add_node(node)
+ try:
+ executor.spin()
+ finally:
+ node.destroy_node()
+ rclpy.shutdown()
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
index f28f46bd2..86424b180 100644
--- a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
@@ -47,6 +47,22 @@ def publisher(self, onnx_pred):
joint_command = self._joint_handler.get_joint_commands(onnx_pred)
self._joint_command_pub.publish(joint_command)
+ def _phase_update_hook(self):
+ if not self._phase.check_phase_set():
+ return
+ phase = self._phase.get_phase()
+ if self._command_handler.get_stop_signal():
+ anchors = [
+ np.array([-np.pi / 2, np.pi / 2], dtype=np.float32),
+ np.array([np.pi / 2, -np.pi / 2], dtype=np.float32),
+ ]
+ nearest = min(anchors, key=lambda a: np.linalg.norm(phase - a))
+ if np.linalg.norm(phase - nearest) < 0.1:
+ self._phase.set_phase(nearest)
+ return
+ phase_tp1 = phase + self._phase.get_phase_dt()
+ self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi)
+
# states in which the policy executes
def allowed_states(self):
allowed_to_move = self._robot_state_handler.is_walkable() and np.any(self._command_handler.get_command() != 0.0)
diff --git a/src/bitbots_motion/bitbots_rl_motion/setup.py b/src/bitbots_motion/bitbots_rl_motion/setup.py
index 106dc782c..e83be072e 100644
--- a/src/bitbots_motion/bitbots_rl_motion/setup.py
+++ b/src/bitbots_motion/bitbots_rl_motion/setup.py
@@ -29,6 +29,7 @@
"console_scripts": [
"walk_node = nodes.walk_node:main",
"kick_node = nodes.kick_node:main",
+ "standup_back_node = nodes.standup_back_node:main",
],
},
)