diff --git a/src/bitbots_misc/bitbots_bringup/launch/motion.launch b/src/bitbots_misc/bitbots_bringup/launch/motion.launch index b32d55461..e2a5967d7 100644 --- a/src/bitbots_misc/bitbots_bringup/launch/motion.launch +++ b/src/bitbots_misc/bitbots_bringup/launch/motion.launch @@ -42,6 +42,11 @@ + + + + + diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py new file mode 100644 index 000000000..6c8fa695d --- /dev/null +++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py @@ -0,0 +1,102 @@ +"""HCM action element that triggers the RL standup-back policy. + +Modeled on ``PlayAnimationDynup`` from ``play_animation.py``: the action +holds a goal open on a ROS action server while the standup policy runs and +publishes joint commands. When the DSD pops this action (e.g. because the +robot is no longer detected as fallen), the goal is canceled and the policy +node stops publishing. + +Notes +----- +* Reuses the existing ``bitbots_msgs/Dynup`` action type, with + ``direction = "rl_standup_back"``. No new action message is required. +* Creates its own ``ActionClient`` rather than relying on a blackboard field, + so ``hcm_blackboard.py`` does not need to be modified. +* This file is provided so the action *exists*; it is not yet wired into + ``hcm.dsd``. Add ``@RLStandupBack`` to the desired DSD branch to use it. +""" + +from action_msgs.msg import GoalStatus +from rclpy.action import ActionClient + +from bitbots_hcm.hcm_dsd.actions import AbstractHCMActionElement +from bitbots_msgs.action import Dynup + +GOAL_DIRECTION = "rl_standup_back" +ACTION_NAME = "rl_standup_back" + + +class RLStandupBack(AbstractHCMActionElement): + """DSD action that runs the RL standup-back policy until cancelled.""" + + def __init__(self, blackboard, dsd, parameters): + super().__init__(blackboard, dsd, parameters) + self.first_perform = True + self._action_client = ActionClient(self.blackboard.node, Dynup, ACTION_NAME) + self._goal_future = None + + def perform(self, reevaluate=False): + # Don't reevaluate while the policy is running — same pattern as + # PlayAnimationDynup for FRONT/BACK directions. + self.do_not_reevaluate() + + if self.first_perform: + if not self._start_goal(): + self.blackboard.node.get_logger().error( + "Could not start RL standup-back goal. Aborting action." + ) + return self.pop() + self.first_perform = False + return + + if self._goal_finished(): + return self.pop() + + def on_pop(self): + """Cancel the goal if the action is popped before completion.""" + super().on_pop() + if self._goal_future is None: + return + if not self._goal_future.done(): + return + goal_handle = self._goal_future.result() + if goal_handle is None or not goal_handle.accepted: + return + if not self._goal_finished(): + goal_handle.cancel_goal_async() + + # ------------------------------------------------------------------ helpers + + def _start_goal(self) -> bool: + wait_param = self.blackboard.node.get_parameter("hcm.anim_server_wait_time").value + if not self._action_client.wait_for_server(timeout_sec=wait_param): + self.blackboard.node.get_logger().warn( + "RL standup-back action server not running; cannot start goal." + ) + return False + + goal = Dynup.Goal() + goal.direction = GOAL_DIRECTION + goal.from_hcm = True + self._goal_future = self._action_client.send_goal_async( + goal, feedback_callback=self._feedback_cb + ) + return True + + def _feedback_cb(self, msg): + feedback: Dynup.Feedback = msg.feedback + self.publish_debug_data("RL Standup Back Percent Done", str(feedback.percent_done)) + + def _goal_finished(self) -> bool: + if self._goal_future is None or not self._goal_future.done(): + return False + if self._goal_future.cancelled(): + return True + goal_handle = self._goal_future.result() + if goal_handle is None: + return False + return goal_handle.status in ( + GoalStatus.STATUS_SUCCEEDED, + GoalStatus.STATUS_CANCELED, + GoalStatus.STATUS_ABORTED, + ) diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd index ccd5e67ea..36f76cf8d 100644 --- a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd +++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd @@ -34,7 +34,7 @@ $StartHCM FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationWalkReady FALLEN_BACK --> $GameControllerStop STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait - FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationStandupBack + FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @RLStandupBack FALLEN_RIGHT --> $GameControllerStop STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @PlayAnimationWalkReady diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py new file mode 100644 index 000000000..61291a3e6 --- /dev/null +++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py @@ -0,0 +1,35 @@ +import numpy as np + + +class ObservationHistory: + """Rolling history buffer that mirrors HoST's observation stacking. + + HoST builds the actor input by stacking ``length`` consecutive one-step + observations of dimension ``one_step_dim``. New frames are appended at the + end and the oldest frame is dropped from the front. The buffer is + initialised with zeros, matching the training-time buffer state right after + a reset. + """ + + def __init__(self, length: int, one_step_dim: int): + self._length = length + self._one_step_dim = one_step_dim + self._buffer = np.zeros(length * one_step_dim, dtype=np.float32) + + def reset(self) -> None: + self._buffer.fill(0.0) + + def update(self, current_obs: np.ndarray) -> None: + assert current_obs.shape == (self._one_step_dim,), ( + f"expected one-step obs of shape ({self._one_step_dim},), got {current_obs.shape}" + ) + # shift left by one frame, append the new frame at the end + self._buffer[: -self._one_step_dim] = self._buffer[self._one_step_dim :] + self._buffer[-self._one_step_dim :] = current_obs.astype(np.float32, copy=False) + + def get(self) -> np.ndarray: + return self._buffer + + @property + def total_dim(self) -> int: + return self._length * self._one_step_dim diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py index 6b57b3640..91ee7479a 100644 --- a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py +++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py @@ -4,7 +4,6 @@ class Phase: - _phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32) _phase_dt: Optional[float] def __init__(self, node): @@ -14,12 +13,15 @@ def __init__(self, node): self._control_dt = self._node.get_parameter("phase.control_dt").value self._gait_frequency = self._node.get_parameter("phase.gait_frequency").value self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt + initial = self._node.get_parameter("phase.initial_phase").value + self._phase = np.array(initial, dtype=np.float32) self._use_phase = True else: self._control_dt = None self._gait_frequency = None self._phase_dt = None self._use_phase = False + self._phase = np.zeros(2, dtype=np.float32) self._node.get_logger().warning("No phase was found! Using policy without phase!") self._obs_phase = None diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml new file mode 100644 index 000000000..05dd1fb8a --- /dev/null +++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml @@ -0,0 +1,66 @@ +standup_back_node: + ros__parameters: + model: pi_supine_training7.onnx + + joints: + # 22-DOF order as reported by Isaac Gym for the HoST pi_plus_ground task. + # The ONNX policy expects observations and produces actions in exactly + # this order, so do not reorder. + ordered_relevant_joint_names: [ + "head_yaw_joint", + "head_pitch_joint", + "l_hip_pitch_joint", + "l_hip_roll_joint", + "l_thigh_joint", + "l_calf_joint", + "l_ankle_pitch_joint", + "l_ankle_roll_joint", + "l_shoulder_pitch_joint", + "l_shoulder_roll_joint", + "l_upper_arm_joint", + "l_elbow_joint", + "r_hip_pitch_joint", + "r_hip_roll_joint", + "r_thigh_joint", + "r_calf_joint", + "r_ankle_pitch_joint", + "r_ankle_roll_joint", + "r_shoulder_pitch_joint", + "r_shoulder_roll_joint", + "r_upper_arm_joint", + "r_elbow_joint", + ] + + # walkready_state is unused by this policy (HoST builds q_target from the + # current joint position, not from a default pose). Declared as 22 zeros + # only to satisfy the parameter declaration in RLNode. + walkready_state: [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + ] + + obs_scales: + ang_vel: 0.25 + dof_pos: 1.0 + dof_vel: 0.05 + + action_scale: 1.0 + action_rescale_obs: 1.0 # constant slot in the HoST observation vector + + history: + length: 6 + one_step_obs_dim: 73 + + # Number of policy ticks at the start of each goal during which the policy + # observes a zero vector and no joint commands are published, mirroring the + # `unactuated_timesteps` warmup HoST uses during training. Set to 0 to + # disable. + unactuated_steps: 30 + + phase: + # HoST does not use a phase signal. + use_phase: False + control_dt: 0.02 + gait_frequency: 0.0 + + providers: ["CPUExecutionProvider"] diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml new file mode 100644 index 000000000..1abc1a1b0 --- /dev/null +++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml @@ -0,0 +1,40 @@ +walk_node: + ros__parameters: + # Filename of the ONNX model inside bitbots_rl_motion/models/ + model: policy_walk.onnx + + joints: + # 12-DOF leg-joint order matching the Pi_plus walk policy training setup. + # Observation and action vectors must follow exactly this order. + ordered_relevant_joint_names: [ + "r_hip_pitch_joint", + "r_hip_roll_joint", + "r_thigh_joint", + "r_calf_joint", + "r_ankle_pitch_joint", + "r_ankle_roll_joint", + "l_hip_pitch_joint", + "l_hip_roll_joint", + "l_thigh_joint", + "l_calf_joint", + "l_ankle_pitch_joint", + "l_ankle_roll_joint", + ] + + # Default (walkready) pose the policy is trained relative to. + # joint_angle_obs = q_current - walkready_state + # joint_command = action * 0.5 + walkready_state + walkready_state: [0.6, 0.0, 0.0, 1.2, 0.6, 0.0, -0.6, 0.0, 0.0, -1.2, -0.6, 0.0] + + # PD gains for the Pi_plus leg joints. + kp: [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0] + kd: [1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1] + + phase: + use_phase: true + control_dt: 0.02 + gait_frequency: 1.5 + # Initial phase [π/2, -π/2] matching the walk policy training setup. + initial_phase: [1.5707963267948966, -1.5707963267948966] + + providers: ["CPUExecutionProvider"] diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py index f20641394..bab868686 100644 --- a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py +++ b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py @@ -16,8 +16,15 @@ def __init__(self, node): def get_command(self): assert self._cmd_vel is not None - command = np.array([self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z], dtype=np.float32) - return command + stop_signal = float(self.get_stop_signal()) + return np.array( + [self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z, stop_signal], + dtype=np.float32, + ) + + def get_stop_signal(self) -> bool: + assert self._cmd_vel is not None + return self._cmd_vel.angular.x != 0.0 def has_data(self): return self._cmd_vel is not None diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py index a1871fd59..b7e8fc89f 100644 --- a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py +++ b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py @@ -13,7 +13,12 @@ def __init__(self, node): self._ordered_relevant_joint_names = self._node.get_parameter("joints.ordered_relevant_joint_names").value self._walkready_state = self._node.get_parameter("joints.walkready_state").value - self._previous_action: np.ndarray = np.zeros(len(self._ordered_relevant_joint_names), dtype=np.float32) + n = len(self._ordered_relevant_joint_names) + kp_param = self._node.get_parameter("joints.kp").value + kd_param = self._node.get_parameter("joints.kd").value + self._kp = list(kp_param) if len(kp_param) > 1 else [kp_param[0]] * n + self._kd = list(kd_param) if len(kd_param) > 1 else [kd_param[0]] * n + self._previous_action: np.ndarray = np.zeros(n, dtype=np.float32) self._joint_state: Optional[JointState] = None self._joint_state_sub = self._node.create_subscription( @@ -61,7 +66,9 @@ def get_joint_commands(self, onnx_pred): joint_command.positions = onnx_pred * 0.5 + self._walkready_state joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names) joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names) - joint_command.max_currents = [-1.0] * len(self._ordered_relevant_joint_names) + joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names) + joint_command.kp = self._kp + joint_command.kd = self._kd return joint_command diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py new file mode 100644 index 000000000..d77b52b24 --- /dev/null +++ b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py @@ -0,0 +1,78 @@ +from typing import Optional + +import numpy as np +from sensor_msgs.msg import JointState + +from bitbots_msgs.msg import JointCommand +from handlers.handler import Handler + + +class RawJointHandler(Handler): + """Joint handler for HoST-style policies. + + Differs from ``JointHandler`` in two aspects that are HoST-specific: + + * ``get_raw_angle_data`` returns absolute joint positions (no walkready + subtraction). HoST observations are built from raw ``dof_pos``. + * ``get_joint_commands`` builds a ``JointCommand`` with + ``q_target = q_current + action * action_scale`` (HoST control law, + ``host_ground.py:_compute_torques``), not the walking-style + ``action * scale + walkready_state``. + + The existing ``JointHandler`` is left untouched so that walking and kick + policies keep their current behaviour. + """ + + def __init__(self, node, action_scale: float): + self._node = node + self._action_scale = float(action_scale) + + self._ordered_relevant_joint_names = self._node.get_parameter( + "joints.ordered_relevant_joint_names" + ).value + + self._joint_state: Optional[JointState] = None + self._joint_state_sub = self._node.create_subscription( + JointState, "joint_states", self._joint_state_callback, 10 + ) + + def _joint_state_callback(self, msg: JointState) -> None: + self._joint_state = msg + + def has_data(self) -> bool: + return self._joint_state is not None + + def get_raw_angle_data(self) -> np.ndarray: + assert self._joint_state is not None + return np.array( + [ + self._joint_state.position[self._joint_state.name.index(name)] + for name in self._ordered_relevant_joint_names + ], + dtype=np.float32, + ) + + def get_velocity_data(self) -> np.ndarray: + assert self._joint_state is not None + return np.array( + [ + self._joint_state.velocity[self._joint_state.name.index(name)] + for name in self._ordered_relevant_joint_names + ], + dtype=np.float32, + ) + + def get_joint_commands(self, onnx_pred: np.ndarray) -> JointCommand: + assert self._joint_state is not None + q_current = self.get_raw_angle_data() + q_target = q_current + onnx_pred.astype(np.float32) * self._action_scale + + joint_command = JointCommand() + joint_command.header.stamp = self._joint_state.header.stamp + joint_command.from_hcm = True + joint_command.joint_names = list(self._ordered_relevant_joint_names) + joint_command.positions = q_target.tolist() + joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names) + joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names) + joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names) + return joint_command diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py index 0c454cddc..7205de78a 100644 --- a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py +++ b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py @@ -13,6 +13,10 @@ RobotControlState.KICKING, ) +GETTING_UP_STATES = ( + RobotControlState.CONTROLLABLE, + RobotControlState.GETTING_UP, +) class RobotStateHandler(Handler): def __init__(self, node): @@ -31,3 +35,6 @@ def is_walkable(self): def is_kickable(self): return self._robot_state in KICKABLE_STATES + + def is_getting_up(self): + return self._robot_state in GETTING_UP_STATES diff --git a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch index 8436dce76..29749de3e 100644 --- a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch +++ b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch @@ -2,10 +2,11 @@ + - + @@ -14,4 +15,11 @@ + + + + + + + diff --git a/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx new file mode 100644 index 000000000..622ed6865 Binary files /dev/null and b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx differ diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py index 1568d1b35..fa61088c3 100644 --- a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py +++ b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py @@ -3,7 +3,6 @@ from pathlib import Path import numpy as np -import onnx import onnxruntime as rt import rclpy from ament_index_python import get_package_share_directory @@ -25,9 +24,13 @@ def __init__(self, node_name: str): self.declare_parameter("phase.control_dt", 0.0) self.declare_parameter("phase.gait_frequency", 0.0) self.declare_parameter("phase.use_phase", False) + self.declare_parameter("phase.initial_phase", [0.0, 3.141592653589793]) self.declare_parameter("providers", ["CPUExecutionProvider"]) self.declare_parameter("joints.ordered_relevant_joint_names", [""]) self.declare_parameter("joints.walkready_state", [0.0]) + self.declare_parameter("joints.kp", [-1.0]) + self.declare_parameter("joints.kd", [-1.0]) + self.declare_parameter("command.include_stop_signal", False) model = self.get_parameter("model").value self.get_logger().info(f"Loaded model: {model}") @@ -67,6 +70,9 @@ def _timer_callback(self): if self.allowed_states(): self.publisher(onnx_pred) + self._phase_update_hook() + + def _phase_update_hook(self): if self._phase.check_phase_set(): phase_tp1 = self._phase.get_phase() + self._phase.get_phase_dt() self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi) @@ -85,10 +91,9 @@ def load_model(self, model): # Load the ONNX model self._onnx_session = rt.InferenceSession(self._onnx_model_path, providers=self.get_parameter("providers").value) - self._onnx_model = onnx.load(self._onnx_model_path) - self._onnx_input_name = [inp.name for inp in self._onnx_model.graph.input] - self._onnx_output_name = [out.name for out in self._onnx_model.graph.output] + self._onnx_input_name = [inp.name for inp in self._onnx_session.get_inputs()] + self._onnx_output_name = [out.name for out in self._onnx_session.get_outputs()] self._handlers = [] diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py new file mode 100644 index 000000000..a89285419 --- /dev/null +++ b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py @@ -0,0 +1,272 @@ +"""Standup-back policy node. + +Wraps the HoST standup-back ONNX policy (trained with +``host_ground.py`` / ``pi_plus_config_ground.py``) as a ROS node. The node: + +* Builds the 73-dimensional one-step observation in the order HoST expects + (``base_ang_vel``, ``projected_gravity``, raw ``dof_pos``, ``dof_vel``, + previous action, ``action_rescale``). +* Stacks the last ``history.length`` one-step observations into the policy + input (``ObservationHistory``). +* Mirrors the ``unactuated_timesteps`` warmup from training: for the first + ``unactuated_steps`` ticks after a goal is accepted, the observation is + zeroed, the previous action is forced to zero, and no joint commands are + published. +* Exposes a ROS action server on ``/rl_standup_back`` using the existing + ``bitbots_msgs/Dynup`` action type with ``direction = "rl_standup_back"``, + matching the dynup action-server pattern. While a goal is active, the + policy publishes joint targets on ``standup_back_motor_goals`` with + ``q_target = q_current + onnx_action * action_scale``. +""" + +import threading + +import numpy as np +import rclpy +from handlers.gravity_handler import GravityHandler +from handlers.gyro_handler import GyroHandler +from handlers.raw_joint_handler import RawJointHandler +from handlers.robot_state_handler import RobotStateHandler +from rclpy.action import ActionServer, CancelResponse, GoalResponse +from rclpy.callback_groups import ReentrantCallbackGroup +from rclpy.duration import Duration +from rclpy.executors import MultiThreadedExecutor + +from bitbots_msgs.action import Dynup +from bitbots_msgs.msg import JointCommand +from bitbots_rl_motion.observation_history import ObservationHistory +from nodes.rl_node import RLNode + +GOAL_DIRECTION = "rl_standup_back" + + +class StandupBackNode(RLNode): + def __init__(self): + # Configures self._phase, self._previous_action; loads parameters. + super().__init__(node_name="standup_back_node") + + # Standup-specific parameters. + self.declare_parameter("obs_scales.ang_vel", 0.25) + self.declare_parameter("obs_scales.dof_pos", 1.0) + self.declare_parameter("obs_scales.dof_vel", 0.05) + self.declare_parameter("action_scale", 1.0) + self.declare_parameter("action_rescale_obs", 1.0) + self.declare_parameter("history.length", 6) + self.declare_parameter("history.one_step_obs_dim", 73) + self.declare_parameter("unactuated_steps", 30) + self.declare_parameter("goal_timeout_sec", 8.0) + + self._ang_vel_scale = float(self.get_parameter("obs_scales.ang_vel").value) + self._dof_pos_scale = float(self.get_parameter("obs_scales.dof_pos").value) + self._dof_vel_scale = float(self.get_parameter("obs_scales.dof_vel").value) + self._action_scale = float(self.get_parameter("action_scale").value) + self._action_rescale_obs = float(self.get_parameter("action_rescale_obs").value) + self._history_length = int(self.get_parameter("history.length").value) + self._one_step_obs_dim = int(self.get_parameter("history.one_step_obs_dim").value) + self._unactuated_steps = int(self.get_parameter("unactuated_steps").value) + self._goal_timeout_sec = float(self.get_parameter("goal_timeout_sec").value) + + self._num_joints = len(self.get_parameter("joints.ordered_relevant_joint_names").value) + + # Sanity: 3 (ang_vel) + 3 (gravity) + N (dof_pos) + N (dof_vel) + N (prev_act) + 1 (action_rescale) + expected_one_step = 3 + 3 + self._num_joints * 3 + 1 + if expected_one_step != self._one_step_obs_dim: + self.get_logger().warning( + f"history.one_step_obs_dim ({self._one_step_obs_dim}) does not match the " + f"expected dimension based on joints count ({expected_one_step}). " + "Check the config and the ONNX policy interface." + ) + + # Publisher + self._joint_command_pub = self.create_publisher(JointCommand, "dynup_motor_goals", 10) + + # Handlers + self._gyro_handler = GyroHandler(self) + self._gravity_handler = GravityHandler(self) + self._raw_joint_handler = RawJointHandler(self, action_scale=self._action_scale) + self._robot_state_handler = RobotStateHandler(self) + + # History buffer (stacked observations). + self._history = ObservationHistory( + length=self._history_length, + one_step_dim=self._one_step_obs_dim, + ) + + # Goal/episode state. + self._goal_lock = threading.Lock() + self._active = False + self._tick = 0 + self._was_getting_up = False + + # Action server (Dynup action type, same as the dynup pattern). + self._action_server_callback_group = ReentrantCallbackGroup() + self._action_server = ActionServer( + self, + Dynup, + "rl_standup_back", + execute_callback=self._execute_callback, + goal_callback=self._goal_callback, + cancel_callback=self._cancel_callback, + callback_group=self._action_server_callback_group, + ) + + # Load model. RLNode collects all Handler instances from self.__dict__, + # subscribes to them, and starts the timer. + model = self.get_parameter("model").value + self.load_model(model) + + # ------------------------------------------------------------------ obs + + def _build_one_step_obs(self) -> np.ndarray: + gyro = self._gyro_handler.get_gyro() * self._ang_vel_scale + gravity = self._gravity_handler.get_gravity() + dof_pos = self._raw_joint_handler.get_raw_angle_data() * self._dof_pos_scale + dof_vel = self._raw_joint_handler.get_velocity_data() * self._dof_vel_scale + prev_action = self._previous_action.get_previous_action() + action_rescale = np.array([self._action_rescale_obs], dtype=np.float32) + + return np.concatenate( + [gyro, gravity, dof_pos, dof_vel, prev_action, action_rescale] + ).astype(np.float32) + + def obs(self) -> np.ndarray: + one_step = self._build_one_step_obs() + + # During the unactuated warmup, HoST trains with a fully zeroed + # observation. Reproduce that here so the policy sees the same input + # distribution. + if self._active and self._tick < self._unactuated_steps: + one_step = np.zeros_like(one_step) + + self._history.update(one_step) + return self._history.get() + + # -------------------------------------------------------------- publish + + def publisher(self, onnx_pred: np.ndarray) -> None: + joint_command = self._raw_joint_handler.get_joint_commands(onnx_pred) + self._joint_command_pub.publish(joint_command) + + def allowed_states(self) -> bool: + # Publishing is gated entirely by the active goal and warmup state. + + return self._robot_state_handler.is_getting_up() and self._active and self._tick >= self._unactuated_steps + + # ----------------------------------------------- override timer callback + + def _timer_callback(self): + """Tick override: keep ``_previous_action`` zero during the warmup. + + RLNode's default callback unconditionally stores the ONNX output as + the previous action. HoST training instead force-zeroes the action + during the unactuated phase, so the previous-action slot in the + observation only becomes non-zero after the warmup ends. We mirror + that here. + """ + sensors_ready, missing_handler = self._all_sensors_ready() + if not sensors_ready: + self.get_logger().warning( + f"Waiting for all sensors to be available. Following handler hasn't got the needed information: {missing_handler}", + throttle_duration_sec=1.0, + ) + return + + # Reset episode state on rising edge into GETTING_UP so the warmup, + # history, and previous_action match the per-episode setup HoST trained + # with. + is_getting_up = self._robot_state_handler.is_getting_up() + if is_getting_up and not self._was_getting_up: + self._history.reset() + self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32)) + self._tick = 0 + self._was_getting_up = is_getting_up + + observation = self.obs() + + onnx_input = {self._onnx_input_name[0]: observation.reshape(1, -1)} + onnx_pred = self._onnx_session.run(self._onnx_output_name, onnx_input)[0][0] + + if self.allowed_states(): + self._previous_action.set_previous_action(onnx_pred) + self.publisher(onnx_pred) + else: + self._previous_action.set_previous_action(np.zeros_like(onnx_pred)) + + if is_getting_up: + self._tick += 1 + + # ----------------------------------------------------- action server cbs + + def _goal_callback(self, goal_request: Dynup.Goal) -> GoalResponse: + if goal_request.direction != GOAL_DIRECTION: + self.get_logger().warning( + f"Rejecting goal with direction='{goal_request.direction}'. " + f"This server only handles direction='{GOAL_DIRECTION}'." + ) + return GoalResponse.REJECT + + with self._goal_lock: + if self._active: + self.get_logger().warning( + "Rejecting standup-back goal: another goal is already active." + ) + return GoalResponse.REJECT + return GoalResponse.ACCEPT + + def _cancel_callback(self, goal_handle) -> CancelResponse: + self.get_logger().info("Cancel requested for standup-back goal.") + return CancelResponse.ACCEPT + + def _execute_callback(self, goal_handle): + self.get_logger().info("Standup-back goal accepted; resetting episode state.") + with self._goal_lock: + self._history.reset() + self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32)) + self._tick = 0 + self._active = True + + # Hold the action open until either: + # - the client cancels (e.g. HCM detects the robot is upright again, + # mirroring how dynup gets cancelled when no longer needed), or + # - the hard timeout elapses. The policy has no built-in done signal, + # so we cap each attempt so the DSD can re-evaluate (retry or move + # on) instead of hanging on this action forever. + start_time = self.get_clock().now() + timeout = Duration(seconds=self._goal_timeout_sec) + rate = self.create_rate(20) + try: + while rclpy.ok(): + if goal_handle.is_cancel_requested: + goal_handle.canceled() + self.get_logger().info("Standup-back goal canceled.") + return Dynup.Result(successful=False) + if self.get_clock().now() - start_time >= timeout: + self.get_logger().info( + f"Standup-back goal hit {self._goal_timeout_sec:.1f}s timeout; " + "returning control to HCM." + ) + goal_handle.succeed() + return Dynup.Result(successful=True) + rate.sleep() + finally: + with self._goal_lock: + self._active = False + self._tick = 0 + self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32)) + self._history.reset() + self.destroy_rate(rate) + + goal_handle.abort() + return Dynup.Result(successful=False) + + +def main(): + rclpy.init() + node = StandupBackNode() + executor = MultiThreadedExecutor() + executor.add_node(node) + try: + executor.spin() + finally: + node.destroy_node() + rclpy.shutdown() diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py index f28f46bd2..86424b180 100644 --- a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py +++ b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py @@ -47,6 +47,22 @@ def publisher(self, onnx_pred): joint_command = self._joint_handler.get_joint_commands(onnx_pred) self._joint_command_pub.publish(joint_command) + def _phase_update_hook(self): + if not self._phase.check_phase_set(): + return + phase = self._phase.get_phase() + if self._command_handler.get_stop_signal(): + anchors = [ + np.array([-np.pi / 2, np.pi / 2], dtype=np.float32), + np.array([np.pi / 2, -np.pi / 2], dtype=np.float32), + ] + nearest = min(anchors, key=lambda a: np.linalg.norm(phase - a)) + if np.linalg.norm(phase - nearest) < 0.1: + self._phase.set_phase(nearest) + return + phase_tp1 = phase + self._phase.get_phase_dt() + self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi) + # states in which the policy executes def allowed_states(self): allowed_to_move = self._robot_state_handler.is_walkable() and np.any(self._command_handler.get_command() != 0.0) diff --git a/src/bitbots_motion/bitbots_rl_motion/setup.py b/src/bitbots_motion/bitbots_rl_motion/setup.py index 106dc782c..e83be072e 100644 --- a/src/bitbots_motion/bitbots_rl_motion/setup.py +++ b/src/bitbots_motion/bitbots_rl_motion/setup.py @@ -29,6 +29,7 @@ "console_scripts": [ "walk_node = nodes.walk_node:main", "kick_node = nodes.kick_node:main", + "standup_back_node = nodes.standup_back_node:main", ], }, )