Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/bitbots_misc/bitbots_bringup/launch/motion.launch
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@
</node>
</group>

<!-- launch rl motion nodes (standup etc.) -->
<include file="$(find-pkg-share bitbots_rl_motion)/launch/rl_motion.launch">
<arg name="sim" value="$(var sim)"/>
</include>

<!-- launch the hcm -->
<include file="$(find-pkg-share bitbots_hcm)/launch/hcm.launch">
<arg name="sim" value="$(var sim)"/>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""HCM action element that triggers the RL standup-back policy.

Modeled on ``PlayAnimationDynup`` from ``play_animation.py``: the action
holds a goal open on a ROS action server while the standup policy runs and
publishes joint commands. When the DSD pops this action (e.g. because the
robot is no longer detected as fallen), the goal is canceled and the policy
node stops publishing.

Notes
-----
* Reuses the existing ``bitbots_msgs/Dynup`` action type, with
``direction = "rl_standup_back"``. No new action message is required.
* Creates its own ``ActionClient`` rather than relying on a blackboard field,
so ``hcm_blackboard.py`` does not need to be modified.
* This file is provided so the action *exists*; it is not yet wired into
``hcm.dsd``. Add ``@RLStandupBack`` to the desired DSD branch to use it.
"""

from action_msgs.msg import GoalStatus
from rclpy.action import ActionClient

from bitbots_hcm.hcm_dsd.actions import AbstractHCMActionElement
from bitbots_msgs.action import Dynup

GOAL_DIRECTION = "rl_standup_back"
ACTION_NAME = "rl_standup_back"


class RLStandupBack(AbstractHCMActionElement):
"""DSD action that runs the RL standup-back policy until cancelled."""

def __init__(self, blackboard, dsd, parameters):
super().__init__(blackboard, dsd, parameters)
self.first_perform = True
self._action_client = ActionClient(self.blackboard.node, Dynup, ACTION_NAME)
self._goal_future = None

def perform(self, reevaluate=False):
# Don't reevaluate while the policy is running — same pattern as
# PlayAnimationDynup for FRONT/BACK directions.
self.do_not_reevaluate()

if self.first_perform:
if not self._start_goal():
self.blackboard.node.get_logger().error(
"Could not start RL standup-back goal. Aborting action."
)
return self.pop()
self.first_perform = False
return

if self._goal_finished():
return self.pop()

def on_pop(self):
"""Cancel the goal if the action is popped before completion."""
super().on_pop()
if self._goal_future is None:
return
if not self._goal_future.done():
return
goal_handle = self._goal_future.result()
if goal_handle is None or not goal_handle.accepted:
return
if not self._goal_finished():
goal_handle.cancel_goal_async()

# ------------------------------------------------------------------ helpers

def _start_goal(self) -> bool:
wait_param = self.blackboard.node.get_parameter("hcm.anim_server_wait_time").value
if not self._action_client.wait_for_server(timeout_sec=wait_param):
self.blackboard.node.get_logger().warn(
"RL standup-back action server not running; cannot start goal."
)
return False

goal = Dynup.Goal()
goal.direction = GOAL_DIRECTION
goal.from_hcm = True
self._goal_future = self._action_client.send_goal_async(
goal, feedback_callback=self._feedback_cb
)
return True

def _feedback_cb(self, msg):
feedback: Dynup.Feedback = msg.feedback
self.publish_debug_data("RL Standup Back Percent Done", str(feedback.percent_done))

def _goal_finished(self) -> bool:
if self._goal_future is None or not self._goal_future.done():
return False
if self._goal_future.cancelled():
return True
goal_handle = self._goal_future.result()
if goal_handle is None:
return False
return goal_handle.status in (
GoalStatus.STATUS_SUCCEEDED,
GoalStatus.STATUS_CANCELED,
GoalStatus.STATUS_ABORTED,
)
2 changes: 1 addition & 1 deletion src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ $StartHCM
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationWalkReady
FALLEN_BACK --> $GameControllerStop
STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationStandupBack
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @RLStandupBack
FALLEN_RIGHT --> $GameControllerStop
STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @PlayAnimationWalkReady
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import numpy as np


class ObservationHistory:
"""Rolling history buffer that mirrors HoST's observation stacking.

HoST builds the actor input by stacking ``length`` consecutive one-step
observations of dimension ``one_step_dim``. New frames are appended at the
end and the oldest frame is dropped from the front. The buffer is
initialised with zeros, matching the training-time buffer state right after
a reset.
"""

def __init__(self, length: int, one_step_dim: int):
self._length = length
self._one_step_dim = one_step_dim
self._buffer = np.zeros(length * one_step_dim, dtype=np.float32)

def reset(self) -> None:
self._buffer.fill(0.0)

def update(self, current_obs: np.ndarray) -> None:
assert current_obs.shape == (self._one_step_dim,), (
f"expected one-step obs of shape ({self._one_step_dim},), got {current_obs.shape}"
)
# shift left by one frame, append the new frame at the end
self._buffer[: -self._one_step_dim] = self._buffer[self._one_step_dim :]
self._buffer[-self._one_step_dim :] = current_obs.astype(np.float32, copy=False)

def get(self) -> np.ndarray:
return self._buffer

@property
def total_dim(self) -> int:
return self._length * self._one_step_dim
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


class Phase:
_phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32)
_phase_dt: Optional[float]

def __init__(self, node):
Expand All @@ -14,12 +13,15 @@ def __init__(self, node):
self._control_dt = self._node.get_parameter("phase.control_dt").value
self._gait_frequency = self._node.get_parameter("phase.gait_frequency").value
self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt
initial = self._node.get_parameter("phase.initial_phase").value
self._phase = np.array(initial, dtype=np.float32)
self._use_phase = True
else:
self._control_dt = None
self._gait_frequency = None
self._phase_dt = None
self._use_phase = False
self._phase = np.zeros(2, dtype=np.float32)
self._node.get_logger().warning("No phase was found! Using policy without phase!")

self._obs_phase = None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
standup_back_node:
ros__parameters:
model: pi_supine_training7.onnx

joints:
# 22-DOF order as reported by Isaac Gym for the HoST pi_plus_ground task.
# The ONNX policy expects observations and produces actions in exactly
# this order, so do not reorder.
ordered_relevant_joint_names: [
"head_yaw_joint",
"head_pitch_joint",
"l_hip_pitch_joint",
"l_hip_roll_joint",
"l_thigh_joint",
"l_calf_joint",
"l_ankle_pitch_joint",
"l_ankle_roll_joint",
"l_shoulder_pitch_joint",
"l_shoulder_roll_joint",
"l_upper_arm_joint",
"l_elbow_joint",
"r_hip_pitch_joint",
"r_hip_roll_joint",
"r_thigh_joint",
"r_calf_joint",
"r_ankle_pitch_joint",
"r_ankle_roll_joint",
"r_shoulder_pitch_joint",
"r_shoulder_roll_joint",
"r_upper_arm_joint",
"r_elbow_joint",
]

# walkready_state is unused by this policy (HoST builds q_target from the
# current joint position, not from a default pose). Declared as 22 zeros
# only to satisfy the parameter declaration in RLNode.
walkready_state: [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
]

obs_scales:
ang_vel: 0.25
dof_pos: 1.0
dof_vel: 0.05

action_scale: 1.0
action_rescale_obs: 1.0 # constant slot in the HoST observation vector

history:
length: 6
one_step_obs_dim: 73

# Number of policy ticks at the start of each goal during which the policy
# observes a zero vector and no joint commands are published, mirroring the
# `unactuated_timesteps` warmup HoST uses during training. Set to 0 to
# disable.
unactuated_steps: 30

phase:
# HoST does not use a phase signal.
use_phase: False
control_dt: 0.02
gait_frequency: 0.0

providers: ["CPUExecutionProvider"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
walk_node:
ros__parameters:
# Filename of the ONNX model inside bitbots_rl_motion/models/
model: policy_walk.onnx

joints:
# 12-DOF leg-joint order matching the Pi_plus walk policy training setup.
# Observation and action vectors must follow exactly this order.
ordered_relevant_joint_names: [
"r_hip_pitch_joint",
"r_hip_roll_joint",
"r_thigh_joint",
"r_calf_joint",
"r_ankle_pitch_joint",
"r_ankle_roll_joint",
"l_hip_pitch_joint",
"l_hip_roll_joint",
"l_thigh_joint",
"l_calf_joint",
"l_ankle_pitch_joint",
"l_ankle_roll_joint",
]

# Default (walkready) pose the policy is trained relative to.
# joint_angle_obs = q_current - walkready_state
# joint_command = action * 0.5 + walkready_state
walkready_state: [0.6, 0.0, 0.0, 1.2, 0.6, 0.0, -0.6, 0.0, 0.0, -1.2, -0.6, 0.0]

# PD gains for the Pi_plus leg joints.
kp: [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0]
kd: [1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]

phase:
use_phase: true
control_dt: 0.02
gait_frequency: 1.5
# Initial phase [π/2, -π/2] matching the walk policy training setup.
initial_phase: [1.5707963267948966, -1.5707963267948966]

providers: ["CPUExecutionProvider"]
11 changes: 9 additions & 2 deletions src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,15 @@ def __init__(self, node):

def get_command(self):
assert self._cmd_vel is not None
command = np.array([self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z], dtype=np.float32)
return command
stop_signal = float(self.get_stop_signal())
return np.array(
[self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z, stop_signal],
dtype=np.float32,
)

def get_stop_signal(self) -> bool:
assert self._cmd_vel is not None
return self._cmd_vel.angular.x != 0.0

def has_data(self):
return self._cmd_vel is not None
Expand Down
11 changes: 9 additions & 2 deletions src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ def __init__(self, node):

self._ordered_relevant_joint_names = self._node.get_parameter("joints.ordered_relevant_joint_names").value
self._walkready_state = self._node.get_parameter("joints.walkready_state").value
self._previous_action: np.ndarray = np.zeros(len(self._ordered_relevant_joint_names), dtype=np.float32)
n = len(self._ordered_relevant_joint_names)
kp_param = self._node.get_parameter("joints.kp").value
kd_param = self._node.get_parameter("joints.kd").value
self._kp = list(kp_param) if len(kp_param) > 1 else [kp_param[0]] * n
self._kd = list(kd_param) if len(kd_param) > 1 else [kd_param[0]] * n
self._previous_action: np.ndarray = np.zeros(n, dtype=np.float32)
self._joint_state: Optional[JointState] = None

self._joint_state_sub = self._node.create_subscription(
Expand Down Expand Up @@ -61,7 +66,9 @@ def get_joint_commands(self, onnx_pred):
joint_command.positions = onnx_pred * 0.5 + self._walkready_state
joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
joint_command.max_currents = [-1.0] * len(self._ordered_relevant_joint_names)
joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
joint_command.kp = self._kp
joint_command.kd = self._kd

return joint_command

Expand Down
Loading
Loading