diff --git a/src/bitbots_misc/bitbots_bringup/launch/motion.launch b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
index b32d55461..e2a5967d7 100644
--- a/src/bitbots_misc/bitbots_bringup/launch/motion.launch
+++ b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
@@ -42,6 +42,11 @@
         </node>
     </group>
 
+    <!-- launch rl motion nodes (standup etc.) -->
+    <include file="$(find-pkg-share bitbots_rl_motion)/launch/rl_motion.launch">
+        <arg name="sim" value="$(var sim)"/>
+    </include>
+
     <!-- launch the hcm -->
     <include file="$(find-pkg-share bitbots_hcm)/launch/hcm.launch">
         <arg name="sim" value="$(var sim)"/>
diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py
new file mode 100644
index 000000000..6c8fa695d
--- /dev/null
+++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/actions/rl_standup_back.py
@@ -0,0 +1,102 @@
+"""HCM action element that triggers the RL standup-back policy.
+
+Modeled on ``PlayAnimationDynup`` from ``play_animation.py``: the action
+holds a goal open on a ROS action server while the standup policy runs and
+publishes joint commands. When the DSD pops this action (e.g. because the
+robot is no longer detected as fallen), the goal is canceled and the policy
+node stops publishing.
+
+Notes
+-----
+* Reuses the existing ``bitbots_msgs/Dynup`` action type, with
+  ``direction = "rl_standup_back"``. No new action message is required.
+* Creates its own ``ActionClient`` rather than relying on a blackboard field,
+  so ``hcm_blackboard.py`` does not need to be modified.
+* This file is provided so the action *exists*; it is not yet wired into
+  ``hcm.dsd``. Add ``@RLStandupBack`` to the desired DSD branch to use it.
+"""
+
+from action_msgs.msg import GoalStatus
+from rclpy.action import ActionClient
+
+from bitbots_hcm.hcm_dsd.actions import AbstractHCMActionElement
+from bitbots_msgs.action import Dynup
+
+GOAL_DIRECTION = "rl_standup_back"
+ACTION_NAME = "rl_standup_back"
+
+
+class RLStandupBack(AbstractHCMActionElement):
+    """DSD action that runs the RL standup-back policy until cancelled."""
+
+    def __init__(self, blackboard, dsd, parameters):
+        super().__init__(blackboard, dsd, parameters)
+        self.first_perform = True
+        self._action_client = ActionClient(self.blackboard.node, Dynup, ACTION_NAME)
+        self._goal_future = None
+
+    def perform(self, reevaluate=False):
+        # Don't reevaluate while the policy is running — same pattern as
+        # PlayAnimationDynup for FRONT/BACK directions.
+        self.do_not_reevaluate()
+
+        if self.first_perform:
+            if not self._start_goal():
+                self.blackboard.node.get_logger().error(
+                    "Could not start RL standup-back goal. Aborting action."
+                )
+                return self.pop()
+            self.first_perform = False
+            return
+
+        if self._goal_finished():
+            return self.pop()
+
+    def on_pop(self):
+        """Cancel the goal if the action is popped before completion."""
+        super().on_pop()
+        if self._goal_future is None:
+            return
+        if not self._goal_future.done():
+            return
+        goal_handle = self._goal_future.result()
+        if goal_handle is None or not goal_handle.accepted:
+            return
+        if not self._goal_finished():
+            goal_handle.cancel_goal_async()
+
+    # ------------------------------------------------------------------ helpers
+
+    def _start_goal(self) -> bool:
+        wait_param = self.blackboard.node.get_parameter("hcm.anim_server_wait_time").value
+        if not self._action_client.wait_for_server(timeout_sec=wait_param):
+            self.blackboard.node.get_logger().warn(
+                "RL standup-back action server not running; cannot start goal."
+            )
+            return False
+
+        goal = Dynup.Goal()
+        goal.direction = GOAL_DIRECTION
+        goal.from_hcm = True
+        self._goal_future = self._action_client.send_goal_async(
+            goal, feedback_callback=self._feedback_cb
+        )
+        return True
+
+    def _feedback_cb(self, msg):
+        feedback: Dynup.Feedback = msg.feedback
+        self.publish_debug_data("RL Standup Back Percent Done", str(feedback.percent_done))
+
+    def _goal_finished(self) -> bool:
+        if self._goal_future is None or not self._goal_future.done():
+            return False
+        if self._goal_future.cancelled():
+            return True
+        goal_handle = self._goal_future.result()
+        if goal_handle is None:
+            return False
+        return goal_handle.status in (
+            GoalStatus.STATUS_SUCCEEDED,
+            GoalStatus.STATUS_CANCELED,
+            GoalStatus.STATUS_ABORTED,
+        )
diff --git a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
index ccd5e67ea..36f76cf8d 100644
--- a/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
+++ b/src/bitbots_motion/bitbots_hcm/bitbots_hcm/hcm_dsd/hcm.dsd
@@ -34,7 +34,7 @@ $StartHCM
                                     FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationWalkReady
                                 FALLEN_BACK --> $GameControllerStop
                                     STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
-                                    FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @PlayAnimationStandupBack
+                                    FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @RobotStateGettingUp, @Complain, @RLStandupBack
                                 FALLEN_RIGHT --> $GameControllerStop
                                     STOPPED --> @RobotStateFallen, @CancelGoals, @StopWalking, @Wait
                                     FREE --> @RobotStateFallen, @CancelGoals, @StopWalking, @PlayAnimationWalkReady
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py
new file mode 100644
index 000000000..61291a3e6
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/observation_history.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+
+class ObservationHistory:
+    """Rolling history buffer that mirrors HoST's observation stacking.
+
+    HoST builds the actor input by stacking ``length`` consecutive one-step
+    observations of dimension ``one_step_dim``. New frames are appended at the
+    end and the oldest frame is dropped from the front. The buffer is
+    initialised with zeros, matching the training-time buffer state right after
+    a reset.
+    """
+
+    def __init__(self, length: int, one_step_dim: int):
+        self._length = length
+        self._one_step_dim = one_step_dim
+        self._buffer = np.zeros(length * one_step_dim, dtype=np.float32)
+
+    def reset(self) -> None:
+        self._buffer.fill(0.0)
+
+    def update(self, current_obs: np.ndarray) -> None:
+        assert current_obs.shape == (self._one_step_dim,), (
+            f"expected one-step obs of shape ({self._one_step_dim},), got {current_obs.shape}"
+        )
+        # shift left by one frame, append the new frame at the end
+        self._buffer[: -self._one_step_dim] = self._buffer[self._one_step_dim :]
+        self._buffer[-self._one_step_dim :] = current_obs.astype(np.float32, copy=False)
+
+    def get(self) -> np.ndarray:
+        return self._buffer
+
+    @property
+    def total_dim(self) -> int:
+        return self._length * self._one_step_dim
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
index 6b57b3640..91ee7479a 100644
--- a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
+++ b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
@@ -4,7 +4,6 @@
 
 
 class Phase:
-    _phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32)
     _phase_dt: Optional[float]
 
     def __init__(self, node):
@@ -14,12 +13,15 @@ def __init__(self, node):
             self._control_dt = self._node.get_parameter("phase.control_dt").value
             self._gait_frequency = self._node.get_parameter("phase.gait_frequency").value
             self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt
+            initial = self._node.get_parameter("phase.initial_phase").value
+            self._phase = np.array(initial, dtype=np.float32)
             self._use_phase = True
         else:
             self._control_dt = None
             self._gait_frequency = None
             self._phase_dt = None
             self._use_phase = False
+            self._phase = np.zeros(2, dtype=np.float32)
             self._node.get_logger().warning("No phase was found! Using policy without phase!")
 
         self._obs_phase = None
diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml
new file mode 100644
index 000000000..05dd1fb8a
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_standup_back_model_config.yaml
@@ -0,0 +1,66 @@
+standup_back_node:
+  ros__parameters:
+    model: pi_supine_training7.onnx
+
+    joints:
+      # 22-DOF order as reported by Isaac Gym for the HoST pi_plus_ground task.
+      # The ONNX policy expects observations and produces actions in exactly
+      # this order, so do not reorder.
+      ordered_relevant_joint_names: [
+        "head_yaw_joint",
+        "head_pitch_joint",
+        "l_hip_pitch_joint",
+        "l_hip_roll_joint",
+        "l_thigh_joint",
+        "l_calf_joint",
+        "l_ankle_pitch_joint",
+        "l_ankle_roll_joint",
+        "l_shoulder_pitch_joint",
+        "l_shoulder_roll_joint",
+        "l_upper_arm_joint",
+        "l_elbow_joint",
+        "r_hip_pitch_joint",
+        "r_hip_roll_joint",
+        "r_thigh_joint",
+        "r_calf_joint",
+        "r_ankle_pitch_joint",
+        "r_ankle_roll_joint",
+        "r_shoulder_pitch_joint",
+        "r_shoulder_roll_joint",
+        "r_upper_arm_joint",
+        "r_elbow_joint",
+      ]
+
+      # walkready_state is unused by this policy (HoST builds q_target from the
+      # current joint position, not from a default pose). Declared as 22 zeros
+      # only to satisfy the parameter declaration in RLNode.
+      walkready_state: [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      ]
+
+    obs_scales:
+      ang_vel: 0.25
+      dof_pos: 1.0
+      dof_vel: 0.05
+
+    action_scale: 1.0
+    action_rescale_obs: 1.0   # constant slot in the HoST observation vector
+
+    history:
+      length: 6
+      one_step_obs_dim: 73
+
+    # Number of policy ticks at the start of each goal during which the policy
+    # observes a zero vector and no joint commands are published, mirroring the
+    # `unactuated_timesteps` warmup HoST uses during training. Set to 0 to
+    # disable.
+    unactuated_steps: 30
+
+    phase:
+      # HoST does not use a phase signal.
+      use_phase: False
+      control_dt: 0.02
+      gait_frequency: 0.0
+
+    providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml
new file mode 100644
index 000000000..1abc1a1b0
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/configs/pi_plus_walking_model_config.yaml
@@ -0,0 +1,40 @@
+walk_node:
+  ros__parameters:
+    # Filename of the ONNX model inside bitbots_rl_motion/models/
+    model: policy_walk.onnx
+
+    joints:
+      # 12-DOF leg-joint order matching the Pi_plus walk policy training setup.
+      # Observation and action vectors must follow exactly this order.
+      ordered_relevant_joint_names: [
+        "r_hip_pitch_joint",
+        "r_hip_roll_joint",
+        "r_thigh_joint",
+        "r_calf_joint",
+        "r_ankle_pitch_joint",
+        "r_ankle_roll_joint",
+        "l_hip_pitch_joint",
+        "l_hip_roll_joint",
+        "l_thigh_joint",
+        "l_calf_joint",
+        "l_ankle_pitch_joint",
+        "l_ankle_roll_joint",
+      ]
+
+      # Default (walkready) pose the policy is trained relative to.
+      # joint_angle_obs = q_current - walkready_state
+      # joint_command   = action * 0.5 + walkready_state
+      walkready_state: [0.6, 0.0, 0.0, 1.2, 0.6, 0.0, -0.6, 0.0, 0.0, -1.2, -0.6, 0.0]
+
+      # PD gains for the Pi_plus leg joints.
+      kp: [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0]
+      kd: [1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]
+
+    phase:
+      use_phase: true
+      control_dt: 0.02
+      gait_frequency: 1.5
+      # Initial phase [π/2, -π/2] matching the walk policy training setup.
+      initial_phase: [1.5707963267948966, -1.5707963267948966]
+
+    providers: ["CPUExecutionProvider"]
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
index f20641394..bab868686 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/command_handler.py
@@ -16,8 +16,15 @@ def __init__(self, node):
 
     def get_command(self):
         assert self._cmd_vel is not None
-        command = np.array([self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z], dtype=np.float32)
-        return command
+        stop_signal = float(self.get_stop_signal())
+        return np.array(
+            [self._cmd_vel.linear.x, self._cmd_vel.linear.y, self._cmd_vel.angular.z, stop_signal],
+            dtype=np.float32,
+        )
+
+    def get_stop_signal(self) -> bool:
+        assert self._cmd_vel is not None
+        return self._cmd_vel.angular.x != 0.0
 
     def has_data(self):
         return self._cmd_vel is not None
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
index a1871fd59..b7e8fc89f 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/joint_handler.py
@@ -13,7 +13,12 @@ def __init__(self, node):
 
         self._ordered_relevant_joint_names = self._node.get_parameter("joints.ordered_relevant_joint_names").value
         self._walkready_state = self._node.get_parameter("joints.walkready_state").value
-        self._previous_action: np.ndarray = np.zeros(len(self._ordered_relevant_joint_names), dtype=np.float32)
+        n = len(self._ordered_relevant_joint_names)
+        kp_param = self._node.get_parameter("joints.kp").value
+        kd_param = self._node.get_parameter("joints.kd").value
+        self._kp = list(kp_param) if len(kp_param) > 1 else [kp_param[0]] * n
+        self._kd = list(kd_param) if len(kd_param) > 1 else [kd_param[0]] * n
+        self._previous_action: np.ndarray = np.zeros(n, dtype=np.float32)
         self._joint_state: Optional[JointState] = None
 
         self._joint_state_sub = self._node.create_subscription(
@@ -61,7 +66,9 @@ def get_joint_commands(self, onnx_pred):
         joint_command.positions = onnx_pred * 0.5 + self._walkready_state
         joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
         joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
-        joint_command.max_currents = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.kp = self._kp
+        joint_command.kd = self._kd
 
         return joint_command
 
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py
new file mode 100644
index 000000000..d77b52b24
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/raw_joint_handler.py
@@ -0,0 +1,78 @@
+from typing import Optional
+
+import numpy as np
+from sensor_msgs.msg import JointState
+
+from bitbots_msgs.msg import JointCommand
+from handlers.handler import Handler
+
+
+class RawJointHandler(Handler):
+    """Joint handler for HoST-style policies.
+
+    Differs from ``JointHandler`` in two aspects that are HoST-specific:
+
+    * ``get_raw_angle_data`` returns absolute joint positions (no walkready
+      subtraction). HoST observations are built from raw ``dof_pos``.
+    * ``get_joint_commands`` builds a ``JointCommand`` with
+      ``q_target = q_current + action * action_scale`` (HoST control law,
+      ``host_ground.py:_compute_torques``), not the walking-style
+      ``action * scale + walkready_state``.
+
+    The existing ``JointHandler`` is left untouched so that walking and kick
+    policies keep their current behaviour.
+    """
+
+    def __init__(self, node, action_scale: float):
+        self._node = node
+        self._action_scale = float(action_scale)
+
+        self._ordered_relevant_joint_names = self._node.get_parameter(
+            "joints.ordered_relevant_joint_names"
+        ).value
+
+        self._joint_state: Optional[JointState] = None
+        self._joint_state_sub = self._node.create_subscription(
+            JointState, "joint_states", self._joint_state_callback, 10
+        )
+
+    def _joint_state_callback(self, msg: JointState) -> None:
+        self._joint_state = msg
+
+    def has_data(self) -> bool:
+        return self._joint_state is not None
+
+    def get_raw_angle_data(self) -> np.ndarray:
+        assert self._joint_state is not None
+        return np.array(
+            [
+                self._joint_state.position[self._joint_state.name.index(name)]
+                for name in self._ordered_relevant_joint_names
+            ],
+            dtype=np.float32,
+        )
+
+    def get_velocity_data(self) -> np.ndarray:
+        assert self._joint_state is not None
+        return np.array(
+            [
+                self._joint_state.velocity[self._joint_state.name.index(name)]
+                for name in self._ordered_relevant_joint_names
+            ],
+            dtype=np.float32,
+        )
+
+    def get_joint_commands(self, onnx_pred: np.ndarray) -> JointCommand:
+        assert self._joint_state is not None
+        q_current = self.get_raw_angle_data()
+        q_target = q_current + onnx_pred.astype(np.float32) * self._action_scale
+
+        joint_command = JointCommand()
+        joint_command.header.stamp = self._joint_state.header.stamp
+        joint_command.from_hcm = True
+        joint_command.joint_names = list(self._ordered_relevant_joint_names)
+        joint_command.positions = q_target.tolist()
+        joint_command.velocities = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.accelerations = [-1.0] * len(self._ordered_relevant_joint_names)
+        joint_command.max_torques = [-1.0] * len(self._ordered_relevant_joint_names)
+        return joint_command
diff --git a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
index 0c454cddc..7205de78a 100644
--- a/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
+++ b/src/bitbots_motion/bitbots_rl_motion/handlers/robot_state_handler.py
@@ -13,6 +13,10 @@
     RobotControlState.KICKING,
 )
 
+GETTING_UP_STATES = (
+    RobotControlState.CONTROLLABLE,
+    RobotControlState.GETTING_UP,
+)
 
 class RobotStateHandler(Handler):
     def __init__(self, node):
@@ -31,3 +35,6 @@ def is_walkable(self):
 
     def is_kickable(self):
         return self._robot_state in KICKABLE_STATES
+
+    def is_getting_up(self):
+        return self._robot_state in GETTING_UP_STATES
diff --git a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
index 8436dce76..29749de3e 100644
--- a/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
+++ b/src/bitbots_motion/bitbots_rl_motion/launch/rl_motion.launch
@@ -2,10 +2,11 @@
   <arg name="sim" default="false"/>
   <arg name="walk" default="true"/>
   <arg name="kick" default="true"/>
+  <arg name="standup_back" default="true"/>
 
   <group if="$(var walk)">
     <node pkg="bitbots_rl_motion" exec="walk_node" output="screen">
-      <param from="$(find-pkg-share bitbots_rl_motion)/configs/wolfgang_walking_model_config.yaml" />
+      <param from="$(find-pkg-share bitbots_rl_motion)/configs/pi_plus_walking_model_config.yaml" />
     </node>
   </group>
 
@@ -14,4 +15,11 @@
       <param from="$(find-pkg-share bitbots_rl_motion)/configs/wolfgang_dribbling_model_config.yaml" />
     </node>
   </group>
+
+  <group if="$(var standup_back)">
+    <node pkg="bitbots_rl_motion" exec="standup_back_node" output="screen">
+      <param from="$(find-pkg-share bitbots_rl_motion)/configs/pi_plus_standup_back_model_config.yaml" />
+      <param name="use_sim_time" value="$(var sim)"/>
+    </node>
+  </group>
 </launch>
diff --git a/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx
new file mode 100644
index 000000000..622ed6865
Binary files /dev/null and b/src/bitbots_motion/bitbots_rl_motion/models/pi_supine_training7.onnx differ
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
index 1568d1b35..fa61088c3 100644
--- a/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/rl_node.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 
 import numpy as np
-import onnx
 import onnxruntime as rt
 import rclpy
 from ament_index_python import get_package_share_directory
@@ -25,9 +24,13 @@ def __init__(self, node_name: str):
         self.declare_parameter("phase.control_dt", 0.0)
         self.declare_parameter("phase.gait_frequency", 0.0)
         self.declare_parameter("phase.use_phase", False)
+        self.declare_parameter("phase.initial_phase", [0.0, 3.141592653589793])
         self.declare_parameter("providers", ["CPUExecutionProvider"])
         self.declare_parameter("joints.ordered_relevant_joint_names", [""])
         self.declare_parameter("joints.walkready_state", [0.0])
+        self.declare_parameter("joints.kp", [-1.0])
+        self.declare_parameter("joints.kd", [-1.0])
+        self.declare_parameter("command.include_stop_signal", False)
 
         model = self.get_parameter("model").value
         self.get_logger().info(f"Loaded model: {model}")
@@ -67,6 +70,9 @@ def _timer_callback(self):
         if self.allowed_states():
             self.publisher(onnx_pred)
 
+        self._phase_update_hook()
+
+    def _phase_update_hook(self):
         if self._phase.check_phase_set():
             phase_tp1 = self._phase.get_phase() + self._phase.get_phase_dt()
             self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi)
@@ -85,10 +91,9 @@ def load_model(self, model):
 
         # Load the ONNX model
         self._onnx_session = rt.InferenceSession(self._onnx_model_path, providers=self.get_parameter("providers").value)
-        self._onnx_model = onnx.load(self._onnx_model_path)
 
-        self._onnx_input_name = [inp.name for inp in self._onnx_model.graph.input]
-        self._onnx_output_name = [out.name for out in self._onnx_model.graph.output]
+        self._onnx_input_name = [inp.name for inp in self._onnx_session.get_inputs()]
+        self._onnx_output_name = [out.name for out in self._onnx_session.get_outputs()]
 
         self._handlers = []
 
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py
new file mode 100644
index 000000000..a89285419
--- /dev/null
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/standup_back_node.py
@@ -0,0 +1,272 @@
+"""Standup-back policy node.
+
+Wraps the HoST standup-back ONNX policy (trained with
+``host_ground.py`` / ``pi_plus_config_ground.py``) as a ROS node. The node:
+
+* Builds the 73-dimensional one-step observation in the order HoST expects
+  (``base_ang_vel``, ``projected_gravity``, raw ``dof_pos``, ``dof_vel``,
+  previous action, ``action_rescale``).
+* Stacks the last ``history.length`` one-step observations into the policy
+  input (``ObservationHistory``).
+* Mirrors the ``unactuated_timesteps`` warmup from training: for the first
+  ``unactuated_steps`` ticks after a goal is accepted, the observation is
+  zeroed, the previous action is forced to zero, and no joint commands are
+  published.
+* Exposes a ROS action server on ``/rl_standup_back`` using the existing
+  ``bitbots_msgs/Dynup`` action type with ``direction = "rl_standup_back"``,
+  matching the dynup action-server pattern. While a goal is active, the
+  policy publishes joint targets on ``standup_back_motor_goals`` with
+  ``q_target = q_current + onnx_action * action_scale``.
+"""
+
+import threading
+
+import numpy as np
+import rclpy
+from handlers.gravity_handler import GravityHandler
+from handlers.gyro_handler import GyroHandler
+from handlers.raw_joint_handler import RawJointHandler
+from handlers.robot_state_handler import RobotStateHandler
+from rclpy.action import ActionServer, CancelResponse, GoalResponse
+from rclpy.callback_groups import ReentrantCallbackGroup
+from rclpy.duration import Duration
+from rclpy.executors import MultiThreadedExecutor
+
+from bitbots_msgs.action import Dynup
+from bitbots_msgs.msg import JointCommand
+from bitbots_rl_motion.observation_history import ObservationHistory
+from nodes.rl_node import RLNode
+
+GOAL_DIRECTION = "rl_standup_back"
+
+
+class StandupBackNode(RLNode):
+    def __init__(self):
+        # Configures self._phase, self._previous_action; loads parameters.
+        super().__init__(node_name="standup_back_node")
+
+        # Standup-specific parameters.
+        self.declare_parameter("obs_scales.ang_vel", 0.25)
+        self.declare_parameter("obs_scales.dof_pos", 1.0)
+        self.declare_parameter("obs_scales.dof_vel", 0.05)
+        self.declare_parameter("action_scale", 1.0)
+        self.declare_parameter("action_rescale_obs", 1.0)
+        self.declare_parameter("history.length", 6)
+        self.declare_parameter("history.one_step_obs_dim", 73)
+        self.declare_parameter("unactuated_steps", 30)
+        self.declare_parameter("goal_timeout_sec", 8.0)
+
+        self._ang_vel_scale = float(self.get_parameter("obs_scales.ang_vel").value)
+        self._dof_pos_scale = float(self.get_parameter("obs_scales.dof_pos").value)
+        self._dof_vel_scale = float(self.get_parameter("obs_scales.dof_vel").value)
+        self._action_scale = float(self.get_parameter("action_scale").value)
+        self._action_rescale_obs = float(self.get_parameter("action_rescale_obs").value)
+        self._history_length = int(self.get_parameter("history.length").value)
+        self._one_step_obs_dim = int(self.get_parameter("history.one_step_obs_dim").value)
+        self._unactuated_steps = int(self.get_parameter("unactuated_steps").value)
+        self._goal_timeout_sec = float(self.get_parameter("goal_timeout_sec").value)
+
+        self._num_joints = len(self.get_parameter("joints.ordered_relevant_joint_names").value)
+
+        # Sanity: 3 (ang_vel) + 3 (gravity) + N (dof_pos) + N (dof_vel) + N (prev_act) + 1 (action_rescale)
+        expected_one_step = 3 + 3 + self._num_joints * 3 + 1
+        if expected_one_step != self._one_step_obs_dim:
+            self.get_logger().warning(
+                f"history.one_step_obs_dim ({self._one_step_obs_dim}) does not match the "
+                f"expected dimension based on joints count ({expected_one_step}). "
+                "Check the config and the ONNX policy interface."
+            )
+
+        # Publisher
+        self._joint_command_pub = self.create_publisher(JointCommand, "dynup_motor_goals", 10)
+
+        # Handlers
+        self._gyro_handler = GyroHandler(self)
+        self._gravity_handler = GravityHandler(self)
+        self._raw_joint_handler = RawJointHandler(self, action_scale=self._action_scale)
+        self._robot_state_handler = RobotStateHandler(self)
+
+        # History buffer (stacked observations).
+        self._history = ObservationHistory(
+            length=self._history_length,
+            one_step_dim=self._one_step_obs_dim,
+        )
+
+        # Goal/episode state.
+        self._goal_lock = threading.Lock()
+        self._active = False
+        self._tick = 0
+        self._was_getting_up = False
+
+        # Action server (Dynup action type, same as the dynup pattern).
+        self._action_server_callback_group = ReentrantCallbackGroup()
+        self._action_server = ActionServer(
+            self,
+            Dynup,
+            "rl_standup_back",
+            execute_callback=self._execute_callback,
+            goal_callback=self._goal_callback,
+            cancel_callback=self._cancel_callback,
+            callback_group=self._action_server_callback_group,
+        )
+
+        # Load model. RLNode collects all Handler instances from self.__dict__,
+        # subscribes to them, and starts the timer.
+        model = self.get_parameter("model").value
+        self.load_model(model)
+
+    # ------------------------------------------------------------------ obs
+
+    def _build_one_step_obs(self) -> np.ndarray:
+        gyro = self._gyro_handler.get_gyro() * self._ang_vel_scale
+        gravity = self._gravity_handler.get_gravity()
+        dof_pos = self._raw_joint_handler.get_raw_angle_data() * self._dof_pos_scale
+        dof_vel = self._raw_joint_handler.get_velocity_data() * self._dof_vel_scale
+        prev_action = self._previous_action.get_previous_action()
+        action_rescale = np.array([self._action_rescale_obs], dtype=np.float32)
+
+        return np.concatenate(
+            [gyro, gravity, dof_pos, dof_vel, prev_action, action_rescale]
+        ).astype(np.float32)
+
+    def obs(self) -> np.ndarray:
+        one_step = self._build_one_step_obs()
+
+        # During the unactuated warmup, HoST trains with a fully zeroed
+        # observation. Reproduce that here so the policy sees the same input
+        # distribution.
+        if self._active and self._tick < self._unactuated_steps:
+            one_step = np.zeros_like(one_step)
+
+        self._history.update(one_step)
+        return self._history.get()
+
+    # -------------------------------------------------------------- publish
+
+    def publisher(self, onnx_pred: np.ndarray) -> None:
+        joint_command = self._raw_joint_handler.get_joint_commands(onnx_pred)
+        self._joint_command_pub.publish(joint_command)
+
+    def allowed_states(self) -> bool:
+        # Publishing is gated entirely by the active goal and warmup state.
+        
+        return self._robot_state_handler.is_getting_up() and self._active and self._tick >= self._unactuated_steps
+
+    # ----------------------------------------------- override timer callback
+
+    def _timer_callback(self):
+        """Tick override: keep ``_previous_action`` zero during the warmup.
+
+        RLNode's default callback unconditionally stores the ONNX output as
+        the previous action. HoST training instead force-zeroes the action
+        during the unactuated phase, so the previous-action slot in the
+        observation only becomes non-zero after the warmup ends. We mirror
+        that here.
+        """
+        sensors_ready, missing_handler = self._all_sensors_ready()
+        if not sensors_ready:
+            self.get_logger().warning(
+                f"Waiting for all sensors to be available. Following handler hasn't got the needed information: {missing_handler}",
+                throttle_duration_sec=1.0,
+            )
+            return
+
+        # Reset episode state on rising edge into GETTING_UP so the warmup,
+        # history, and previous_action match the per-episode setup HoST trained
+        # with.
+        is_getting_up = self._robot_state_handler.is_getting_up()
+        if is_getting_up and not self._was_getting_up:
+            self._history.reset()
+            self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+            self._tick = 0
+        self._was_getting_up = is_getting_up
+
+        observation = self.obs()
+
+        onnx_input = {self._onnx_input_name[0]: observation.reshape(1, -1)}
+        onnx_pred = self._onnx_session.run(self._onnx_output_name, onnx_input)[0][0]
+
+        if self.allowed_states():
+            self._previous_action.set_previous_action(onnx_pred)
+            self.publisher(onnx_pred)
+        else:
+            self._previous_action.set_previous_action(np.zeros_like(onnx_pred))
+
+        if is_getting_up:
+            self._tick += 1
+
+    # ----------------------------------------------------- action server cbs
+
+    def _goal_callback(self, goal_request: Dynup.Goal) -> GoalResponse:
+        if goal_request.direction != GOAL_DIRECTION:
+            self.get_logger().warning(
+                f"Rejecting goal with direction='{goal_request.direction}'. "
+                f"This server only handles direction='{GOAL_DIRECTION}'."
+            )
+            return GoalResponse.REJECT
+
+        with self._goal_lock:
+            if self._active:
+                self.get_logger().warning(
+                    "Rejecting standup-back goal: another goal is already active."
+                )
+                return GoalResponse.REJECT
+        return GoalResponse.ACCEPT
+
+    def _cancel_callback(self, goal_handle) -> CancelResponse:
+        self.get_logger().info("Cancel requested for standup-back goal.")
+        return CancelResponse.ACCEPT
+
+    def _execute_callback(self, goal_handle):
+        self.get_logger().info("Standup-back goal accepted; resetting episode state.")
+        with self._goal_lock:
+            self._history.reset()
+            self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+            self._tick = 0
+            self._active = True
+
+        # Hold the action open until either:
+        #   - the client cancels (e.g. HCM detects the robot is upright again,
+        #     mirroring how dynup gets cancelled when no longer needed), or
+        #   - the hard timeout elapses. The policy has no built-in done signal,
+        #     so we cap each attempt so the DSD can re-evaluate (retry or move
+        #     on) instead of hanging on this action forever.
+        start_time = self.get_clock().now()
+        timeout = Duration(seconds=self._goal_timeout_sec)
+        rate = self.create_rate(20)
+        try:
+            while rclpy.ok():
+                if goal_handle.is_cancel_requested:
+                    goal_handle.canceled()
+                    self.get_logger().info("Standup-back goal canceled.")
+                    return Dynup.Result(successful=False)
+                if self.get_clock().now() - start_time >= timeout:
+                    self.get_logger().info(
+                        f"Standup-back goal hit {self._goal_timeout_sec:.1f}s timeout; "
+                        "returning control to HCM."
+                    )
+                    goal_handle.succeed()
+                    return Dynup.Result(successful=True)
+                rate.sleep()
+        finally:
+            with self._goal_lock:
+                self._active = False
+                self._tick = 0
+                self._previous_action.set_previous_action(np.zeros(self._num_joints, dtype=np.float32))
+                self._history.reset()
+            self.destroy_rate(rate)
+
+        goal_handle.abort()
+        return Dynup.Result(successful=False)
+
+
+def main():
+    rclpy.init()
+    node = StandupBackNode()
+    executor = MultiThreadedExecutor()
+    executor.add_node(node)
+    try:
+        executor.spin()
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
diff --git a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
index f28f46bd2..86424b180 100644
--- a/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
+++ b/src/bitbots_motion/bitbots_rl_motion/nodes/walk_node.py
@@ -47,6 +47,22 @@ def publisher(self, onnx_pred):
         joint_command = self._joint_handler.get_joint_commands(onnx_pred)
         self._joint_command_pub.publish(joint_command)
 
+    def _phase_update_hook(self):
+        if not self._phase.check_phase_set():
+            return
+        phase = self._phase.get_phase()
+        if self._command_handler.get_stop_signal():
+            anchors = [
+                np.array([-np.pi / 2, np.pi / 2], dtype=np.float32),
+                np.array([np.pi / 2, -np.pi / 2], dtype=np.float32),
+            ]
+            nearest = min(anchors, key=lambda a: np.linalg.norm(phase - a))
+            if np.linalg.norm(phase - nearest) < 0.1:
+                self._phase.set_phase(nearest)
+                return
+        phase_tp1 = phase + self._phase.get_phase_dt()
+        self._phase.set_phase(np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi)
+
     # states in which the policy executes
     def allowed_states(self):
         allowed_to_move = self._robot_state_handler.is_walkable() and np.any(self._command_handler.get_command() != 0.0)
diff --git a/src/bitbots_motion/bitbots_rl_motion/setup.py b/src/bitbots_motion/bitbots_rl_motion/setup.py
index 106dc782c..e83be072e 100644
--- a/src/bitbots_motion/bitbots_rl_motion/setup.py
+++ b/src/bitbots_motion/bitbots_rl_motion/setup.py
@@ -29,6 +29,7 @@
         "console_scripts": [
             "walk_node = nodes.walk_node:main",
             "kick_node = nodes.kick_node:main",
+            "standup_back_node = nodes.standup_back_node:main",
         ],
     },
 )