From f0a017ee024a4226b8ac9cd2d4ae62549630e47f Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Wed, 1 Apr 2026 19:24:27 +0200
Subject: [PATCH 01/30] first data classes for compilation visualization

---
 src/mqt/predictor/rl/predictor_visualizer.py | 107 +++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 src/mqt/predictor/rl/predictor_visualizer.py

diff --git a/src/mqt/predictor/rl/predictor_visualizer.py b/src/mqt/predictor/rl/predictor_visualizer.py
new file mode 100644
index 000000000..5ccf9497d
--- /dev/null
+++ b/src/mqt/predictor/rl/predictor_visualizer.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM
+# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH
+# All rights reserved.
+#
+# SPDX-License-Identifier: MIT
+#
+# Licensed under the MIT License
+
+"""Visualization module for recording and exporting the RL compilation process."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import qiskit.qasm2 as qasm2
+
+if TYPE_CHECKING:
+    from qiskit import QuantumCircuit
+
+
+@dataclass
+class DeviceMetadata:
+    """Metadata containing information about the target quantum device for compilation."""
+
+    description: str
+    device_qubits: int
+
+
+@dataclass
+class InputCircuitMetadata:
+    """Metadata containing information about the initial, uncompiled quantum circuit."""
+
+    name: str
+    num_qubits: int
+
+
+@dataclass
+class CompilationStep:
+    """A snapshot of the circuit state and environment metrics at a single timestep.
+
+    Attributes:
+        step_index: The current step number in the reinforcement learning episode.
+        action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords').
+        reward: The calculated reward value for applying this specific action.
+        current_depth: The depth of the quantum circuit after the action was applied.
+        is_terminal: A flag indicating if the compilation process has concluded.
+        circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format.
+    """
+
+    step_index: int
+    action: str
+    reward: float
+    current_depth: int
+    is_terminal: bool
+    circuit_qasm: str
+
+
+@dataclass
+class CompilationVisualizer:
+    """Aggregates compilation steps and metadata for export to a JSON file.
+
+    This class acts as an in-memory buffer during the reinforcement learning compilation
+    process. It tracks the physical transformations of the circuit and exports the
+    entire episode as a structured JSON file upon termination.
+
+    Attributes:
+        device: The target device metadata.
+        input_circuit: The uncompiled circuit metadata.
+        steps: An ordered list of CompilationStep snapshots.
+    """
+
+    device: DeviceMetadata
+    input_circuit: InputCircuitMetadata
+    steps: list[CompilationStep] = field(default_factory=list)
+
+    def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None:
+        """Records a single compilation action and the resulting circuit state.
+
+        Args:
+            step_index: The current step number in the environment.
+            action: The name of the compilation pass that was just applied.
+            reward: The calculated reward for the applied pass.
+            current_qc: The current Qiskit QuantumCircuit object after the pass.
+            done: Boolean indicating if this is the final step of the compilation.
+        """
+        new_step = CompilationStep(
+            step_index=step_index,
+            action=action,
+            reward=round(reward, 6),
+            current_depth=current_qc.depth(),
+            is_terminal=done,
+            circuit_qasm=qasm2.dumps(current_qc),
+        )
+        self.steps.append(new_step)
+
+    def save_to_json(self, filepath: str | Path) -> None:
+        """Serializes the metadata and all recorded steps to a JSON file.
+
+        Args:
+            filepath: The destination path or filename for the output JSON file.
+        """
+        # asdict() seamlessly converts the nested subclasses into a clean dictionary!
+        with Path(filepath).open("w", encoding="utf-8") as f:
+            json.dump(asdict(self), f, indent=4)

From 78ca6b6c4e5e94c0f3e954f7e05f8a45f557b62f Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 4 Apr 2026 12:08:20 +0200
Subject: [PATCH 02/30] First Running Draft of the new tracer.py

- renamed CompilationVisualizer to CompilationTracer
- included first draft of actual tracing logic inside predictorenv.py
- adjusted qcompile.py and predictor.py accordingly to route through the tracer_output_path
---
 src/mqt/predictor/qcompile.py                 |  8 ++++-
 src/mqt/predictor/rl/predictor.py             | 11 ++++--
 src/mqt/predictor/rl/predictorenv.py          | 34 +++++++++++++++++++
 .../rl/{predictor_visualizer.py => tracer.py} |  2 +-
 4 files changed, 51 insertions(+), 4 deletions(-)
 rename src/mqt/predictor/rl/{predictor_visualizer.py => tracer.py} (99%)

diff --git a/src/mqt/predictor/qcompile.py b/src/mqt/predictor/qcompile.py
index d65be982e..a1f8240f0 100644
--- a/src/mqt/predictor/qcompile.py
+++ b/src/mqt/predictor/qcompile.py
@@ -16,6 +16,8 @@
 from mqt.predictor.rl import rl_compile
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from qiskit import QuantumCircuit
 
     from mqt.predictor.reward import figure_of_merit
@@ -24,16 +26,20 @@
 def qcompile(
     qc: QuantumCircuit,
     figure_of_merit: figure_of_merit = "expected_fidelity",
+    tracer_output_path: str | Path | None = None,
 ) -> tuple[QuantumCircuit, list[str], str]:
     """Compiles a given quantum circuit to a device with the highest predicted figure of merit.
 
     Arguments:
         qc: The quantum circuit to be compiled.
         figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity".
+        tracer_output_path: If provided, enables compiler tracing and exports the JSON log to this path/directory.
 
     Returns:
         A tuple containing the compiled quantum circuit, the compilation information, and the name of the device used for compilation.
     """
     predicted_device = predict_device_for_figure_of_merit(qc, figure_of_merit=figure_of_merit)
-    res = rl_compile(qc, device=predicted_device, figure_of_merit=figure_of_merit)
+    res = rl_compile(
+        qc, device=predicted_device, figure_of_merit=figure_of_merit, tracer_output_path=tracer_output_path
+    )
     return *res, predicted_device
diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
index 1f75b1901..7053c9eb0 100644
--- a/src/mqt/predictor/rl/predictor.py
+++ b/src/mqt/predictor/rl/predictor.py
@@ -38,12 +38,16 @@ def __init__(
         device: Target,
         path_training_circuits: Path | None = None,
         logger_level: int = logging.INFO,
+        tracer_output_path: str | Path | None = None,
     ) -> None:
         """Initializes the Predictor object."""
         logger.setLevel(logger_level)
 
         self.env = PredictorEnv(
-            reward_function=figure_of_merit, device=device, path_training_circuits=path_training_circuits
+            reward_function=figure_of_merit,
+            device=device,
+            path_training_circuits=path_training_circuits,
+            tracer_output_path=tracer_output_path,
         )
         self.device_name = device.description
         self.figure_of_merit = figure_of_merit
@@ -154,6 +158,7 @@ def rl_compile(
     device: Target | None,
     figure_of_merit: figure_of_merit | None = "expected_fidelity",
     predictor_singleton: Predictor | None = None,
+    tracer_output_path: str | Path | None = None,
 ) -> tuple[QuantumCircuit, list[str]]:
     """Compiles a given quantum circuit to a device optimizing for the given figure of merit.
 
@@ -162,6 +167,7 @@ def rl_compile(
         device: The device to compile to.
         figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity".
         predictor_singleton: A predictor object that is used for compilation to reduce compilation time when compiling multiple quantum circuits. If None, a new predictor object is created. Defaults to None.
+        tracer_output_path: If provided, enables compiler tracing and exports the JSON log to the specified path.
 
     Returns:
         A tuple containing the compiled quantum circuit and the compilation information. If compilation fails, False is returned.
@@ -176,8 +182,9 @@ def rl_compile(
         if device is None:
             msg = "device must not be None if predictor_singleton is None."
             raise ValueError(msg)
-        predictor = Predictor(figure_of_merit=figure_of_merit, device=device)
+        predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path)
     else:
         predictor = predictor_singleton
+        predictor.env.tracer_output_path = tracer_output_path
 
     return predictor.compile_as_predicted(qc)
diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 5541e260e..fc2967cc4 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -86,6 +86,11 @@
     postprocess_vf2postlayout,
     prepare_noise_data,
 )
+from mqt.predictor.rl.tracer import (
+    CompilationTracer,
+    DeviceMetadata,
+    InputCircuitMetadata,
+)
 from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl
 
 logger = logging.getLogger("mqt-predictor")
@@ -102,6 +107,7 @@ def __init__(
         path_training_circuits: Path | None = None,
         reward_scale: float = 1.0,
         no_effect_penalty: float = -0.001,
+        tracer_output_path: str | Path | None = None,
     ) -> None:
         """Initializes the PredictorEnv object.
 
@@ -112,6 +118,7 @@ def __init__(
             path_training_circuits: The path to the training circuits folder. Defaults to None, which uses the default path.
             reward_scale: Scaling factor for rewards/penalties proportional to fidelity changes.
             no_effect_penalty: Step penalty applied when an action does not change the circuit (no-op).
+            tracer_output_path: Whether to enable compilation tracing. If provided, this will export a JSON file at the end of the compilation process. Defaults to None.
 
         Raises:
             ValueError: If the reward function is "estimated_success_probability" and no calibration data is available for the device or if the reward function is "estimated_hellinger_distance" and no trained model is available for the device.
@@ -130,6 +137,8 @@ def __init__(
         self.actions_structure_preserving_indices = []  # Actions that preserves the mapping and native gates
         self.used_actions: list[str] = []
         self.device = device
+        self.tracer_output_path = tracer_output_path
+        self.tracer = None
 
         logger.info("MDP: " + mdp)
         self.mdp = mdp
@@ -258,6 +267,24 @@ def _log_step_reward(self, step_index: int, action_name: str, reward_val: float,
                 reward_val,
             )
 
+        if self.tracer is not None and self.tracer_output_path is not None:
+            self.tracer.record_step(
+                step_index=step_index,
+                action=action_name,
+                reward=reward_val,
+                current_qc=self.state,
+                done=done,
+            )
+
+            if done:
+                out_path = Path(self.tracer_output_path)
+
+                if out_path.is_dir() or not out_path.suffix:
+                    out_path = out_path / f"visualization_{self.current_circuit_name}.json"
+
+                self.tracer.save_to_json(out_path)
+                logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve())
+
     def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any, Any]]:
         """Run one environment step.
 
@@ -464,9 +491,16 @@ def reset(
 
         self.prev_reward = None
         self.prev_reward_kind = None
+        self.tracer = None
 
         self.num_qubits_uncompiled_circuit = self.state.num_qubits
         self.has_parameterized_gates = len(self.state.parameters) > 0
+
+        if self.tracer_output_path is not None:
+            device_meta = DeviceMetadata(self.device.description, self.device.num_qubits)
+            input_meta = InputCircuitMetadata(self.current_circuit_name, self.num_qubits_uncompiled_circuit)
+            self.tracer = CompilationTracer(device=device_meta, input_circuit=input_meta)
+
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
         return create_feature_dict(self.state), {}
diff --git a/src/mqt/predictor/rl/predictor_visualizer.py b/src/mqt/predictor/rl/tracer.py
similarity index 99%
rename from src/mqt/predictor/rl/predictor_visualizer.py
rename to src/mqt/predictor/rl/tracer.py
index 5ccf9497d..f7d834a2f 100644
--- a/src/mqt/predictor/rl/predictor_visualizer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -59,7 +59,7 @@ class CompilationStep:
 
 
 @dataclass
-class CompilationVisualizer:
+class CompilationTracer:
     """Aggregates compilation steps and metadata for export to a JSON file.
 
     This class acts as an in-memory buffer during the reinforcement learning compilation

From d29b926a5b3c02c71ef275f129f870e9466fa7a8 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 4 Apr 2026 13:02:51 +0200
Subject: [PATCH 03/30] cleaned up tracer initialization

---
 src/mqt/predictor/rl/predictorenv.py |  6 +-----
 src/mqt/predictor/rl/tracer.py       | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index fc2967cc4..377d4d654 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -88,8 +88,6 @@
 )
 from mqt.predictor.rl.tracer import (
     CompilationTracer,
-    DeviceMetadata,
-    InputCircuitMetadata,
 )
 from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl
 
@@ -497,9 +495,7 @@ def reset(
         self.has_parameterized_gates = len(self.state.parameters) > 0
 
         if self.tracer_output_path is not None:
-            device_meta = DeviceMetadata(self.device.description, self.device.num_qubits)
-            input_meta = InputCircuitMetadata(self.current_circuit_name, self.num_qubits_uncompiled_circuit)
-            self.tracer = CompilationTracer(device=device_meta, input_circuit=input_meta)
+            self.tracer = CompilationTracer.from_initial_state(self.device, self.state, self.current_circuit_name)
 
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index f7d834a2f..f4f4086ec 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -19,6 +19,7 @@
 
 if TYPE_CHECKING:
     from qiskit import QuantumCircuit
+    from qiskit.transpiler import Target
 
 
 @dataclass
@@ -35,6 +36,8 @@ class InputCircuitMetadata:
 
     name: str
     num_qubits: int
+    depth: int
+    circuit_qasm: str
 
 
 @dataclass
@@ -76,6 +79,22 @@ class CompilationTracer:
     input_circuit: InputCircuitMetadata
     steps: list[CompilationStep] = field(default_factory=list)
 
+    @classmethod
+    def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer:
+        """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
+        device_meta = DeviceMetadata(
+            description=device.description,
+            device_qubits=device.num_qubits,
+        )
+        input_meta = InputCircuitMetadata(
+            name=circuit_name,
+            num_qubits=input_circuit.num_qubits,
+            depth=input_circuit.depth(),
+            circuit_qasm=qasm2.dumps(input_circuit),
+        )
+
+        return cls(device=device_meta, input_circuit=input_meta)
+
     def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None:
         """Records a single compilation action and the resulting circuit state.
 
@@ -102,6 +121,5 @@ def save_to_json(self, filepath: str | Path) -> None:
         Args:
             filepath: The destination path or filename for the output JSON file.
         """
-        # asdict() seamlessly converts the nested subclasses into a clean dictionary!
         with Path(filepath).open("w", encoding="utf-8") as f:
             json.dump(asdict(self), f, indent=4)

From 4fdddfb9f394214a9769bcd0011cff84140f41e0 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 4 Apr 2026 14:53:53 +0200
Subject: [PATCH 04/30] extended capture of device metadata

The DeviceMetadata class now also includes the device's native gates, its topology and gate calibration data.
---
 src/mqt/predictor/rl/tracer.py | 76 ++++++++++++++++++++++++++++------
 1 file changed, 64 insertions(+), 12 deletions(-)

diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index f4f4086ec..e1ba4a193 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -19,7 +19,24 @@
 
 if TYPE_CHECKING:
     from qiskit import QuantumCircuit
-    from qiskit.transpiler import Target
+    from qiskit.transpiler import InstructionProperties, Target
+
+
+@dataclass
+class TopologyEdge:
+    """Represents a topology edge between two qubits."""
+
+    control: int
+    target: int
+
+
+@dataclass
+class GateCalibration:
+    """Calibration data for a specific gate on a specific set of qubits."""
+
+    qubits: list[int]
+    duration: float | None
+    error: float | None
 
 
 @dataclass
@@ -28,6 +45,9 @@ class DeviceMetadata:
 
     description: str
     device_qubits: int
+    native_gates: list[str]
+    topology: list[TopologyEdge]
+    calibration_data: dict[str, list[GateCalibration]]
 
 
 @dataclass
@@ -82,17 +102,8 @@ class CompilationTracer:
     @classmethod
     def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer:
         """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
-        device_meta = DeviceMetadata(
-            description=device.description,
-            device_qubits=device.num_qubits,
-        )
-        input_meta = InputCircuitMetadata(
-            name=circuit_name,
-            num_qubits=input_circuit.num_qubits,
-            depth=input_circuit.depth(),
-            circuit_qasm=qasm2.dumps(input_circuit),
-        )
-
+        device_meta = cls._extract_device_metadata(device)
+        input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name)
         return cls(device=device_meta, input_circuit=input_meta)
 
     def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None:
@@ -123,3 +134,44 @@ def save_to_json(self, filepath: str | Path) -> None:
         """
         with Path(filepath).open("w", encoding="utf-8") as f:
             json.dump(asdict(self), f, indent=4)
+
+    @staticmethod
+    def _extract_circuit_metadata(input_circuit: QuantumCircuit, circuit_name: str) -> InputCircuitMetadata:
+        """Internal helper to parse the initial quantum circuit."""
+        return InputCircuitMetadata(
+            name=circuit_name,
+            num_qubits=input_circuit.num_qubits,
+            depth=input_circuit.depth(),
+            circuit_qasm=qasm2.dumps(input_circuit),
+        )
+
+    @staticmethod
+    def _extract_device_metadata(device: Target) -> DeviceMetadata:
+        """Internal helper to extract topology and calibration data from the device."""
+        native_gates = list(device.operation_names)
+        cmap = device.build_coupling_map()
+        topology = [TopologyEdge(control=c, target=t) for c, t in cmap] if cmap is not None else []
+        calibration_data: dict[str, list[GateCalibration]] = {}
+
+        for gate_name in native_gates:
+            gate_calibrations = []
+            props: InstructionProperties
+            qubit_tuples: tuple[int, ...]
+
+            for qubit_tuples, props in device[gate_name].items():
+                if qubit_tuples is None or props is None:
+                    continue
+
+                gate_calibrations.append(
+                    GateCalibration(qubits=list(qubit_tuples), duration=props.duration, error=props.error)
+                )
+
+            calibration_data[gate_name] = gate_calibrations
+
+        return DeviceMetadata(
+            description=device.description,
+            device_qubits=device.num_qubits,
+            native_gates=native_gates,
+            topology=topology,
+            calibration_data=calibration_data,
+        )

From ea7751eec0ecf132ce529196a80d338ea1f7b35e Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:31:56 +0200
Subject: [PATCH 05/30] included tracking of figure_of_merit over time

- each compilation step is now associated with its figure of merit value and its kind (exact vs. approximate)
---
 src/mqt/predictor/rl/predictorenv.py | 38 ++++++++++++++++++++++++----
 src/mqt/predictor/rl/tracer.py       | 37 ++++++++++++++++++++++++---
 2 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 377d4d654..3fc0c36da 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -248,7 +248,9 @@ def _apply_and_update(self, action: int) -> QuantumCircuit | None:
 
         return altered_qc
 
-    def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, done: bool) -> None:
+    def _log_step_reward(
+        self, step_index: int, action_name: str, reward_val: float, fom_value: float, fom_kind: str, done: bool
+    ) -> None:
         """Log the chosen action and resulting reward for the current episode step."""
         logger.info(
             "Episode %d step %d: action=%s reward=%.6f",
@@ -271,6 +273,8 @@ def _log_step_reward(self, step_index: int, action_name: str, reward_val: float,
                 action=action_name,
                 reward=reward_val,
                 current_qc=self.state,
+                fom_value=fom_value,
+                fom_kind=fom_kind,
                 done=done,
             )
 
@@ -305,7 +309,14 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
 
         altered_qc = self._apply_and_update(action)
         if altered_qc is None:
-            self._log_step_reward(step_index, action_name, 0.0, done=True)
+            self._log_step_reward(
+                step_index=step_index,
+                action_name=action_name,
+                reward_val=0.0,
+                fom_value=0.0,
+                fom_kind="exact",
+                done=True,
+            )
             return create_feature_dict(self.state), 0.0, True, False, {}
 
         done = action == self.action_terminate_index
@@ -313,7 +324,14 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
         if self.reward_function == "estimated_hellinger_distance":
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
             self.state._layout = self.layout  # noqa: SLF001
-            self._log_step_reward(step_index, action_name, reward_val, done)
+            self._log_step_reward(
+                step_index=step_index,
+                action_name=action_name,
+                reward_val=reward_val,
+                fom_value=reward_val,
+                fom_kind="exact",
+                done=done,
+            )
             return create_feature_dict(self.state), reward_val, done, False, {}
 
         # Lazy init: compute prev_reward only once per episode (or if missing)
@@ -342,7 +360,15 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             self.prev_reward, self.prev_reward_kind = new_val, new_kind
 
         obs = create_feature_dict(self.state)
-        self._log_step_reward(step_index, action_name, reward_val, done)
+        self._log_step_reward(
+            step_index=step_index,
+            action_name=action_name,
+            reward_val=reward_val,
+            fom_value=self.prev_reward,
+            fom_kind=self.prev_reward_kind,
+            done=done,
+        )
+
         return obs, reward_val, done, False, {}
 
     def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
@@ -495,7 +521,9 @@ def reset(
         self.has_parameterized_gates = len(self.state.parameters) > 0
 
         if self.tracer_output_path is not None:
-            self.tracer = CompilationTracer.from_initial_state(self.device, self.state, self.current_circuit_name)
+            self.tracer = CompilationTracer.from_initial_state(
+                self.device, self.state, self.current_circuit_name, self.reward_function
+            )
 
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index e1ba4a193..e01ab6f6f 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -57,6 +57,7 @@ class InputCircuitMetadata:
     name: str
     num_qubits: int
     depth: int
+    figure_of_merit: str
     circuit_qasm: str
 
 
@@ -69,6 +70,9 @@ class CompilationStep:
         action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords').
         reward: The calculated reward value for applying this specific action.
         current_depth: The depth of the quantum circuit after the action was applied.
+        total_gates: The total number of gates included in the circuit.
+        fom_value: The figure of merit value for this compilation pass.
+        fom_kind: The kind of fom value: 'exact' or 'approx'.
         is_terminal: A flag indicating if the compilation process has concluded.
         circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format.
     """
@@ -77,6 +81,9 @@ class CompilationStep:
     action: str
     reward: float
     current_depth: int
+    total_gates: int
+    fom_value: float
+    fom_kind: str
     is_terminal: bool
     circuit_qasm: str
 
@@ -100,13 +107,24 @@ class CompilationTracer:
     steps: list[CompilationStep] = field(default_factory=list)
 
     @classmethod
-    def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer:
+    def from_initial_state(
+        cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str
+    ) -> CompilationTracer:
         """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
         device_meta = cls._extract_device_metadata(device)
-        input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name)
+        input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name, figure_of_merit)
         return cls(device=device_meta, input_circuit=input_meta)
 
-    def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None:
+    def record_step(
+        self,
+        step_index: int,
+        action: str,
+        reward: float,
+        current_qc: QuantumCircuit,
+        fom_value: float,
+        fom_kind: str,
+        done: bool,
+    ) -> None:
         """Records a single compilation action and the resulting circuit state.
 
         Args:
@@ -114,13 +132,21 @@ def record_step(self, step_index: int, action: str, reward: float, current_qc: Q
             action: The name of the compilation pass that was just applied.
             reward: The calculated reward for the applied pass.
             current_qc: The current Qiskit QuantumCircuit object after the pass.
+            fom_value: The figure of merit value for the compilation pass.
+            fom_kind: The kind of fom value: 'exact' or 'approx'.
             done: Boolean indicating if this is the final step of the compilation.
         """
+        present_ops_dict = current_qc.count_ops()
+        total_gates = sum(present_ops_dict.values()) if present_ops_dict else 0
+
         new_step = CompilationStep(
             step_index=step_index,
             action=action,
             reward=round(reward, 6),
             current_depth=current_qc.depth(),
+            total_gates=total_gates,
+            fom_value=round(fom_value, 6),
+            fom_kind=fom_kind,
             is_terminal=done,
             circuit_qasm=qasm2.dumps(current_qc),
         )
@@ -136,12 +162,15 @@ def save_to_json(self, filepath: str | Path) -> None:
             json.dump(asdict(self), f, indent=4)
 
     @staticmethod
-    def _extract_circuit_metadata(input_circuit: QuantumCircuit, circuit_name: str) -> InputCircuitMetadata:
+    def _extract_circuit_metadata(
+        input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str
+    ) -> InputCircuitMetadata:
         """Internal helper to parse the initial quantum circuit."""
         return InputCircuitMetadata(
             name=circuit_name,
             num_qubits=input_circuit.num_qubits,
             depth=input_circuit.depth(),
+            figure_of_merit=figure_of_merit,
             circuit_qasm=qasm2.dumps(input_circuit),
         )
 

From bf3bde3b64a93969e304774c9a963f78cea9d582 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sun, 5 Apr 2026 13:54:27 +0200
Subject: [PATCH 06/30] adjusted gate counting & included feature vector values
 for compilation steps

- gate count does now not include barriers
- program_communication, critical_depth, entanglement_ratio, parallelism, liveness are now also captured for each compilation step
---
 src/mqt/predictor/rl/predictorenv.py | 23 +++++++++++++++++++----
 src/mqt/predictor/rl/tracer.py       | 26 +++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 3fc0c36da..771935433 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -14,10 +14,13 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import numpy as np
+
 if TYPE_CHECKING:
     from collections.abc import Callable
 
     from bqskit import Circuit
+    from numpy.typing import NDArray
     from pytket._tket.passes import BasePass as TketBasePass
     from pytket.circuit import Node
     from qiskit.passmanager.base_tasks import Task
@@ -34,7 +37,6 @@
 from math import isclose
 from typing import cast
 
-import numpy as np
 from bqskit.ext import bqskit_to_qiskit, qiskit_to_bqskit
 from gymnasium import Env
 from gymnasium.spaces import Box, Dict, Discrete
@@ -249,7 +251,14 @@ def _apply_and_update(self, action: int) -> QuantumCircuit | None:
         return altered_qc
 
     def _log_step_reward(
-        self, step_index: int, action_name: str, reward_val: float, fom_value: float, fom_kind: str, done: bool
+        self,
+        step_index: int,
+        action_name: str,
+        reward_val: float,
+        fom_value: float,
+        fom_kind: str,
+        feature_vector: dict[str, int | NDArray[np.float32]],
+        done: bool,
     ) -> None:
         """Log the chosen action and resulting reward for the current episode step."""
         logger.info(
@@ -275,6 +284,7 @@ def _log_step_reward(
                 current_qc=self.state,
                 fom_value=fom_value,
                 fom_kind=fom_kind,
+                features=feature_vector,
                 done=done,
             )
 
@@ -309,30 +319,34 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
 
         altered_qc = self._apply_and_update(action)
         if altered_qc is None:
+            obs = create_feature_dict(self.state)
             self._log_step_reward(
                 step_index=step_index,
                 action_name=action_name,
                 reward_val=0.0,
                 fom_value=0.0,
                 fom_kind="exact",
+                feature_vector=obs,
                 done=True,
             )
-            return create_feature_dict(self.state), 0.0, True, False, {}
+            return obs, 0.0, True, False, {}
 
         done = action == self.action_terminate_index
 
         if self.reward_function == "estimated_hellinger_distance":
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
             self.state._layout = self.layout  # noqa: SLF001
+            obs = create_feature_dict(self.state)
             self._log_step_reward(
                 step_index=step_index,
                 action_name=action_name,
                 reward_val=reward_val,
                 fom_value=reward_val,
                 fom_kind="exact",
+                feature_vector=obs,
                 done=done,
             )
-            return create_feature_dict(self.state), reward_val, done, False, {}
+            return obs, reward_val, done, False, {}
 
         # Lazy init: compute prev_reward only once per episode (or if missing)
         if self.prev_reward is None:
@@ -366,6 +380,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             reward_val=reward_val,
             fom_value=self.prev_reward,
             fom_kind=self.prev_reward_kind,
+            feature_vector=obs,
             done=done,
         )
 
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index e01ab6f6f..5a54c3aa6 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -18,6 +18,8 @@
 import qiskit.qasm2 as qasm2
 
 if TYPE_CHECKING:
+    import numpy as np
+    from numpy.typing import NDArray
     from qiskit import QuantumCircuit
     from qiskit.transpiler import InstructionProperties, Target
 
@@ -81,11 +83,17 @@ class CompilationStep:
     action: str
     reward: float
     current_depth: int
+    num_qubits: int
     total_gates: int
     fom_value: float
     fom_kind: str
     is_terminal: bool
     circuit_qasm: str
+    program_communication: float
+    critical_depth: float
+    entanglement_ratio: float
+    parallelism: float
+    liveness: float
 
 
 @dataclass
@@ -123,6 +131,7 @@ def record_step(
         current_qc: QuantumCircuit,
         fom_value: float,
         fom_kind: str,
+        features: dict[str, int | NDArray[np.float32]],
         done: bool,
     ) -> None:
         """Records a single compilation action and the resulting circuit state.
@@ -134,21 +143,28 @@ def record_step(
             current_qc: The current Qiskit QuantumCircuit object after the pass.
             fom_value: The figure of merit value for the compilation pass.
             fom_kind: The kind of fom value: 'exact' or 'approx'.
+            features: The quantum circuit's feature vector used by the RL agent.
             done: Boolean indicating if this is the final step of the compilation.
         """
         present_ops_dict = current_qc.count_ops()
-        total_gates = sum(present_ops_dict.values()) if present_ops_dict else 0
+        total_gates = sum(count for gate, count in present_ops_dict.items() if gate != "barrier")
 
         new_step = CompilationStep(
             step_index=step_index,
             action=action,
             reward=round(reward, 6),
             current_depth=current_qc.depth(),
+            num_qubits=current_qc.num_qubits,
             total_gates=total_gates,
             fom_value=round(fom_value, 6),
             fom_kind=fom_kind,
             is_terminal=done,
             circuit_qasm=qasm2.dumps(current_qc),
+            program_communication=self._extract_float(features["program_communication"]),
+            critical_depth=self._extract_float(features["critical_depth"]),
+            entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
+            parallelism=self._extract_float(features["parallelism"]),
+            liveness=self._extract_float(features["liveness"]),
         )
         self.steps.append(new_step)
 
@@ -204,3 +220,11 @@ def _extract_device_metadata(device: Target) -> DeviceMetadata:
             topology=topology,
             calibration_data=calibration_data,
         )
+
+    @staticmethod
+    def _extract_float(val: int | NDArray[np.float32]) -> float:
+        """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements."""
+        if isinstance(val, (int, float)):
+            return float(val)
+        # If it is not an int or float, the linter now safely assumes it is an array
+        return float(val[0])

From 3ece49d5505c8542bf1e7ac313b21109c303fa60 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Tue, 7 Apr 2026 16:50:06 +0200
Subject: [PATCH 07/30] restructuring of CompilationTracer class

- initial circuit is now captured as first value in the steps array (step_index 0), removed InputCircuitMetadata class from tracer.py
- predictorenv.py was adjusted to calculate the initial figure_of_merit value already in the reset() method, stripping away the None-check inside the step method (step() can only ever be called after reset() was called on the environment, thereby the prev_reward and prev_reward_kind always have to be set)
- changed data type for prev_reward and prev_reward_kind to float and str, respectively, and provided initial values.
---
 src/mqt/predictor/rl/predictorenv.py | 25 ++++++------
 src/mqt/predictor/rl/tracer.py       | 58 +++++++++++++---------------
 2 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 771935433..71ffc1c40 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -221,8 +221,8 @@ def __init__(
         self.readout_err: dict[Node, float] | None = None
         self.reward_scale = reward_scale
         self.no_effect_penalty = no_effect_penalty
-        self.prev_reward: float | None = None
-        self.prev_reward_kind: str | None = None
+        self.prev_reward: float = 0.0
+        self.prev_reward_kind: str = "unknown"
         self.episode_count = 0
         self.current_circuit_name = "<unknown>"
         self.err_by_gate: dict[str, float] = {}
@@ -348,10 +348,6 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             )
             return obs, reward_val, done, False, {}
 
-        # Lazy init: compute prev_reward only once per episode (or if missing)
-        if self.prev_reward is None:
-            self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
-
         if done:
             assert action in self.valid_actions, "Terminate action is not valid but was chosen."
             self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="exact")
@@ -527,22 +523,29 @@ def reset(
             self.valid_actions = self.actions_synthesis_indices + self.actions_opt_indices
 
         self.error_occurred = False
-
-        self.prev_reward = None
-        self.prev_reward_kind = None
         self.tracer = None
 
         self.num_qubits_uncompiled_circuit = self.state.num_qubits
         self.has_parameterized_gates = len(self.state.parameters) > 0
 
+        # create baseline values
+        obs = create_feature_dict(self.state)
+        self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
+
         if self.tracer_output_path is not None:
             self.tracer = CompilationTracer.from_initial_state(
-                self.device, self.state, self.current_circuit_name, self.reward_function
+                device=self.device,
+                input_circuit=self.state,
+                circuit_name=self.current_circuit_name,
+                figure_of_merit=self.reward_function,
+                features=obs,
+                initial_fom=self.prev_reward,
+                fom_kind=self.prev_reward_kind,
             )
 
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
-        return create_feature_dict(self.state), {}
+        return obs, {}
 
     def action_masks(self) -> list[bool]:
         """Returns a list of valid actions for the current state."""
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index 5a54c3aa6..aab3232ea 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -52,17 +52,6 @@ class DeviceMetadata:
     calibration_data: dict[str, list[GateCalibration]]
 
 
-@dataclass
-class InputCircuitMetadata:
-    """Metadata containing information about the initial, uncompiled quantum circuit."""
-
-    name: str
-    num_qubits: int
-    depth: int
-    figure_of_merit: str
-    circuit_qasm: str
-
-
 @dataclass
 class CompilationStep:
     """A snapshot of the circuit state and environment metrics at a single timestep.
@@ -105,23 +94,44 @@ class CompilationTracer:
     entire episode as a structured JSON file upon termination.
 
     Attributes:
+        circuit_name: The name of the circuit being compiled.
+        figure_of_merit: The chosen figure of merit for this compilation.
         device: The target device metadata.
-        input_circuit: The uncompiled circuit metadata.
         steps: An ordered list of CompilationStep snapshots.
     """
 
+    circuit_name: str
+    figure_of_merit: str
     device: DeviceMetadata
-    input_circuit: InputCircuitMetadata
     steps: list[CompilationStep] = field(default_factory=list)
 
     @classmethod
     def from_initial_state(
-        cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str
+        cls,
+        device: Target,
+        input_circuit: QuantumCircuit,
+        circuit_name: str,
+        figure_of_merit: str,
+        features: dict[str, int | NDArray[np.float32]],
+        initial_fom: float,
+        fom_kind: str,
     ) -> CompilationTracer:
         """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
         device_meta = cls._extract_device_metadata(device)
-        input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name, figure_of_merit)
-        return cls(device=device_meta, input_circuit=input_meta)
+        tracer = cls(circuit_name=circuit_name, figure_of_merit=figure_of_merit, device=device_meta)
+
+        tracer.record_step(
+            step_index=0,
+            action="Baseline",
+            reward=0.0,
+            current_qc=input_circuit,
+            fom_value=initial_fom,
+            fom_kind=fom_kind,
+            features=features,
+            done=False,
+        )
+
+        return tracer
 
     def record_step(
         self,
@@ -177,19 +187,6 @@ def save_to_json(self, filepath: str | Path) -> None:
         with Path(filepath).open("w", encoding="utf-8") as f:
             json.dump(asdict(self), f, indent=4)
 
-    @staticmethod
-    def _extract_circuit_metadata(
-        input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str
-    ) -> InputCircuitMetadata:
-        """Internal helper to parse the initial quantum circuit."""
-        return InputCircuitMetadata(
-            name=circuit_name,
-            num_qubits=input_circuit.num_qubits,
-            depth=input_circuit.depth(),
-            figure_of_merit=figure_of_merit,
-            circuit_qasm=qasm2.dumps(input_circuit),
-        )
-
     @staticmethod
     def _extract_device_metadata(device: Target) -> DeviceMetadata:
         """Internal helper to extract topology and calibration data from the device."""
@@ -224,7 +221,6 @@ def _extract_device_metadata(device: Target) -> DeviceMetadata:
     @staticmethod
     def _extract_float(val: int | NDArray[np.float32]) -> float:
         """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements."""
-        if isinstance(val, (int, float)):
+        if isinstance(val, int):
             return float(val)
-        # If it is not an int or float, the linter now safely assumes it is an array
         return float(val[0])

From 8a82b849d4706a8485c80ef95dea31f52c342d7b Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Tue, 7 Apr 2026 20:55:31 +0200
Subject: [PATCH 08/30] included tracing of MDP state evolution

The tracer now tracks the MDP policy as well as the MDP state evolution throughout the compilation.
---
 src/mqt/predictor/rl/predictorenv.py | 22 ++++++++++++++++++++--
 src/mqt/predictor/rl/tracer.py       | 28 +++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 71ffc1c40..9a96c2f7e 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -277,6 +277,7 @@ def _log_step_reward(
             )
 
         if self.tracer is not None and self.tracer_output_path is not None:
+            synthesized, laid_out, routed = self._get_mdp_state()
             self.tracer.record_step(
                 step_index=step_index,
                 action=action_name,
@@ -285,6 +286,9 @@ def _log_step_reward(
                 fom_value=fom_value,
                 fom_kind=fom_kind,
                 features=feature_vector,
+                synthesized=synthesized,
+                laid_out=laid_out,
+                routed=routed,
                 done=done,
             )
 
@@ -533,14 +537,20 @@ def reset(
         self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
 
         if self.tracer_output_path is not None:
+            synthesized, laid_out, routed = self._get_mdp_state()
+
             self.tracer = CompilationTracer.from_initial_state(
                 device=self.device,
                 input_circuit=self.state,
                 circuit_name=self.current_circuit_name,
                 figure_of_merit=self.reward_function,
+                mdp_policy=self.mdp,
                 features=obs,
                 initial_fom=self.prev_reward,
                 fom_kind=self.prev_reward_kind,
+                synthesized=synthesized,
+                laid_out=laid_out,
+                routed=routed,
             )
 
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
@@ -938,15 +948,23 @@ def is_circuit_routed(self, circuit: QuantumCircuit, coupling_map: CouplingMap)
                     return False
         return True
 
-    def determine_valid_actions_for_state(self) -> list[int]:
-        """Determine valid actions based on circuit state: synthesized, mapped, routed."""
+    def _get_mdp_state(self) -> tuple[bool, bool, bool]:
+        """Determine the current MDP state of the circuit.
+
+        Returns:
+            A tuple with boolean values describing the state of the circuit (synthesized, laid_out, routed)
+        """
         synthesized = self.is_circuit_synthesized(self.state)
         laid_out = self.is_circuit_laid_out(self.state, self.layout) if self.layout else False
         # Routing is only allowed after layout
         routed = (
             self.is_circuit_routed(self.state, CouplingMap(self.device.build_coupling_map())) if laid_out else False
         )
+        return synthesized, laid_out, routed
 
+    def determine_valid_actions_for_state(self) -> list[int]:
+        """Determine valid actions based on circuit state: synthesized, mapped, routed."""
+        synthesized, laid_out, routed = self._get_mdp_state()
         actions = []
         # Initial state
         if not synthesized and not laid_out and not routed:
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index aab3232ea..ed7ed5ed5 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -64,6 +64,9 @@ class CompilationStep:
         total_gates: The total number of gates included in the circuit.
         fom_value: The figure of merit value for this compilation pass.
         fom_kind: The kind of fom value: 'exact' or 'approx'.
+        synthesized: Whether the circuit has already been synthesized.
+        laid_out: Whether the circuit has already been laid out.
+        routed: Whether the circuit has already been routed.
         is_terminal: A flag indicating if the compilation process has concluded.
         circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format.
     """
@@ -76,6 +79,9 @@ class CompilationStep:
     total_gates: int
     fom_value: float
     fom_kind: str
+    synthesized: bool
+    laid_out: bool
+    routed: bool
     is_terminal: bool
     circuit_qasm: str
     program_communication: float
@@ -96,12 +102,14 @@ class CompilationTracer:
     Attributes:
         circuit_name: The name of the circuit being compiled.
         figure_of_merit: The chosen figure of merit for this compilation.
+        mdp_policy: The MDP transition policy.
         device: The target device metadata.
         steps: An ordered list of CompilationStep snapshots.
     """
 
     circuit_name: str
     figure_of_merit: str
+    mdp_policy: str
     device: DeviceMetadata
     steps: list[CompilationStep] = field(default_factory=list)
 
@@ -112,13 +120,19 @@ def from_initial_state(
         input_circuit: QuantumCircuit,
         circuit_name: str,
         figure_of_merit: str,
+        mdp_policy: str,
         features: dict[str, int | NDArray[np.float32]],
         initial_fom: float,
         fom_kind: str,
+        synthesized: bool,
+        laid_out: bool,
+        routed: bool,
     ) -> CompilationTracer:
         """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
         device_meta = cls._extract_device_metadata(device)
-        tracer = cls(circuit_name=circuit_name, figure_of_merit=figure_of_merit, device=device_meta)
+        tracer = cls(
+            circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta
+        )
 
         tracer.record_step(
             step_index=0,
@@ -128,6 +142,9 @@ def from_initial_state(
             fom_value=initial_fom,
             fom_kind=fom_kind,
             features=features,
+            synthesized=synthesized,
+            laid_out=laid_out,
+            routed=routed,
             done=False,
         )
 
@@ -142,6 +159,9 @@ def record_step(
         fom_value: float,
         fom_kind: str,
         features: dict[str, int | NDArray[np.float32]],
+        synthesized: bool,
+        laid_out: bool,
+        routed: bool,
         done: bool,
     ) -> None:
         """Records a single compilation action and the resulting circuit state.
@@ -154,6 +174,9 @@ def record_step(
             fom_value: The figure of merit value for the compilation pass.
             fom_kind: The kind of fom value: 'exact' or 'approx'.
             features: The quantum circuit's feature vector used by the RL agent.
+            synthesized: Whether the circuit has already been synthesized.
+            laid_out: Whether the circuit has already been laid out.
+            routed: Whether the circuit has already been routed.
             done: Boolean indicating if this is the final step of the compilation.
         """
         present_ops_dict = current_qc.count_ops()
@@ -175,6 +198,9 @@ def record_step(
             entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
             parallelism=self._extract_float(features["parallelism"]),
             liveness=self._extract_float(features["liveness"]),
+            synthesized=synthesized,
+            laid_out=laid_out,
+            routed=routed,
         )
         self.steps.append(new_step)
 

From 4b3368a6fe6619cc512e55df71f0af0d881960f2 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 11 Apr 2026 13:30:46 +0200
Subject: [PATCH 09/30] CompilationStep now includes gate count per operation
 values

---
 src/mqt/predictor/rl/tracer.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index ed7ed5ed5..cea681235 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -61,6 +61,8 @@ class CompilationStep:
         action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords').
         reward: The calculated reward value for applying this specific action.
         current_depth: The depth of the quantum circuit after the action was applied.
+        num_qubits: The number of qubits in the circuit.
+        gates_per_operation: The number of gates per operation occurring in the circuit.
         total_gates: The total number of gates included in the circuit.
         fom_value: The figure of merit value for this compilation pass.
         fom_kind: The kind of fom value: 'exact' or 'approx'.
@@ -76,6 +78,7 @@ class CompilationStep:
     reward: float
     current_depth: int
     num_qubits: int
+    gates_per_operation: dict[str, int]
     total_gates: int
     fom_value: float
     fom_kind: str
@@ -179,8 +182,12 @@ def record_step(
             routed: Whether the circuit has already been routed.
             done: Boolean indicating if this is the final step of the compilation.
         """
-        present_ops_dict = current_qc.count_ops()
-        total_gates = sum(count for gate, count in present_ops_dict.items() if gate != "barrier")
+        present_ops_dict: dict[str, int] = {
+            str(gate_name): int(count)
+            for gate_name, count in current_qc.count_ops().items()
+            if str(gate_name) != "barrier"
+        }
+        total_gates = sum(present_ops_dict.values())
 
         new_step = CompilationStep(
             step_index=step_index,
@@ -188,6 +195,7 @@ def record_step(
             reward=round(reward, 6),
             current_depth=current_qc.depth(),
             num_qubits=current_qc.num_qubits,
+            gates_per_operation=present_ops_dict,
             total_gates=total_gates,
             fom_value=round(fom_value, 6),
             fom_kind=fom_kind,

From 88b8fb6c5b85ef32765604e6e49c39b12478620a Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 11 Apr 2026 14:40:33 +0200
Subject: [PATCH 10/30] expected_fidelity is now permanently included in
 tracing

This happens regardless of what the actual figure of merit is
---
 src/mqt/predictor/rl/predictorenv.py | 35 ++++++++++++++++++++--------
 src/mqt/predictor/rl/tracer.py       | 35 +++++++++-------------------
 2 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 9a96c2f7e..49567962f 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -278,11 +278,25 @@ def _log_step_reward(
 
         if self.tracer is not None and self.tracer_output_path is not None:
             synthesized, laid_out, routed = self._get_mdp_state()
+
+            if self.reward_function == "expected_fidelity":
+                fidelity_val = fom_value
+                fidelity_kind = fom_kind
+            else:
+                fidelity_val = (
+                    expected_fidelity(qc=self.state, device=self.device)
+                    if (synthesized and routed)
+                    else approx_expected_fidelity(qc=self.state, device=self.device, error_rates=self.err_by_gate)
+                )
+                fidelity_kind = "exact" if (synthesized and routed) else "approx"
+
             self.tracer.record_step(
                 step_index=step_index,
                 action=action_name,
                 reward=reward_val,
                 current_qc=self.state,
+                expected_fidelity=fidelity_val,
+                fidelity_kind=fidelity_kind,
                 fom_value=fom_value,
                 fom_kind=fom_kind,
                 features=feature_vector,
@@ -536,24 +550,25 @@ def reset(
         obs = create_feature_dict(self.state)
         self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
 
-        if self.tracer_output_path is not None:
-            synthesized, laid_out, routed = self._get_mdp_state()
+        logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
+        if self.tracer_output_path is not None:
             self.tracer = CompilationTracer.from_initial_state(
                 device=self.device,
-                input_circuit=self.state,
                 circuit_name=self.current_circuit_name,
                 figure_of_merit=self.reward_function,
                 mdp_policy=self.mdp,
-                features=obs,
-                initial_fom=self.prev_reward,
-                fom_kind=self.prev_reward_kind,
-                synthesized=synthesized,
-                laid_out=laid_out,
-                routed=routed,
             )
 
-        logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
+            self._log_step_reward(
+                step_index=0,
+                action_name="Baseline",
+                reward_val=0.0,
+                fom_value=self.prev_reward,
+                fom_kind=self.prev_reward_kind,
+                feature_vector=obs,
+                done=False,
+            )
 
         return obs, {}
 
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index cea681235..ac8ac67e5 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -64,6 +64,8 @@ class CompilationStep:
         num_qubits: The number of qubits in the circuit.
         gates_per_operation: The number of gates per operation occurring in the circuit.
         total_gates: The total number of gates included in the circuit.
+        expected_fidelity: The expected fidelity of the circuit.
+        fidelity_kind: The kind of fidelity value: 'exact' or 'approx'.
         fom_value: The figure of merit value for this compilation pass.
         fom_kind: The kind of fom value: 'exact' or 'approx'.
         synthesized: Whether the circuit has already been synthesized.
@@ -80,6 +82,8 @@ class CompilationStep:
     num_qubits: int
     gates_per_operation: dict[str, int]
     total_gates: int
+    expected_fidelity: float
+    fidelity_kind: str
     fom_value: float
     fom_kind: str
     synthesized: bool
@@ -120,45 +124,24 @@ class CompilationTracer:
     def from_initial_state(
         cls,
         device: Target,
-        input_circuit: QuantumCircuit,
         circuit_name: str,
         figure_of_merit: str,
         mdp_policy: str,
-        features: dict[str, int | NDArray[np.float32]],
-        initial_fom: float,
-        fom_kind: str,
-        synthesized: bool,
-        laid_out: bool,
-        routed: bool,
     ) -> CompilationTracer:
         """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
         device_meta = cls._extract_device_metadata(device)
-        tracer = cls(
+        return cls(
             circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta
         )
 
-        tracer.record_step(
-            step_index=0,
-            action="Baseline",
-            reward=0.0,
-            current_qc=input_circuit,
-            fom_value=initial_fom,
-            fom_kind=fom_kind,
-            features=features,
-            synthesized=synthesized,
-            laid_out=laid_out,
-            routed=routed,
-            done=False,
-        )
-
-        return tracer
-
     def record_step(
         self,
         step_index: int,
         action: str,
         reward: float,
         current_qc: QuantumCircuit,
+        expected_fidelity: float,
+        fidelity_kind: str,
         fom_value: float,
         fom_kind: str,
         features: dict[str, int | NDArray[np.float32]],
@@ -174,6 +157,8 @@ def record_step(
             action: The name of the compilation pass that was just applied.
             reward: The calculated reward for the applied pass.
             current_qc: The current Qiskit QuantumCircuit object after the pass.
+            expected_fidelity: The expected fidelity of the circuit after applying the pass.
+            fidelity_kind: The kind of fidelity value: 'exact' or 'approx'.
             fom_value: The figure of merit value for the compilation pass.
             fom_kind: The kind of fom value: 'exact' or 'approx'.
             features: The quantum circuit's feature vector used by the RL agent.
@@ -197,6 +182,8 @@ def record_step(
             num_qubits=current_qc.num_qubits,
             gates_per_operation=present_ops_dict,
             total_gates=total_gates,
+            expected_fidelity=round(expected_fidelity, 6),
+            fidelity_kind=fidelity_kind,
             fom_value=round(fom_value, 6),
             fom_kind=fom_kind,
             is_terminal=done,

From 2e24f2120a24d1eb242c7d7517e09bbabe4c26fe Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 11 Apr 2026 14:46:09 +0200
Subject: [PATCH 11/30] included schema version & timestamp values

---
 src/mqt/predictor/rl/tracer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index ac8ac67e5..43cbfff75 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -11,6 +11,7 @@
 from __future__ import annotations
 
 import json
+import time
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -111,6 +112,8 @@ class CompilationTracer:
         figure_of_merit: The chosen figure of merit for this compilation.
         mdp_policy: The MDP transition policy.
         device: The target device metadata.
+        schema_version: The version of this schema. Upgrade in case of schema changes to maintain compatibility with tracer frontend.
+        timestamp: A timestamp indicating start of the compilation.
         steps: An ordered list of CompilationStep snapshots.
     """
 
@@ -118,6 +121,8 @@ class CompilationTracer:
     figure_of_merit: str
     mdp_policy: str
     device: DeviceMetadata
+    schema_version: str = "1.0.0"
+    timestamp: float = field(default_factory=time.time)
     steps: list[CompilationStep] = field(default_factory=list)
 
     @classmethod

From e84d0f568b345bd533bdfc6b3319edd217e8933e Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sat, 11 Apr 2026 18:57:25 +0200
Subject: [PATCH 12/30] added more doc comments

---
 src/mqt/predictor/rl/tracer.py | 37 ++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index 43cbfff75..11fc615b1 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -27,7 +27,12 @@
 
 @dataclass
 class TopologyEdge:
-    """Represents a topology edge between two qubits."""
+    """Represents a topology edge between two qubits.
+
+    Attributes:
+        control: The control qubit index.
+        target: The target qubit index.
+    """
 
     control: int
     target: int
@@ -35,7 +40,13 @@ class TopologyEdge:
 
 @dataclass
 class GateCalibration:
-    """Calibration data for a specific gate on a specific set of qubits."""
+    """Calibration data for a specific gate on a specific set of qubits.
+
+    Attributes:
+        qubits: The qubits that the calibration data applies to.
+        duration: The instructions execution duration (in seconds) on the specified set of qubits.
+        error: The average error rate for the instruction on the specified set of qubits.
+    """
 
     qubits: list[int]
     duration: float | None
@@ -44,7 +55,14 @@ class GateCalibration:
 
 @dataclass
 class DeviceMetadata:
-    """Metadata containing information about the target quantum device for compilation."""
+    """Metadata containing information about the target quantum device for compilation.
+
+    Attributes:
+        description: The name of the quantum device.
+        device_qubits: The number of qubits available on the device.
+        native_gates: A set of gates native to this device.
+        calibration_data: The calibration data for this device per native instruction.
+    """
 
     description: str
     device_qubits: int
@@ -74,6 +92,10 @@ class CompilationStep:
         routed: Whether the circuit has already been routed.
         is_terminal: A flag indicating if the compilation process has concluded.
         circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format.
+        program_communication: The program communication value for the current circuit.
+        entanglement_ratio: The entanglement ratio for the current circuit.
+        parallelism: The parallelism value for the current circuit.
+        liveness: The liveness value for the current circuit.
     """
 
     step_index: int
@@ -133,7 +155,14 @@ def from_initial_state(
         figure_of_merit: str,
         mdp_policy: str,
     ) -> CompilationTracer:
-        """Alternative constructor to build the tracer more conveniently from the environment's initial state."""
+        """Alternative constructor to build the tracer more conveniently from the environment's initial state.
+
+        Args:
+            device: The target device for which compilation is performed.
+            circuit_name: The name of the circuit being compiled.
+            figure_of_merit: The chosen figure of merit for this compilation.
+            mdp_policy: The MDP transition policy.
+        """
         device_meta = cls._extract_device_metadata(device)
         return cls(
             circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta

From 2b3bf4fb2681033424276f65bc4ced6be5a00c07 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 12 Apr 2026 11:29:00 +0000
Subject: [PATCH 13/30] =?UTF-8?q?=F0=9F=8E=A8=20pre-commit=20fixes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mqt/predictor/rl/actions.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py
index 0e32d63d2..0e0e31be6 100644
--- a/src/mqt/predictor/rl/actions.py
+++ b/src/mqt/predictor/rl/actions.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-import os
 import sys
 import warnings
 from collections import defaultdict
@@ -18,8 +17,6 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from bqskit import MachineModel
-from bqskit import compile as bqskit_compile
 from pytket.architecture import Architecture
 from pytket.passes import (
     CliffordSimp,
@@ -78,7 +75,6 @@
 
 from mqt.predictor.rl.parsing import (
     PreProcessTKETRoutingAfterQiskitLayout,
-    get_bqskit_native_gates,
 )
 
 IS_WIN_PY313 = sys.platform == "win32" and sys.version_info[:2] == (3, 13)
@@ -366,7 +362,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitO2",
 #        CompilationOrigin.BQSKIT,
@@ -380,7 +376,7 @@ def remove_action(name: str) -> None:
 #            num_workers=-1,
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceDependentAction(
@@ -526,7 +522,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitMapping",
 #        CompilationOrigin.BQSKIT,
@@ -548,7 +544,7 @@ def remove_action(name: str) -> None:
 #            )
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceDependentAction(
@@ -561,7 +557,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitSynthesis",
 #        CompilationOrigin.BQSKIT,
@@ -578,7 +574,7 @@ def remove_action(name: str) -> None:
 #            )
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceIndependentAction(

From efc94a35148b369e068e05bf23da5726fb5845f3 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Sun, 12 Apr 2026 13:55:10 +0200
Subject: [PATCH 14/30] revert accidental inclusion of local model and gitkeep
 files

---
 .../trained_clf_expected_fidelity.joblib      | Bin 81951 -> 81263 bytes
 .../training_circuits_compiled/.gitkeep       |   0
 .../training_data_aggregated/.gitkeep         |   0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/mqt/predictor/ml/training_data/training_circuits_compiled/.gitkeep
 create mode 100644 src/mqt/predictor/ml/training_data/training_data_aggregated/.gitkeep

diff --git a/src/mqt/predictor/ml/training_data/trained_model/trained_clf_expected_fidelity.joblib b/src/mqt/predictor/ml/training_data/trained_model/trained_clf_expected_fidelity.joblib
index e040b02848e55c97687245e66317c870ebaac433..576f8f16c6055c8eff5117795dd32304a7a13e87 100644
GIT binary patch
delta 7187
zcmZu!c_5VQ7anUO<(5=hNN!w}NJ<MwT`iVm%f5^yTV(8OzQNeZGLkG=l7vcGD{6{L
zmT0-HlvF5^s6^t&d;0S?e{_7$`<(4L=bib=W%7NQ;4L5J;!%-TsQAG(#Ita7T5L>o
znn9YZHjncjU0q#m74{E4M=nLS8UIW6)_DuhXwIqTzD{Fn*Z<8^V7Qd+U9TaMAZT;g
z+sDP-*2lvu%~{Z!?Zw$c@k~!o(`G&l--|G7oh<5zHQ4l!SdQogV%3yuXDxGJ4Dl-~
z&l5a!`Z}?X7d;|2P8{QG?604F=CE=*2)<XPlvqirYXs+n5uBoN3zd+AS0N;UdW|4D
zxSqoVx3^s-=YQ)WKIMk|H4F@7ybtk-WK2-RAvjaW;Q{fBl0iK9DZy#Wf3y>v-@ItX
zQZyRZ)fxi)bVG;rO6SP)h#e`2mOj=-o_oJwVp<<6(Ors(Q$J&&mnTs<5206t|83hP
zVj0hG%vwo)nGds;IlrWV;Gv^;U^YL~IOHh-bXJuULVq7;)~XK_b`t#b*kyw2i_JL9
z@LgNF3EtjMlwrz$Yngpk|6SQd@IgInC}mgk>@z2(=`+DcYwr`@P$%I@@|5%dH5;%b
zkR+s;Cfp{3xMh!tLxMC)XVgk&N$U~D8P8M28K3-0I%dx4#GSBLA%)UhObDN2tM-h<
z5!+4p%r?2Zv(KCqIczxAqM9OvJ9BRm8_|NZF*Qf2=WfIf#kV5PZH`j!4kR6N8boje
zVux^KclogM94*A@eq|)?s1))=ap`E4M{zXzlN2u3PQjOi$Xi23K%*ZgozY9L^bq`m
z8<C#&JCV$e^OqbdBpZ)PEY2c)^^(3fgfDi3D8cDcB+78Mq`oG2;-2bR%Y1PO6<47)
z{6`L^#V~RAUUHo{d&<bN&~BX}4z%JnB4xLS0M5Sl9r9zlNCKwmvL_@C?_I>L;&C(z
zak#uayr^ioY8BxLs34!JD<B8Vuq9;nv_0+5387es1Y%adBLSFdW~k8SLnyJADH$2F
zQjIJhO?nO{@b#>i^=J03L)<9+EQxoEeTCRYf3k$M?m~i7v^`0c#33rai4d-Y<`aAN
z5jyxUK<V9X6cYU2L4q?+tR>4tb2yhr@E^Oe;HUf;M?mKc!5KUle~lUP4JMH9TR&SU
zMWZ=HG!r8C;sRoIj}Zr2rv~CGpAnz<N;;#})L>@|i?FjN+9>J#tthpa!0Z<bjpjW@
zaOM{U>{$FHHjv<kJJ6mp6K5tr?P)k}FRwYyI)!nM1SsBycmq1nR*7Pu6&EmZt|;;;
zVJuYO8S<M=vExfGk?*!9m&B_)kA>2z5wGJbB=?`T{WVtlRtq~DlEMxeHzQuB*Gd9-
zrC@>sXEDJb1NqXuDD|1$s2uA*EQsM-Lj3+pz@53m#5g6xIO5WPDm_97iNJ~`s<5*u
zLzHOUDpaCw0VY;(!VaFlLH=`D<QMJ4r5odJWK;KcQ9SZ3XLN-<Kz7I3x|(B5Q1UR4
z%GeMIi@CGDy?*Nio{d`9lDMMaw+542>Rl8}De>`UYMH^3Fy&pp^`jtVvwUoq^Km$4
zBeqH^B@iZp<tjAY<Dn(xpm$h)D!i{aBy0TD7it#l5Es1a3vWk$@qEjTg?Wp5s;`bS
zDG0W=@)}te1h1PnwV5}tAiGS-zPKR_uJ1i?U+sAaWSX4YEmIN+9}avs6pr!+Bipdl
z0iOt9XQpZLZH)!lP+k2W=>Z_+%_|wh34<4BUCi3|`oYH}_J*l%cF?roQNr8LUeLAv
z{TudR3~&W(<ivjPhD!5G)k`-}QJ_AkXf^j@G`#Q=8=c7ThKu4&one~6FdkvJcG@lq
zws9F;4B!ueuc}6c<}(TusqpolI1mZBpR(IxYD3^amCmu+Pk}HkP?N`Yya`F&@jVb=
z2k&w_bouV3gH&J^Xp5&nVqwz8=_9A0Y|c4Z?rnKcmDg}BoSH0w9&ITj(Mv4o;8}dA
zz&aC_Y@OdV7+VdAswt(<wk5;+FGFXvm!^WGI;U;DQW<cs{GqqAG7Q`n7Wt;y<->2u
z=nF5doQE|o<qC^=li*48iTSjFVo?66d%I`49I}|+l2u9^5Y)Kb$dg?Qk|`4>pPfDl
zM>~I}P#@S8FtTNHpk728NId1S%if#~uKND$Lc>T{HM--S87Ch+IbYQi_oRbQ{XR#f
zng9qXe7EwG?=h&?VBJi690z+d^v-Db_yAwh4fCOsEKtjQ(LA{|9lEljBR%}W;iSvw
zEZ(kEu+u9PWyKzcLAojEYOulS1UJR2!Ic9!HpN`_5-dnyPL#wv^@dY5+Mk-${lLQ9
z?8Q=n7|;&jGF$Z{6xK*ruROf)5FDtUZrI-#3y*1My>)V3A(6pkty|B87RiyU&hIf0
zVO6{47Rwxtr4@;-9tehu>57|RmlIgEZ1?L94TEi^OOC`vGhw)wGXMK%Qax<i+`g$V
zm<3I((`L*6WkKiqzDq8;kuWzh$xS<@93E@QM82|5g;3Ls#=eqM;A}j2vy<%w%N?(+
zE|~U#1xsbujaNm(53#tW;R7*{zbxLXmpc)92OZ9i-AIOO_MJkz?}foz@8fxtWi?z6
z@&4>9Uk;;`OswjcYa#IW^RzJOikW2$zuT4mv=lV2XFRj-xePZYB>NnyE`S#OjNLMY
zn=qd6D`B2m9K816RanZC0?tlrZ>tNkA*<<>YTw&qaP&%Xb-}49Q0;9Ej+!)w1B=SI
z;bJTl71z7I+n)sdUvk}_t%(Lsz|qo6&LKcueQ%K3XO#~t<rPxf4&DHt`Qt`+N{>U<
zoKGvhisysi^45b>+l#@f_lc~MJR71Py0iKvj>E%scM}(_j)h{;#m7D`iiL&-u8{6|
zELicQaFMuVGUR66y_~g?1=R^=*5y7&VetvEOS|^O!<S{(<~rL3!ad4x>2;St3gjDK
z!kz;m5Fp8C<f&{9zwX-a-=uB=R;|%0FLRE-qir>scCJC-XD(v;@)`>+zOdJ|l@Ed2
zDO~THOFW>Ae$~7v#}n363A}A=ItKCT3KFIU9xxK^dtzkjIE0O6oHb}?!@xibPkfCn
z_&6A=JlGHcCTo4~Mqj2<z*FIJTIUUCU|s0q-?<|gG--*#VLZt|`}OJK=_G$J&gf1u
zuxCN!)a{dW(==FN-G0yCi3wZhSxmh%3j@=Chhg8hR8W_5H_vQz2ICutBa{z?!xCjH
z`kRND(5n3O`~4&)49!n9o>N6ZDRbSxseev_W=dSyJ?dN#bQo*<_*{vDMJ5Nud7h+0
zbr$!Oa7Q#eyk)s8Kr;#?TFiBYy)r;vo040`9SCDPw>ZpM!hk0Q7B6oXSU|0pP8Utx
z8+r@+^Cc{zpe>fWE2Z8Ik{lN59eL#ocl15ELu8YIhijj5QhqXUkL)U|3kZeVal7@Y
z3q#4!5#5|CQJM~BtnJqv_l3h|8@?5-MSdXU)2-!Rm<hFAcb{%lbODvlWfNy(LZE+O
z2bY7uY3QmyojP{T59Uhk+M3rJ1w!5vH6DA@;4zP!->*4A&?uP`m3{a)C}#UdiZ4F~
zL8JW|Tz7pydbIo;<5dLgnf$K9-#wWOb#^VRiP8w@Wvh3Tw>Sasn($*<PPve+s%+%_
zk0-2tU--ymC>B1nD>^r8#X{Y9z$tNwF!*~^%_(4^7nrI!|F}6F#ikzYr5t_9?_@`H
z7_rXmRAM=|juPA7%p_LNfsJ^+8?kvdEMhx?GKlRi^CmW?GlE!=;dEjZB)o|2SnWrw
zUQ-IOk{V9LQuK^K@<(OX31T}UJc-TQgo&Nyu^{yTba2i>Jb5`5q}PNAM2>_Izn%KX
zm&`vxo|!X0va|0@!z&OM3Bvg0jAOGnMN6Cd)7f)>LKIQQ1iT_A$+LnBgV^q8LBu9#
zBF=Qcj`fx!?iIr!=fm5vpyUEnLbe6tD>MX?^St~xVtGqu-<>ELt)m_j#H@)W2T}_7
zR><JOkuavC2u{<-w>(-62C{sCQi^J2k@MOyEL^V%<1m@o1TX%OK&*r6%q5-qxT&M%
zxXd(6%>bsINa&D2iIOw1vy8}a;$SK9$6;Rpc^(!*Nn3yX>8uX<vOAGKIF1d71o#lX
zfKVc_lIM`0Jb(%f*GAx5V)o3>{cL5DOFf9Cg<=8~78XQ1i?gTkVuG@fG{U2gB7gW0
zMV{v=U>s%x@`p=937*!6iS^u3X@PmD?Bt4abS$sIit++cLLDEJlx~5u;@n5P@+Ai1
zcsP^5ONDX92`cBvbMhlhpy7|p{EhDv!5JIzmYMEE{v=-x!SfU;Of2bw2Yc3)lLI;z
zZZl&7rBrc8I~<CE%3JZ;WmsdO9Z5Kvyc~=(=#AHp?t<4Y`6k(R+MsM6iLc;{*SDK0
zMk)9uF#&xNmuns^lMw4(qXQ=r1DmGca&;==EhrbBS)!Q_Jqi_Ke8bL}{J0YV>M`Ve
zDL>-FPp}~7ejFv09Ei(7|8tKO3h_Gew%{x|Be)CYuduMf)tES029;nuz(5(wm_XsD
zCrQW{!^W~J|KQL5EFBe1&J~J@f)tJ8gbqKCpo4S)N=GZlZ@_@f*je_>Pt9znc@@YH
z7)M1i5)m)Y#}WK&$AU!Va7nv$P>JVq7{{js8_+YIEr_Dg0(#;}VD<7pi65c@j0&vO
z^c8kMo5q4_!?92|Z=C(`3G6Jx0uy&!LghLdu~3m5yake>7@xTu6=lAki8GU**1rxb
zQdo@ll|G6I6b@n_x(Uv>I|P@gc;?4-R*B&ah&!J`CEN@VSJ;ekYP}J!Jc<o4GhB&Z
zZ4oXXGv*}hn+;U{7gn0whaF6=Lg|Kgp`_jap>zVN*g?l2-j0sT$Y<V5U{n9?qIR}P
zQM)c1LHk?%`zzY5VQI}GrROp(Fv4;%6Ft2PcHF$|!l&c_Iu~!J+dW|b^Lyiyd5`r#
z`Hsh{AZd5FVl&n*v3KTWE9dj^_H}yjd*zV|VI2o(st#MvQxpy*ZVuNhri@{c;{nAg
z-~nz$>5H~rN5E!hlMVmpX-dt~<QCrrR~-nPuD>I{mH~5>Qgyi=+QSMrs4W{~z`)7{
z@}G>IVO+;y`%#e}YF?4@%YXCQgWjcXzj_CEc(U;Fj#uM!aOvl8$2Xh8#y6o}F-sl6
zFjX`N)@s75Lb0ro4inh_+Rm&d!xYvOBp4P?+e3HSX`{N=lqEDiGu9~9@`mOLdxwu3
z55xP-L3Y<m4nxq|6{kPS`ao-Y(M|y^W0-q>Oh#3}77Cxp>{o8JhjT;WdAmi-;c&zC
z#~}j-@a(#gimhq|G%ooV{Vi4>Ru$C+oJn;8ca6U~zZiOh9JiOb6&C~6n=oiHQh}g5
zw@*@vvWkGYp`Y^k-dh06WO=Y>GaVX5Q(SvA7$APH;_M3<JqSp>-xmJH5465l9ad6g
z!gI~#A{u&jaP?5_`z5wPux8!-rxxcO!6|XS*GngT=xrGG<3t6+Eq5-(v~w;nzTaG#
z^@IhgJt1434*7$Wn6&)WRyy2SvMHI$d0+|0D|f$2rZXW$sCh&A2V3YU^boN=U;#GO
zoXU^o4$yp6`#ful5qOB-+V?!&7^=*+vQmF*!X1sNkCjFKpt@t-Deu%n;I9~UGu6=*
zvXtN2$E-<&hQlI*Uu|qbc+$G6MBEQN?)jbxH8O=}yMaOZ<#gEQ$D$Tw=2}7IvY)5U
zi~55G3m&O*{R`Sh^;!gWnZVJA`IATYdcgw4wM*Uh)8J9lW3PQNP7skl;Iw|z5m=bn
zlw%fU1q)BhO7}z_0$l?!(Sir=@VP2=B2Q=~tb7nSVjW@vU%NwU)W*C(^yW7cVVPYZ
zvHYq{`J^4(o1*^alX5-}H3iq>obS^??1hT-UmX_UIa$%>7ZN@5pR->5w*|*xK~6Qi
zR&s&l?;l&X_A`L@!<8&WZ7DcC?XUfAzAcpI2p^>$B*V$ryUWyqY@uj-UmojMIMk#s
zo$I$D1cW8`=;+1T!hn6|4bwO$&<hq(;(r<pj?|V_;W9!-AX(A>Z*!nAxMfFb*QVz~
z?N{%~?>6or<}v?Wgl7>1)z^Md;9`MfdbxwsE@u#^Kh<3?=mlqV{Ua_31;G7}UDMes
zY~U|S+1B!<EjaJ39%p_xgu>F`)oeFYSUG>C^hpCf=$HB)*D??dEkkk2(E*k~?YL~+
z_Rk>)(3B3&wQPt03x{v#Po1{{nG*w7Pq-e3(ec0EuDX>10fU`VcSBqusO&-VXoMFC
z+LozxTe!mW?<|44zs-PA$X-*&u!g6XcWMXl89-lG!1lBLcA$SXa(`$512(&RdL7>B
z0-=`isq_(jcsd+e*P=wH;Qlv>Hm-)fpyo6+F|^1M#OH}^-nM!#D81<!uH0=07P>Za
zv66I{7k;EvW}78^xahR~d|@aE?QLF^?BoG`L#sWCJlsGu^g;fde_Vm7T`c+60|yYg
zBq$KZZv)3QYI9@PI{}xFh8<V51srjCHuJ^d4)3ahe`=Xi4&b~mXYQttnou!XvND@(
z4i~P6Z{gu`fx6FMbpjjJVA%c8&v+AiNL(iO;R+W8Hx24+J*Q0Iw?lvB0x?HOI2iBH
z=57b`Pnez$m9qu?c*_L?f>uylZ>uHn#vLL+Nh%@N7#?2Q@nG&(BQRU_d!=RbVfaUN
z_kShSej7LzRTX2c%7i5<g0x<KPq5B(wY;3}0@VUO|Ared;6-q)7eD<Nv^j1OaA`6G
zxz<&;^F?i7$%l<Wxpu)|yu#q+n6@$a2=e3(tT%#Ny#~sOs~J#L7xZd0+YrpdM6Y)$
zIKq9W^}D;K9boX#mdeWc*3jSG(?+Q@+Jf$aD_6Zz1Hrho?$>{YCJ?H9``Tea3$XlW
zTg5zAcW@T6{Jmn@3|hSs+6)^HfYllKPLXkUi2CWIMNJ(B+lz6Z8hdPE#M-;Z*vk)u
zmyEAor>YIP@5SzmKDGe=?$V1N1g+t#fAAKCx3VCT{&C&AY7=m@J*gusO=G}4`ENGS
z`eyL`&8qsQOPY|jir!VG8VIKXP30mc55mB2+voHxhoF<TFecc@5qy7i?^$iE3vCyS
z&*=-zTqpXfu7}rjDyU97?nSWzG@7hy!py(U4z7ta#`So`x7QtW{@(%n`j9;PcdtT)
faDN^jJE#9S`@-j6|BqO4eW>;SMySaA5vln<8Zg~i

delta 7872
zcmb7}c{J30`^U$YJ%c1dld@({MfhHm$QFr|r7~kD`<l>9s1%_nDrwrJx+|rkMJX-r
zaz{dGqYafr*0McguJ3)$@BDuEbDrndAAOwH^?tvv>$A-0oPPWkJjxa->*o_#qGwb-
z#MdJrvoL>G#?E|~d`$;|AZsTlCkNxaA)$ag#<Ukg5|i9N<tD<?*Z2bj@*Jwv1Wq3*
z%!{eAVrgcHdHcu228S_YBck(z#ImHl!~A2Je#}^=ceH<09JiLAMT^-K>d%Z0*Nck{
z4vo?KKZ=<AEGb9Q<wBbB^0l_*r{rs92}LA?`$y-8M2Ju*SposUvH2l!Q$Naa9YL%J
zzkh2K{d<5IZcfM+t_L%waR&^F@XHTL;-Wa8Eb&NYbZjs))SH)`!iCs5gw6f&e>y`_
zr!vyeB^(*;?-$JS<&N_oEj!U7$+P&zdPhb__%QzqUncsb|MrDO1pY5Ci+e)<X3iYa
z?C0+r91|Q7?i~;p?i<Vf;lF5{hhCu=--u}cf6!-1#xTPoL;by33I4%>L9zKxF}wo~
z62UVn_}d>DI(1+F@qmg@FB%)o437zjhz|QNJe(;H-6u;rI6U4zI>z7o|6@MMYZ&Jf
zm-nd3IxkR6lGPM{8YtX9SFNt`RwC&MZ@q6>!dpM|T;;8PZpbN;$P4*+dbUsya?80S
zzlHj)ImoN0A;0yM$JzD^k#~JQHI>Vv_*SB!cor6trO?n-i-x-f$S(^cHzWCWW#qRh
z$lWJUZ$a`<8uG-ykbetCz05OSW6I1oQ=CO%3mRj=g)C%|4IIcF(8Vz5eGM;&vfcyr
z<|OxjfDH^)kdHKCgTiXmC+i~jyn;E^<c1G~b2*E923y~tVgC&@c&MS_)pO+KXkZtS
zJ1hEt{AwHOH(8^8#sG50XXKJ(<HhyJ&*4c>>XxHk=Ibe#>QCXT;~Cid%&<UviH4;&
zG3cQL^5e4P4z8hox+n6VuTlT04E4uMk^inmUV8<(xDv?+k%v9F%9?rv>}~6@aMm0f
zuzS(qr;psJ1r6_>VPi+~k#@<TzPJ=0<BEj8rt&E9L8ZL)&_R6EPogoW<QqQrgh|xf
z{>dT><Xx)BVqsfU9WQ{=jE$p>k?$aP&{K!p)DRn3l8;oz9S2%KzM)&nn3HkoC@*hw
z!A0Kc_@#!oeq!Cig3|o5DZp+%f(6fu9G+msqDwq}VGw!y4&)3a<i)!m@N^j`F(=0u
zb8gGs=kX2$)c+7be$gED2h5J~>MTnyEEtrd;g3cPDyia4%1%0ljcrN(GYWU0@)3FE
zebj>lFNcy)fjn6k8`H02&e$X5&L2>}ua(DH>=zVl?EDxDBYZfJVhI{_`jH!_VB<U5
zs8=+_2K=$uAYdAD+d9-2hofHd1m+YzLVZ&h@*43eJ&QsqC3ohNg+cG1p&_FZgQy44
zP*RJ$2{0%|8h3nH7aJ=bLA}mV)Wb09InBt|7a~{F#48wKa&x%-DQ*k!E-f};5WO4?
z8C!Uq5(&sVI<WDX2dJMmfeqq!B5%Kn17((?K6WGWcyZ)Dg2)G+Vg7FJKvVtM*V?gg
zfDa#q-X*+(8)i7^Ar=Phh{qF>t;WXguQ4bUuYh9OhJ2(6dDJY-nIO4|C*}maz>{dM
zsIleV0fo}_6AeSpu@EDJ11ZizgGx6xp7?<cKAl4S)+4AVYHRZ9?=NG$$Zu8N9pDBE
z&KsPE1dL$DZF~QF>8oL$eFF2ar97;gc{^17#ysd}Tu;+kz66*V?~J}YR)vm^i&2s`
zi{Wj<AGxP5xI#)Q(b-aD1v4Mky_OT12X$%(N6u!Mg6BqdlJ{{P$li442|u41jE5Au
zN!zc4=8V&nk!7=3@RF@@Zs55h=*|+?n0?a(W*5gieC$Mrk+Yu`(t-hGebpz&1g*eL
zJ^h{4&sngmhjC_(s0oxFG1-0Y03H5P4LLrqmIkJ@s@0noSV2Q#v3lhbBWS3|n)gD|
z0CYaNNB_z&2f0**!Tcr%Fh07zHT|U$ENr%Dt{P#PLRH*3@%NS{uxrnJ|Ll39;E+@m
zd1<i{^k!}1d-B5q1_F$f-n>(Q<M6fKYMBYVU9x+kX0rwCI;G{l=8P@8EbVphZ7_ff
zH4YJVt%g9YX#D0SCI@12N>;az8^f(NjdOR$Xn}C<_vAbF?r>tJ&ZSkshOl->W#1Q;
zf+a-gZrLg&W&|^LsV?M%ErBQ7a!xYdnuE}61*aTWDm0#&KO<qYF*w{)%WOMi0&2<o
zn7t<!!OO(6!ge#P;TYrihDcLS_-mPcvFHBTP(^#^9DQpA9F}l>(Hg|f4UWqxADR#C
z4?1qI4KRnB<LArb!dC)Kb+n1K@0Awp`SoMt7JD<OQE_UUZe|7Nh>hQ$>S#lsl#_Ib
zq#o?E3Ao%omk!!DzQ_j|T0ywit__z=^x*#5q>DY(ZjkX|<5_><nXt!O@WPP4GSCl3
zm>pYT0Fq|rUh7t=!3fh_QaO7rJh=MkTg_fNB$}T%_p8nb7^M;{r_K-)uzgd%In34)
z&Q{u*hi!3#w*FVcDGH9zEE7*F$XE+$k9tKOw`+rRUCJ{4WHV@dwEtuJj!^i$uSoWR
z$|5jP_qACz5&)Vf9uBqqjE0x;YStFVy`b!JSIy*JUpU;+D_dU`3@O^>`(3XD!Vb%w
znNMZIz)qfZpMK0P0{kvBYTrH8h9SFYgB9uPpxtHXU~s?|xcf%qZiLG$Xa&m-XL}de
zIA>bmCp}Y;?MkIQD%XY$KCRh%AG(6qisBzSj{cxBr@g7x(;Aj=0&eS7SioxGa2n@q
z0(6*&zPXbp2kHv0$)in%Q0eOR{Cq|@c(K~IDw&;`16CmylMY|?ff=G1KX<KDh8#~?
z#Jm$5K~Z7#&iZ_RkQItB(hQG<gZxU_757&`5L35`Rj>w>*tvaHsheTMba{4hH5K?7
zqBljRGr>)tqIg9v7Q&Wq(Hfnx2EzNlg<tbn1B$&1o$8HPkiR9x|G*g+V3j~m>C7xY
zaFyH;*%O`&e4oFIrhc9cU=yO(nY;orKfPREQ(^>d!JI}psw}j8I2LRuAqRu9ag~dT
z*Fn0|w&y;xH9*1s_SQ+N2k7k--LBijy}~wuYMT&8SgS5{#PJIqvV?bcW{<81%aPnQ
zR%f*#$YxzJb%8w#0{t)OD(|+3f){gM>$@t!fGJZTpma4Pc6a7gHQT~QzeK5ZyG>!|
zA9s!BRH}md)~!2zJ}rSj*OiAC$f!bB@g3okE$ZO2<Z5?Qw-aPPnf0kQMhC8MqyD%i
zuL`}HOx><ZXV~>|{WifQXOOyk$YuF`4e<Ol-ZpfH<qHN>V~VP|4!9ad4L3eD0!5c2
zPgTwZLLk-N{OG7UXrG_l5x~}jw%S*h?2SBNdg`guxyJ!U4Htg;S>g`S%9YAG34X9D
zV$*|F)M+q$S@pWZ*M+blT<VI<t!UslZt}V-tN>A>1-dzx=#c02xbohyRLE3Kdl$ws
zT?HD~C8VOSC4-&Auk2shQBbvQKQP|N!I1f@wY!Tqf#4bXrGc7A_%S5C-8j?@wo~sa
zL~dfkynvX!LaL$Au;gQm;AS`I%<HNZv<?EhWq~qmr){8ap_S5N<pS!r%+nm6ID@Ba
zqbvWb7%<lzv~S7QgTSZpcUewzbiw*`52r?O3rrp>uUccj4q_F&dkib%KzQcELt)Q%
z!O+4E{muzjsJiF9=Z%&Yq(wdJmr>aOA0&z%-5b{gn~gVq>kAveFR85a((&9Aefw44
zw7?i%YrP*$Z)HNT{Eo|%#1L?fxLcWQ><oQ5*>8lhS3;X|&dQ#0J2#Np)9jS(ZU9|<
z3fp1~9KhkU{HX=~Oppl~<Mg#H2jz=k9-n9jqtR^+)A;>CY2|>&QAH{YthWm_k=B5`
zbxYSY_B-d*KUu-@@a6q}%v}=2TWt@g@z(YMCU2#9MDo@`6Xf(<<P2Nnl^c=M;*fK$
zM)35#eW|>au^l;G5%r}ysIOjtoW;41hK3Jlpgh4sFBJ_O4>YikA#Xp8dXY)g)7_Cb
zNTa@sY)pwl9;rn(Rwf%uAm=>e<*+CjE?D3UGk6noO0a>6EOLfF8W=l}Q`O0Vypgjn
zA)jSOa!FEu0(rGO$@7uZ_K^8jQ=G;9w}~%lI6@lMqk%>x4O@{H$|0w#Bd3=k57H+a
zq#$qaM_$T6-Y$%sU5lJziM(2e%cuIYKVTt}+%boNhUx<3ngnTZM$Ty^^+w3)Qpgz%
z$Z4V^_d!lMj-2X;oDHO&dj(VdDI#KI!G<i5cVAdV1_dLp9>XB_C+Xay;|{(GxicFd
zZ-W<d${FMayYcQD$Tz`W;luM)dm=BiNMrE;!wd~&)3H#t8yj;*@J%pAurXx<A6>gS
z>LW{0Z$O?z^+)9NCe-IraG+}P(Xl&`mu@CEAj3ifXD;p_NSbUMOY%l+V5>p$2C~7)
zXx`u=mobPogoDz#-+NOhL#aT0I|Fx2*T?*qhmcdMFvoyphlSii(x8F`Ho1eYuef6d
zd1uw+=bIjhdbR=TtLKp?v6O5;UKxYjaR&MQ@%PCwoKepzD@FrnF$Vqpxu=mkW>}B|
z72?s&+KT$>3gnb4$a{m3)5(utdj`o>$$?&=o{@x{YQf_y3Wxjaf9k|IqBzmZG2Ag-
z19`g=d8G3(h?9z(YJ%L|lx#4GSI|I1Udu+_u8(@U2y)Jb?RZ67r#Op3p(@AlfcqdC
z*kWkNy%WphoKobBTIAJK<P=-7aWLL-gD_sDNIluu7WHN1H)G)(l8Ydx3UCMF_NPRO
zk&npRpVzRx2`8m@p@ChF2HH3t-OB+yA%+(6_UFiHi%`!XkW=WGlY52CNhEVLkyHCq
z!Di~{X^XHxB|rCU@~f0W{!*bWMuR~r2GImi&mq598Zy)J>Km7_*6vNnl+RHCc?H?v
zilVu2qDI*vbMH)O7Fg#PYuHD)8;l+9v6&<;_%5-@sS<?V8IKtoW%~&B>*wRkUX2p7
zWlyEX-jo2F2%(UNC4B@{WZ*CP7sJF)Nw19blo_CSR=cJ(_$zVXNac2?qEX^wj^?}3
z=mFw=_+ra%Z@;mK60f54yk&z#@on=bD`pCUveWOme{P^c-lj0trh!3X!uSP<{ieb#
z*8$g-rxI{LY1-JPmSJLK?CGwO4>RCuf=xdA;ZMR(bi0sr(KwN3l4d9TeU#X8=-2VJ
zhq{U8&6mSvPfZXDOTq;PZV13BLTv4!w>`w;MJjG5_p^qH6t#kaM2G3{y51~di^E&u
z=Fc;G)XaVnzF+UQo#vN-AUV<Tf^U7q*q@U^$8LTn7A*UI`*+m{(L#SPV1IXn7_+dy
z6EuIAh?vWN?s&8y97?X7uFC(85O#Ial{^1|SU*3h@7Zx7xRN*b!lb}2B76TxY)-`(
zk-k*d(}gAXkqGMU7kh6vL^OXivdnb<MjSYG#`NkUIhfr4F0CrMgUB9a?-tViMYIT|
zcXpi<gB`79YcwuQ5`xSA(pZ%=4b+TeJ4|F|z>C)FyZGdW36`_u{@5R)V9T^EH}(HP
z+^Gn3UZ65WyfpiuHg}#N%nemC8ayoq+BYXCtflFF#H5?%Wu;^>aPL@r+iQ&olywGp
z4#|BXnE9C=JI8wo=9jg}LTo<R>tnrVy#*gwpHlR2YUYPo7ZQ|48fCyCey{w*v-d>*
zsA1U!`)=aej+|<H7$bDH-?S6EUPp9i&doS?ew3J>VY&L};~5}iD(M;@I6^Eqyq+au
z%8-GRMl%UpRetEW77~*aBmjGS%i}amsIb-Na-fBP9mtj*Ju~!Tf~c!Gn>U9c0Zj)@
z^H*uOfMd<H7VU%mM9Pi3Ryo$oL0{-{1rb08r!pD8fWHJFZDrNMz(rc{ab9p%os$HN
z-t2kA3Xq5IQj@)NCY0gOH`a)*kGLd+hE=H--RmTpX#K&<A4ovkm7FJ9+)qQQGhe^@
z=2qgYbL#ew+yRROM~&Xlhl#^t<?&w(J`$RuG2f4#Q-f~WN9$+r1i;%)&+S0L4Di!D
z<#^s=nAoq}dUM$lQz#KxopGeBjrcTPCVyp?6gd5`h+aC~4VqYiql?|fzY{}Q^?tGm
z3qkc5n>n$2Cba)?r@A0m0@VG^=H%z7!`#(g(siX`5U1M8?<1f9tEF`P^uMnF>9hq^
z$w_n&us+Hee@%gn$ISX=!;GO*PwDyo%EhoM??(UZ8-}o<Bk00489Mx_wl|tc(FNmL
zlam*SL4q}s-hH^l!V+SN%Ji55y6~>^+Q|z;KZw+m-G#cZ_`qIvz36zn2&~)Mk(n6x
zlMrt;<b3B7g3<x+!>R09Ae_KVykYmAsLWjXY{)<n!jrnJIJtv_c%-YNgOLarxfFjI
z-6;*>RolcS;?zOTqgOU{`!I2}{MV`EW>pq!&@RZy&(#H+;fsOB*$)XnE$_35I&x6?
zcutax>IiXSWmxWc>q#O!vgG88#D3yvde~k~>3LwpzI08*w4WF;e_MTUogmC_iJd02
zKoP`aQ*zVKG!tsyzHD4t@R9Iy<R5Z1>?Otq2DZ_1M~UNMwcD2;d`&EHo3To^gCzmq
z8s5YonlVTe^?up;G?5?HN*<O8@!<zA+uL6aS~`jO7PP+4LbKqdSZ$~mTN?KEsjh1j
zqrm0PhQ8RH@=&~GLEG&&GVn0>g!lWBCZZrOOW))z6%Mz(EIs;e5v*yJlu%yULj;QJ
z|2%zE79O4sIH0Y&2F$P2)=97^f*^VR+vd1*N0487=HkP7)^IDab+lyjJL21V*WN#;
z(_z!{(wbi`3!(d}hd`o?B-lts-!QZHfGC6hiL2=v5VA~tkL^=65NpxU?NV6`kGp#+
zI#&byF3P*z9z}sYAH6(QeeNNGMrQvyP@@Gg2Y21DT{K1*+-v@W^;vwBxOx3W>u@d|
z=1*MErp{gt9VO#GhLrW8MS8>1?e6;U{IiN(u{RasqMgn^44NdEQ448%f&?LypcM%e
z{w7M@48PRJ{Nf(<=-G;PanNZi_<Wc9741#u&v+oC2uXK@ZGYAnKtn=Q<6=HJm}i&0
zv?5j%Ix?QEoYXuh4Ekwll4XC45owLx<83}v@bRqo>wYB%)uV69dl-TcA$zp7Ku{Xe
zGx&Ez>S)5rw?0fW)pjD)F*j;kw={fEPVuT%Fe<Na_2HMT;{I=%W=J*n?;yXByWAz@
xUb)qq2H}4y<cW9d<vD&bDBtjzBb4|3^UJ*PucQAYEEn&t`^SXxul?KY{sm&gYybcN

diff --git a/src/mqt/predictor/ml/training_data/training_circuits_compiled/.gitkeep b/src/mqt/predictor/ml/training_data/training_circuits_compiled/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/mqt/predictor/ml/training_data/training_data_aggregated/.gitkeep b/src/mqt/predictor/ml/training_data/training_data_aggregated/.gitkeep
new file mode 100644
index 000000000..e69de29bb

From 2c76fbc1625dabda7d96c132f1542ec614494158 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:57:53 +0200
Subject: [PATCH 15/30] Added test that checks tracer presence and superficial
 semantic correctness

---
 tests/compilation/test_tracer.py | 57 ++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/compilation/test_tracer.py

diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
new file mode 100644
index 000000000..d0b5087cf
--- /dev/null
+++ b/tests/compilation/test_tracer.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM
+# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH
+# All rights reserved.
+#
+# SPDX-License-Identifier: MIT
+#
+# Licensed under the MIT License
+
+"""Tests for the CompilationTracer."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from mqt.bench import BenchmarkLevel, get_benchmark
+
+from mqt.predictor.qcompile import qcompile
+from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata
+
+
+def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
+    """Test that the compilation tracer correctly generates a JSON file when a path is provided."""
+    trace_file = tmp_path / "test_trace.json"
+    qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3)
+    _compiled_qc, _compilation_info, _selected_device = qcompile(
+        qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file)
+    )
+
+    assert trace_file.exists(), "Tracer JSON file was not generated."
+    assert trace_file.is_file(), "Tracer output path is not a valid file."
+
+    with Path(trace_file).open(encoding="utf-8") as f:
+        trace_data = json.load(f)
+
+    assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name."
+    assert "mdp_policy" in trace_data, "Tracer JSON is missing the mdp policy."
+    assert "device" in trace_data, "Tracer JSON is missing the device information."
+    assert "schema_version" in trace_data, "Tracer JSON is missing the schema version."
+    assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp."
+    assert "steps" in trace_data, "Tracer JSON is missing the steps array."
+
+    assert len(trace_data["steps"]) > 0, "Tracer did not record any compilation steps."
+    assert trace_data["steps"][0]["action"] == "Baseline"
+    assert trace_data["schema_version"] == "1.0.0"
+
+    try:
+        # initialize from JSON (throws if the structures don't match)
+        DeviceMetadata(**trace_data["device"])
+        CompilationStep(**trace_data["steps"][0])
+
+    except TypeError as e:
+        # pytest.fail instantly stops the test and prints your custom error message
+        pytest.fail(
+            f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}"
+        )

From 6f2784a946bc1073c604dcf684c774806165fea6 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:08:13 +0200
Subject: [PATCH 16/30] adjusted CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b81df38f..6cde189b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**])
+- ✨ Added CompilationTracer that exports collects compilation information and exports it to a JSON file ([**@linus-hologram**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
 
@@ -74,6 +75,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 [**@denialhaag**]: https://github.com/denialhaag
 [**@bachase**]: https://github.com/bachase
 [**@Shaobo-Zhou**]: https://github.com/Shaobo-Zhou
+[**@linus-hologram**]: https://github.com/linus-hologram
 
 <!-- General links -->
 

From 14eb26d4b88cdaa6336d79bf7f97dd65d25a240e Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 19:16:28 +0200
Subject: [PATCH 17/30] Update CHANGELOG.md

wording fix

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Signed-off-by: Linus <42340116+linus-hologram@users.noreply.github.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6cde189b1..3bbf5684b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**])
-- ✨ Added CompilationTracer that exports collects compilation information and exports it to a JSON file ([**@linus-hologram**])
+- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([`#641`]) ([**@linus-hologram**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
 

From d0f0506797a1b3ef8ff5be8afc3f91c6aaf5f530 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 19:28:58 +0200
Subject: [PATCH 18/30] incorporated changes from coderabbit :)

---
 CHANGELOG.md                      |  2 +-
 src/mqt/predictor/rl/predictor.py |  8 +++++++-
 src/mqt/predictor/rl/tracer.py    | 13 +++++++++++--
 tests/compilation/test_tracer.py  |  7 +++++--
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3bbf5684b..dc916d5a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**])
-- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([`#641`]) ([**@linus-hologram**])
+- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([#641]) ([**@linus-hologram**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
 
diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
index dcd3ef5a5..d97a161c3 100644
--- a/src/mqt/predictor/rl/predictor.py
+++ b/src/mqt/predictor/rl/predictor.py
@@ -209,4 +209,10 @@ def rl_compile(
         predictor = predictor_singleton
         predictor.env.tracer_output_path = tracer_output_path
 
-    return predictor.compile_as_predicted(qc)
+    result = predictor.compile_as_predicted(qc)
+
+    # Reset tracer path to prevent leakage to subsequent calls
+    if predictor_singleton is not None:
+        predictor.env.tracer_output_path = None
+
+    return result
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index 11fc615b1..ba172845b 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -16,7 +16,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-import qiskit.qasm2 as qasm2
+from qiskit import qasm2
+from qiskit.qasm2 import QASM2ExportError
 
 if TYPE_CHECKING:
     import numpy as np
@@ -221,7 +222,7 @@ def record_step(
             fom_value=round(fom_value, 6),
             fom_kind=fom_kind,
             is_terminal=done,
-            circuit_qasm=qasm2.dumps(current_qc),
+            circuit_qasm=self._safe_qasm_dumps(current_qc),
             program_communication=self._extract_float(features["program_communication"]),
             critical_depth=self._extract_float(features["critical_depth"]),
             entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
@@ -279,3 +280,11 @@ def _extract_float(val: int | NDArray[np.float32]) -> float:
         if isinstance(val, int):
             return float(val)
         return float(val[0])
+
+    @staticmethod
+    def _safe_qasm_dumps(qc: QuantumCircuit) -> str:
+        """Safely export circuit to QASM2, returning error message on failure."""
+        try:
+            return qasm2.dumps(qc)
+        except QASM2ExportError as e:
+            return f"QASM2 export failed: {e}"
diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index d0b5087cf..1a2e57f22 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 import json
-from pathlib import Path
+from typing import TYPE_CHECKING
 
 import pytest
 from mqt.bench import BenchmarkLevel, get_benchmark
@@ -19,6 +19,9 @@
 from mqt.predictor.qcompile import qcompile
 from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata
 
+if TYPE_CHECKING:
+    from pathlib import Path
+
 
 def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     """Test that the compilation tracer correctly generates a JSON file when a path is provided."""
@@ -31,7 +34,7 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     assert trace_file.exists(), "Tracer JSON file was not generated."
     assert trace_file.is_file(), "Tracer output path is not a valid file."
 
-    with Path(trace_file).open(encoding="utf-8") as f:
+    with trace_file.open(encoding="utf-8") as f:
         trace_data = json.load(f)
 
     assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name."

From 393568397dfdb9399f91aadc74608fa7930cbe09 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 19:37:45 +0200
Subject: [PATCH 19/30] made unit test for tracer more powerful

- now also checks semantics of the first and last compilation steps
- now also checks structural integrity of the compilation steps
---
 tests/compilation/test_tracer.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index 1a2e57f22..079ed7135 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -44,17 +44,22 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp."
     assert "steps" in trace_data, "Tracer JSON is missing the steps array."
 
-    assert len(trace_data["steps"]) > 0, "Tracer did not record any compilation steps."
-    assert trace_data["steps"][0]["action"] == "Baseline"
+    assert len(trace_data["steps"]) > 1, "Tracer should record subsequent compilation steps beyond the Baseline."
+    assert trace_data["steps"][0]["action"] == "Baseline", "First step must be Baseline."
     assert trace_data["schema_version"] == "1.0.0"
 
+    last_step_data = trace_data["steps"][-1]
+    assert last_step_data.get("isTerminal") is True, "The final compilation step must be marked as terminal."
+
     try:
-        # initialize from JSON (throws if the structures don't match)
+        # Initialize from JSON (throws if the structures don't match)
         DeviceMetadata(**trace_data["device"])
+
+        # Semantically validate both the first and the last steps
         CompilationStep(**trace_data["steps"][0])
+        CompilationStep(**last_step_data)
 
     except TypeError as e:
-        # pytest.fail instantly stops the test and prints your custom error message
         pytest.fail(
             f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}"
         )

From 62000bb1d97136596ec4ecb496eae9fa7f5f6c72 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 19:41:18 +0200
Subject: [PATCH 20/30] incorporate code rabbit feedback

---
 src/mqt/predictor/rl/tracer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index ba172845b..7957b702a 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -240,7 +240,9 @@ def save_to_json(self, filepath: str | Path) -> None:
         Args:
             filepath: The destination path or filename for the output JSON file.
         """
-        with Path(filepath).open("w", encoding="utf-8") as f:
+        path = Path(filepath)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", encoding="utf-8") as f:
             json.dump(asdict(self), f, indent=4)
 
     @staticmethod

From 90f4bb329eeaf530af4386b4d3e521d3a44930cb Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 23:09:23 +0200
Subject: [PATCH 21/30] added missing PR link

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dc916d5a1..dd4d38b26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,6 +66,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 [#393]: https://github.com/munich-quantum-toolkit/predictor/pull/393
 [#385]: https://github.com/munich-quantum-toolkit/predictor/pull/385
 [#360]: https://github.com/munich-quantum-toolkit/predictor/pull/360
+[#641]: https://github.com/munich-quantum-toolkit/predictor/pull/641
 
 <!-- Contributor -->
 

From 42e5d406a30126af73bde58b4f60373d09928292 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 23:13:27 +0200
Subject: [PATCH 22/30] tracer_path restoration of predictor singleton instance
 in predictor.py after compilation

---
 src/mqt/predictor/rl/predictor.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
index d97a161c3..9305acc84 100644
--- a/src/mqt/predictor/rl/predictor.py
+++ b/src/mqt/predictor/rl/predictor.py
@@ -205,14 +205,11 @@ def rl_compile(
             msg = "device must not be None if predictor_singleton is None."
             raise ValueError(msg)
         predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path)
-    else:
-        predictor = predictor_singleton
-        predictor.env.tracer_output_path = tracer_output_path
-
-    result = predictor.compile_as_predicted(qc)
-
-    # Reset tracer path to prevent leakage to subsequent calls
-    if predictor_singleton is not None:
-        predictor.env.tracer_output_path = None
-
-    return result
+        return predictor.compile_as_predicted(qc)
+    predictor = predictor_singleton
+    original_tracer_output_path = predictor.env.tracer_output_path
+    predictor.env.tracer_output_path = tracer_output_path
+    try:
+        return predictor.compile_as_predicted(qc)
+    finally:
+        predictor.env.tracer_output_path = original_tracer_output_path

From a84658d0f698b0ea6d3f25ba41cd5199dbad0540 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 13 Apr 2026 23:28:09 +0200
Subject: [PATCH 23/30] fixed incorrect key for last_step_data and added
 comment

---
 src/mqt/predictor/rl/predictor.py | 2 ++
 tests/compilation/test_tracer.py  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
index 9305acc84..ee9321f87 100644
--- a/src/mqt/predictor/rl/predictor.py
+++ b/src/mqt/predictor/rl/predictor.py
@@ -206,6 +206,8 @@ def rl_compile(
             raise ValueError(msg)
         predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path)
         return predictor.compile_as_predicted(qc)
+
+    # use singleton and restore tracer path afterward
     predictor = predictor_singleton
     original_tracer_output_path = predictor.env.tracer_output_path
     predictor.env.tracer_output_path = tracer_output_path
diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index 079ed7135..162c256f6 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -49,7 +49,7 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     assert trace_data["schema_version"] == "1.0.0"
 
     last_step_data = trace_data["steps"][-1]
-    assert last_step_data.get("isTerminal") is True, "The final compilation step must be marked as terminal."
+    assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal."
 
     try:
         # Initialize from JSON (throws if the structures don't match)

From e4d03c41573a336535db9a3a7bcdb07bb3274f2d Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:33:04 +0200
Subject: [PATCH 24/30] added model training in case test_tracer.py is ran out
 of order or separately

---
 tests/compilation/test_tracer.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index 162c256f6..babea29c1 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -15,8 +15,11 @@
 
 import pytest
 from mqt.bench import BenchmarkLevel, get_benchmark
+from mqt.bench.targets.devices import get_device
 
 from mqt.predictor.qcompile import qcompile
+from mqt.predictor.rl.helper import get_path_trained_model
+from mqt.predictor.rl.predictor import Predictor
 from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata
 
 if TYPE_CHECKING:
@@ -27,6 +30,16 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     """Test that the compilation tracer correctly generates a JSON file when a path is provided."""
     trace_file = tmp_path / "test_trace.json"
     qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3)
+
+    figure_of_merit = "expected_fidelity"
+    device = get_device("ibm_falcon_127")
+    model_name = "model_" + figure_of_merit + "_" + device.description
+    model_path = get_path_trained_model() / (model_name + ".zip")
+
+    if not model_path.exists():
+        predictor = Predictor(figure_of_merit="expected_fidelity", device=device)
+        predictor.train_model(timesteps=1000, test=True)
+
     _compiled_qc, _compilation_info, _selected_device = qcompile(
         qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file)
     )

From 1dafd89dd03ed0e90ebc4d0845d06771a101e19e Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:56:23 +0200
Subject: [PATCH 25/30] switched test to usage of rl_compile instead of
 qcompile

---
 tests/compilation/test_tracer.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index babea29c1..1625023fd 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -17,9 +17,8 @@
 from mqt.bench import BenchmarkLevel, get_benchmark
 from mqt.bench.targets.devices import get_device
 
-from mqt.predictor.qcompile import qcompile
 from mqt.predictor.rl.helper import get_path_trained_model
-from mqt.predictor.rl.predictor import Predictor
+from mqt.predictor.rl.predictor import Predictor, rl_compile
 from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata
 
 if TYPE_CHECKING:
@@ -27,7 +26,11 @@
 
 
 def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
-    """Test that the compilation tracer correctly generates a JSON file when a path is provided."""
+    """Test that the compilation tracer correctly generates a JSON file when a path is provided.
+
+    Args:
+        tmp_path: Pytest-provided temporary directory used for the trace output file.
+    """
     trace_file = tmp_path / "test_trace.json"
     qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3)
 
@@ -37,11 +40,11 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     model_path = get_path_trained_model() / (model_name + ".zip")
 
     if not model_path.exists():
-        predictor = Predictor(figure_of_merit="expected_fidelity", device=device)
+        predictor = Predictor(figure_of_merit=figure_of_merit, device=device)
         predictor.train_model(timesteps=1000, test=True)
 
-    _compiled_qc, _compilation_info, _selected_device = qcompile(
-        qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file)
+    _compiled_qc, _compilation_info = rl_compile(
+        qc, device=device, figure_of_merit=figure_of_merit, tracer_output_path=str(trace_file)
     )
 
     assert trace_file.exists(), "Tracer JSON file was not generated."

From 4564a55747c3e7045a9b56b282208c2cedbd56f1 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Wed, 15 Apr 2026 19:58:49 +0200
Subject: [PATCH 26/30] incorporated first round of feedback

- separate method inside predictorenv.py that handles collection of data for tracing
- qasm3 circuit strings are now used instead of qasm2
---
 src/mqt/predictor/rl/predictorenv.py | 55 +++++++++++++++++-----------
 src/mqt/predictor/rl/tracer.py       | 17 ++-------
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index d8c66befb..a74bedf5f 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -298,7 +298,7 @@ def export_circuit(self, qc: QuantumCircuit | None = None) -> QuantumCircuit:
         exported._layout = self.layout  # noqa: SLF001
         return exported
 
-    def _log_step_reward(
+    def _collect_tracer_data(
         self,
         step_index: int,
         action_name: str,
@@ -308,22 +308,7 @@ def _log_step_reward(
         feature_vector: dict[str, int | NDArray[np.float32]],
         done: bool,
     ) -> None:
-        """Log the chosen action and resulting reward for the current episode step."""
-        logger.info(
-            "Episode %d step %d: action=%s reward=%.6f",
-            self.episode_count,
-            step_index,
-            action_name,
-            reward_val,
-        )
-        if done:
-            logger.info(
-                "Episode %d finished: circuit=%s final_reward=%.6f",
-                self.episode_count,
-                self.current_circuit_name,
-                reward_val,
-            )
-
+        """Collects the current compilation state and sends it to the tracer."""
         if self.tracer is not None and self.tracer_output_path is not None:
             synthesized, laid_out, routed = self._get_compilation_state_flags()
 
@@ -362,6 +347,30 @@ def _log_step_reward(
 
                 self.tracer.save_to_json(out_path)
                 logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve())
+        return
+
+    def _log_step_reward(
+        self,
+        step_index: int,
+        action_name: str,
+        reward_val: float,
+        done: bool,
+    ) -> None:
+        """Log the chosen action and resulting reward for the current episode step."""
+        logger.info(
+            "Episode %d step %d: action=%s reward=%.6f",
+            self.episode_count,
+            step_index,
+            action_name,
+            reward_val,
+        )
+        if done:
+            logger.info(
+                "Episode %d finished: circuit=%s final_reward=%.6f",
+                self.episode_count,
+                self.current_circuit_name,
+                reward_val,
+            )
 
     def _get_compilation_state_flags(self) -> tuple[bool, bool, bool]:
         """Return `(synthesized, laid_out, routed)` for the current circuit state."""
@@ -404,7 +413,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
         altered_qc = self._apply_and_update(action)
         if altered_qc is None:
             obs = self._create_observation()
-            self._log_step_reward(
+            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=True)
+            self._collect_tracer_data(
                 step_index=step_index,
                 action_name=action_name,
                 reward_val=0.0,
@@ -421,7 +431,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
             self.state._layout = self.layout  # noqa: SLF001
             obs = self._create_observation()
-            self._log_step_reward(
+            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done)
+            self._collect_tracer_data(
                 step_index=step_index,
                 action_name=action_name,
                 reward_val=reward_val,
@@ -457,7 +468,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             self.prev_reward, self.prev_reward_kind = new_val, new_kind
 
         obs = self._create_observation()
-        self._log_step_reward(
+        self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done)
+        self._collect_tracer_data(
             step_index=step_index,
             action_name=action_name,
             reward_val=reward_val,
@@ -627,6 +639,7 @@ def reset(
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
         if self.tracer_output_path is not None:
+            logger.info("Tracing enabled for compilation...")
             self.tracer = CompilationTracer.from_initial_state(
                 device=self.device,
                 circuit_name=self.current_circuit_name,
@@ -634,7 +647,7 @@ def reset(
                 mdp_policy=self.mdp,
             )
 
-            self._log_step_reward(
+            self._collect_tracer_data(
                 step_index=0,
                 action_name="Baseline",
                 reward_val=0.0,
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index 7957b702a..e1d4cde3e 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -16,8 +16,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from qiskit import qasm2
-from qiskit.qasm2 import QASM2ExportError
+from qiskit import qasm3
 
 if TYPE_CHECKING:
     import numpy as np
@@ -92,7 +91,7 @@ class CompilationStep:
         laid_out: Whether the circuit has already been laid out.
         routed: Whether the circuit has already been routed.
         is_terminal: A flag indicating if the compilation process has concluded.
-        circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format.
+        circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format.
         program_communication: The program communication value for the current circuit.
         entanglement_ratio: The entanglement ratio for the current circuit.
         parallelism: The parallelism value for the current circuit.
@@ -114,7 +113,7 @@ class CompilationStep:
     laid_out: bool
     routed: bool
     is_terminal: bool
-    circuit_qasm: str
+    circuit_qasm3: str
     program_communication: float
     critical_depth: float
     entanglement_ratio: float
@@ -222,7 +221,7 @@ def record_step(
             fom_value=round(fom_value, 6),
             fom_kind=fom_kind,
             is_terminal=done,
-            circuit_qasm=self._safe_qasm_dumps(current_qc),
+            circuit_qasm3=qasm3.dumps(current_qc),
             program_communication=self._extract_float(features["program_communication"]),
             critical_depth=self._extract_float(features["critical_depth"]),
             entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
@@ -282,11 +281,3 @@ def _extract_float(val: int | NDArray[np.float32]) -> float:
         if isinstance(val, int):
             return float(val)
         return float(val[0])
-
-    @staticmethod
-    def _safe_qasm_dumps(qc: QuantumCircuit) -> str:
-        """Safely export circuit to QASM2, returning error message on failure."""
-        try:
-            return qasm2.dumps(qc)
-        except QASM2ExportError as e:
-            return f"QASM2 export failed: {e}"

From 2424d3c07e0f90f2d8dcabac5d61ada4eac071e6 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Thu, 16 Apr 2026 21:19:25 +0200
Subject: [PATCH 27/30] incorporated feedback

- tracer now collects all possible figures of merit
- included cleaner methods for calculating the supported figures of merit and a draft for a rewrite of the calculate_reward function
- included according tests
---
 src/mqt/predictor/rl/predictorenv.py | 256 +++++++++++++++++++++++++--
 src/mqt/predictor/rl/tracer.py       |  60 ++++---
 tests/compilation/test_tracer.py     |  16 ++
 3 files changed, 298 insertions(+), 34 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index a74bedf5f..3e221a287 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -90,6 +90,8 @@
 )
 from mqt.predictor.rl.tracer import (
     CompilationTracer,
+    FigureOfMeritMetrics,
+    FOMMetric,
 )
 from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl
 
@@ -171,6 +173,7 @@ def __init__(
         self.used_actions: list[str] = []
         self.device = device
         self.tracer_output_path = tracer_output_path
+        self.hellinger_model = None
         self.tracer = None
 
         logger.info("MDP: " + mdp)
@@ -312,26 +315,49 @@ def _collect_tracer_data(
         if self.tracer is not None and self.tracer_output_path is not None:
             synthesized, laid_out, routed = self._get_compilation_state_flags()
 
+            # Collect figures of merit
+            hd_metric: FOMMetric | None = None
+            cd_metric: FOMMetric
+            ef_metric: FOMMetric
+            esp_metric: FOMMetric | None = None
+
             if self.reward_function == "expected_fidelity":
-                fidelity_val = fom_value
-                fidelity_kind = fom_kind
+                ef_metric = FOMMetric(value=fom_value, kind=fom_kind)
             else:
-                fidelity_val = (
-                    expected_fidelity(qc=self.state, device=self.device)
-                    if (synthesized and routed)
-                    else approx_expected_fidelity(qc=self.state, device=self.device, error_rates=self.err_by_gate)
-                )
-                fidelity_kind = "exact" if (synthesized and routed) else "approx"
+                val, kind = self.calculate_expected_fidelity(qc=self.state, mode="auto")
+                ef_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "estimated_success_probability":
+                esp_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            elif esp_data_available(self.device):
+                val, kind = self.calculate_estimated_success_probability(qc=self.state, mode="auto")
+                esp_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "critical_depth":
+                cd_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            else:
+                val, kind = self.calculate_critical_depth(qc=self.state)
+                cd_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "estimated_hellinger_distance":
+                hd_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            elif self.hellinger_model is not None:
+                val, kind = self.calculate_estimated_hellinger_distance(qc=self.state)
+                hd_metric = FOMMetric(value=val, kind=kind)
+
+            metrics = FigureOfMeritMetrics(
+                expected_fidelity=ef_metric,
+                success_probability=esp_metric,
+                critical_depth=cd_metric,
+                hellinger_distance=hd_metric,
+            )
 
             self.tracer.record_step(
                 step_index=step_index,
                 action=action_name,
                 reward=reward_val,
                 current_qc=self.state,
-                expected_fidelity=fidelity_val,
-                fidelity_kind=fidelity_kind,
-                fom_value=fom_value,
-                fom_kind=fom_kind,
+                figures_of_merit=metrics,
                 features=feature_vector,
                 synthesized=synthesized,
                 laid_out=laid_out,
@@ -481,6 +507,204 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
 
         return obs, reward_val, done, False, {}
 
+    def _resolve_evaluation_kind(self, qc: QuantumCircuit, mode: str) -> tuple[str, Any]:
+        """Resolves whether to use 'exact' or 'approx' evaluation based on the circuit state."""
+        reward_layout = getattr(qc, "_layout", None)
+        if reward_layout is None:
+            reward_layout = self.layout
+
+        if mode == "exact":
+            return "exact", reward_layout
+        if mode == "approx":
+            return "approx", reward_layout
+
+        # "auto" resolution
+        only_native = self.is_circuit_synthesized(qc)
+        laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False
+        mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False
+
+        kind = "exact" if (only_native and laid_out and mapped) else "approx"
+        return kind, reward_layout
+
+    def _prepare_exact_qc(self, qc: QuantumCircuit, reward_layout: TranspileLayout | Layout | None) -> QuantumCircuit:
+        """Prepares the circuit for exact evaluation by exporting it if necessary."""
+        if reward_layout is None or getattr(qc, "_layout", None) is not None:
+            return qc
+        return self.export_circuit(qc)
+
+    def calculate_expected_fidelity(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
+        """Calculates the expected fidelity for the given quantum circuit.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+            mode:
+                Selects how the method chooses between exact and approximate evaluation:
+
+                - ``"auto"`` (default): determines computation automatically.
+                - ``"exact"``: always compute the exact, calibration-aware metric.
+                - ``"approx"``: always compute the approximate, transpile-based proxy.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the expected fidelity (float).
+            - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
+        """
+        if qc is None:
+            qc = self.state
+
+        kind, reward_layout = self._resolve_evaluation_kind(qc, mode)
+
+        if kind == "exact":
+            exact_qc = self._prepare_exact_qc(qc, reward_layout)
+            return expected_fidelity(exact_qc, self.device), "exact"
+
+        self._ensure_device_averages_cached()
+        val = approx_expected_fidelity(qc, device=self.device, error_rates=self.err_by_gate)
+        return val, "approx"
+
+    def calculate_estimated_success_probability(
+        self, qc: QuantumCircuit | None = None, mode: str = "auto"
+    ) -> tuple[float, str]:
+        """Calculates the estimated success probability (ESP) for the given quantum circuit.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+            mode:
+                Selects how the method chooses between exact and approximate evaluation:
+
+                - ``"auto"`` (default): determines computation automatically.
+                - ``"exact"``: always compute the exact, calibration-aware metric.
+                - ``"approx"``: always compute the approximate, transpile-based proxy.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the estimated success probability (float).
+            - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
+        """
+        if qc is None:
+            qc = self.state
+
+        kind, reward_layout = self._resolve_evaluation_kind(qc, mode)
+
+        if kind == "exact":
+            exact_qc = self._prepare_exact_qc(qc, reward_layout)
+            return estimated_success_probability(exact_qc, self.device), "exact"
+
+        self._ensure_device_averages_cached()
+        feats = calc_supermarq_features(qc)
+        val = approx_estimated_success_probability(
+            qc,
+            device=self.device,
+            error_rates=self.err_by_gate,
+            gate_durations=self.dur_by_gate,
+            tbar=self.tbar,
+            par_feature=float(feats.parallelism),
+            liv_feature=float(feats.liveness),
+            n_qubits=int(qc.num_qubits),
+        )
+        return val, "approx"
+
+    def calculate_critical_depth(self, qc: QuantumCircuit | None = None) -> tuple[float, str]:
+        """Calculates the critical depth for the given quantum circuit.
+
+        Note:
+            Critical depth is always computed exactly.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the critical depth (float).
+            - ``kind`` is always ``"exact"``.
+        """
+        if qc is None:
+            qc = self.state
+        return crit_depth(qc), "exact"
+
+    def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = None) -> tuple[float, str]:
+        """Calculates the estimated Hellinger distance for the given quantum circuit.
+
+        Note:
+            Hellinger distance is always computed exactly using the environment's
+            pretrained machine learning model.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the estimated Hellinger distance (float).
+            - ``kind`` is always ``"exact"``.
+        """
+        if qc is None:
+            qc = self.state
+        return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact"
+
+    # -----------------------------------------------------------------------------------------------------
+    # MARK: New, cleaner method for reward calculation, functionally identical to original calculate_reward
+    #       It might be worth using this method (unless you plan to change the original implementation),
+    #       since this now cleanly uses above methods that are also required by the tracer data collection.
+    #       @flowerthrower
+    # -----------------------------------------------------------------------------------------------------
+
+    # def calculate_reward_new(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
+    #     """Compute the reward for a circuit and report whether it was computed exactly or approximately.
+    #
+    #     This environment supports two evaluation regimes for selected figures of merit:
+    #
+    #     - **Exact**: uses the calibration-aware implementation on the full circuit/device
+    #     (e.g., uses the device Target calibration data as-is).
+    #     - **Approximate**: uses a transpile-based proxy:
+    #     the circuit is transpiled to the device's basis gates and the resulting basis-gate
+    #     counts are combined with cached **per-basis-gate** calibration statistics
+    #     (error rates and durations) to estimate the metric. This approximation ignores
+    #     additional mapping/routing overhead beyond what is reflected in the transpiled
+    #     basis-gate counts.
+    #
+    #     Args:
+    #         qc:
+    #             Circuit to evaluate. If ``None``, evaluates the environment's current state.
+    #         mode:
+    #             Selects how the method chooses between exact and approximate evaluation:
+    #
+    #             - ``"auto"`` (default): compute the exact metric if the circuit is already
+    #             **native and mapped** for the device; otherwise compute the approximate metric.
+    #             - ``"exact"``: always compute the exact, calibration-aware metric.
+    #             - ``"approx"``: always compute the approximate, transpile-based proxy.
+    #
+    #     Returns:
+    #         A pair ``(value, kind)`` where:
+    #
+    #         - ``value`` is the scalar reward value (typically in ``[0, 1]`` for EF/ESP).
+    #         - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
+    #     """
+    #     if qc is None:
+    #         qc = self.state
+    #
+    #     if self.reward_function == "expected_fidelity":
+    #         return self.calculate_expected_fidelity(qc, mode)
+    #
+    #     if self.reward_function == "estimated_success_probability":
+    #         return self.calculate_estimated_success_probability(qc, mode)
+    #
+    #     if self.reward_function == "critical_depth":
+    #         return self.calculate_critical_depth(qc)
+    #
+    #     if self.reward_function == "estimated_hellinger_distance":
+    #         return self.calculate_estimated_hellinger_distance(qc)
+    #
+    #     # Fallback for other unknown / not-yet-implemented reward functions:
+    #     logger.warning(
+    #         "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
+    #         self.reward_function,
+    #     )
+    #     return 0.0, "exact"
+
     def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
         """Compute the reward for a circuit and report whether it was computed exactly or approximately.
 
@@ -640,6 +864,14 @@ def reset(
 
         if self.tracer_output_path is not None:
             logger.info("Tracing enabled for compilation...")
+
+            if self.reward_function != "estimated_hellinger_distance":
+                self.hellinger_model = None
+                hellinger_model_path = get_hellinger_model_path(self.device)
+                if hellinger_model_path.is_file():
+                    # load the model so it can be used in _collect_tracer_data
+                    self.hellinger_model = load(hellinger_model_path)
+
             self.tracer = CompilationTracer.from_initial_state(
                 device=self.device,
                 circuit_name=self.current_circuit_name,
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
index e1d4cde3e..34374efad 100644
--- a/src/mqt/predictor/rl/tracer.py
+++ b/src/mqt/predictor/rl/tracer.py
@@ -71,6 +71,36 @@ class DeviceMetadata:
     calibration_data: dict[str, list[GateCalibration]]
 
 
+@dataclass
+class FOMMetric:
+    """Represents a Figure of Merit metric value.
+
+    Attributes:
+        value: The figure-of-merit value.
+        kind: The used calculation method of the value, "exact" or "approx".
+    """
+
+    value: float
+    kind: str
+
+
+@dataclass
+class FigureOfMeritMetrics:
+    """Data containing information about various figures of merit.
+
+    Attributes:
+        expected_fidelity: The expected fidelity value of the circuit.
+        critical_depth: The critical depth of the circuit.
+        hellinger_distance: The hellinger distance of the circuit, if available.
+        success_probability: The success probability of the circuit, if available.
+    """
+
+    expected_fidelity: FOMMetric
+    critical_depth: FOMMetric
+    hellinger_distance: FOMMetric | None
+    success_probability: FOMMetric | None
+
+
 @dataclass
 class CompilationStep:
     """A snapshot of the circuit state and environment metrics at a single timestep.
@@ -83,16 +113,14 @@ class CompilationStep:
         num_qubits: The number of qubits in the circuit.
         gates_per_operation: The number of gates per operation occurring in the circuit.
         total_gates: The total number of gates included in the circuit.
-        expected_fidelity: The expected fidelity of the circuit.
-        fidelity_kind: The kind of fidelity value: 'exact' or 'approx'.
-        fom_value: The figure of merit value for this compilation pass.
-        fom_kind: The kind of fom value: 'exact' or 'approx'.
+        figures_of_merit: The figure of merit values for the current circuit.
         synthesized: Whether the circuit has already been synthesized.
         laid_out: Whether the circuit has already been laid out.
         routed: Whether the circuit has already been routed.
         is_terminal: A flag indicating if the compilation process has concluded.
         circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format.
         program_communication: The program communication value for the current circuit.
+        raw_critical_depth: The raw critical depth of the circuit.
         entanglement_ratio: The entanglement ratio for the current circuit.
         parallelism: The parallelism value for the current circuit.
         liveness: The liveness value for the current circuit.
@@ -105,17 +133,14 @@ class CompilationStep:
     num_qubits: int
     gates_per_operation: dict[str, int]
     total_gates: int
-    expected_fidelity: float
-    fidelity_kind: str
-    fom_value: float
-    fom_kind: str
+    figures_of_merit: FigureOfMeritMetrics
     synthesized: bool
     laid_out: bool
     routed: bool
     is_terminal: bool
     circuit_qasm3: str
     program_communication: float
-    critical_depth: float
+    raw_critical_depth: float
     entanglement_ratio: float
     parallelism: float
     liveness: float
@@ -174,10 +199,7 @@ def record_step(
         action: str,
         reward: float,
         current_qc: QuantumCircuit,
-        expected_fidelity: float,
-        fidelity_kind: str,
-        fom_value: float,
-        fom_kind: str,
+        figures_of_merit: FigureOfMeritMetrics,
         features: dict[str, int | NDArray[np.float32]],
         synthesized: bool,
         laid_out: bool,
@@ -191,10 +213,7 @@ def record_step(
             action: The name of the compilation pass that was just applied.
             reward: The calculated reward for the applied pass.
             current_qc: The current Qiskit QuantumCircuit object after the pass.
-            expected_fidelity: The expected fidelity of the circuit after applying the pass.
-            fidelity_kind: The kind of fidelity value: 'exact' or 'approx'.
-            fom_value: The figure of merit value for the compilation pass.
-            fom_kind: The kind of fom value: 'exact' or 'approx'.
+            figures_of_merit: The available figures of merit for the current circuit.
             features: The quantum circuit's feature vector used by the RL agent.
             synthesized: Whether the circuit has already been synthesized.
             laid_out: Whether the circuit has already been laid out.
@@ -216,14 +235,11 @@ def record_step(
             num_qubits=current_qc.num_qubits,
             gates_per_operation=present_ops_dict,
             total_gates=total_gates,
-            expected_fidelity=round(expected_fidelity, 6),
-            fidelity_kind=fidelity_kind,
-            fom_value=round(fom_value, 6),
-            fom_kind=fom_kind,
+            figures_of_merit=figures_of_merit,
             is_terminal=done,
             circuit_qasm3=qasm3.dumps(current_qc),
             program_communication=self._extract_float(features["program_communication"]),
-            critical_depth=self._extract_float(features["critical_depth"]),
+            raw_critical_depth=self._extract_float(features["critical_depth"]),
             entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
             parallelism=self._extract_float(features["parallelism"]),
             liveness=self._extract_float(features["liveness"]),
diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index 1625023fd..bafe68073 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -67,6 +67,22 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     last_step_data = trace_data["steps"][-1]
     assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal."
 
+    # Verify Figures of Merit
+    fom_data = last_step_data.get("figures_of_merit")
+    assert fom_data is not None, "Figures of merit dictionary is missing from the trace step."
+
+    # always calculated ones
+    assert fom_data.get("expected_fidelity") is not None, "Expected fidelity failed to populate."
+    assert fom_data.get("critical_depth") is not None, "Critical depth fallback failed."
+
+    # for this device ESP should be populated
+    assert fom_data.get("success_probability") is not None, "ESP fallback calculation failed."
+    assert "value" in fom_data["success_probability"], "ESP is missing its float value."
+    assert "kind" in fom_data["success_probability"], "ESP is missing its kind string."
+
+    # for this device HD should fallback to None
+    assert fom_data.get("hellinger_distance") is None, "Hellinger distance should be null when model is missing."
+
     try:
         # Initialize from JSON (throws if the structures don't match)
         DeviceMetadata(**trace_data["device"])

From 0c42d4a044f840685e10973aafaf407dee756e84 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Thu, 16 Apr 2026 21:42:15 +0200
Subject: [PATCH 28/30] minor cleanup

---
 src/mqt/predictor/rl/predictorenv.py | 1 -
 tests/compilation/test_tracer.py     | 7 +++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 3e221a287..0d4967823 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -373,7 +373,6 @@ def _collect_tracer_data(
 
                 self.tracer.save_to_json(out_path)
                 logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve())
-        return
 
     def _log_step_reward(
         self,
diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
index bafe68073..6f3b49477 100644
--- a/tests/compilation/test_tracer.py
+++ b/tests/compilation/test_tracer.py
@@ -80,8 +80,11 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
     assert "value" in fom_data["success_probability"], "ESP is missing its float value."
     assert "kind" in fom_data["success_probability"], "ESP is missing its kind string."
 
-    # for this device HD should fallback to None
-    assert fom_data.get("hellinger_distance") is None, "Hellinger distance should be null when model is missing."
+    # It is valid for HD to be None (model missing) or a populated dictionary (model exists)
+    hd_metric = fom_data.get("hellinger_distance")
+    if hd_metric is not None:
+        assert "value" in hd_metric, "Hellinger distance is missing its float value."
+        assert "kind" in hd_metric, "Hellinger distance is missing its kind string."
 
     try:
         # Initialize from JSON (throws if the structures don't match)

From 197ebc555f9498242c43134625b10bc84453e794 Mon Sep 17 00:00:00 2001
From: Linus <42340116+linus-hologram@users.noreply.github.com>
Date: Mon, 20 Apr 2026 20:16:51 +0200
Subject: [PATCH 29/30] added rewritten, cleaner version of the
 calculate_reward function + feedback incorporation

---
 src/mqt/predictor/rl/predictorenv.py | 136 +++------------------------
 1 file changed, 15 insertions(+), 121 deletions(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 0d4967823..08b53e7ea 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -456,7 +456,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
             self.state._layout = self.layout  # noqa: SLF001
             obs = self._create_observation()
-            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done)
+            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done)
             self._collect_tracer_data(
                 step_index=step_index,
                 action_name=action_name,
@@ -493,7 +493,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             self.prev_reward, self.prev_reward_kind = new_val, new_kind
 
         obs = self._create_observation()
-        self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done)
+        self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done)
         self._collect_tracer_data(
             step_index=step_index,
             action_name=action_name,
@@ -644,66 +644,6 @@ def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = Non
             qc = self.state
         return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact"
 
-    # -----------------------------------------------------------------------------------------------------
-    # MARK: New, cleaner method for reward calculation, functionally identical to original calculate_reward
-    #       It might be worth using this method (unless you plan to change the original implementation),
-    #       since this now cleanly uses above methods that are also required by the tracer data collection.
-    #       @flowerthrower
-    # -----------------------------------------------------------------------------------------------------
-
-    # def calculate_reward_new(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
-    #     """Compute the reward for a circuit and report whether it was computed exactly or approximately.
-    #
-    #     This environment supports two evaluation regimes for selected figures of merit:
-    #
-    #     - **Exact**: uses the calibration-aware implementation on the full circuit/device
-    #     (e.g., uses the device Target calibration data as-is).
-    #     - **Approximate**: uses a transpile-based proxy:
-    #     the circuit is transpiled to the device's basis gates and the resulting basis-gate
-    #     counts are combined with cached **per-basis-gate** calibration statistics
-    #     (error rates and durations) to estimate the metric. This approximation ignores
-    #     additional mapping/routing overhead beyond what is reflected in the transpiled
-    #     basis-gate counts.
-    #
-    #     Args:
-    #         qc:
-    #             Circuit to evaluate. If ``None``, evaluates the environment's current state.
-    #         mode:
-    #             Selects how the method chooses between exact and approximate evaluation:
-    #
-    #             - ``"auto"`` (default): compute the exact metric if the circuit is already
-    #             **native and mapped** for the device; otherwise compute the approximate metric.
-    #             - ``"exact"``: always compute the exact, calibration-aware metric.
-    #             - ``"approx"``: always compute the approximate, transpile-based proxy.
-    #
-    #     Returns:
-    #         A pair ``(value, kind)`` where:
-    #
-    #         - ``value`` is the scalar reward value (typically in ``[0, 1]`` for EF/ESP).
-    #         - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
-    #     """
-    #     if qc is None:
-    #         qc = self.state
-    #
-    #     if self.reward_function == "expected_fidelity":
-    #         return self.calculate_expected_fidelity(qc, mode)
-    #
-    #     if self.reward_function == "estimated_success_probability":
-    #         return self.calculate_estimated_success_probability(qc, mode)
-    #
-    #     if self.reward_function == "critical_depth":
-    #         return self.calculate_critical_depth(qc)
-    #
-    #     if self.reward_function == "estimated_hellinger_distance":
-    #         return self.calculate_estimated_hellinger_distance(qc)
-    #
-    #     # Fallback for other unknown / not-yet-implemented reward functions:
-    #     logger.warning(
-    #         "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
-    #         self.reward_function,
-    #     )
-    #     return 0.0, "exact"
-
     def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
         """Compute the reward for a circuit and report whether it was computed exactly or approximately.
 
@@ -738,70 +678,24 @@ def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto")
         if qc is None:
             qc = self.state
 
-        # Reward functions that are always computed exactly.
-        if self.reward_function not in {"expected_fidelity", "estimated_success_probability"}:
-            if self.reward_function == "critical_depth":
-                return crit_depth(qc), "exact"
-            if self.reward_function == "estimated_hellinger_distance":
-                return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact"
-            # Fallback for other unknown / not-yet-implemented reward functions:
-            logger.warning(
-                "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
-                self.reward_function,
-            )
-            return 0.0, "exact"
-
-        reward_layout = cast("TranspileLayout | Layout | None", getattr(qc, "_layout", None))
-        if reward_layout is None:
-            # use the env layout if the circuit has no attached layout
-            # (e.g., if it's an intermediate state or a newly exported copy)
-            reward_layout = self.layout
-
-        # Dual-path evaluation (exact vs. approximate) for EF / ESP.
-        if mode == "exact":
-            kind = "exact"
-        elif mode == "approx":
-            kind = "approx"
-        else:  # "auto"
-            only_native = self.is_circuit_synthesized(qc)
-            laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False
-            mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False
-
-            kind = "exact" if (only_native and laid_out and mapped) else "approx"
-
-        if kind == "exact":
-            exact_qc = (
-                qc if reward_layout is None or getattr(qc, "_layout", None) is not None else self.export_circuit(qc)
-            )
-            if self.reward_function == "expected_fidelity":
-                return expected_fidelity(exact_qc, self.device), "exact"
-
-            return estimated_success_probability(exact_qc, self.device), "exact"
+        if self.reward_function == "expected_fidelity":
+            return self.calculate_expected_fidelity(qc, mode)
 
-        # Approximate metrics use per-basis-gate averages cached from device calibration
-        self._ensure_device_averages_cached()
+        if self.reward_function == "estimated_success_probability":
+            return self.calculate_estimated_success_probability(qc, mode)
 
-        if self.reward_function == "expected_fidelity":
-            val = approx_expected_fidelity(
-                qc,
-                device=self.device,
-                error_rates=self.err_by_gate,
-            )
-            return val, "approx"
+        if self.reward_function == "critical_depth":
+            return self.calculate_critical_depth(qc)
 
-        feats = calc_supermarq_features(qc)
+        if self.reward_function == "estimated_hellinger_distance":
+            return self.calculate_estimated_hellinger_distance(qc)
 
-        val = approx_estimated_success_probability(
-            qc,
-            device=self.device,
-            error_rates=self.err_by_gate,
-            gate_durations=self.dur_by_gate,
-            tbar=self.tbar,
-            par_feature=float(feats.parallelism),
-            liv_feature=float(feats.liveness),
-            n_qubits=int(qc.num_qubits),
+        # Fallback for other unknown / not-yet-implemented reward functions:
+        logger.warning(
+            "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
+            self.reward_function,
         )
-        return val, "approx"
+        return 0.0, "exact"
 
     def render(self) -> None:
         """Renders the current state."""

From 5c2ecd6038fe2642ce82790f944754e4a7e104f3 Mon Sep 17 00:00:00 2001
From: flowerthrower <flowerthrower@users.noreply.github.com>
Date: Mon, 20 Apr 2026 23:42:56 +0200
Subject: [PATCH 30/30] =?UTF-8?q?=E2=8F=AA=20remove=20merge=20leftover=20l?=
 =?UTF-8?q?ine?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mqt/predictor/rl/predictorenv.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 08b53e7ea..ecc481063 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -454,7 +454,6 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
 
         if self.reward_function == "estimated_hellinger_distance":
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
-            self.state._layout = self.layout  # noqa: SLF001
             obs = self._create_observation()
             self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done)
             self._collect_tracer_data(