From f0a017ee024a4226b8ac9cd2d4ae62549630e47f Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Wed, 1 Apr 2026 19:24:27 +0200 Subject: [PATCH 01/30] first data classes for compilation visualization --- src/mqt/predictor/rl/predictor_visualizer.py | 107 +++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/mqt/predictor/rl/predictor_visualizer.py diff --git a/src/mqt/predictor/rl/predictor_visualizer.py b/src/mqt/predictor/rl/predictor_visualizer.py new file mode 100644 index 000000000..5ccf9497d --- /dev/null +++ b/src/mqt/predictor/rl/predictor_visualizer.py @@ -0,0 +1,107 @@ +# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM +# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# Licensed under the MIT License + +"""Visualization module for recording and exporting the RL compilation process.""" + +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING + +import qiskit.qasm2 as qasm2 + +if TYPE_CHECKING: + from qiskit import QuantumCircuit + + +@dataclass +class DeviceMetadata: + """Metadata containing information about the target quantum device for compilation.""" + + description: str + device_qubits: int + + +@dataclass +class InputCircuitMetadata: + """Metadata containing information about the initial, uncompiled quantum circuit.""" + + name: str + num_qubits: int + + +@dataclass +class CompilationStep: + """A snapshot of the circuit state and environment metrics at a single timestep. + + Attributes: + step_index: The current step number in the reinforcement learning episode. + action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords'). + reward: The calculated reward value for applying this specific action. + current_depth: The depth of the quantum circuit after the action was applied. + is_terminal: A flag indicating if the compilation process has concluded. + circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format. + """ + + step_index: int + action: str + reward: float + current_depth: int + is_terminal: bool + circuit_qasm: str + + +@dataclass +class CompilationVisualizer: + """Aggregates compilation steps and metadata for export to a JSON file. + + This class acts as an in-memory buffer during the reinforcement learning compilation + process. It tracks the physical transformations of the circuit and exports the + entire episode as a structured JSON file upon termination. + + Attributes: + device: The target device metadata. + input_circuit: The uncompiled circuit metadata. + steps: An ordered list of CompilationStep snapshots. + """ + + device: DeviceMetadata + input_circuit: InputCircuitMetadata + steps: list[CompilationStep] = field(default_factory=list) + + def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None: + """Records a single compilation action and the resulting circuit state. + + Args: + step_index: The current step number in the environment. + action: The name of the compilation pass that was just applied. + reward: The calculated reward for the applied pass. + current_qc: The current Qiskit QuantumCircuit object after the pass. + done: Boolean indicating if this is the final step of the compilation. + """ + new_step = CompilationStep( + step_index=step_index, + action=action, + reward=round(reward, 6), + current_depth=current_qc.depth(), + is_terminal=done, + circuit_qasm=qasm2.dumps(current_qc), + ) + self.steps.append(new_step) + + def save_to_json(self, filepath: str | Path) -> None: + """Serializes the metadata and all recorded steps to a JSON file. + + Args: + filepath: The destination path or filename for the output JSON file. + """ + # asdict() seamlessly converts the nested subclasses into a clean dictionary! + with Path(filepath).open("w", encoding="utf-8") as f: + json.dump(asdict(self), f, indent=4) From 78ca6b6c4e5e94c0f3e954f7e05f8a45f557b62f Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 4 Apr 2026 12:08:20 +0200 Subject: [PATCH 02/30] First Running Draft of the new tracer.py - renamed CompilationVisualizer to CompilationTracer - included first draft of actual tracing logic inside predictorenv.py - adjusted qcompile.py and predictor.py accordingly to route through the tracer_output_path --- src/mqt/predictor/qcompile.py | 8 ++++- src/mqt/predictor/rl/predictor.py | 11 ++++-- src/mqt/predictor/rl/predictorenv.py | 34 +++++++++++++++++++ .../rl/{predictor_visualizer.py => tracer.py} | 2 +- 4 files changed, 51 insertions(+), 4 deletions(-) rename src/mqt/predictor/rl/{predictor_visualizer.py => tracer.py} (99%) diff --git a/src/mqt/predictor/qcompile.py b/src/mqt/predictor/qcompile.py index d65be982e..a1f8240f0 100644 --- a/src/mqt/predictor/qcompile.py +++ b/src/mqt/predictor/qcompile.py @@ -16,6 +16,8 @@ from mqt.predictor.rl import rl_compile if TYPE_CHECKING: + from pathlib import Path + from qiskit import QuantumCircuit from mqt.predictor.reward import figure_of_merit @@ -24,16 +26,20 @@ def qcompile( qc: QuantumCircuit, figure_of_merit: figure_of_merit = "expected_fidelity", + tracer_output_path: str | Path | None = None, ) -> tuple[QuantumCircuit, list[str], str]: """Compiles a given quantum circuit to a device with the highest predicted figure of merit. Arguments: qc: The quantum circuit to be compiled. figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity". + tracer_output_path: If provided, enables compiler tracing and exports the JSON log to this path/directory. Returns: A tuple containing the compiled quantum circuit, the compilation information, and the name of the device used for compilation. """ predicted_device = predict_device_for_figure_of_merit(qc, figure_of_merit=figure_of_merit) - res = rl_compile(qc, device=predicted_device, figure_of_merit=figure_of_merit) + res = rl_compile( + qc, device=predicted_device, figure_of_merit=figure_of_merit, tracer_output_path=tracer_output_path + ) return *res, predicted_device diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py index 1f75b1901..7053c9eb0 100644 --- a/src/mqt/predictor/rl/predictor.py +++ b/src/mqt/predictor/rl/predictor.py @@ -38,12 +38,16 @@ def __init__( device: Target, path_training_circuits: Path | None = None, logger_level: int = logging.INFO, + tracer_output_path: str | Path | None = None, ) -> None: """Initializes the Predictor object.""" logger.setLevel(logger_level) self.env = PredictorEnv( - reward_function=figure_of_merit, device=device, path_training_circuits=path_training_circuits + reward_function=figure_of_merit, + device=device, + path_training_circuits=path_training_circuits, + tracer_output_path=tracer_output_path, ) self.device_name = device.description self.figure_of_merit = figure_of_merit @@ -154,6 +158,7 @@ def rl_compile( device: Target | None, figure_of_merit: figure_of_merit | None = "expected_fidelity", predictor_singleton: Predictor | None = None, + tracer_output_path: str | Path | None = None, ) -> tuple[QuantumCircuit, list[str]]: """Compiles a given quantum circuit to a device optimizing for the given figure of merit. @@ -162,6 +167,7 @@ def rl_compile( device: The device to compile to. figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity". predictor_singleton: A predictor object that is used for compilation to reduce compilation time when compiling multiple quantum circuits. If None, a new predictor object is created. Defaults to None. + tracer_output_path: If provided, enables compiler tracing and exports the JSON log to the specified path. Returns: A tuple containing the compiled quantum circuit and the compilation information. If compilation fails, False is returned. @@ -176,8 +182,9 @@ def rl_compile( if device is None: msg = "device must not be None if predictor_singleton is None." raise ValueError(msg) - predictor = Predictor(figure_of_merit=figure_of_merit, device=device) + predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path) else: predictor = predictor_singleton + predictor.env.tracer_output_path = tracer_output_path return predictor.compile_as_predicted(qc) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 5541e260e..fc2967cc4 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -86,6 +86,11 @@ postprocess_vf2postlayout, prepare_noise_data, ) +from mqt.predictor.rl.tracer import ( + CompilationTracer, + DeviceMetadata, + InputCircuitMetadata, +) from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl logger = logging.getLogger("mqt-predictor") @@ -102,6 +107,7 @@ def __init__( path_training_circuits: Path | None = None, reward_scale: float = 1.0, no_effect_penalty: float = -0.001, + tracer_output_path: str | Path | None = None, ) -> None: """Initializes the PredictorEnv object. @@ -112,6 +118,7 @@ def __init__( path_training_circuits: The path to the training circuits folder. Defaults to None, which uses the default path. reward_scale: Scaling factor for rewards/penalties proportional to fidelity changes. no_effect_penalty: Step penalty applied when an action does not change the circuit (no-op). + tracer_output_path: Whether to enable compilation tracing. If provided, this will export a JSON file at the end of the compilation process. Defaults to None. Raises: ValueError: If the reward function is "estimated_success_probability" and no calibration data is available for the device or if the reward function is "estimated_hellinger_distance" and no trained model is available for the device. @@ -130,6 +137,8 @@ def __init__( self.actions_structure_preserving_indices = [] # Actions that preserves the mapping and native gates self.used_actions: list[str] = [] self.device = device + self.tracer_output_path = tracer_output_path + self.tracer = None logger.info("MDP: " + mdp) self.mdp = mdp @@ -258,6 +267,24 @@ def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, reward_val, ) + if self.tracer is not None and self.tracer_output_path is not None: + self.tracer.record_step( + step_index=step_index, + action=action_name, + reward=reward_val, + current_qc=self.state, + done=done, + ) + + if done: + out_path = Path(self.tracer_output_path) + + if out_path.is_dir() or not out_path.suffix: + out_path = out_path / f"visualization_{self.current_circuit_name}.json" + + self.tracer.save_to_json(out_path) + logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve()) + def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any, Any]]: """Run one environment step. @@ -464,9 +491,16 @@ def reset( self.prev_reward = None self.prev_reward_kind = None + self.tracer = None self.num_qubits_uncompiled_circuit = self.state.num_qubits self.has_parameterized_gates = len(self.state.parameters) > 0 + + if self.tracer_output_path is not None: + device_meta = DeviceMetadata(self.device.description, self.device.num_qubits) + input_meta = InputCircuitMetadata(self.current_circuit_name, self.num_qubits_uncompiled_circuit) + self.tracer = CompilationTracer(device=device_meta, input_circuit=input_meta) + logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) return create_feature_dict(self.state), {} diff --git a/src/mqt/predictor/rl/predictor_visualizer.py b/src/mqt/predictor/rl/tracer.py similarity index 99% rename from src/mqt/predictor/rl/predictor_visualizer.py rename to src/mqt/predictor/rl/tracer.py index 5ccf9497d..f7d834a2f 100644 --- a/src/mqt/predictor/rl/predictor_visualizer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -59,7 +59,7 @@ class CompilationStep: @dataclass -class CompilationVisualizer: +class CompilationTracer: """Aggregates compilation steps and metadata for export to a JSON file. This class acts as an in-memory buffer during the reinforcement learning compilation From d29b926a5b3c02c71ef275f129f870e9466fa7a8 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 4 Apr 2026 13:02:51 +0200 Subject: [PATCH 03/30] cleaned up tracer initialization --- src/mqt/predictor/rl/predictorenv.py | 6 +----- src/mqt/predictor/rl/tracer.py | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index fc2967cc4..377d4d654 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -88,8 +88,6 @@ ) from mqt.predictor.rl.tracer import ( CompilationTracer, - DeviceMetadata, - InputCircuitMetadata, ) from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl @@ -497,9 +495,7 @@ def reset( self.has_parameterized_gates = len(self.state.parameters) > 0 if self.tracer_output_path is not None: - device_meta = DeviceMetadata(self.device.description, self.device.num_qubits) - input_meta = InputCircuitMetadata(self.current_circuit_name, self.num_qubits_uncompiled_circuit) - self.tracer = CompilationTracer(device=device_meta, input_circuit=input_meta) + self.tracer = CompilationTracer.from_initial_state(self.device, self.state, self.current_circuit_name) logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index f7d834a2f..f4f4086ec 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -19,6 +19,7 @@ if TYPE_CHECKING: from qiskit import QuantumCircuit + from qiskit.transpiler import Target @dataclass @@ -35,6 +36,8 @@ class InputCircuitMetadata: name: str num_qubits: int + depth: int + circuit_qasm: str @dataclass @@ -76,6 +79,22 @@ class CompilationTracer: input_circuit: InputCircuitMetadata steps: list[CompilationStep] = field(default_factory=list) + @classmethod + def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer: + """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" + device_meta = DeviceMetadata( + description=device.description, + device_qubits=device.num_qubits, + ) + input_meta = InputCircuitMetadata( + name=circuit_name, + num_qubits=input_circuit.num_qubits, + depth=input_circuit.depth(), + circuit_qasm=qasm2.dumps(input_circuit), + ) + + return cls(device=device_meta, input_circuit=input_meta) + def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None: """Records a single compilation action and the resulting circuit state. @@ -102,6 +121,5 @@ def save_to_json(self, filepath: str | Path) -> None: Args: filepath: The destination path or filename for the output JSON file. """ - # asdict() seamlessly converts the nested subclasses into a clean dictionary! with Path(filepath).open("w", encoding="utf-8") as f: json.dump(asdict(self), f, indent=4) From 4fdddfb9f394214a9769bcd0011cff84140f41e0 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 4 Apr 2026 14:53:53 +0200 Subject: [PATCH 04/30] extended capture of device metadata The DeviceMetadata class now also includes the device's native gates, its topology and gate calibration data. --- src/mqt/predictor/rl/tracer.py | 76 ++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index f4f4086ec..e1ba4a193 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -19,7 +19,24 @@ if TYPE_CHECKING: from qiskit import QuantumCircuit - from qiskit.transpiler import Target + from qiskit.transpiler import InstructionProperties, Target + + +@dataclass +class TopologyEdge: + """Represents a topology edge between two qubits.""" + + control: int + target: int + + +@dataclass +class GateCalibration: + """Calibration data for a specific gate on a specific set of qubits.""" + + qubits: list[int] + duration: float | None + error: float | None @dataclass @@ -28,6 +45,9 @@ class DeviceMetadata: description: str device_qubits: int + native_gates: list[str] + topology: list[TopologyEdge] + calibration_data: dict[str, list[GateCalibration]] @dataclass @@ -82,17 +102,8 @@ class CompilationTracer: @classmethod def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer: """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" - device_meta = DeviceMetadata( - description=device.description, - device_qubits=device.num_qubits, - ) - input_meta = InputCircuitMetadata( - name=circuit_name, - num_qubits=input_circuit.num_qubits, - depth=input_circuit.depth(), - circuit_qasm=qasm2.dumps(input_circuit), - ) - + device_meta = cls._extract_device_metadata(device) + input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name) return cls(device=device_meta, input_circuit=input_meta) def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None: @@ -123,3 +134,44 @@ def save_to_json(self, filepath: str | Path) -> None: """ with Path(filepath).open("w", encoding="utf-8") as f: json.dump(asdict(self), f, indent=4) + + @staticmethod + def _extract_circuit_metadata(input_circuit: QuantumCircuit, circuit_name: str) -> InputCircuitMetadata: + """Internal helper to parse the initial quantum circuit.""" + return InputCircuitMetadata( + name=circuit_name, + num_qubits=input_circuit.num_qubits, + depth=input_circuit.depth(), + circuit_qasm=qasm2.dumps(input_circuit), + ) + + @staticmethod + def _extract_device_metadata(device: Target) -> DeviceMetadata: + """Internal helper to extract topology and calibration data from the device.""" + native_gates = list(device.operation_names) + cmap = device.build_coupling_map() + topology = [TopologyEdge(control=c, target=t) for c, t in cmap] if cmap is not None else [] + calibration_data: dict[str, list[GateCalibration]] = {} + + for gate_name in native_gates: + gate_calibrations = [] + props: InstructionProperties + qubit_tuples: tuple[int, ...] + + for qubit_tuples, props in device[gate_name].items(): + if qubit_tuples is None or props is None: + continue + + gate_calibrations.append( + GateCalibration(qubits=list(qubit_tuples), duration=props.duration, error=props.error) + ) + + calibration_data[gate_name] = gate_calibrations + + return DeviceMetadata( + description=device.description, + device_qubits=device.num_qubits, + native_gates=native_gates, + topology=topology, + calibration_data=calibration_data, + ) From ea7751eec0ecf132ce529196a80d338ea1f7b35e Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:31:56 +0200 Subject: [PATCH 05/30] included tracking of figure_of_merit over time - each compilation step is now associated with its figure of merit value and its kind (exact vs. approximate) --- src/mqt/predictor/rl/predictorenv.py | 38 ++++++++++++++++++++++++---- src/mqt/predictor/rl/tracer.py | 37 ++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 377d4d654..3fc0c36da 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -248,7 +248,9 @@ def _apply_and_update(self, action: int) -> QuantumCircuit | None: return altered_qc - def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, done: bool) -> None: + def _log_step_reward( + self, step_index: int, action_name: str, reward_val: float, fom_value: float, fom_kind: str, done: bool + ) -> None: """Log the chosen action and resulting reward for the current episode step.""" logger.info( "Episode %d step %d: action=%s reward=%.6f", @@ -271,6 +273,8 @@ def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, action=action_name, reward=reward_val, current_qc=self.state, + fom_value=fom_value, + fom_kind=fom_kind, done=done, ) @@ -305,7 +309,14 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any altered_qc = self._apply_and_update(action) if altered_qc is None: - self._log_step_reward(step_index, action_name, 0.0, done=True) + self._log_step_reward( + step_index=step_index, + action_name=action_name, + reward_val=0.0, + fom_value=0.0, + fom_kind="exact", + done=True, + ) return create_feature_dict(self.state), 0.0, True, False, {} done = action == self.action_terminate_index @@ -313,7 +324,14 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any if self.reward_function == "estimated_hellinger_distance": reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 self.state._layout = self.layout # noqa: SLF001 - self._log_step_reward(step_index, action_name, reward_val, done) + self._log_step_reward( + step_index=step_index, + action_name=action_name, + reward_val=reward_val, + fom_value=reward_val, + fom_kind="exact", + done=done, + ) return create_feature_dict(self.state), reward_val, done, False, {} # Lazy init: compute prev_reward only once per episode (or if missing) @@ -342,7 +360,15 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any self.prev_reward, self.prev_reward_kind = new_val, new_kind obs = create_feature_dict(self.state) - self._log_step_reward(step_index, action_name, reward_val, done) + self._log_step_reward( + step_index=step_index, + action_name=action_name, + reward_val=reward_val, + fom_value=self.prev_reward, + fom_kind=self.prev_reward_kind, + done=done, + ) + return obs, reward_val, done, False, {} def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: @@ -495,7 +521,9 @@ def reset( self.has_parameterized_gates = len(self.state.parameters) > 0 if self.tracer_output_path is not None: - self.tracer = CompilationTracer.from_initial_state(self.device, self.state, self.current_circuit_name) + self.tracer = CompilationTracer.from_initial_state( + self.device, self.state, self.current_circuit_name, self.reward_function + ) logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index e1ba4a193..e01ab6f6f 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -57,6 +57,7 @@ class InputCircuitMetadata: name: str num_qubits: int depth: int + figure_of_merit: str circuit_qasm: str @@ -69,6 +70,9 @@ class CompilationStep: action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords'). reward: The calculated reward value for applying this specific action. current_depth: The depth of the quantum circuit after the action was applied. + total_gates: The total number of gates included in the circuit. + fom_value: The figure of merit value for this compilation pass. + fom_kind: The kind of fom value: 'exact' or 'approx'. is_terminal: A flag indicating if the compilation process has concluded. circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format. """ @@ -77,6 +81,9 @@ class CompilationStep: action: str reward: float current_depth: int + total_gates: int + fom_value: float + fom_kind: str is_terminal: bool circuit_qasm: str @@ -100,13 +107,24 @@ class CompilationTracer: steps: list[CompilationStep] = field(default_factory=list) @classmethod - def from_initial_state(cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str) -> CompilationTracer: + def from_initial_state( + cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str + ) -> CompilationTracer: """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" device_meta = cls._extract_device_metadata(device) - input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name) + input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name, figure_of_merit) return cls(device=device_meta, input_circuit=input_meta) - def record_step(self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, done: bool) -> None: + def record_step( + self, + step_index: int, + action: str, + reward: float, + current_qc: QuantumCircuit, + fom_value: float, + fom_kind: str, + done: bool, + ) -> None: """Records a single compilation action and the resulting circuit state. Args: @@ -114,13 +132,21 @@ def record_step(self, step_index: int, action: str, reward: float, current_qc: Q action: The name of the compilation pass that was just applied. reward: The calculated reward for the applied pass. current_qc: The current Qiskit QuantumCircuit object after the pass. + fom_value: The figure of merit value for the compilation pass. + fom_kind: The kind of fom value: 'exact' or 'approx'. done: Boolean indicating if this is the final step of the compilation. """ + present_ops_dict = current_qc.count_ops() + total_gates = sum(present_ops_dict.values()) if present_ops_dict else 0 + new_step = CompilationStep( step_index=step_index, action=action, reward=round(reward, 6), current_depth=current_qc.depth(), + total_gates=total_gates, + fom_value=round(fom_value, 6), + fom_kind=fom_kind, is_terminal=done, circuit_qasm=qasm2.dumps(current_qc), ) @@ -136,12 +162,15 @@ def save_to_json(self, filepath: str | Path) -> None: json.dump(asdict(self), f, indent=4) @staticmethod - def _extract_circuit_metadata(input_circuit: QuantumCircuit, circuit_name: str) -> InputCircuitMetadata: + def _extract_circuit_metadata( + input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str + ) -> InputCircuitMetadata: """Internal helper to parse the initial quantum circuit.""" return InputCircuitMetadata( name=circuit_name, num_qubits=input_circuit.num_qubits, depth=input_circuit.depth(), + figure_of_merit=figure_of_merit, circuit_qasm=qasm2.dumps(input_circuit), ) From bf3bde3b64a93969e304774c9a963f78cea9d582 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sun, 5 Apr 2026 13:54:27 +0200 Subject: [PATCH 06/30] adjusted gate counting & included feature vector values for compilation steps - gate count does now not include barriers - program_communication, critical_depth, entanglement_ratio, parallelism, liveness are now also captured for each compilation step --- src/mqt/predictor/rl/predictorenv.py | 23 +++++++++++++++++++---- src/mqt/predictor/rl/tracer.py | 26 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 3fc0c36da..771935433 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -14,10 +14,13 @@ from pathlib import Path from typing import TYPE_CHECKING, Any +import numpy as np + if TYPE_CHECKING: from collections.abc import Callable from bqskit import Circuit + from numpy.typing import NDArray from pytket._tket.passes import BasePass as TketBasePass from pytket.circuit import Node from qiskit.passmanager.base_tasks import Task @@ -34,7 +37,6 @@ from math import isclose from typing import cast -import numpy as np from bqskit.ext import bqskit_to_qiskit, qiskit_to_bqskit from gymnasium import Env from gymnasium.spaces import Box, Dict, Discrete @@ -249,7 +251,14 @@ def _apply_and_update(self, action: int) -> QuantumCircuit | None: return altered_qc def _log_step_reward( - self, step_index: int, action_name: str, reward_val: float, fom_value: float, fom_kind: str, done: bool + self, + step_index: int, + action_name: str, + reward_val: float, + fom_value: float, + fom_kind: str, + feature_vector: dict[str, int | NDArray[np.float32]], + done: bool, ) -> None: """Log the chosen action and resulting reward for the current episode step.""" logger.info( @@ -275,6 +284,7 @@ def _log_step_reward( current_qc=self.state, fom_value=fom_value, fom_kind=fom_kind, + features=feature_vector, done=done, ) @@ -309,30 +319,34 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any altered_qc = self._apply_and_update(action) if altered_qc is None: + obs = create_feature_dict(self.state) self._log_step_reward( step_index=step_index, action_name=action_name, reward_val=0.0, fom_value=0.0, fom_kind="exact", + feature_vector=obs, done=True, ) - return create_feature_dict(self.state), 0.0, True, False, {} + return obs, 0.0, True, False, {} done = action == self.action_terminate_index if self.reward_function == "estimated_hellinger_distance": reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 self.state._layout = self.layout # noqa: SLF001 + obs = create_feature_dict(self.state) self._log_step_reward( step_index=step_index, action_name=action_name, reward_val=reward_val, fom_value=reward_val, fom_kind="exact", + feature_vector=obs, done=done, ) - return create_feature_dict(self.state), reward_val, done, False, {} + return obs, reward_val, done, False, {} # Lazy init: compute prev_reward only once per episode (or if missing) if self.prev_reward is None: @@ -366,6 +380,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any reward_val=reward_val, fom_value=self.prev_reward, fom_kind=self.prev_reward_kind, + feature_vector=obs, done=done, ) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index e01ab6f6f..5a54c3aa6 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -18,6 +18,8 @@ import qiskit.qasm2 as qasm2 if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray from qiskit import QuantumCircuit from qiskit.transpiler import InstructionProperties, Target @@ -81,11 +83,17 @@ class CompilationStep: action: str reward: float current_depth: int + num_qubits: int total_gates: int fom_value: float fom_kind: str is_terminal: bool circuit_qasm: str + program_communication: float + critical_depth: float + entanglement_ratio: float + parallelism: float + liveness: float @dataclass @@ -123,6 +131,7 @@ def record_step( current_qc: QuantumCircuit, fom_value: float, fom_kind: str, + features: dict[str, int | NDArray[np.float32]], done: bool, ) -> None: """Records a single compilation action and the resulting circuit state. @@ -134,21 +143,28 @@ def record_step( current_qc: The current Qiskit QuantumCircuit object after the pass. fom_value: The figure of merit value for the compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. + features: The quantum circuit's feature vector used by the RL agent. done: Boolean indicating if this is the final step of the compilation. """ present_ops_dict = current_qc.count_ops() - total_gates = sum(present_ops_dict.values()) if present_ops_dict else 0 + total_gates = sum(count for gate, count in present_ops_dict.items() if gate != "barrier") new_step = CompilationStep( step_index=step_index, action=action, reward=round(reward, 6), current_depth=current_qc.depth(), + num_qubits=current_qc.num_qubits, total_gates=total_gates, fom_value=round(fom_value, 6), fom_kind=fom_kind, is_terminal=done, circuit_qasm=qasm2.dumps(current_qc), + program_communication=self._extract_float(features["program_communication"]), + critical_depth=self._extract_float(features["critical_depth"]), + entanglement_ratio=self._extract_float(features["entanglement_ratio"]), + parallelism=self._extract_float(features["parallelism"]), + liveness=self._extract_float(features["liveness"]), ) self.steps.append(new_step) @@ -204,3 +220,11 @@ def _extract_device_metadata(device: Target) -> DeviceMetadata: topology=topology, calibration_data=calibration_data, ) + + @staticmethod + def _extract_float(val: int | NDArray[np.float32]) -> float: + """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements.""" + if isinstance(val, (int, float)): + return float(val) + # If it is not an int or float, the linter now safely assumes it is an array + return float(val[0]) From 3ece49d5505c8542bf1e7ac313b21109c303fa60 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Tue, 7 Apr 2026 16:50:06 +0200 Subject: [PATCH 07/30] restructuring of CompilationTracer class - initial circuit is now captured as first value in the steps array (step_index 0), removed InputCircuitMetadata class from tracer.py - predictorenv.py was adjusted to calculate the initial figure_of_merit value already in the reset() method, stripping away the None-check inside the step method (step() can only ever be called after reset() was called on the environment, thereby the prev_reward and prev_reward_kind always have to be set) - changed data type for prev_reward and prev_reward_kind to float and str, respectively, and provided initial values. --- src/mqt/predictor/rl/predictorenv.py | 25 ++++++------ src/mqt/predictor/rl/tracer.py | 58 +++++++++++++--------------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 771935433..71ffc1c40 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -221,8 +221,8 @@ def __init__( self.readout_err: dict[Node, float] | None = None self.reward_scale = reward_scale self.no_effect_penalty = no_effect_penalty - self.prev_reward: float | None = None - self.prev_reward_kind: str | None = None + self.prev_reward: float = 0.0 + self.prev_reward_kind: str = "unknown" self.episode_count = 0 self.current_circuit_name = "" self.err_by_gate: dict[str, float] = {} @@ -348,10 +348,6 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any ) return obs, reward_val, done, False, {} - # Lazy init: compute prev_reward only once per episode (or if missing) - if self.prev_reward is None: - self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") - if done: assert action in self.valid_actions, "Terminate action is not valid but was chosen." self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="exact") @@ -527,22 +523,29 @@ def reset( self.valid_actions = self.actions_synthesis_indices + self.actions_opt_indices self.error_occurred = False - - self.prev_reward = None - self.prev_reward_kind = None self.tracer = None self.num_qubits_uncompiled_circuit = self.state.num_qubits self.has_parameterized_gates = len(self.state.parameters) > 0 + # create baseline values + obs = create_feature_dict(self.state) + self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") + if self.tracer_output_path is not None: self.tracer = CompilationTracer.from_initial_state( - self.device, self.state, self.current_circuit_name, self.reward_function + device=self.device, + input_circuit=self.state, + circuit_name=self.current_circuit_name, + figure_of_merit=self.reward_function, + features=obs, + initial_fom=self.prev_reward, + fom_kind=self.prev_reward_kind, ) logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) - return create_feature_dict(self.state), {} + return obs, {} def action_masks(self) -> list[bool]: """Returns a list of valid actions for the current state.""" diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index 5a54c3aa6..aab3232ea 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -52,17 +52,6 @@ class DeviceMetadata: calibration_data: dict[str, list[GateCalibration]] -@dataclass -class InputCircuitMetadata: - """Metadata containing information about the initial, uncompiled quantum circuit.""" - - name: str - num_qubits: int - depth: int - figure_of_merit: str - circuit_qasm: str - - @dataclass class CompilationStep: """A snapshot of the circuit state and environment metrics at a single timestep. @@ -105,23 +94,44 @@ class CompilationTracer: entire episode as a structured JSON file upon termination. Attributes: + circuit_name: The name of the circuit being compiled. + figure_of_merit: The chosen figure of merit for this compilation. device: The target device metadata. - input_circuit: The uncompiled circuit metadata. steps: An ordered list of CompilationStep snapshots. """ + circuit_name: str + figure_of_merit: str device: DeviceMetadata - input_circuit: InputCircuitMetadata steps: list[CompilationStep] = field(default_factory=list) @classmethod def from_initial_state( - cls, device: Target, input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str + cls, + device: Target, + input_circuit: QuantumCircuit, + circuit_name: str, + figure_of_merit: str, + features: dict[str, int | NDArray[np.float32]], + initial_fom: float, + fom_kind: str, ) -> CompilationTracer: """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" device_meta = cls._extract_device_metadata(device) - input_meta = cls._extract_circuit_metadata(input_circuit, circuit_name, figure_of_merit) - return cls(device=device_meta, input_circuit=input_meta) + tracer = cls(circuit_name=circuit_name, figure_of_merit=figure_of_merit, device=device_meta) + + tracer.record_step( + step_index=0, + action="Baseline", + reward=0.0, + current_qc=input_circuit, + fom_value=initial_fom, + fom_kind=fom_kind, + features=features, + done=False, + ) + + return tracer def record_step( self, @@ -177,19 +187,6 @@ def save_to_json(self, filepath: str | Path) -> None: with Path(filepath).open("w", encoding="utf-8") as f: json.dump(asdict(self), f, indent=4) - @staticmethod - def _extract_circuit_metadata( - input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str - ) -> InputCircuitMetadata: - """Internal helper to parse the initial quantum circuit.""" - return InputCircuitMetadata( - name=circuit_name, - num_qubits=input_circuit.num_qubits, - depth=input_circuit.depth(), - figure_of_merit=figure_of_merit, - circuit_qasm=qasm2.dumps(input_circuit), - ) - @staticmethod def _extract_device_metadata(device: Target) -> DeviceMetadata: """Internal helper to extract topology and calibration data from the device.""" @@ -224,7 +221,6 @@ def _extract_device_metadata(device: Target) -> DeviceMetadata: @staticmethod def _extract_float(val: int | NDArray[np.float32]) -> float: """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements.""" - if isinstance(val, (int, float)): + if isinstance(val, int): return float(val) - # If it is not an int or float, the linter now safely assumes it is an array return float(val[0]) From 8a82b849d4706a8485c80ef95dea31f52c342d7b Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Tue, 7 Apr 2026 20:55:31 +0200 Subject: [PATCH 08/30] included tracing of MDP state evolution The tracer now tracks the MDP policy as well as the MDP state evolution throughout the compilation. --- src/mqt/predictor/rl/predictorenv.py | 22 ++++++++++++++++++++-- src/mqt/predictor/rl/tracer.py | 28 +++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 71ffc1c40..9a96c2f7e 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -277,6 +277,7 @@ def _log_step_reward( ) if self.tracer is not None and self.tracer_output_path is not None: + synthesized, laid_out, routed = self._get_mdp_state() self.tracer.record_step( step_index=step_index, action=action_name, @@ -285,6 +286,9 @@ def _log_step_reward( fom_value=fom_value, fom_kind=fom_kind, features=feature_vector, + synthesized=synthesized, + laid_out=laid_out, + routed=routed, done=done, ) @@ -533,14 +537,20 @@ def reset( self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") if self.tracer_output_path is not None: + synthesized, laid_out, routed = self._get_mdp_state() + self.tracer = CompilationTracer.from_initial_state( device=self.device, input_circuit=self.state, circuit_name=self.current_circuit_name, figure_of_merit=self.reward_function, + mdp_policy=self.mdp, features=obs, initial_fom=self.prev_reward, fom_kind=self.prev_reward_kind, + synthesized=synthesized, + laid_out=laid_out, + routed=routed, ) logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) @@ -938,15 +948,23 @@ def is_circuit_routed(self, circuit: QuantumCircuit, coupling_map: CouplingMap) return False return True - def determine_valid_actions_for_state(self) -> list[int]: - """Determine valid actions based on circuit state: synthesized, mapped, routed.""" + def _get_mdp_state(self) -> tuple[bool, bool, bool]: + """Determine the current MDP state of the circuit. + + Returns: + A tuple with boolean values describing the state of the circuit (synthesized, laid_out, routed) + """ synthesized = self.is_circuit_synthesized(self.state) laid_out = self.is_circuit_laid_out(self.state, self.layout) if self.layout else False # Routing is only allowed after layout routed = ( self.is_circuit_routed(self.state, CouplingMap(self.device.build_coupling_map())) if laid_out else False ) + return synthesized, laid_out, routed + def determine_valid_actions_for_state(self) -> list[int]: + """Determine valid actions based on circuit state: synthesized, mapped, routed.""" + synthesized, laid_out, routed = self._get_mdp_state() actions = [] # Initial state if not synthesized and not laid_out and not routed: diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index aab3232ea..ed7ed5ed5 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -64,6 +64,9 @@ class CompilationStep: total_gates: The total number of gates included in the circuit. fom_value: The figure of merit value for this compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. + synthesized: Whether the circuit has already been synthesized. + laid_out: Whether the circuit has already been laid out. + routed: Whether the circuit has already been routed. is_terminal: A flag indicating if the compilation process has concluded. circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format. """ @@ -76,6 +79,9 @@ class CompilationStep: total_gates: int fom_value: float fom_kind: str + synthesized: bool + laid_out: bool + routed: bool is_terminal: bool circuit_qasm: str program_communication: float @@ -96,12 +102,14 @@ class CompilationTracer: Attributes: circuit_name: The name of the circuit being compiled. figure_of_merit: The chosen figure of merit for this compilation. + mdp_policy: The MDP transition policy. device: The target device metadata. steps: An ordered list of CompilationStep snapshots. """ circuit_name: str figure_of_merit: str + mdp_policy: str device: DeviceMetadata steps: list[CompilationStep] = field(default_factory=list) @@ -112,13 +120,19 @@ def from_initial_state( input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str, + mdp_policy: str, features: dict[str, int | NDArray[np.float32]], initial_fom: float, fom_kind: str, + synthesized: bool, + laid_out: bool, + routed: bool, ) -> CompilationTracer: """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" device_meta = cls._extract_device_metadata(device) - tracer = cls(circuit_name=circuit_name, figure_of_merit=figure_of_merit, device=device_meta) + tracer = cls( + circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta + ) tracer.record_step( step_index=0, @@ -128,6 +142,9 @@ def from_initial_state( fom_value=initial_fom, fom_kind=fom_kind, features=features, + synthesized=synthesized, + laid_out=laid_out, + routed=routed, done=False, ) @@ -142,6 +159,9 @@ def record_step( fom_value: float, fom_kind: str, features: dict[str, int | NDArray[np.float32]], + synthesized: bool, + laid_out: bool, + routed: bool, done: bool, ) -> None: """Records a single compilation action and the resulting circuit state. @@ -154,6 +174,9 @@ def record_step( fom_value: The figure of merit value for the compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. features: The quantum circuit's feature vector used by the RL agent. + synthesized: Whether the circuit has already been synthesized. + laid_out: Whether the circuit has already been laid out. + routed: Whether the circuit has already been routed. done: Boolean indicating if this is the final step of the compilation. """ present_ops_dict = current_qc.count_ops() @@ -175,6 +198,9 @@ def record_step( entanglement_ratio=self._extract_float(features["entanglement_ratio"]), parallelism=self._extract_float(features["parallelism"]), liveness=self._extract_float(features["liveness"]), + synthesized=synthesized, + laid_out=laid_out, + routed=routed, ) self.steps.append(new_step) From 4b3368a6fe6619cc512e55df71f0af0d881960f2 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 11 Apr 2026 13:30:46 +0200 Subject: [PATCH 09/30] CompilationStep now includes gate count per operation values --- src/mqt/predictor/rl/tracer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index ed7ed5ed5..cea681235 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -61,6 +61,8 @@ class CompilationStep: action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords'). reward: The calculated reward value for applying this specific action. current_depth: The depth of the quantum circuit after the action was applied. + num_qubits: The number of qubits in the circuit. + gates_per_operation: The number of gates per operation occurring in the circuit. total_gates: The total number of gates included in the circuit. fom_value: The figure of merit value for this compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. @@ -76,6 +78,7 @@ class CompilationStep: reward: float current_depth: int num_qubits: int + gates_per_operation: dict[str, int] total_gates: int fom_value: float fom_kind: str @@ -179,8 +182,12 @@ def record_step( routed: Whether the circuit has already been routed. done: Boolean indicating if this is the final step of the compilation. """ - present_ops_dict = current_qc.count_ops() - total_gates = sum(count for gate, count in present_ops_dict.items() if gate != "barrier") + present_ops_dict: dict[str, int] = { + str(gate_name): int(count) + for gate_name, count in current_qc.count_ops().items() + if str(gate_name) != "barrier" + } + total_gates = sum(present_ops_dict.values()) new_step = CompilationStep( step_index=step_index, @@ -188,6 +195,7 @@ def record_step( reward=round(reward, 6), current_depth=current_qc.depth(), num_qubits=current_qc.num_qubits, + gates_per_operation=present_ops_dict, total_gates=total_gates, fom_value=round(fom_value, 6), fom_kind=fom_kind, From 88b8fb6c5b85ef32765604e6e49c39b12478620a Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 11 Apr 2026 14:40:33 +0200 Subject: [PATCH 10/30] expected_fidelity is now permanently included in tracing This happens regardless of what the actual figure of merit is --- src/mqt/predictor/rl/predictorenv.py | 35 ++++++++++++++++++++-------- src/mqt/predictor/rl/tracer.py | 35 +++++++++------------------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 9a96c2f7e..49567962f 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -278,11 +278,25 @@ def _log_step_reward( if self.tracer is not None and self.tracer_output_path is not None: synthesized, laid_out, routed = self._get_mdp_state() + + if self.reward_function == "expected_fidelity": + fidelity_val = fom_value + fidelity_kind = fom_kind + else: + fidelity_val = ( + expected_fidelity(qc=self.state, device=self.device) + if (synthesized and routed) + else approx_expected_fidelity(qc=self.state, device=self.device, error_rates=self.err_by_gate) + ) + fidelity_kind = "exact" if (synthesized and routed) else "approx" + self.tracer.record_step( step_index=step_index, action=action_name, reward=reward_val, current_qc=self.state, + expected_fidelity=fidelity_val, + fidelity_kind=fidelity_kind, fom_value=fom_value, fom_kind=fom_kind, features=feature_vector, @@ -536,24 +550,25 @@ def reset( obs = create_feature_dict(self.state) self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") - if self.tracer_output_path is not None: - synthesized, laid_out, routed = self._get_mdp_state() + logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) + if self.tracer_output_path is not None: self.tracer = CompilationTracer.from_initial_state( device=self.device, - input_circuit=self.state, circuit_name=self.current_circuit_name, figure_of_merit=self.reward_function, mdp_policy=self.mdp, - features=obs, - initial_fom=self.prev_reward, - fom_kind=self.prev_reward_kind, - synthesized=synthesized, - laid_out=laid_out, - routed=routed, ) - logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) + self._log_step_reward( + step_index=0, + action_name="Baseline", + reward_val=0.0, + fom_value=self.prev_reward, + fom_kind=self.prev_reward_kind, + feature_vector=obs, + done=False, + ) return obs, {} diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index cea681235..ac8ac67e5 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -64,6 +64,8 @@ class CompilationStep: num_qubits: The number of qubits in the circuit. gates_per_operation: The number of gates per operation occurring in the circuit. total_gates: The total number of gates included in the circuit. + expected_fidelity: The expected fidelity of the circuit. + fidelity_kind: The kind of fidelity value: 'exact' or 'approx'. fom_value: The figure of merit value for this compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. synthesized: Whether the circuit has already been synthesized. @@ -80,6 +82,8 @@ class CompilationStep: num_qubits: int gates_per_operation: dict[str, int] total_gates: int + expected_fidelity: float + fidelity_kind: str fom_value: float fom_kind: str synthesized: bool @@ -120,45 +124,24 @@ class CompilationTracer: def from_initial_state( cls, device: Target, - input_circuit: QuantumCircuit, circuit_name: str, figure_of_merit: str, mdp_policy: str, - features: dict[str, int | NDArray[np.float32]], - initial_fom: float, - fom_kind: str, - synthesized: bool, - laid_out: bool, - routed: bool, ) -> CompilationTracer: """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" device_meta = cls._extract_device_metadata(device) - tracer = cls( + return cls( circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta ) - tracer.record_step( - step_index=0, - action="Baseline", - reward=0.0, - current_qc=input_circuit, - fom_value=initial_fom, - fom_kind=fom_kind, - features=features, - synthesized=synthesized, - laid_out=laid_out, - routed=routed, - done=False, - ) - - return tracer - def record_step( self, step_index: int, action: str, reward: float, current_qc: QuantumCircuit, + expected_fidelity: float, + fidelity_kind: str, fom_value: float, fom_kind: str, features: dict[str, int | NDArray[np.float32]], @@ -174,6 +157,8 @@ def record_step( action: The name of the compilation pass that was just applied. reward: The calculated reward for the applied pass. current_qc: The current Qiskit QuantumCircuit object after the pass. + expected_fidelity: The expected fidelity of the circuit after applying the pass. + fidelity_kind: The kind of fidelity value: 'exact' or 'approx'. fom_value: The figure of merit value for the compilation pass. fom_kind: The kind of fom value: 'exact' or 'approx'. features: The quantum circuit's feature vector used by the RL agent. @@ -197,6 +182,8 @@ def record_step( num_qubits=current_qc.num_qubits, gates_per_operation=present_ops_dict, total_gates=total_gates, + expected_fidelity=round(expected_fidelity, 6), + fidelity_kind=fidelity_kind, fom_value=round(fom_value, 6), fom_kind=fom_kind, is_terminal=done, From 2e24f2120a24d1eb242c7d7517e09bbabe4c26fe Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 11 Apr 2026 14:46:09 +0200 Subject: [PATCH 11/30] included schema version & timestamp values --- src/mqt/predictor/rl/tracer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index ac8ac67e5..43cbfff75 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -11,6 +11,7 @@ from __future__ import annotations import json +import time from dataclasses import asdict, dataclass, field from pathlib import Path from typing import TYPE_CHECKING @@ -111,6 +112,8 @@ class CompilationTracer: figure_of_merit: The chosen figure of merit for this compilation. mdp_policy: The MDP transition policy. device: The target device metadata. + schema_version: The version of this schema. Upgrade in case of schema changes to maintain compatibility with tracer frontend. + timestamp: A timestamp indicating start of the compilation. steps: An ordered list of CompilationStep snapshots. """ @@ -118,6 +121,8 @@ class CompilationTracer: figure_of_merit: str mdp_policy: str device: DeviceMetadata + schema_version: str = "1.0.0" + timestamp: float = field(default_factory=time.time) steps: list[CompilationStep] = field(default_factory=list) @classmethod From e84d0f568b345bd533bdfc6b3319edd217e8933e Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sat, 11 Apr 2026 18:57:25 +0200 Subject: [PATCH 12/30] added more doc comments --- src/mqt/predictor/rl/tracer.py | 37 ++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index 43cbfff75..11fc615b1 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -27,7 +27,12 @@ @dataclass class TopologyEdge: - """Represents a topology edge between two qubits.""" + """Represents a topology edge between two qubits. + + Attributes: + control: The control qubit index. + target: The target qubit index. + """ control: int target: int @@ -35,7 +40,13 @@ class TopologyEdge: @dataclass class GateCalibration: - """Calibration data for a specific gate on a specific set of qubits.""" + """Calibration data for a specific gate on a specific set of qubits. + + Attributes: + qubits: The qubits that the calibration data applies to. + duration: The instructions execution duration (in seconds) on the specified set of qubits. + error: The average error rate for the instruction on the specified set of qubits. + """ qubits: list[int] duration: float | None @@ -44,7 +55,14 @@ class GateCalibration: @dataclass class DeviceMetadata: - """Metadata containing information about the target quantum device for compilation.""" + """Metadata containing information about the target quantum device for compilation. + + Attributes: + description: The name of the quantum device. + device_qubits: The number of qubits available on the device. + native_gates: A set of gates native to this device. + calibration_data: The calibration data for this device per native instruction. + """ description: str device_qubits: int @@ -74,6 +92,10 @@ class CompilationStep: routed: Whether the circuit has already been routed. is_terminal: A flag indicating if the compilation process has concluded. circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format. + program_communication: The program communication value for the current circuit. + entanglement_ratio: The entanglement ratio for the current circuit. + parallelism: The parallelism value for the current circuit. + liveness: The liveness value for the current circuit. """ step_index: int @@ -133,7 +155,14 @@ def from_initial_state( figure_of_merit: str, mdp_policy: str, ) -> CompilationTracer: - """Alternative constructor to build the tracer more conveniently from the environment's initial state.""" + """Alternative constructor to build the tracer more conveniently from the environment's initial state. + + Args: + device: The target device for which compilation is performed. + circuit_name: The name of the circuit being compiled. + figure_of_merit: The chosen figure of merit for this compilation. + mdp_policy: The MDP transition policy. + """ device_meta = cls._extract_device_metadata(device) return cls( circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta From 2b3bf4fb2681033424276f65bc4ced6be5a00c07 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 11:29:00 +0000 Subject: [PATCH 13/30] =?UTF-8?q?=F0=9F=8E=A8=20pre-commit=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mqt/predictor/rl/actions.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py index 0e32d63d2..0e0e31be6 100644 --- a/src/mqt/predictor/rl/actions.py +++ b/src/mqt/predictor/rl/actions.py @@ -10,7 +10,6 @@ from __future__ import annotations -import os import sys import warnings from collections import defaultdict @@ -18,8 +17,6 @@ from enum import Enum from typing import TYPE_CHECKING -from bqskit import MachineModel -from bqskit import compile as bqskit_compile from pytket.architecture import Architecture from pytket.passes import ( CliffordSimp, @@ -78,7 +75,6 @@ from mqt.predictor.rl.parsing import ( PreProcessTKETRoutingAfterQiskitLayout, - get_bqskit_native_gates, ) IS_WIN_PY313 = sys.platform == "win32" and sys.version_info[:2] == (3, 13) @@ -366,7 +362,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitO2", # CompilationOrigin.BQSKIT, @@ -380,7 +376,7 @@ def remove_action(name: str) -> None: # num_workers=-1, # ), # ) -#) +# ) register_action( DeviceDependentAction( @@ -526,7 +522,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitMapping", # CompilationOrigin.BQSKIT, @@ -548,7 +544,7 @@ def remove_action(name: str) -> None: # ) # ), # ) -#) +# ) register_action( DeviceDependentAction( @@ -561,7 +557,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitSynthesis", # CompilationOrigin.BQSKIT, @@ -578,7 +574,7 @@ def remove_action(name: str) -> None: # ) # ), # ) -#) +# ) register_action( DeviceIndependentAction( From efc94a35148b369e068e05bf23da5726fb5845f3 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Sun, 12 Apr 2026 13:55:10 +0200 Subject: [PATCH 14/30] revert accidental inclusion of local model and gitkeep files --- .../trained_clf_expected_fidelity.joblib | Bin 81951 -> 81263 bytes .../training_circuits_compiled/.gitkeep | 0 .../training_data_aggregated/.gitkeep | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/mqt/predictor/ml/training_data/training_circuits_compiled/.gitkeep create mode 100644 src/mqt/predictor/ml/training_data/training_data_aggregated/.gitkeep diff --git a/src/mqt/predictor/ml/training_data/trained_model/trained_clf_expected_fidelity.joblib b/src/mqt/predictor/ml/training_data/trained_model/trained_clf_expected_fidelity.joblib index e040b02848e55c97687245e66317c870ebaac433..576f8f16c6055c8eff5117795dd32304a7a13e87 100644 GIT binary patch delta 7187 zcmZu!c_5VQ7anUO<(5=hNN!w}NJo znn9YZHjncjU0q#m74{E4M=nLS8UIW6)_DuhXwIqTzD{Fn*Z<8^V7Qd+U9TaMAZT;g z+sDP-*2lvu%~{Z!?Zw$c@k~!o(`G&l--|G7oh<5zHQ4l!SdQogV%3yuXDxGJ4Dl-~ z&l5a!`Z}?X7d;|2P8{QG?604F=CE=*2)v-@ItX zQZyRZ)fxi)bVG;rO6SP)h#e`2mOj=-o_oJwVp<<6(Ors(Q$J&&mnTs<5206t|83hP zVj0hG%vwo)nGds;IlrWV;Gv^;U^YL~IOHh-bXJuULVq7;)~XK_b`t#b*kyw2i_JL9 z@LgNF3EtjMlwrz$Yngpk|6SQd@IgInC}mgk>@z2(=`+DcYwr`@P$%I@@|5%dH5;%b zkR+s;Cfp{3xMh!tLxMC)XVgk&N$U~D8P8M28K3-0I%dx4#GSBLA%)UhObDN2tM-h< z5!+4p%r?2Zv(KCqIczxAqM9OvJ9BRm8_|NZF*Qf2=WfIf#kV5PZH`j!4kR6N8boje zVux^KclogM94*A@eq|)?s1))=ap`E4M{zXzlN2u3PQjOi$Xi23K%*ZgozY9L^bq`m z8L;&C(z zak#uayr^ioY8BxLs34!JDi^=J03L)<9+EQxoEeTCRYf3k$M?m~i7v^`0c#33rai4d-Y<`aAN z5jyxUK4tb2yhr@E^Oe;HUf;M?mKc!5KUle~lUP4JMH9TR&SU zMWZ=HG!r8C;sRoIj}Zr2rv~CGpAnz@|i?FjN+9>J#tthpa!0Z{$FHHjv2z5wGJbB=?`T{WVtlRtq~DlEMxeHzQuB*Gd9- zrC@>sXEDJb1NqXuDD|1$s2uA*EQsM-Lj3+pz@53m#5g6xIO5WPDm_97iNJ~`s<5*u zLzHOUDpaCw0VY;(!VaFlLH=`DRl8}De>`UYMH^3Fy&pp^`jtVvwUoq^Km$4 zBeqH^B@iZpZ_+%_|wh34<4BUCi3|`oYH}_J*l%cF?roQNr8LUeLAv z{TudR3~&W(2u z=nF5doQE|opPfDl zM>~I}P#@S8FtTNHpk728NId1S%if#~uKND$Lc>T{HM--S87Ch+IbYQi_oRbQ{XR#f zng9qXe7EwG?=h&?VBJi690z+d^v-Db_yAwh4fCOsEKtjQ(LA{|9lEljBR%}W;iSvw zEZ(kEu+u9PWyKzcLAojEYOulS1UJR2!Ic9!HpN`_5-dnyPL#wv^@dY5+Mk-${lLQ9 z?8Q=n7|;&jGF$Z{6xK*ruROf)5FDtUZrI-#3y*1My>)V3A(6pkty|B87RiyU&hIf0 zVO6{47Rwxtr4@;-9tehu>57|RmlIgEZ1?L94TEi^OOC`vGhw)wGXMK%Qax9Uk;;`OswjcYa#IW^RzJOikW2$zuT4mv=lV2XFRj-xePZYB>NnyE`S#OjNLMY zn=qd6D`B2m9K816RanZC0?tlrZ>tNkA*<<>YTw&qaP&%Xb-}49Q0;9Ej+!)w1B=SI z;bJTl71z7I+n)sdUvk}_t%(Lsz|qo6&LKcueQ%K3XO#~tU< zoKGvhisysi^45b>+l#@f_lc~MJR71Py0iKvj>E%scM}(_j)h{;#m7D`iiL&-u8{6| zELicQaFMuVGUR66y_~g?1=R^=*5y7&VetvEOS|^O!2;St3gjDK z!kz;m5Fp8CscCJC-XD(v;@)`>+zOdJ|l@Ed2 zDO~THOFW>Ae$~7v#}n363A}A=ItKCT3KFIU9xxK^dtzkjIE0O6oHb}?!@xibPkfCn z_&6A=JlGHcCTo4~Mqj2N6ZDRbSxseev_W=dSyJ?dN#bQo*<_*{vDMJ5Nud7h+0 zbr$!Oa7Q#eyk)s8Kr;#?TFiBYy)r;vo040`9SCDPw>ZpM!hk0Q7B6oXSU|0pP8Utx z8+r@+^Cc{zpe>fWE2Z8Ik{lN59eL#ocl15ELu8YIhijj5QhqXUkL)U|3kZeVal7@Y z3q#4!5#5|CQJM~BtnJqv_l3h|8@?5-MSdXU)2-!Rm*4A&?uP`m3{a)C}#UdiZ4F~ zL8JW|Tz7pydbIo;<5dLgnf$K9-#wWOb#^VRiP8w@Wvh3Tw>Sasn($*B1nD>^r8#X{Y9z$tNwF!*~^%_(4^7nrI!|F}6F#ikzYr5t_9?_@`H z7_rXmRAM=|juPA7%p_LNfsJ^+8?kvdEMhx?GKlRi^CmW?GlE!=;dEjZB)o|2SnWrw zUQ-IOk{V9LQuK^K@<(OX31T}UJc-TQgo&Nyu^{yTba2i>Jb5`5q}PNAM2>_Izn%KX zm&`vxo|!X0va|0@!z&OM3Bvg0jAOGnMN6Cd)7f)>LKIQQ1iT_A$+LnBgV^q8LBu9# zBF=Qcj`fx!?iIr!=fm5vpyUEnLbe6tD>MX?^St~xVtGqu-<>ELt)m_j#H@)W2T}_7 zR>(-62C{sCQi^J2k@MOyEL^V%<1m@o1TX%OK&*r6%q5-qxT&M% zxXd(6%>bsINa&D2iIOw1vy8}a;$SK9$6;Rpc^(!*Nn3yX>8uX{cL5DOFf9Cg<=8~78XQ1i?gTkVuG@fG{U2gB7gW0 zMV{v=U>s%x@`p=937*!6iS^u3X@PmD?Bt4abS$sIit++cLLDEJlx~5u;@n5P@+Ai1 zcsP^5ONDX92`cBvbMhlhpy7|p{EhDv!5JIzmYMEE{v=-x!SfU;Of2bw2Yc3)lLI;z zZZl&7rBrc8I~M`Ve zDL>-FPp}~7ejFv09Ei(7|8tKO3h_Gew%{x|Be)CYuduMf)tES029;nuz(5(wm_XsD zCrQW{!^W~J|KQL5EFBe1&J~J@f)tJ8gbqKCpo4S)N=GZlZ@_@f*je_>Pt9znc@@YH z7)M1i5)m)Y#}WK&$AU!Va7nv$P>JVq7{{js8_+YIEr_Dg0(#;}VD<7pi65c@j0&vO z^c8kMo5q4_!?92|Z=C(`3G6Jx0uy&!LghLdu~3m5yake>7@xTu6=lAki8GU**1rxb zQdo@ll|G6I6b@n_x(Uv>I|P@gc;?4-R*B&ah&!J`CEN@VSJ;ekYP}J!JczVN*g?l2-j0sT$Y5H~rN5E!hlMVmpX-dt~I{mH~5>Qgyi=+QSMrs4W{~z`)7{ z@}G>IVO+;y`%#e}YF?4@%YXCQgWjcXzj_CEc(U;Fj#uM!aOvl8$2Xh8#y6o}F-sl6 zFjX`N)@s75Lb0ro4inh_+Rm&d!xYvOBp4P?+e3HSX`{N=lqEDiGu9~9@`mOLdxwu3 z55xP-L3YOh#3}77Cxp>{o8JhjT;WdAmi-;c&zC z#~}j-@a(#gimhq|G%ooV{Vi4>Ru$C+oJn;8ca6U~zZiOh9JiOb6&C~6n=oiHQh}g5 zw@*@vvWkGYp`Y^k-dh06WO=Y>GaVX5Q(SvA7$APH;_M3-xmJH5465l9ad6g z!gI~#A{u&jaP?5_`z5wPux8!-rxxcO!6|XS*GngT=xrGG<3t6+Eq5-(v~w;nzTaG# z^@IhgJt1434*7$Wn6&)WRyy2SvMHI$d0+|0D|f$2rZXW$sCh&A2V3YU^boN=U;#GO zoXU^o4$yp6`#ful5qOB-+V?!&7^=*+vQmF*!X1sNkCjFKpt@t-Deu%n;I9~UGu6=* zvXtN2$E-<&hQlI*Uu|qbc+$G6MBEQN?)jbxH8O=}yMaOZ<#gEQ$D$Tw=2}7IvY)5U zi~55G3m&O*{R`Sh^;!gWnZVJA`IATYdcgw4wM*Uh)8J9lW3PQNP7skl;Iw|z5m=bn zlw%fU1q)BhO7}z_0$l?!(Sir=@VP2=B2Q=~tb7nSVjW@vU%NwU)W*C(^yW7cVVPYZ zvHYq{`J^4(o1*^alX5-}H3iq>obS^??1hT-UmX_UIa$%>7ZN@5pR->5w*|*xK~6Qi zR&s&l?;l&X_A`L@!<8&WZ7DcC?XUfAzAcpI2p^>$B*V$ryUWyqY@uj-UmojMIMk#s zo$I$D1cW8`=;+1T!hn6|4bwO$&Z*!nAxMfFb*QVz~ z?N{%~?>6or<}v?Wgl7>1)z^Md;9`MfdbxwsE@u#^Kh<3?=mlqV{Ua_31;G7}UDMes zY~U|S+1B!F`)oeFYSUG>C^hpCf=$HB)*D??dEkkk2(E*k~?YL~+ z_Rk>)(3B3&wQPt03x{v#Po1{{nG*w7Pq-e3(ec0EuDX>10fU`VcSBqusO&-VXoMFC z+LozxTe!mW?<|44zs-PA$X-*&u!g6XcWMXl89-lG!1lBLcA$SXa(`$512(&RdL7>B z0-=`isq_(jcsd+e*P=wH;Qlv>Hm-)fpyo6+F|^1M#OH}^-nM!#D81Za zv66I{7k;EvW}78^xahR~d|@aE?QLF^?BoG`L#sWCJlsGu^g;fde_Vm7T`c+60|yYg zBq$KZZv)3QYI9@PI{}xFh8uylZ>uHn#vLL+Nh%@N7#?2Q@nG&(BQRU_d!=RbVfaUN z_kShSej7LzRTX2c%7i5{YCJ?H9``Tea3$XlW zTg5zAcW@T6{Jmn@3|hSs+6)^HfYllKPLXkUi2CWIMNJ(B+lz6Z8hdPE#M-;Z*vk)u zmyEAor>YIP@5SzmKDGe=?$V1N1g+t#fAAKCx3VCT{&C&AY7=m@J*gusO=G}4`ENGS z`eyL`&8qsQOPY|jir!VG8VIKXP30mc55mB2+voHxhoF;SMySaA5vln<8Zg~i delta 7872 zcmb7}c{J30`^U$YJ%c1dld@({MfhHm$QFr|r7~kD`9s1%_nDrwrJx+|rkMJX-r zaz{dGqYafr*0McguJ3)$@BDuEbDrndAAOwH^?tvv>$A-0oPPWkJjxa->*o_#qGwb- z#MdJrvoL>G#?E|~d`$;|AZsTlCkNxaA)$ag#d%Z0*Nck{ z4vo?KKZ=y`_ zr!vyeB^(*;?-$JS<&N_oEj!U7$+P&zdPhb__%QzqUncsb|MrDO1pY5Ci+e)L9zKxF}wo~ z62UVn_}d>DI(1+F@qmg@FB%)o437zjhz|QNJe(;H-6u;rI6U4zI>z7o|6@MMYZ&Jf zm-nd3IxkR6lGPM{8YtX9SFNt`RwC&MZ@q6>!dpM|T;;8PZpbN;$P4*+dbUsya?80S zzlHj)ImoN0A;0yM$JzD^k#~JQHI>Vv_*SB!cor6trO?n-i-x-f$S(^cHzWCWW#qRh z$lWJUZ$a`<8uG-ykbetCz05OSW6I1oQ=CO%3mRj=g)C%|4IIcF(8Vz5eGM;&vfcyr z<|OxjfDH^)kdHKCgTiXmC+i~jyn;E^>XxHk=Ibe#>QCXT;~Cid%&}~6@aMm0f zuzS(qr;psJ1r6_>VPi+~k#@dT>k?$aP&{K!p)DRn3l8;oz9S2%KzM)&nn3HkoC@*hw z!A0Kc_@#!oeq!Cig3|o5DZp+%f(6fu9G+msqDwq}VGw!y4&)3aMTnyEEtrd;g3cPDyia4%1%0ljcrN(GYWU0@)3FE zebj>lFNcy)fjn6k8`H02&e$X5&L2>}ua(DH>=zVl?EDxDBYZfJVhI{_`jH!_VBt;WXguQ4bUuYh9OhJ2(6dDJY-nIO4|C*}maz>{dM zsIleV0fo}_6AeSpu@EDJ11ZizgGx6xp7?8oL$eFF2ar97;gc{^17#ysd}Tu;+kz66*V?~J}YR)vm^i&2s` zi{Wj(Q;az8^f(NjdOR$Xn}C<_vAbF?r>tJ&ZSkshOl->W#1Q; zf+a-gZrLg&W&|^LsV?M%ErBQ7a!xYdnuE}61*aTWDm0#&KO(Hg|f4UWqxADR#C z4?1qI4KRnBhQ$>S#lsl#_Ib zq#o?E3Ao%omk!!DzQ_j|T0ywit__z=^x*#5q>DY(ZjkX|<5_>pYT0Fq|rUh7t=!3fh_QaO7rJh=MkTg_fNB$}T%_p8nb7^M;{r_K-)uzgd%In34) z&Q{u*hi!3#w*FVcDGH9zEE7*F$XE+$k9tKOw`+rRUCJ{4WHV@dwEtuJj!^i$uSoWR z$|5jP_qACz5&)Vf9uBqqjE0x;YStFVy`b!JSIy*JUpU;+D_dU`3@O^>`(3XD!Vb%w znNMZIz)qfZpMK0P0{kvBYTrH8h9SFYgB9uPpxtHXU~s?|xcf%qZiLG$Xa&m-XL}de zIA>bmCp}Y;?MkIQD%XY$KCRh%AG(6qisBzSj{cxBr@g7x(;Aj=0&eS7SioxGa2n@q z0(6*&zPXbp2kHv0$)in%Q0eOR{Cq|@c(K~IDw&;`16CmylMY|?ff=G1KXw>*tvaHsheTMba{4hH5K?7 zqBljRGr>)tqIg9v7Q&Wq(Hfnx2EzNlgC7xY zaFyH;*%O`&e4oFIrhc9cU=yO(nY;orKfPREQ(^>d!JI}psw}j8I2LRuAqRu9ag~dT z*Fn0|w&y;xH9*1s_SQ+N2k7k--LBijy}~wuYMT&8SgS5{#PJIqvV?bcW{<81%aPnQ zR%f*#$YxzJb%8w#0{t)OD(|+3f){gM>$@t!fGJZTpma4Pc6a7gHQT~QzeK5ZyG>!| zA9s!BRH}md)~!2zJ}rSj*OiAC$f!bB@g3okE$ZO2|{WifQXOOyk$YuF`4eV~VP|4!9ad4L3eD0!5c2 zPgTwZLLk-N{OG7UXrG_l5x~}jw%S*h?2SBNdg`guxyJ!U4Htg;S>g`S%9YAG34X9D zV$*|F)M+q$S@pWZ*M+blTA>1-dzx=#c02xbohyRLE3Kdl$ws zT?HD~C8VOSC4-&Auk2shQBbvQKQP|N!I1f@wY!Tqf#4bXrGc7A_%S5C-8j?@wo~sa zL~dfkynvX!LaL$Au;gQm;AS`I%uUccj4q_F&dkib%KzQcELt)Q% z!O+4E{muzjsJiF9=Z%&Yq(wdJmr>aOA0&z%-5b{gn~gVq>kAveFR85a((&9Aefw44 zw7?i%YrP*$Z)HNT{Eo|%#1L?fxLcWQ>8lhS3;X|&dQ#0J2#Np)9jS(ZU9|< z3fp1~9KhkU{HX=~Oppl~CY2|>&QAH{YthWm_k=B5` zbxYSY_B-d*KUu-@@a6q}%v}=2TWt@g@z(YMCU2#9MDo@`6Xf(<au^l;G5%r}ysIOjtoW;41hK3Jlpgh4sFBJ_O4>YikA#Xp8dXY)g)7_Cb zNTa@sY)pwl9;rn(Rwf%uAm=>e<*+CjE?D3UGk6noO0a>6EOLfF8W=l}Q`O0Vypgjn zA)jSOa!FEu0(rGO$@7uZ_K^8jQ=G;9w}~%lI6@lMqk%>x4O@{H$|0w#Bd3=k57H+a zq#$qaM_$T6-Y$%sU5lJziM(2e%cuIYKVTt}+%boNhUx<3ngnTZM$Ty^^+w3)Qpgz% z$Z4V^_d!lMj-2X;oDHO&dj(VdDI#KI!GXB_C+Xay;|{(GxicFd zZ-WgS z>LW{0Z$O?z^+)9NCe-IraG+}P(Xl&`mu@CEAj3ifXD;p_NSbUMOY%l+V5>p$2C~7) zXx`u=mobPogoDz#-+NOhL#aT0I|Fx2*T?*qhmcdMFvoyphlSii(x8F`Ho1eYuef6d zd1uw+=bIjhdbR=TtLKp?v6O5;UKxYjaR&MQ@%PCwoKepzD@FrnF$Vqpxu=mkW>}B| z72?s&+KT$>3gnb4$a{m3)5(utdj`o>$$?&=o{@x{YQf_y3Wxjaf9k|IqBzmZG2Ag- z19`g=d8G3(h?9z(YJ%L|lx#4GSI|I1Udu+_u8(@U2y)Jb?RZ67r#Op3p(@AlfcqdC z*kWkNy%WphoKobBTIAJKKm7_*6vNnl+RHCc?H?v zilVu2qDI*vbMH)O7Fg#PYuHD)8;l+9v6&<;_%5-@sS*wRkUX2p7 zWlyEX-jo2F2%(UNC4B@{WZ*CP7sJF)Nw19blo_CSR=cJ(_$zVXNac2?qEX^wj^?}3 z=mFw=_+ra%Z@;mK60f54yk&z#@on=bD`pCUveWOme{P^c-lj0trh!3X!uSP<{ieb# z*8$g-rxI{LY1-JPmSJLK?CGwO4>RCuf=xdA;ZMR(bi0sr(KwN3l4d9TeU#X8=-2VJ zhq{U8&6mSvPfZXDOTq;PZV13BLTv4!w>`w;MJjG5_p^qH6t#kaM2G3{y51~di^E&u z=Fc;G)XaVnzF+UQo#vN-AUVC)FyZGdW36`_u{@5R)V9T^EH}(HP z+^Gn3UZ65WyfpiuHg}#N%nemC8ayoq+BYXCtflFF#H5?%Wu;^>aPL@r+iQ&olywGp z4#|BXnE9C=JI8wo=9jg}LTotnrVy#*gwpHlR2YUYPo7ZQ|48fCyCey{w*v-d>* zsA1U!`)=aej+|VY&L};~5}iD(M;@I6^Eqyq+au z%8-GRMl%UpRetEW77~*aBmjGS%i}amsIb-Na-fBP9mtj*Ju~!Tf~c!Gn>U9c0Zj)@ z^H*uOfMdn$2Cba)?r@A0m0@VG^=H%z7!`#(g(siX`5U1M8?<1f9tEF`P^uMnF>9hq^ z$w_n&us+Hee@%gn$ISX=!;GO*PwDyo%EhoM??(UZ8-}o^+Q|z;KZw+m-G#cZ_`qIvz36zn2&~)Mk(n6x zlMrt;R09Ae_KVykYmAsLWjXY{)RolcS;?zOTqgOU{`!I2}{MV`EW>pq!&@RZy&(#H+;fsOB*$)XnE$_35I&x6? zcutax>IiXSWmxWc>q#O!vgG88#D3yvde~k~>3LwpzI08*w4WF;e_MTUogmC_iJd02 zKoP`aQ*zVKG!tsyzHD4t@R9Iy?Otq2DZ_1M~UNMwcD2;d`&EHo3To^gCzmq z8s5YonlVTe^?up;G?5?HN*g!lWBCZZrOOW))z6%Mz(EIs;e5v*yJlu%yULj;QJ z|2%zE79O4sIH0Y&2F$P2)=97^f*^VR+vd1*N0487=HkP7)^IDab+lyjJL21V*WN#; z(_z!{(wbi`3!(d}hd`o?B-lts-!QZHfGC6hiL2=v5VA~tkL^=65NpxU?NV6`kGp#+ zI#&byF3P*z9z}sYAH6(QeeNNGMrQvyP@@Gg2Y21DT{K1*+-v@W^;vwBxOx3W>u@d| z=1*MErp{gt9VO#GhLrW8MS8>1?e6;U{IiN(u{RasqMgn^44NdEQ448%f&?LypcM%e z{w7M@48PRJ{Nf(<=-G;PanNZi_wYB%)uV69dl-TcA$zp7Ku{Xe zGx&Ez>S)5rw?0fW)pjD)F*j;kw={fEPVuT%Fe Date: Mon, 13 Apr 2026 16:57:53 +0200 Subject: [PATCH 15/30] Added test that checks tracer presence and superficial semantic correctness --- tests/compilation/test_tracer.py | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/compilation/test_tracer.py diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py new file mode 100644 index 000000000..d0b5087cf --- /dev/null +++ b/tests/compilation/test_tracer.py @@ -0,0 +1,57 @@ +# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM +# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# Licensed under the MIT License + +"""Tests for the CompilationTracer.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from mqt.bench import BenchmarkLevel, get_benchmark + +from mqt.predictor.qcompile import qcompile +from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata + + +def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: + """Test that the compilation tracer correctly generates a JSON file when a path is provided.""" + trace_file = tmp_path / "test_trace.json" + qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3) + _compiled_qc, _compilation_info, _selected_device = qcompile( + qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file) + ) + + assert trace_file.exists(), "Tracer JSON file was not generated." + assert trace_file.is_file(), "Tracer output path is not a valid file." + + with Path(trace_file).open(encoding="utf-8") as f: + trace_data = json.load(f) + + assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name." + assert "mdp_policy" in trace_data, "Tracer JSON is missing the mdp policy." + assert "device" in trace_data, "Tracer JSON is missing the device information." + assert "schema_version" in trace_data, "Tracer JSON is missing the schema version." + assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp." + assert "steps" in trace_data, "Tracer JSON is missing the steps array." + + assert len(trace_data["steps"]) > 0, "Tracer did not record any compilation steps." + assert trace_data["steps"][0]["action"] == "Baseline" + assert trace_data["schema_version"] == "1.0.0" + + try: + # initialize from JSON (throws if the structures don't match) + DeviceMetadata(**trace_data["device"]) + CompilationStep(**trace_data["steps"][0]) + + except TypeError as e: + # pytest.fail instantly stops the test and prints your custom error message + pytest.fail( + f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}" + ) From 6f2784a946bc1073c604dcf684c774806165fea6 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:08:13 +0200 Subject: [PATCH 16/30] adjusted CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b81df38f..6cde189b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**]) - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**]) - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**]) +- ✨ Added CompilationTracer that exports collects compilation information and exports it to a JSON file ([**@linus-hologram**]) - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**]) - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**]) @@ -74,6 +75,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool [**@denialhaag**]: https://github.com/denialhaag [**@bachase**]: https://github.com/bachase [**@Shaobo-Zhou**]: https://github.com/Shaobo-Zhou +[**@linus-hologram**]: https://github.com/linus-hologram From 14eb26d4b88cdaa6336d79bf7f97dd65d25a240e Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:16:28 +0200 Subject: [PATCH 17/30] Update CHANGELOG.md wording fix Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Linus <42340116+linus-hologram@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cde189b1..3bbf5684b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**]) - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**]) - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**]) -- ✨ Added CompilationTracer that exports collects compilation information and exports it to a JSON file ([**@linus-hologram**]) +- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([`#641`]) ([**@linus-hologram**]) - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**]) - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**]) From d0f0506797a1b3ef8ff5be8afc3f91c6aaf5f530 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:28:58 +0200 Subject: [PATCH 18/30] incorporated changes from coderabbit :) --- CHANGELOG.md | 2 +- src/mqt/predictor/rl/predictor.py | 8 +++++++- src/mqt/predictor/rl/tracer.py | 13 +++++++++++-- tests/compilation/test_tracer.py | 7 +++++-- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bbf5684b..dc916d5a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**]) - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**]) - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**]) -- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([`#641`]) ([**@linus-hologram**]) +- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([#641]) ([**@linus-hologram**]) - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**]) - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**]) diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py index dcd3ef5a5..d97a161c3 100644 --- a/src/mqt/predictor/rl/predictor.py +++ b/src/mqt/predictor/rl/predictor.py @@ -209,4 +209,10 @@ def rl_compile( predictor = predictor_singleton predictor.env.tracer_output_path = tracer_output_path - return predictor.compile_as_predicted(qc) + result = predictor.compile_as_predicted(qc) + + # Reset tracer path to prevent leakage to subsequent calls + if predictor_singleton is not None: + predictor.env.tracer_output_path = None + + return result diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index 11fc615b1..ba172845b 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -16,7 +16,8 @@ from pathlib import Path from typing import TYPE_CHECKING -import qiskit.qasm2 as qasm2 +from qiskit import qasm2 +from qiskit.qasm2 import QASM2ExportError if TYPE_CHECKING: import numpy as np @@ -221,7 +222,7 @@ def record_step( fom_value=round(fom_value, 6), fom_kind=fom_kind, is_terminal=done, - circuit_qasm=qasm2.dumps(current_qc), + circuit_qasm=self._safe_qasm_dumps(current_qc), program_communication=self._extract_float(features["program_communication"]), critical_depth=self._extract_float(features["critical_depth"]), entanglement_ratio=self._extract_float(features["entanglement_ratio"]), @@ -279,3 +280,11 @@ def _extract_float(val: int | NDArray[np.float32]) -> float: if isinstance(val, int): return float(val) return float(val[0]) + + @staticmethod + def _safe_qasm_dumps(qc: QuantumCircuit) -> str: + """Safely export circuit to QASM2, returning error message on failure.""" + try: + return qasm2.dumps(qc) + except QASM2ExportError as e: + return f"QASM2 export failed: {e}" diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index d0b5087cf..1a2e57f22 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -11,7 +11,7 @@ from __future__ import annotations import json -from pathlib import Path +from typing import TYPE_CHECKING import pytest from mqt.bench import BenchmarkLevel, get_benchmark @@ -19,6 +19,9 @@ from mqt.predictor.qcompile import qcompile from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata +if TYPE_CHECKING: + from pathlib import Path + def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: """Test that the compilation tracer correctly generates a JSON file when a path is provided.""" @@ -31,7 +34,7 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: assert trace_file.exists(), "Tracer JSON file was not generated." assert trace_file.is_file(), "Tracer output path is not a valid file." - with Path(trace_file).open(encoding="utf-8") as f: + with trace_file.open(encoding="utf-8") as f: trace_data = json.load(f) assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name." From 393568397dfdb9399f91aadc74608fa7930cbe09 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:37:45 +0200 Subject: [PATCH 19/30] made unit test for tracer more powerful - now also checks semantics of the first and last compilation steps - now also checks structural integrity of the compilation steps --- tests/compilation/test_tracer.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index 1a2e57f22..079ed7135 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -44,17 +44,22 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp." assert "steps" in trace_data, "Tracer JSON is missing the steps array." - assert len(trace_data["steps"]) > 0, "Tracer did not record any compilation steps." - assert trace_data["steps"][0]["action"] == "Baseline" + assert len(trace_data["steps"]) > 1, "Tracer should record subsequent compilation steps beyond the Baseline." + assert trace_data["steps"][0]["action"] == "Baseline", "First step must be Baseline." assert trace_data["schema_version"] == "1.0.0" + last_step_data = trace_data["steps"][-1] + assert last_step_data.get("isTerminal") is True, "The final compilation step must be marked as terminal." + try: - # initialize from JSON (throws if the structures don't match) + # Initialize from JSON (throws if the structures don't match) DeviceMetadata(**trace_data["device"]) + + # Semantically validate both the first and the last steps CompilationStep(**trace_data["steps"][0]) + CompilationStep(**last_step_data) except TypeError as e: - # pytest.fail instantly stops the test and prints your custom error message pytest.fail( f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}" ) From 62000bb1d97136596ec4ecb496eae9fa7f5f6c72 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:41:18 +0200 Subject: [PATCH 20/30] incorporate code rabbit feedback --- src/mqt/predictor/rl/tracer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index ba172845b..7957b702a 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -240,7 +240,9 @@ def save_to_json(self, filepath: str | Path) -> None: Args: filepath: The destination path or filename for the output JSON file. """ - with Path(filepath).open("w", encoding="utf-8") as f: + path = Path(filepath) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: json.dump(asdict(self), f, indent=4) @staticmethod From 90f4bb329eeaf530af4386b4d3e521d3a44930cb Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 23:09:23 +0200 Subject: [PATCH 21/30] added missing PR link --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc916d5a1..dd4d38b26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool [#393]: https://github.com/munich-quantum-toolkit/predictor/pull/393 [#385]: https://github.com/munich-quantum-toolkit/predictor/pull/385 [#360]: https://github.com/munich-quantum-toolkit/predictor/pull/360 +[#641]: https://github.com/munich-quantum-toolkit/predictor/pull/641 From 42e5d406a30126af73bde58b4f60373d09928292 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 23:13:27 +0200 Subject: [PATCH 22/30] tracer_path restoration of predictor singleton instance in predictor.py after compilation --- src/mqt/predictor/rl/predictor.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py index d97a161c3..9305acc84 100644 --- a/src/mqt/predictor/rl/predictor.py +++ b/src/mqt/predictor/rl/predictor.py @@ -205,14 +205,11 @@ def rl_compile( msg = "device must not be None if predictor_singleton is None." raise ValueError(msg) predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path) - else: - predictor = predictor_singleton - predictor.env.tracer_output_path = tracer_output_path - - result = predictor.compile_as_predicted(qc) - - # Reset tracer path to prevent leakage to subsequent calls - if predictor_singleton is not None: - predictor.env.tracer_output_path = None - - return result + return predictor.compile_as_predicted(qc) + predictor = predictor_singleton + original_tracer_output_path = predictor.env.tracer_output_path + predictor.env.tracer_output_path = tracer_output_path + try: + return predictor.compile_as_predicted(qc) + finally: + predictor.env.tracer_output_path = original_tracer_output_path From a84658d0f698b0ea6d3f25ba41cd5199dbad0540 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 13 Apr 2026 23:28:09 +0200 Subject: [PATCH 23/30] fixed incorrect key for last_step_data and added comment --- src/mqt/predictor/rl/predictor.py | 2 ++ tests/compilation/test_tracer.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py index 9305acc84..ee9321f87 100644 --- a/src/mqt/predictor/rl/predictor.py +++ b/src/mqt/predictor/rl/predictor.py @@ -206,6 +206,8 @@ def rl_compile( raise ValueError(msg) predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path) return predictor.compile_as_predicted(qc) + + # use singleton and restore tracer path afterward predictor = predictor_singleton original_tracer_output_path = predictor.env.tracer_output_path predictor.env.tracer_output_path = tracer_output_path diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index 079ed7135..162c256f6 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -49,7 +49,7 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: assert trace_data["schema_version"] == "1.0.0" last_step_data = trace_data["steps"][-1] - assert last_step_data.get("isTerminal") is True, "The final compilation step must be marked as terminal." + assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal." try: # Initialize from JSON (throws if the structures don't match) From e4d03c41573a336535db9a3a7bcdb07bb3274f2d Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:33:04 +0200 Subject: [PATCH 24/30] added model training in case test_tracer.py is ran out of order or separately --- tests/compilation/test_tracer.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index 162c256f6..babea29c1 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -15,8 +15,11 @@ import pytest from mqt.bench import BenchmarkLevel, get_benchmark +from mqt.bench.targets.devices import get_device from mqt.predictor.qcompile import qcompile +from mqt.predictor.rl.helper import get_path_trained_model +from mqt.predictor.rl.predictor import Predictor from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata if TYPE_CHECKING: @@ -27,6 +30,16 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: """Test that the compilation tracer correctly generates a JSON file when a path is provided.""" trace_file = tmp_path / "test_trace.json" qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3) + + figure_of_merit = "expected_fidelity" + device = get_device("ibm_falcon_127") + model_name = "model_" + figure_of_merit + "_" + device.description + model_path = get_path_trained_model() / (model_name + ".zip") + + if not model_path.exists(): + predictor = Predictor(figure_of_merit="expected_fidelity", device=device) + predictor.train_model(timesteps=1000, test=True) + _compiled_qc, _compilation_info, _selected_device = qcompile( qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file) ) From 1dafd89dd03ed0e90ebc4d0845d06771a101e19e Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:56:23 +0200 Subject: [PATCH 25/30] switched test to usage of rl_compile instead of qcompile --- tests/compilation/test_tracer.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index babea29c1..1625023fd 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -17,9 +17,8 @@ from mqt.bench import BenchmarkLevel, get_benchmark from mqt.bench.targets.devices import get_device -from mqt.predictor.qcompile import qcompile from mqt.predictor.rl.helper import get_path_trained_model -from mqt.predictor.rl.predictor import Predictor +from mqt.predictor.rl.predictor import Predictor, rl_compile from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata if TYPE_CHECKING: @@ -27,7 +26,11 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: - """Test that the compilation tracer correctly generates a JSON file when a path is provided.""" + """Test that the compilation tracer correctly generates a JSON file when a path is provided. + + Args: + tmp_path: Pytest-provided temporary directory used for the trace output file. + """ trace_file = tmp_path / "test_trace.json" qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3) @@ -37,11 +40,11 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: model_path = get_path_trained_model() / (model_name + ".zip") if not model_path.exists(): - predictor = Predictor(figure_of_merit="expected_fidelity", device=device) + predictor = Predictor(figure_of_merit=figure_of_merit, device=device) predictor.train_model(timesteps=1000, test=True) - _compiled_qc, _compilation_info, _selected_device = qcompile( - qc, figure_of_merit="expected_fidelity", tracer_output_path=str(trace_file) + _compiled_qc, _compilation_info = rl_compile( + qc, device=device, figure_of_merit=figure_of_merit, tracer_output_path=str(trace_file) ) assert trace_file.exists(), "Tracer JSON file was not generated." From 4564a55747c3e7045a9b56b282208c2cedbd56f1 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Wed, 15 Apr 2026 19:58:49 +0200 Subject: [PATCH 26/30] incorporated first round of feedback - separate method inside predictorenv.py that handles collection of data for tracing - qasm3 circuit strings are now used instead of qasm2 --- src/mqt/predictor/rl/predictorenv.py | 55 +++++++++++++++++----------- src/mqt/predictor/rl/tracer.py | 17 ++------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index d8c66befb..a74bedf5f 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -298,7 +298,7 @@ def export_circuit(self, qc: QuantumCircuit | None = None) -> QuantumCircuit: exported._layout = self.layout # noqa: SLF001 return exported - def _log_step_reward( + def _collect_tracer_data( self, step_index: int, action_name: str, @@ -308,22 +308,7 @@ def _log_step_reward( feature_vector: dict[str, int | NDArray[np.float32]], done: bool, ) -> None: - """Log the chosen action and resulting reward for the current episode step.""" - logger.info( - "Episode %d step %d: action=%s reward=%.6f", - self.episode_count, - step_index, - action_name, - reward_val, - ) - if done: - logger.info( - "Episode %d finished: circuit=%s final_reward=%.6f", - self.episode_count, - self.current_circuit_name, - reward_val, - ) - + """Collects the current compilation state and sends it to the tracer.""" if self.tracer is not None and self.tracer_output_path is not None: synthesized, laid_out, routed = self._get_compilation_state_flags() @@ -362,6 +347,30 @@ def _log_step_reward( self.tracer.save_to_json(out_path) logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve()) + return + + def _log_step_reward( + self, + step_index: int, + action_name: str, + reward_val: float, + done: bool, + ) -> None: + """Log the chosen action and resulting reward for the current episode step.""" + logger.info( + "Episode %d step %d: action=%s reward=%.6f", + self.episode_count, + step_index, + action_name, + reward_val, + ) + if done: + logger.info( + "Episode %d finished: circuit=%s final_reward=%.6f", + self.episode_count, + self.current_circuit_name, + reward_val, + ) def _get_compilation_state_flags(self) -> tuple[bool, bool, bool]: """Return `(synthesized, laid_out, routed)` for the current circuit state.""" @@ -404,7 +413,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any altered_qc = self._apply_and_update(action) if altered_qc is None: obs = self._create_observation() - self._log_step_reward( + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=True) + self._collect_tracer_data( step_index=step_index, action_name=action_name, reward_val=0.0, @@ -421,7 +431,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 self.state._layout = self.layout # noqa: SLF001 obs = self._create_observation() - self._log_step_reward( + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done) + self._collect_tracer_data( step_index=step_index, action_name=action_name, reward_val=reward_val, @@ -457,7 +468,8 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any self.prev_reward, self.prev_reward_kind = new_val, new_kind obs = self._create_observation() - self._log_step_reward( + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done) + self._collect_tracer_data( step_index=step_index, action_name=action_name, reward_val=reward_val, @@ -627,6 +639,7 @@ def reset( logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) if self.tracer_output_path is not None: + logger.info("Tracing enabled for compilation...") self.tracer = CompilationTracer.from_initial_state( device=self.device, circuit_name=self.current_circuit_name, @@ -634,7 +647,7 @@ def reset( mdp_policy=self.mdp, ) - self._log_step_reward( + self._collect_tracer_data( step_index=0, action_name="Baseline", reward_val=0.0, diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index 7957b702a..e1d4cde3e 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -16,8 +16,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from qiskit import qasm2 -from qiskit.qasm2 import QASM2ExportError +from qiskit import qasm3 if TYPE_CHECKING: import numpy as np @@ -92,7 +91,7 @@ class CompilationStep: laid_out: Whether the circuit has already been laid out. routed: Whether the circuit has already been routed. is_terminal: A flag indicating if the compilation process has concluded. - circuit_qasm: The structural representation of the circuit in OpenQASM 2.0 format. + circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format. program_communication: The program communication value for the current circuit. entanglement_ratio: The entanglement ratio for the current circuit. parallelism: The parallelism value for the current circuit. @@ -114,7 +113,7 @@ class CompilationStep: laid_out: bool routed: bool is_terminal: bool - circuit_qasm: str + circuit_qasm3: str program_communication: float critical_depth: float entanglement_ratio: float @@ -222,7 +221,7 @@ def record_step( fom_value=round(fom_value, 6), fom_kind=fom_kind, is_terminal=done, - circuit_qasm=self._safe_qasm_dumps(current_qc), + circuit_qasm3=qasm3.dumps(current_qc), program_communication=self._extract_float(features["program_communication"]), critical_depth=self._extract_float(features["critical_depth"]), entanglement_ratio=self._extract_float(features["entanglement_ratio"]), @@ -282,11 +281,3 @@ def _extract_float(val: int | NDArray[np.float32]) -> float: if isinstance(val, int): return float(val) return float(val[0]) - - @staticmethod - def _safe_qasm_dumps(qc: QuantumCircuit) -> str: - """Safely export circuit to QASM2, returning error message on failure.""" - try: - return qasm2.dumps(qc) - except QASM2ExportError as e: - return f"QASM2 export failed: {e}" From 2424d3c07e0f90f2d8dcabac5d61ada4eac071e6 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Thu, 16 Apr 2026 21:19:25 +0200 Subject: [PATCH 27/30] incorporated feedback - tracer now collects all possible figures of merit - included cleaner methods for calculating the supported figures of merit and a draft for a rewrite of the calculate_reward function - included according tests --- src/mqt/predictor/rl/predictorenv.py | 256 +++++++++++++++++++++++++-- src/mqt/predictor/rl/tracer.py | 60 ++++--- tests/compilation/test_tracer.py | 16 ++ 3 files changed, 298 insertions(+), 34 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index a74bedf5f..3e221a287 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -90,6 +90,8 @@ ) from mqt.predictor.rl.tracer import ( CompilationTracer, + FigureOfMeritMetrics, + FOMMetric, ) from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl @@ -171,6 +173,7 @@ def __init__( self.used_actions: list[str] = [] self.device = device self.tracer_output_path = tracer_output_path + self.hellinger_model = None self.tracer = None logger.info("MDP: " + mdp) @@ -312,26 +315,49 @@ def _collect_tracer_data( if self.tracer is not None and self.tracer_output_path is not None: synthesized, laid_out, routed = self._get_compilation_state_flags() + # Collect figures of merit + hd_metric: FOMMetric | None = None + cd_metric: FOMMetric + ef_metric: FOMMetric + esp_metric: FOMMetric | None = None + if self.reward_function == "expected_fidelity": - fidelity_val = fom_value - fidelity_kind = fom_kind + ef_metric = FOMMetric(value=fom_value, kind=fom_kind) else: - fidelity_val = ( - expected_fidelity(qc=self.state, device=self.device) - if (synthesized and routed) - else approx_expected_fidelity(qc=self.state, device=self.device, error_rates=self.err_by_gate) - ) - fidelity_kind = "exact" if (synthesized and routed) else "approx" + val, kind = self.calculate_expected_fidelity(qc=self.state, mode="auto") + ef_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "estimated_success_probability": + esp_metric = FOMMetric(value=fom_value, kind=fom_kind) + elif esp_data_available(self.device): + val, kind = self.calculate_estimated_success_probability(qc=self.state, mode="auto") + esp_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "critical_depth": + cd_metric = FOMMetric(value=fom_value, kind=fom_kind) + else: + val, kind = self.calculate_critical_depth(qc=self.state) + cd_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "estimated_hellinger_distance": + hd_metric = FOMMetric(value=fom_value, kind=fom_kind) + elif self.hellinger_model is not None: + val, kind = self.calculate_estimated_hellinger_distance(qc=self.state) + hd_metric = FOMMetric(value=val, kind=kind) + + metrics = FigureOfMeritMetrics( + expected_fidelity=ef_metric, + success_probability=esp_metric, + critical_depth=cd_metric, + hellinger_distance=hd_metric, + ) self.tracer.record_step( step_index=step_index, action=action_name, reward=reward_val, current_qc=self.state, - expected_fidelity=fidelity_val, - fidelity_kind=fidelity_kind, - fom_value=fom_value, - fom_kind=fom_kind, + figures_of_merit=metrics, features=feature_vector, synthesized=synthesized, laid_out=laid_out, @@ -481,6 +507,204 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any return obs, reward_val, done, False, {} + def _resolve_evaluation_kind(self, qc: QuantumCircuit, mode: str) -> tuple[str, Any]: + """Resolves whether to use 'exact' or 'approx' evaluation based on the circuit state.""" + reward_layout = getattr(qc, "_layout", None) + if reward_layout is None: + reward_layout = self.layout + + if mode == "exact": + return "exact", reward_layout + if mode == "approx": + return "approx", reward_layout + + # "auto" resolution + only_native = self.is_circuit_synthesized(qc) + laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False + mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False + + kind = "exact" if (only_native and laid_out and mapped) else "approx" + return kind, reward_layout + + def _prepare_exact_qc(self, qc: QuantumCircuit, reward_layout: TranspileLayout | Layout | None) -> QuantumCircuit: + """Prepares the circuit for exact evaluation by exporting it if necessary.""" + if reward_layout is None or getattr(qc, "_layout", None) is not None: + return qc + return self.export_circuit(qc) + + def calculate_expected_fidelity(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: + """Calculates the expected fidelity for the given quantum circuit. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + mode: + Selects how the method chooses between exact and approximate evaluation: + + - ``"auto"`` (default): determines computation automatically. + - ``"exact"``: always compute the exact, calibration-aware metric. + - ``"approx"``: always compute the approximate, transpile-based proxy. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the expected fidelity (float). + - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. + """ + if qc is None: + qc = self.state + + kind, reward_layout = self._resolve_evaluation_kind(qc, mode) + + if kind == "exact": + exact_qc = self._prepare_exact_qc(qc, reward_layout) + return expected_fidelity(exact_qc, self.device), "exact" + + self._ensure_device_averages_cached() + val = approx_expected_fidelity(qc, device=self.device, error_rates=self.err_by_gate) + return val, "approx" + + def calculate_estimated_success_probability( + self, qc: QuantumCircuit | None = None, mode: str = "auto" + ) -> tuple[float, str]: + """Calculates the estimated success probability (ESP) for the given quantum circuit. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + mode: + Selects how the method chooses between exact and approximate evaluation: + + - ``"auto"`` (default): determines computation automatically. + - ``"exact"``: always compute the exact, calibration-aware metric. + - ``"approx"``: always compute the approximate, transpile-based proxy. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the estimated success probability (float). + - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. + """ + if qc is None: + qc = self.state + + kind, reward_layout = self._resolve_evaluation_kind(qc, mode) + + if kind == "exact": + exact_qc = self._prepare_exact_qc(qc, reward_layout) + return estimated_success_probability(exact_qc, self.device), "exact" + + self._ensure_device_averages_cached() + feats = calc_supermarq_features(qc) + val = approx_estimated_success_probability( + qc, + device=self.device, + error_rates=self.err_by_gate, + gate_durations=self.dur_by_gate, + tbar=self.tbar, + par_feature=float(feats.parallelism), + liv_feature=float(feats.liveness), + n_qubits=int(qc.num_qubits), + ) + return val, "approx" + + def calculate_critical_depth(self, qc: QuantumCircuit | None = None) -> tuple[float, str]: + """Calculates the critical depth for the given quantum circuit. + + Note: + Critical depth is always computed exactly. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the critical depth (float). + - ``kind`` is always ``"exact"``. + """ + if qc is None: + qc = self.state + return crit_depth(qc), "exact" + + def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = None) -> tuple[float, str]: + """Calculates the estimated Hellinger distance for the given quantum circuit. + + Note: + Hellinger distance is always computed exactly using the environment's + pretrained machine learning model. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the estimated Hellinger distance (float). + - ``kind`` is always ``"exact"``. + """ + if qc is None: + qc = self.state + return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact" + + # ----------------------------------------------------------------------------------------------------- + # MARK: New, cleaner method for reward calculation, functionally identical to original calculate_reward + # It might be worth using this method (unless you plan to change the original implementation), + # since this now cleanly uses above methods that are also required by the tracer data collection. + # @flowerthrower + # ----------------------------------------------------------------------------------------------------- + + # def calculate_reward_new(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: + # """Compute the reward for a circuit and report whether it was computed exactly or approximately. + # + # This environment supports two evaluation regimes for selected figures of merit: + # + # - **Exact**: uses the calibration-aware implementation on the full circuit/device + # (e.g., uses the device Target calibration data as-is). + # - **Approximate**: uses a transpile-based proxy: + # the circuit is transpiled to the device's basis gates and the resulting basis-gate + # counts are combined with cached **per-basis-gate** calibration statistics + # (error rates and durations) to estimate the metric. This approximation ignores + # additional mapping/routing overhead beyond what is reflected in the transpiled + # basis-gate counts. + # + # Args: + # qc: + # Circuit to evaluate. If ``None``, evaluates the environment's current state. + # mode: + # Selects how the method chooses between exact and approximate evaluation: + # + # - ``"auto"`` (default): compute the exact metric if the circuit is already + # **native and mapped** for the device; otherwise compute the approximate metric. + # - ``"exact"``: always compute the exact, calibration-aware metric. + # - ``"approx"``: always compute the approximate, transpile-based proxy. + # + # Returns: + # A pair ``(value, kind)`` where: + # + # - ``value`` is the scalar reward value (typically in ``[0, 1]`` for EF/ESP). + # - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. + # """ + # if qc is None: + # qc = self.state + # + # if self.reward_function == "expected_fidelity": + # return self.calculate_expected_fidelity(qc, mode) + # + # if self.reward_function == "estimated_success_probability": + # return self.calculate_estimated_success_probability(qc, mode) + # + # if self.reward_function == "critical_depth": + # return self.calculate_critical_depth(qc) + # + # if self.reward_function == "estimated_hellinger_distance": + # return self.calculate_estimated_hellinger_distance(qc) + # + # # Fallback for other unknown / not-yet-implemented reward functions: + # logger.warning( + # "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", + # self.reward_function, + # ) + # return 0.0, "exact" + def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: """Compute the reward for a circuit and report whether it was computed exactly or approximately. @@ -640,6 +864,14 @@ def reset( if self.tracer_output_path is not None: logger.info("Tracing enabled for compilation...") + + if self.reward_function != "estimated_hellinger_distance": + self.hellinger_model = None + hellinger_model_path = get_hellinger_model_path(self.device) + if hellinger_model_path.is_file(): + # load the model so it can be used in _collect_tracer_data + self.hellinger_model = load(hellinger_model_path) + self.tracer = CompilationTracer.from_initial_state( device=self.device, circuit_name=self.current_circuit_name, diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py index e1d4cde3e..34374efad 100644 --- a/src/mqt/predictor/rl/tracer.py +++ b/src/mqt/predictor/rl/tracer.py @@ -71,6 +71,36 @@ class DeviceMetadata: calibration_data: dict[str, list[GateCalibration]] +@dataclass +class FOMMetric: + """Represents a Figure of Merit metric value. + + Attributes: + value: The figure-of-merit value. + kind: The used calculation method of the value, "exact" or "approx". + """ + + value: float + kind: str + + +@dataclass +class FigureOfMeritMetrics: + """Data containing information about various figures of merit. + + Attributes: + expected_fidelity: The expected fidelity value of the circuit. + critical_depth: The critical depth of the circuit. + hellinger_distance: The hellinger distance of the circuit, if available. + success_probability: The success probability of the circuit, if available. + """ + + expected_fidelity: FOMMetric + critical_depth: FOMMetric + hellinger_distance: FOMMetric | None + success_probability: FOMMetric | None + + @dataclass class CompilationStep: """A snapshot of the circuit state and environment metrics at a single timestep. @@ -83,16 +113,14 @@ class CompilationStep: num_qubits: The number of qubits in the circuit. gates_per_operation: The number of gates per operation occurring in the circuit. total_gates: The total number of gates included in the circuit. - expected_fidelity: The expected fidelity of the circuit. - fidelity_kind: The kind of fidelity value: 'exact' or 'approx'. - fom_value: The figure of merit value for this compilation pass. - fom_kind: The kind of fom value: 'exact' or 'approx'. + figures_of_merit: The figure of merit values for the current circuit. synthesized: Whether the circuit has already been synthesized. laid_out: Whether the circuit has already been laid out. routed: Whether the circuit has already been routed. is_terminal: A flag indicating if the compilation process has concluded. circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format. program_communication: The program communication value for the current circuit. + raw_critical_depth: The raw critical depth of the circuit. entanglement_ratio: The entanglement ratio for the current circuit. parallelism: The parallelism value for the current circuit. liveness: The liveness value for the current circuit. @@ -105,17 +133,14 @@ class CompilationStep: num_qubits: int gates_per_operation: dict[str, int] total_gates: int - expected_fidelity: float - fidelity_kind: str - fom_value: float - fom_kind: str + figures_of_merit: FigureOfMeritMetrics synthesized: bool laid_out: bool routed: bool is_terminal: bool circuit_qasm3: str program_communication: float - critical_depth: float + raw_critical_depth: float entanglement_ratio: float parallelism: float liveness: float @@ -174,10 +199,7 @@ def record_step( action: str, reward: float, current_qc: QuantumCircuit, - expected_fidelity: float, - fidelity_kind: str, - fom_value: float, - fom_kind: str, + figures_of_merit: FigureOfMeritMetrics, features: dict[str, int | NDArray[np.float32]], synthesized: bool, laid_out: bool, @@ -191,10 +213,7 @@ def record_step( action: The name of the compilation pass that was just applied. reward: The calculated reward for the applied pass. current_qc: The current Qiskit QuantumCircuit object after the pass. - expected_fidelity: The expected fidelity of the circuit after applying the pass. - fidelity_kind: The kind of fidelity value: 'exact' or 'approx'. - fom_value: The figure of merit value for the compilation pass. - fom_kind: The kind of fom value: 'exact' or 'approx'. + figures_of_merit: The available figures of merit for the current circuit. features: The quantum circuit's feature vector used by the RL agent. synthesized: Whether the circuit has already been synthesized. laid_out: Whether the circuit has already been laid out. @@ -216,14 +235,11 @@ def record_step( num_qubits=current_qc.num_qubits, gates_per_operation=present_ops_dict, total_gates=total_gates, - expected_fidelity=round(expected_fidelity, 6), - fidelity_kind=fidelity_kind, - fom_value=round(fom_value, 6), - fom_kind=fom_kind, + figures_of_merit=figures_of_merit, is_terminal=done, circuit_qasm3=qasm3.dumps(current_qc), program_communication=self._extract_float(features["program_communication"]), - critical_depth=self._extract_float(features["critical_depth"]), + raw_critical_depth=self._extract_float(features["critical_depth"]), entanglement_ratio=self._extract_float(features["entanglement_ratio"]), parallelism=self._extract_float(features["parallelism"]), liveness=self._extract_float(features["liveness"]), diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index 1625023fd..bafe68073 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -67,6 +67,22 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: last_step_data = trace_data["steps"][-1] assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal." + # Verify Figures of Merit + fom_data = last_step_data.get("figures_of_merit") + assert fom_data is not None, "Figures of merit dictionary is missing from the trace step." + + # always calculated ones + assert fom_data.get("expected_fidelity") is not None, "Expected fidelity failed to populate." + assert fom_data.get("critical_depth") is not None, "Critical depth fallback failed." + + # for this device ESP should be populated + assert fom_data.get("success_probability") is not None, "ESP fallback calculation failed." + assert "value" in fom_data["success_probability"], "ESP is missing its float value." + assert "kind" in fom_data["success_probability"], "ESP is missing its kind string." + + # for this device HD should fallback to None + assert fom_data.get("hellinger_distance") is None, "Hellinger distance should be null when model is missing." + try: # Initialize from JSON (throws if the structures don't match) DeviceMetadata(**trace_data["device"]) From 0c42d4a044f840685e10973aafaf407dee756e84 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Thu, 16 Apr 2026 21:42:15 +0200 Subject: [PATCH 28/30] minor cleanup --- src/mqt/predictor/rl/predictorenv.py | 1 - tests/compilation/test_tracer.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 3e221a287..0d4967823 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -373,7 +373,6 @@ def _collect_tracer_data( self.tracer.save_to_json(out_path) logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve()) - return def _log_step_reward( self, diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py index bafe68073..6f3b49477 100644 --- a/tests/compilation/test_tracer.py +++ b/tests/compilation/test_tracer.py @@ -80,8 +80,11 @@ def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: assert "value" in fom_data["success_probability"], "ESP is missing its float value." assert "kind" in fom_data["success_probability"], "ESP is missing its kind string." - # for this device HD should fallback to None - assert fom_data.get("hellinger_distance") is None, "Hellinger distance should be null when model is missing." + # It is valid for HD to be None (model missing) or a populated dictionary (model exists) + hd_metric = fom_data.get("hellinger_distance") + if hd_metric is not None: + assert "value" in hd_metric, "Hellinger distance is missing its float value." + assert "kind" in hd_metric, "Hellinger distance is missing its kind string." try: # Initialize from JSON (throws if the structures don't match) From 197ebc555f9498242c43134625b10bc84453e794 Mon Sep 17 00:00:00 2001 From: Linus <42340116+linus-hologram@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:16:51 +0200 Subject: [PATCH 29/30] added rewritten, cleaner version of the calculate_reward function + feedback incorporation --- src/mqt/predictor/rl/predictorenv.py | 136 +++------------------------ 1 file changed, 15 insertions(+), 121 deletions(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 0d4967823..08b53e7ea 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -456,7 +456,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 self.state._layout = self.layout # noqa: SLF001 obs = self._create_observation() - self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done) + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done) self._collect_tracer_data( step_index=step_index, action_name=action_name, @@ -493,7 +493,7 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any self.prev_reward, self.prev_reward_kind = new_val, new_kind obs = self._create_observation() - self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=done) + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done) self._collect_tracer_data( step_index=step_index, action_name=action_name, @@ -644,66 +644,6 @@ def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = Non qc = self.state return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact" - # ----------------------------------------------------------------------------------------------------- - # MARK: New, cleaner method for reward calculation, functionally identical to original calculate_reward - # It might be worth using this method (unless you plan to change the original implementation), - # since this now cleanly uses above methods that are also required by the tracer data collection. - # @flowerthrower - # ----------------------------------------------------------------------------------------------------- - - # def calculate_reward_new(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: - # """Compute the reward for a circuit and report whether it was computed exactly or approximately. - # - # This environment supports two evaluation regimes for selected figures of merit: - # - # - **Exact**: uses the calibration-aware implementation on the full circuit/device - # (e.g., uses the device Target calibration data as-is). - # - **Approximate**: uses a transpile-based proxy: - # the circuit is transpiled to the device's basis gates and the resulting basis-gate - # counts are combined with cached **per-basis-gate** calibration statistics - # (error rates and durations) to estimate the metric. This approximation ignores - # additional mapping/routing overhead beyond what is reflected in the transpiled - # basis-gate counts. - # - # Args: - # qc: - # Circuit to evaluate. If ``None``, evaluates the environment's current state. - # mode: - # Selects how the method chooses between exact and approximate evaluation: - # - # - ``"auto"`` (default): compute the exact metric if the circuit is already - # **native and mapped** for the device; otherwise compute the approximate metric. - # - ``"exact"``: always compute the exact, calibration-aware metric. - # - ``"approx"``: always compute the approximate, transpile-based proxy. - # - # Returns: - # A pair ``(value, kind)`` where: - # - # - ``value`` is the scalar reward value (typically in ``[0, 1]`` for EF/ESP). - # - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. - # """ - # if qc is None: - # qc = self.state - # - # if self.reward_function == "expected_fidelity": - # return self.calculate_expected_fidelity(qc, mode) - # - # if self.reward_function == "estimated_success_probability": - # return self.calculate_estimated_success_probability(qc, mode) - # - # if self.reward_function == "critical_depth": - # return self.calculate_critical_depth(qc) - # - # if self.reward_function == "estimated_hellinger_distance": - # return self.calculate_estimated_hellinger_distance(qc) - # - # # Fallback for other unknown / not-yet-implemented reward functions: - # logger.warning( - # "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", - # self.reward_function, - # ) - # return 0.0, "exact" - def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: """Compute the reward for a circuit and report whether it was computed exactly or approximately. @@ -738,70 +678,24 @@ def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") if qc is None: qc = self.state - # Reward functions that are always computed exactly. - if self.reward_function not in {"expected_fidelity", "estimated_success_probability"}: - if self.reward_function == "critical_depth": - return crit_depth(qc), "exact" - if self.reward_function == "estimated_hellinger_distance": - return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact" - # Fallback for other unknown / not-yet-implemented reward functions: - logger.warning( - "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", - self.reward_function, - ) - return 0.0, "exact" - - reward_layout = cast("TranspileLayout | Layout | None", getattr(qc, "_layout", None)) - if reward_layout is None: - # use the env layout if the circuit has no attached layout - # (e.g., if it's an intermediate state or a newly exported copy) - reward_layout = self.layout - - # Dual-path evaluation (exact vs. approximate) for EF / ESP. - if mode == "exact": - kind = "exact" - elif mode == "approx": - kind = "approx" - else: # "auto" - only_native = self.is_circuit_synthesized(qc) - laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False - mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False - - kind = "exact" if (only_native and laid_out and mapped) else "approx" - - if kind == "exact": - exact_qc = ( - qc if reward_layout is None or getattr(qc, "_layout", None) is not None else self.export_circuit(qc) - ) - if self.reward_function == "expected_fidelity": - return expected_fidelity(exact_qc, self.device), "exact" - - return estimated_success_probability(exact_qc, self.device), "exact" + if self.reward_function == "expected_fidelity": + return self.calculate_expected_fidelity(qc, mode) - # Approximate metrics use per-basis-gate averages cached from device calibration - self._ensure_device_averages_cached() + if self.reward_function == "estimated_success_probability": + return self.calculate_estimated_success_probability(qc, mode) - if self.reward_function == "expected_fidelity": - val = approx_expected_fidelity( - qc, - device=self.device, - error_rates=self.err_by_gate, - ) - return val, "approx" + if self.reward_function == "critical_depth": + return self.calculate_critical_depth(qc) - feats = calc_supermarq_features(qc) + if self.reward_function == "estimated_hellinger_distance": + return self.calculate_estimated_hellinger_distance(qc) - val = approx_estimated_success_probability( - qc, - device=self.device, - error_rates=self.err_by_gate, - gate_durations=self.dur_by_gate, - tbar=self.tbar, - par_feature=float(feats.parallelism), - liv_feature=float(feats.liveness), - n_qubits=int(qc.num_qubits), + # Fallback for other unknown / not-yet-implemented reward functions: + logger.warning( + "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", + self.reward_function, ) - return val, "approx" + return 0.0, "exact" def render(self) -> None: """Renders the current state.""" From 5c2ecd6038fe2642ce82790f944754e4a7e104f3 Mon Sep 17 00:00:00 2001 From: flowerthrower Date: Mon, 20 Apr 2026 23:42:56 +0200 Subject: [PATCH 30/30] =?UTF-8?q?=E2=8F=AA=20remove=20merge=20leftover=20l?= =?UTF-8?q?ine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mqt/predictor/rl/predictorenv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 08b53e7ea..ecc481063 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -454,7 +454,6 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any if self.reward_function == "estimated_hellinger_distance": reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 - self.state._layout = self.layout # noqa: SLF001 obs = self._create_observation() self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done) self._collect_tracer_data(