From bd240e15b4145ee6e9374cc337def248d56db75f Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:39:27 -0500
Subject: [PATCH 01/19] Compare two runtime serires medians using
 Kruskal-Wallis test

---
 compiler_gym/spaces/runtime_series_reward.py | 74 ++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 compiler_gym/spaces/runtime_series_reward.py

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
new file mode 100644
index 000000000..7179b1ac2
--- /dev/null
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -0,0 +1,74 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from compiler_gym.errors import BenchmarkInitError, ServiceError
+from compiler_gym.spaces.reward import Reward
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
+
+import scipy
+import numpy as np
+
+class RuntimeSeriesReward(Reward):
+    def __init__(
+        self,
+        runtime_count: int,
+        warmup_count: int,
+        default_value: int = 0,
+    ):
+        super().__init__(
+            name="runtime",
+            observation_spaces=["Runtime"],
+            default_value=default_value,
+            min=None,
+            max=None,
+            default_negates_returns=True,
+            deterministic=False,
+            platform_dependent=True,
+        )
+        self.runtime_count = runtime_count
+        self.warmup_count = warmup_count
+        self.starting_runtimes: List[float] = None
+        self.previous_runtimes: List[float] = None
+        self.current_benchmark: Optional[str] = None
+
+    def reset(self, benchmark, observation_view) -> None:
+        # If we are changing the benchmark then check that it is runnable.
+        if benchmark != self.current_benchmark:
+            if not observation_view["IsRunnable"]:
+                raise BenchmarkInitError(f"Benchmark is not runnable: {benchmark}")
+            self.current_benchmark = benchmark
+            self.starting_runtimes = None
+
+        # Compute initial runtimes
+        if self.starting_runtimes is None:
+            self.starting_runtimes = observation_view["Runtime"]
+
+        self.previous_runtimes = self.starting_runtimes
+
+    def update(
+        self,
+        actions: List[ActionType],
+        observations: List[ObservationType],
+        observation_view,
+    ) -> float:
+        del actions  # unused
+        del observation_view  # unused
+        runtimes = observations[0]
+        if len(runtimes) != self.runtime_count:
+            raise ServiceError(
+                f"Expected {self.runtime_count} runtimes but received {len(runtimes)}"
+            )
+
+        # Use the Kruskal–Wallis test to determine if the medians are equal
+        # between the two series of runtimes. If the runtimes medians are
+        # significantly different, compute the reward by computing the
+        # difference between the two medians. Otherwise, set the reward as 0.
+        # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
+        _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
+        reward = np.median(runtimes) - np.median(self.previous_runtimes) if pval < 0.05 else 0
+        self.previous_runtimes = runtimes
+        return reward

From 642644a58248fba8238d4129e381d2895c463489 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:43:45 -0500
Subject: [PATCH 02/19] Add RuntimeSeriesEstimateReward wrapper

---
 compiler_gym/spaces/BUILD                     | 11 +++++
 compiler_gym/spaces/CMakeLists.txt            | 13 ++++++
 compiler_gym/spaces/__init__.py               |  2 +
 compiler_gym/wrappers/__init__.py             |  6 ++-
 compiler_gym/wrappers/llvm.py                 | 43 +++++++++++++++++++
 .../llvm_autotuning/optimization_target.py    |  5 +++
 6 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD
index 88e74e1a2..33366e934 100644
--- a/compiler_gym/spaces/BUILD
+++ b/compiler_gym/spaces/BUILD
@@ -20,6 +20,7 @@ py_library(
         ":permutation",
         ":reward",
         ":runtime_reward",
+        ":runtime_series_reward",
         ":scalar",
         ":sequence",
         ":space_sequence",
@@ -86,6 +87,16 @@ py_library(
     ],
 )
 
+py_library(
+    name = "runtime_series_reward",
+    srcs = ["runtime_series_reward.py"],
+    deps = [
+        ":reward",
+        "//compiler_gym/errors",
+        "//compiler_gym/util",
+    ],
+)
+
 py_library(
     name = "scalar",
     srcs = ["scalar.py"],
diff --git a/compiler_gym/spaces/CMakeLists.txt b/compiler_gym/spaces/CMakeLists.txt
index e8d3bc69c..6aa41cdd0 100644
--- a/compiler_gym/spaces/CMakeLists.txt
+++ b/compiler_gym/spaces/CMakeLists.txt
@@ -20,6 +20,7 @@ cg_py_library(
     ::permutation
     ::reward
     ::runtime_reward
+    ::runtime_series_reward
     ::scalar
     ::sequence
     ::space_sequence
@@ -90,6 +91,18 @@ cg_py_library(
   PUBLIC
 )
 
+cg_py_library(
+  NAME
+    runtime_series_reward
+  SRCS
+    "runtime_series_reward.py"
+  DEPS
+    ::reward
+    compiler_gym::errors::errors
+    compiler_gym::util::util
+  PUBLIC
+)
+
 cg_py_library(
   NAME
     scalar
diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py
index f52ca0da2..6fc845ce3 100644
--- a/compiler_gym/spaces/__init__.py
+++ b/compiler_gym/spaces/__init__.py
@@ -10,6 +10,7 @@
 from compiler_gym.spaces.permutation import Permutation
 from compiler_gym.spaces.reward import DefaultRewardFromObservation, Reward
 from compiler_gym.spaces.runtime_reward import RuntimeReward
+from compiler_gym.spaces.runtime_series_reward import RuntimeSeriesReward
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
 from compiler_gym.spaces.space_sequence import SpaceSequence
@@ -26,6 +27,7 @@
     "Permutation",
     "Reward",
     "RuntimeReward",
+    "RuntimeSeriesReward",
     "Scalar",
     "Sequence",
     "SpaceSequence",
diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
index ae181bd28..16d5ce4f7 100644
--- a/compiler_gym/wrappers/__init__.py
+++ b/compiler_gym/wrappers/__init__.py
@@ -48,7 +48,10 @@
 from compiler_gym.wrappers.fork import ForkOnStep
 
 if config.enable_llvm_env:
-    from compiler_gym.wrappers.llvm import RuntimePointEstimateReward  # noqa: F401
+    from compiler_gym.wrappers.llvm import (
+        RuntimePointEstimateReward, # noqa: F401
+        RuntimeSeriesEstimateReward,
+    )
     from compiler_gym.wrappers.sqlite_logger import (  # noqa: F401
         SynchronousSqliteLogger,
     )
@@ -76,4 +79,5 @@
 
 if config.enable_llvm_env:
     __all__.append("RuntimePointEstimateReward")
+    __all__.append("RuntimeSeriesEstimateReward")
     __all__.append("SynchronousSqliteLogger")
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index fe4a8a29b..6ad1af65f 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -9,6 +9,7 @@
 
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.spaces import RuntimeReward
+from compiler_gym.spaces import RuntimeSeriesReward
 from compiler_gym.wrappers import CompilerEnvWrapper
 
 
@@ -67,3 +68,45 @@ def fork(self) -> "RuntimePointEstimateReward":
             warmup_count=self.reward.spaces["runtime"].warmup_count,
             estimator=self.reward.spaces["runtime"].estimator,
         )
+
+class RuntimeSeriesEstimateReward(CompilerEnvWrapper):
+    """TODO: documentation
+    """
+
+    def __init__(
+        self,
+        env: LlvmEnv,
+        runtime_count: int = 30,
+        warmup_count: int = 0,
+    ):
+        """Constructor.
+        :param env: The environment to wrap.
+        :param runtime_count: The number of times to execute the binary when
+            estimating the runtime.
+        :param warmup_count: The number of warmup runs of the binary to perform
+            before measuring the runtime.
+        """
+        super().__init__(env)
+
+        self.env.unwrapped.reward.add_space(
+            RuntimeSeriesReward(
+                runtime_count=runtime_count,
+                warmup_count=warmup_count,
+            )
+        )
+        self.env.unwrapped.reward_space = "runtime"
+
+        self.env.unwrapped.runtime_observation_count = runtime_count
+        self.env.unwrapped.runtime_warmup_runs_count = warmup_count
+
+    def fork(self) -> "RuntimeSeriesEstimateReward":
+        fkd = self.env.fork()
+        # Remove the original "runtime" space so that we that new
+        # RuntimeSeriesEstimateReward wrapper instance does not attempt to
+        # redefine, raising a warning.
+        del fkd.unwrapped.reward.spaces["runtime"]
+        return RuntimeSeriesEstimateReward(
+            env=fkd,
+            runtime_count=self.reward.spaces["runtime"].runtime_count,
+            warmup_count=self.reward.spaces["runtime"].warmup_count,
+        )
diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 7baeba1cb..73dd0c9c2 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -15,6 +15,7 @@
 from compiler_gym.datasets import Benchmark
 from compiler_gym.envs import LlvmEnv
 from compiler_gym.wrappers import RuntimePointEstimateReward
+from compiler_gym.wrappers import RuntimeSeriesEstimateReward
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +26,7 @@ class OptimizationTarget(str, Enum):
     CODESIZE = "codesize"
     BINSIZE = "binsize"
     RUNTIME = "runtime"
+    RUNTIME_SERIES = "runtimeseries"
 
     @property
     def optimization_space_enum_name(self) -> str:
@@ -32,6 +34,7 @@ def optimization_space_enum_name(self) -> str:
             OptimizationTarget.CODESIZE: "IrInstructionCount",
             OptimizationTarget.BINSIZE: "ObjectTextSizeBytes",
             OptimizationTarget.RUNTIME: "Runtime",
+            OptimizationTarget.RUNTIME_SERIES: "RuntimeSeries",
         }[self.value]
 
     def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
@@ -50,6 +53,8 @@ def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
             env.reward_space = "ObjectTextSizeOz"
         elif self.value == OptimizationTarget.RUNTIME:
             env = RuntimePointEstimateReward(env, warmup_count=0, runtime_count=3)
+        elif self.value == OptimizationTarget.RUNTIME_SERIES:
+            env = RuntimeSeriesEstimateReward(env, warmup_count=5, runtime_count=30)
         else:
             assert False, f"Unknown OptimizationTarget: {self.value}"
 

From bb78bf945ea7b99ec4f20ee5fb674535bbdac32d Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:39:27 -0500
Subject: [PATCH 03/19] Compare two runtime serires medians using
 Kruskal-Wallis test

---
 compiler_gym/spaces/runtime_series_reward.py | 74 ++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 compiler_gym/spaces/runtime_series_reward.py

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
new file mode 100644
index 000000000..7179b1ac2
--- /dev/null
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -0,0 +1,74 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from compiler_gym.errors import BenchmarkInitError, ServiceError
+from compiler_gym.spaces.reward import Reward
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
+
+import scipy
+import numpy as np
+
+class RuntimeSeriesReward(Reward):
+    def __init__(
+        self,
+        runtime_count: int,
+        warmup_count: int,
+        default_value: int = 0,
+    ):
+        super().__init__(
+            name="runtime",
+            observation_spaces=["Runtime"],
+            default_value=default_value,
+            min=None,
+            max=None,
+            default_negates_returns=True,
+            deterministic=False,
+            platform_dependent=True,
+        )
+        self.runtime_count = runtime_count
+        self.warmup_count = warmup_count
+        self.starting_runtimes: List[float] = None
+        self.previous_runtimes: List[float] = None
+        self.current_benchmark: Optional[str] = None
+
+    def reset(self, benchmark, observation_view) -> None:
+        # If we are changing the benchmark then check that it is runnable.
+        if benchmark != self.current_benchmark:
+            if not observation_view["IsRunnable"]:
+                raise BenchmarkInitError(f"Benchmark is not runnable: {benchmark}")
+            self.current_benchmark = benchmark
+            self.starting_runtimes = None
+
+        # Compute initial runtimes
+        if self.starting_runtimes is None:
+            self.starting_runtimes = observation_view["Runtime"]
+
+        self.previous_runtimes = self.starting_runtimes
+
+    def update(
+        self,
+        actions: List[ActionType],
+        observations: List[ObservationType],
+        observation_view,
+    ) -> float:
+        del actions  # unused
+        del observation_view  # unused
+        runtimes = observations[0]
+        if len(runtimes) != self.runtime_count:
+            raise ServiceError(
+                f"Expected {self.runtime_count} runtimes but received {len(runtimes)}"
+            )
+
+        # Use the Kruskal–Wallis test to determine if the medians are equal
+        # between the two series of runtimes. If the runtimes medians are
+        # significantly different, compute the reward by computing the
+        # difference between the two medians. Otherwise, set the reward as 0.
+        # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
+        _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
+        reward = np.median(runtimes) - np.median(self.previous_runtimes) if pval < 0.05 else 0
+        self.previous_runtimes = runtimes
+        return reward

From cbf8e6b80ed4d63cd0e1ce3c5ab55cf4b6f7d2bf Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:43:45 -0500
Subject: [PATCH 04/19] Add RuntimeSeriesEstimateReward wrapper

---
 compiler_gym/spaces/BUILD                     | 11 +++++
 compiler_gym/spaces/CMakeLists.txt            | 13 ++++++
 compiler_gym/spaces/__init__.py               |  2 +
 compiler_gym/wrappers/__init__.py             |  6 ++-
 compiler_gym/wrappers/llvm.py                 | 43 +++++++++++++++++++
 .../llvm_autotuning/optimization_target.py    |  5 +++
 6 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD
index 88e74e1a2..33366e934 100644
--- a/compiler_gym/spaces/BUILD
+++ b/compiler_gym/spaces/BUILD
@@ -20,6 +20,7 @@ py_library(
         ":permutation",
         ":reward",
         ":runtime_reward",
+        ":runtime_series_reward",
         ":scalar",
         ":sequence",
         ":space_sequence",
@@ -86,6 +87,16 @@ py_library(
     ],
 )
 
+py_library(
+    name = "runtime_series_reward",
+    srcs = ["runtime_series_reward.py"],
+    deps = [
+        ":reward",
+        "//compiler_gym/errors",
+        "//compiler_gym/util",
+    ],
+)
+
 py_library(
     name = "scalar",
     srcs = ["scalar.py"],
diff --git a/compiler_gym/spaces/CMakeLists.txt b/compiler_gym/spaces/CMakeLists.txt
index e8d3bc69c..6aa41cdd0 100644
--- a/compiler_gym/spaces/CMakeLists.txt
+++ b/compiler_gym/spaces/CMakeLists.txt
@@ -20,6 +20,7 @@ cg_py_library(
     ::permutation
     ::reward
     ::runtime_reward
+    ::runtime_series_reward
     ::scalar
     ::sequence
     ::space_sequence
@@ -90,6 +91,18 @@ cg_py_library(
   PUBLIC
 )
 
+cg_py_library(
+  NAME
+    runtime_series_reward
+  SRCS
+    "runtime_series_reward.py"
+  DEPS
+    ::reward
+    compiler_gym::errors::errors
+    compiler_gym::util::util
+  PUBLIC
+)
+
 cg_py_library(
   NAME
     scalar
diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py
index f52ca0da2..6fc845ce3 100644
--- a/compiler_gym/spaces/__init__.py
+++ b/compiler_gym/spaces/__init__.py
@@ -10,6 +10,7 @@
 from compiler_gym.spaces.permutation import Permutation
 from compiler_gym.spaces.reward import DefaultRewardFromObservation, Reward
 from compiler_gym.spaces.runtime_reward import RuntimeReward
+from compiler_gym.spaces.runtime_series_reward import RuntimeSeriesReward
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
 from compiler_gym.spaces.space_sequence import SpaceSequence
@@ -26,6 +27,7 @@
     "Permutation",
     "Reward",
     "RuntimeReward",
+    "RuntimeSeriesReward",
     "Scalar",
     "Sequence",
     "SpaceSequence",
diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
index ae181bd28..16d5ce4f7 100644
--- a/compiler_gym/wrappers/__init__.py
+++ b/compiler_gym/wrappers/__init__.py
@@ -48,7 +48,10 @@
 from compiler_gym.wrappers.fork import ForkOnStep
 
 if config.enable_llvm_env:
-    from compiler_gym.wrappers.llvm import RuntimePointEstimateReward  # noqa: F401
+    from compiler_gym.wrappers.llvm import (
+        RuntimePointEstimateReward, # noqa: F401
+        RuntimeSeriesEstimateReward,
+    )
     from compiler_gym.wrappers.sqlite_logger import (  # noqa: F401
         SynchronousSqliteLogger,
     )
@@ -76,4 +79,5 @@
 
 if config.enable_llvm_env:
     __all__.append("RuntimePointEstimateReward")
+    __all__.append("RuntimeSeriesEstimateReward")
     __all__.append("SynchronousSqliteLogger")
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index fe4a8a29b..6ad1af65f 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -9,6 +9,7 @@
 
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.spaces import RuntimeReward
+from compiler_gym.spaces import RuntimeSeriesReward
 from compiler_gym.wrappers import CompilerEnvWrapper
 
 
@@ -67,3 +68,45 @@ def fork(self) -> "RuntimePointEstimateReward":
             warmup_count=self.reward.spaces["runtime"].warmup_count,
             estimator=self.reward.spaces["runtime"].estimator,
         )
+
+class RuntimeSeriesEstimateReward(CompilerEnvWrapper):
+    """TODO: documentation
+    """
+
+    def __init__(
+        self,
+        env: LlvmEnv,
+        runtime_count: int = 30,
+        warmup_count: int = 0,
+    ):
+        """Constructor.
+        :param env: The environment to wrap.
+        :param runtime_count: The number of times to execute the binary when
+            estimating the runtime.
+        :param warmup_count: The number of warmup runs of the binary to perform
+            before measuring the runtime.
+        """
+        super().__init__(env)
+
+        self.env.unwrapped.reward.add_space(
+            RuntimeSeriesReward(
+                runtime_count=runtime_count,
+                warmup_count=warmup_count,
+            )
+        )
+        self.env.unwrapped.reward_space = "runtime"
+
+        self.env.unwrapped.runtime_observation_count = runtime_count
+        self.env.unwrapped.runtime_warmup_runs_count = warmup_count
+
+    def fork(self) -> "RuntimeSeriesEstimateReward":
+        fkd = self.env.fork()
+        # Remove the original "runtime" space so that we that new
+        # RuntimeSeriesEstimateReward wrapper instance does not attempt to
+        # redefine, raising a warning.
+        del fkd.unwrapped.reward.spaces["runtime"]
+        return RuntimeSeriesEstimateReward(
+            env=fkd,
+            runtime_count=self.reward.spaces["runtime"].runtime_count,
+            warmup_count=self.reward.spaces["runtime"].warmup_count,
+        )
diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 7baeba1cb..73dd0c9c2 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -15,6 +15,7 @@
 from compiler_gym.datasets import Benchmark
 from compiler_gym.envs import LlvmEnv
 from compiler_gym.wrappers import RuntimePointEstimateReward
+from compiler_gym.wrappers import RuntimeSeriesEstimateReward
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +26,7 @@ class OptimizationTarget(str, Enum):
     CODESIZE = "codesize"
     BINSIZE = "binsize"
     RUNTIME = "runtime"
+    RUNTIME_SERIES = "runtimeseries"
 
     @property
     def optimization_space_enum_name(self) -> str:
@@ -32,6 +34,7 @@ def optimization_space_enum_name(self) -> str:
             OptimizationTarget.CODESIZE: "IrInstructionCount",
             OptimizationTarget.BINSIZE: "ObjectTextSizeBytes",
             OptimizationTarget.RUNTIME: "Runtime",
+            OptimizationTarget.RUNTIME_SERIES: "RuntimeSeries",
         }[self.value]
 
     def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
@@ -50,6 +53,8 @@ def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
             env.reward_space = "ObjectTextSizeOz"
         elif self.value == OptimizationTarget.RUNTIME:
             env = RuntimePointEstimateReward(env, warmup_count=0, runtime_count=3)
+        elif self.value == OptimizationTarget.RUNTIME_SERIES:
+            env = RuntimeSeriesEstimateReward(env, warmup_count=5, runtime_count=30)
         else:
             assert False, f"Unknown OptimizationTarget: {self.value}"
 

From 348ab6b4207809704fa733907088a64663718328 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 16:33:05 -0500
Subject: [PATCH 05/19] Fix unknown optimization target error

---
 .../llvm_autotuning/optimization_target.py    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 73dd0c9c2..16c658865 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -116,4 +116,26 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
 
                 return speedup
 
+        if self.value == OptimizationTarget.RUNTIME_SERIES:
+            with _RUNTIME_LOCK:
+                with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
+                    new_env.reset()
+                    new_env.runtime_observation_count = runtime_count
+                    new_env.runtime_warmup_count = 0
+                    new_env.apply(env.state)
+                    final_runtimes = new_env.observation.Runtime()
+                    assert len(final_runtimes) == runtime_count
+
+                    new_env.reset()
+                    new_env.send_param("llvm.apply_baseline_optimizations", "-O3")
+                    o3_runtimes = new_env.observation.Runtime()
+                    assert len(o3_runtimes) == runtime_count
+
+                logger.debug("O3 runtimes: %s", o3_runtimes)
+                logger.debug("Final runtimes: %s", final_runtimes)
+                speedup = np.median(o3_runtimes) / max(np.median(final_runtimes), 1e-12)
+                logger.debug("Speedup: %.4f", speedup)
+
+                return speedup
+
         assert False, f"Unknown OptimizationTarget: {self.value}"

From e1f68afd8659c1c5b186bbfb5bf78f41979f54ef Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Fri, 5 Aug 2022 17:04:37 -0500
Subject: [PATCH 06/19] Increase reward if two series are significantly
 different

---
 compiler_gym/spaces/runtime_series_reward.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index 7179b1ac2..a2523b87e 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -69,6 +69,7 @@ def update(
         # difference between the two medians. Otherwise, set the reward as 0.
         # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
         _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
-        reward = np.median(runtimes) - np.median(self.previous_runtimes) if pval < 0.05 else 0
+        diff = np.median(runtimes) - np.median(self.previous_runtimes)
+        reward = 2 * diff if pval < 0.05 else diff
         self.previous_runtimes = runtimes
         return reward

From 24924183202bb45a317888e314c7916398888ac2 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Fri, 5 Aug 2022 21:22:46 -0500
Subject: [PATCH 07/19] Fix bug in reward calculation

---
 compiler_gym/spaces/runtime_series_reward.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index a2523b87e..2d8fa3373 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -69,7 +69,6 @@ def update(
         # difference between the two medians. Otherwise, set the reward as 0.
         # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
         _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
-        diff = np.median(runtimes) - np.median(self.previous_runtimes)
-        reward = 2 * diff if pval < 0.05 else diff
+        reward = np.median(self.previous_runtimes) - np.median(runtimes) if pval < 0.05 else 0
         self.previous_runtimes = runtimes
         return reward

From 7e9f51145d2191066abb56a71660d079d1f0e919 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Mon, 22 Aug 2022 20:54:48 -0500
Subject: [PATCH 08/19] rename runtime to runtimeseries

---
 compiler_gym/spaces/runtime_series_reward.py | 2 +-
 compiler_gym/wrappers/llvm.py                | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index 2d8fa3373..739345e77 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -20,7 +20,7 @@ def __init__(
         default_value: int = 0,
     ):
         super().__init__(
-            name="runtime",
+            name="runtimeseries",
             observation_spaces=["Runtime"],
             default_value=default_value,
             min=None,
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index 6ad1af65f..36529be5b 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -94,7 +94,7 @@ def __init__(
                 warmup_count=warmup_count,
             )
         )
-        self.env.unwrapped.reward_space = "runtime"
+        self.env.unwrapped.reward_space = "runtimeseries"
 
         self.env.unwrapped.runtime_observation_count = runtime_count
         self.env.unwrapped.runtime_warmup_runs_count = warmup_count
@@ -104,9 +104,9 @@ def fork(self) -> "RuntimeSeriesEstimateReward":
         # Remove the original "runtime" space so that we that new
         # RuntimeSeriesEstimateReward wrapper instance does not attempt to
         # redefine, raising a warning.
-        del fkd.unwrapped.reward.spaces["runtime"]
+        del fkd.unwrapped.reward.spaces["runtimeseries"]
         return RuntimeSeriesEstimateReward(
             env=fkd,
-            runtime_count=self.reward.spaces["runtime"].runtime_count,
-            warmup_count=self.reward.spaces["runtime"].warmup_count,
+            runtime_count=self.reward.spaces["runtimeseries"].runtime_count,
+            warmup_count=self.reward.spaces["runtimeseries"].warmup_count,
         )

From ceb4e213b218d4c392b1f0c20e38b52d879a64e0 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Mon, 22 Aug 2022 20:55:46 -0500
Subject: [PATCH 09/19] add check

---
 examples/llvm_autotuning/autotuners/nevergrad_.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py
index bacea33d8..4e1ae0cdb 100644
--- a/examples/llvm_autotuning/autotuners/nevergrad_.py
+++ b/examples/llvm_autotuning/autotuners/nevergrad_.py
@@ -29,7 +29,10 @@ def nevergrad(
 
         https://facebookresearch.github.io/nevergrad/
     """
-    if optimization_target == OptimizationTarget.RUNTIME:
+    if (
+        optimization_target == OptimizationTarget.RUNTIME or
+        optimization_target == OptimizationTarget.RUNTIME_SERIES
+    ):
 
         def calculate_negative_reward(actions: Tuple[ActionType]) -> float:
             env.reset()

From 812e07c5376d5401d1b81da196196769735f3c4e Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:39:27 -0500
Subject: [PATCH 10/19] Compare two runtime serires medians using
 Kruskal-Wallis test

---
 compiler_gym/spaces/runtime_series_reward.py | 74 ++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 compiler_gym/spaces/runtime_series_reward.py

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
new file mode 100644
index 000000000..7179b1ac2
--- /dev/null
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -0,0 +1,74 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from compiler_gym.errors import BenchmarkInitError, ServiceError
+from compiler_gym.spaces.reward import Reward
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
+
+import scipy
+import numpy as np
+
+class RuntimeSeriesReward(Reward):
+    def __init__(
+        self,
+        runtime_count: int,
+        warmup_count: int,
+        default_value: int = 0,
+    ):
+        super().__init__(
+            name="runtime",
+            observation_spaces=["Runtime"],
+            default_value=default_value,
+            min=None,
+            max=None,
+            default_negates_returns=True,
+            deterministic=False,
+            platform_dependent=True,
+        )
+        self.runtime_count = runtime_count
+        self.warmup_count = warmup_count
+        self.starting_runtimes: List[float] = None
+        self.previous_runtimes: List[float] = None
+        self.current_benchmark: Optional[str] = None
+
+    def reset(self, benchmark, observation_view) -> None:
+        # If we are changing the benchmark then check that it is runnable.
+        if benchmark != self.current_benchmark:
+            if not observation_view["IsRunnable"]:
+                raise BenchmarkInitError(f"Benchmark is not runnable: {benchmark}")
+            self.current_benchmark = benchmark
+            self.starting_runtimes = None
+
+        # Compute initial runtimes
+        if self.starting_runtimes is None:
+            self.starting_runtimes = observation_view["Runtime"]
+
+        self.previous_runtimes = self.starting_runtimes
+
+    def update(
+        self,
+        actions: List[ActionType],
+        observations: List[ObservationType],
+        observation_view,
+    ) -> float:
+        del actions  # unused
+        del observation_view  # unused
+        runtimes = observations[0]
+        if len(runtimes) != self.runtime_count:
+            raise ServiceError(
+                f"Expected {self.runtime_count} runtimes but received {len(runtimes)}"
+            )
+
+        # Use the Kruskal–Wallis test to determine if the medians are equal
+        # between the two series of runtimes. If the runtimes medians are
+        # significantly different, compute the reward by computing the
+        # difference between the two medians. Otherwise, set the reward as 0.
+        # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
+        _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
+        reward = np.median(runtimes) - np.median(self.previous_runtimes) if pval < 0.05 else 0
+        self.previous_runtimes = runtimes
+        return reward

From 2ade2ff3451078db42752df6bd873bbd42435186 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 12:43:45 -0500
Subject: [PATCH 11/19] Add RuntimeSeriesEstimateReward wrapper

---
 compiler_gym/spaces/BUILD                     | 11 +++++
 compiler_gym/spaces/CMakeLists.txt            | 13 ++++++
 compiler_gym/spaces/__init__.py               |  2 +
 compiler_gym/wrappers/__init__.py             |  6 ++-
 compiler_gym/wrappers/llvm.py                 | 43 +++++++++++++++++++
 .../llvm_autotuning/optimization_target.py    |  5 +++
 6 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD
index 88e74e1a2..33366e934 100644
--- a/compiler_gym/spaces/BUILD
+++ b/compiler_gym/spaces/BUILD
@@ -20,6 +20,7 @@ py_library(
         ":permutation",
         ":reward",
         ":runtime_reward",
+        ":runtime_series_reward",
         ":scalar",
         ":sequence",
         ":space_sequence",
@@ -86,6 +87,16 @@ py_library(
     ],
 )
 
+py_library(
+    name = "runtime_series_reward",
+    srcs = ["runtime_series_reward.py"],
+    deps = [
+        ":reward",
+        "//compiler_gym/errors",
+        "//compiler_gym/util",
+    ],
+)
+
 py_library(
     name = "scalar",
     srcs = ["scalar.py"],
diff --git a/compiler_gym/spaces/CMakeLists.txt b/compiler_gym/spaces/CMakeLists.txt
index e8d3bc69c..6aa41cdd0 100644
--- a/compiler_gym/spaces/CMakeLists.txt
+++ b/compiler_gym/spaces/CMakeLists.txt
@@ -20,6 +20,7 @@ cg_py_library(
     ::permutation
     ::reward
     ::runtime_reward
+    ::runtime_series_reward
     ::scalar
     ::sequence
     ::space_sequence
@@ -90,6 +91,18 @@ cg_py_library(
   PUBLIC
 )
 
+cg_py_library(
+  NAME
+    runtime_series_reward
+  SRCS
+    "runtime_series_reward.py"
+  DEPS
+    ::reward
+    compiler_gym::errors::errors
+    compiler_gym::util::util
+  PUBLIC
+)
+
 cg_py_library(
   NAME
     scalar
diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py
index f52ca0da2..6fc845ce3 100644
--- a/compiler_gym/spaces/__init__.py
+++ b/compiler_gym/spaces/__init__.py
@@ -10,6 +10,7 @@
 from compiler_gym.spaces.permutation import Permutation
 from compiler_gym.spaces.reward import DefaultRewardFromObservation, Reward
 from compiler_gym.spaces.runtime_reward import RuntimeReward
+from compiler_gym.spaces.runtime_series_reward import RuntimeSeriesReward
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
 from compiler_gym.spaces.space_sequence import SpaceSequence
@@ -26,6 +27,7 @@
     "Permutation",
     "Reward",
     "RuntimeReward",
+    "RuntimeSeriesReward",
     "Scalar",
     "Sequence",
     "SpaceSequence",
diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
index ae181bd28..16d5ce4f7 100644
--- a/compiler_gym/wrappers/__init__.py
+++ b/compiler_gym/wrappers/__init__.py
@@ -48,7 +48,10 @@
 from compiler_gym.wrappers.fork import ForkOnStep
 
 if config.enable_llvm_env:
-    from compiler_gym.wrappers.llvm import RuntimePointEstimateReward  # noqa: F401
+    from compiler_gym.wrappers.llvm import (
+        RuntimePointEstimateReward, # noqa: F401
+        RuntimeSeriesEstimateReward,
+    )
     from compiler_gym.wrappers.sqlite_logger import (  # noqa: F401
         SynchronousSqliteLogger,
     )
@@ -76,4 +79,5 @@
 
 if config.enable_llvm_env:
     __all__.append("RuntimePointEstimateReward")
+    __all__.append("RuntimeSeriesEstimateReward")
     __all__.append("SynchronousSqliteLogger")
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index fe4a8a29b..6ad1af65f 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -9,6 +9,7 @@
 
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.spaces import RuntimeReward
+from compiler_gym.spaces import RuntimeSeriesReward
 from compiler_gym.wrappers import CompilerEnvWrapper
 
 
@@ -67,3 +68,45 @@ def fork(self) -> "RuntimePointEstimateReward":
             warmup_count=self.reward.spaces["runtime"].warmup_count,
             estimator=self.reward.spaces["runtime"].estimator,
         )
+
+class RuntimeSeriesEstimateReward(CompilerEnvWrapper):
+    """TODO: documentation
+    """
+
+    def __init__(
+        self,
+        env: LlvmEnv,
+        runtime_count: int = 30,
+        warmup_count: int = 0,
+    ):
+        """Constructor.
+        :param env: The environment to wrap.
+        :param runtime_count: The number of times to execute the binary when
+            estimating the runtime.
+        :param warmup_count: The number of warmup runs of the binary to perform
+            before measuring the runtime.
+        """
+        super().__init__(env)
+
+        self.env.unwrapped.reward.add_space(
+            RuntimeSeriesReward(
+                runtime_count=runtime_count,
+                warmup_count=warmup_count,
+            )
+        )
+        self.env.unwrapped.reward_space = "runtime"
+
+        self.env.unwrapped.runtime_observation_count = runtime_count
+        self.env.unwrapped.runtime_warmup_runs_count = warmup_count
+
+    def fork(self) -> "RuntimeSeriesEstimateReward":
+        fkd = self.env.fork()
+        # Remove the original "runtime" space so that we that new
+        # RuntimeSeriesEstimateReward wrapper instance does not attempt to
+        # redefine, raising a warning.
+        del fkd.unwrapped.reward.spaces["runtime"]
+        return RuntimeSeriesEstimateReward(
+            env=fkd,
+            runtime_count=self.reward.spaces["runtime"].runtime_count,
+            warmup_count=self.reward.spaces["runtime"].warmup_count,
+        )
diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 7baeba1cb..73dd0c9c2 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -15,6 +15,7 @@
 from compiler_gym.datasets import Benchmark
 from compiler_gym.envs import LlvmEnv
 from compiler_gym.wrappers import RuntimePointEstimateReward
+from compiler_gym.wrappers import RuntimeSeriesEstimateReward
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +26,7 @@ class OptimizationTarget(str, Enum):
     CODESIZE = "codesize"
     BINSIZE = "binsize"
     RUNTIME = "runtime"
+    RUNTIME_SERIES = "runtimeseries"
 
     @property
     def optimization_space_enum_name(self) -> str:
@@ -32,6 +34,7 @@ def optimization_space_enum_name(self) -> str:
             OptimizationTarget.CODESIZE: "IrInstructionCount",
             OptimizationTarget.BINSIZE: "ObjectTextSizeBytes",
             OptimizationTarget.RUNTIME: "Runtime",
+            OptimizationTarget.RUNTIME_SERIES: "RuntimeSeries",
         }[self.value]
 
     def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
@@ -50,6 +53,8 @@ def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
             env.reward_space = "ObjectTextSizeOz"
         elif self.value == OptimizationTarget.RUNTIME:
             env = RuntimePointEstimateReward(env, warmup_count=0, runtime_count=3)
+        elif self.value == OptimizationTarget.RUNTIME_SERIES:
+            env = RuntimeSeriesEstimateReward(env, warmup_count=5, runtime_count=30)
         else:
             assert False, f"Unknown OptimizationTarget: {self.value}"
 

From eb150d4b555af14805a645a9e929e997edc0e90d Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Tue, 19 Jul 2022 16:33:05 -0500
Subject: [PATCH 12/19] Fix unknown optimization target error

---
 .../llvm_autotuning/optimization_target.py    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 73dd0c9c2..16c658865 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -116,4 +116,26 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
 
                 return speedup
 
+        if self.value == OptimizationTarget.RUNTIME_SERIES:
+            with _RUNTIME_LOCK:
+                with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
+                    new_env.reset()
+                    new_env.runtime_observation_count = runtime_count
+                    new_env.runtime_warmup_count = 0
+                    new_env.apply(env.state)
+                    final_runtimes = new_env.observation.Runtime()
+                    assert len(final_runtimes) == runtime_count
+
+                    new_env.reset()
+                    new_env.send_param("llvm.apply_baseline_optimizations", "-O3")
+                    o3_runtimes = new_env.observation.Runtime()
+                    assert len(o3_runtimes) == runtime_count
+
+                logger.debug("O3 runtimes: %s", o3_runtimes)
+                logger.debug("Final runtimes: %s", final_runtimes)
+                speedup = np.median(o3_runtimes) / max(np.median(final_runtimes), 1e-12)
+                logger.debug("Speedup: %.4f", speedup)
+
+                return speedup
+
         assert False, f"Unknown OptimizationTarget: {self.value}"

From 7d9c7c20137111429bcd362f2ff687e0eb8f9144 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Fri, 5 Aug 2022 17:04:37 -0500
Subject: [PATCH 13/19] Increase reward if two series are significantly
 different

---
 compiler_gym/spaces/runtime_series_reward.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index 7179b1ac2..a2523b87e 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -69,6 +69,7 @@ def update(
         # difference between the two medians. Otherwise, set the reward as 0.
         # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
         _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
-        reward = np.median(runtimes) - np.median(self.previous_runtimes) if pval < 0.05 else 0
+        diff = np.median(runtimes) - np.median(self.previous_runtimes)
+        reward = 2 * diff if pval < 0.05 else diff
         self.previous_runtimes = runtimes
         return reward

From 262ef55a61dbd6b81c2b51628786f6255efd8f19 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Fri, 5 Aug 2022 21:22:46 -0500
Subject: [PATCH 14/19] Fix bug in reward calculation

---
 compiler_gym/spaces/runtime_series_reward.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index a2523b87e..2d8fa3373 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -69,7 +69,6 @@ def update(
         # difference between the two medians. Otherwise, set the reward as 0.
         # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
         _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
-        diff = np.median(runtimes) - np.median(self.previous_runtimes)
-        reward = 2 * diff if pval < 0.05 else diff
+        reward = np.median(self.previous_runtimes) - np.median(runtimes) if pval < 0.05 else 0
         self.previous_runtimes = runtimes
         return reward

From b797e93298bd0435db6ff1bdb054f737e3f69595 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Mon, 22 Aug 2022 20:54:48 -0500
Subject: [PATCH 15/19] rename runtime to runtimeseries

---
 compiler_gym/spaces/runtime_series_reward.py | 2 +-
 compiler_gym/wrappers/llvm.py                | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index 2d8fa3373..739345e77 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -20,7 +20,7 @@ def __init__(
         default_value: int = 0,
     ):
         super().__init__(
-            name="runtime",
+            name="runtimeseries",
             observation_spaces=["Runtime"],
             default_value=default_value,
             min=None,
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index 6ad1af65f..36529be5b 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -94,7 +94,7 @@ def __init__(
                 warmup_count=warmup_count,
             )
         )
-        self.env.unwrapped.reward_space = "runtime"
+        self.env.unwrapped.reward_space = "runtimeseries"
 
         self.env.unwrapped.runtime_observation_count = runtime_count
         self.env.unwrapped.runtime_warmup_runs_count = warmup_count
@@ -104,9 +104,9 @@ def fork(self) -> "RuntimeSeriesEstimateReward":
         # Remove the original "runtime" space so that we that new
         # RuntimeSeriesEstimateReward wrapper instance does not attempt to
         # redefine, raising a warning.
-        del fkd.unwrapped.reward.spaces["runtime"]
+        del fkd.unwrapped.reward.spaces["runtimeseries"]
         return RuntimeSeriesEstimateReward(
             env=fkd,
-            runtime_count=self.reward.spaces["runtime"].runtime_count,
-            warmup_count=self.reward.spaces["runtime"].warmup_count,
+            runtime_count=self.reward.spaces["runtimeseries"].runtime_count,
+            warmup_count=self.reward.spaces["runtimeseries"].warmup_count,
         )

From 40303161199c6f4611211ff0bf1952aa3c7974be Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Mon, 22 Aug 2022 20:55:46 -0500
Subject: [PATCH 16/19] add check

---
 examples/llvm_autotuning/autotuners/nevergrad_.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py
index bacea33d8..4e1ae0cdb 100644
--- a/examples/llvm_autotuning/autotuners/nevergrad_.py
+++ b/examples/llvm_autotuning/autotuners/nevergrad_.py
@@ -29,7 +29,10 @@ def nevergrad(
 
         https://facebookresearch.github.io/nevergrad/
     """
-    if optimization_target == OptimizationTarget.RUNTIME:
+    if (
+        optimization_target == OptimizationTarget.RUNTIME or
+        optimization_target == OptimizationTarget.RUNTIME_SERIES
+    ):
 
         def calculate_negative_reward(actions: Tuple[ActionType]) -> float:
             env.reset()

From 330c1bbde40d571a1bcd37c53ddf99b179448081 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Thu, 25 Aug 2022 13:52:57 -0500
Subject: [PATCH 17/19] remove duplicate code

---
 .../llvm_autotuning/optimization_target.py    | 27 +++----------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 16c658865..0672cd4c5 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -94,29 +94,10 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
                 env.observation.ObjectTextSizeBytes(), 1
             )
 
-        if self.value == OptimizationTarget.RUNTIME:
-            with _RUNTIME_LOCK:
-                with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
-                    new_env.reset()
-                    new_env.runtime_observation_count = runtime_count
-                    new_env.runtime_warmup_count = 0
-                    new_env.apply(env.state)
-                    final_runtimes = new_env.observation.Runtime()
-                    assert len(final_runtimes) == runtime_count
-
-                    new_env.reset()
-                    new_env.send_param("llvm.apply_baseline_optimizations", "-O3")
-                    o3_runtimes = new_env.observation.Runtime()
-                    assert len(o3_runtimes) == runtime_count
-
-                logger.debug("O3 runtimes: %s", o3_runtimes)
-                logger.debug("Final runtimes: %s", final_runtimes)
-                speedup = np.median(o3_runtimes) / max(np.median(final_runtimes), 1e-12)
-                logger.debug("Speedup: %.4f", speedup)
-
-                return speedup
-
-        if self.value == OptimizationTarget.RUNTIME_SERIES:
+        if (
+            self.value == OptimizationTarget.RUNTIME or
+            self.value == OptimizationTarget.RUNTIME_SERIES
+        ):
             with _RUNTIME_LOCK:
                 with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
                     new_env.reset()

From 77eba6fdc8cf0c501586eb427df431b0f52f286d Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Thu, 25 Aug 2022 13:56:56 -0500
Subject: [PATCH 18/19] documentation

---
 compiler_gym/wrappers/llvm.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index 36529be5b..e50881084 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -70,7 +70,15 @@ def fork(self) -> "RuntimePointEstimateReward":
         )
 
 class RuntimeSeriesEstimateReward(CompilerEnvWrapper):
-    """TODO: documentation
+    """LLVM wrapper that estimates the runtime of a program using N runtime
+    observations and uses it as the reward.
+
+    This class wraps an LLVM environment and registers a new runtime reward
+    space. It is similar to the RuntimePointEstimateReward except that it only
+    computes runtime differences if the change in runtime is significantly
+    different from the runtimes in the previous step.
+
+    See RuntimeSeriesReward for more details.
     """
 
     def __init__(
@@ -101,7 +109,7 @@ def __init__(
 
     def fork(self) -> "RuntimeSeriesEstimateReward":
         fkd = self.env.fork()
-        # Remove the original "runtime" space so that we that new
+        # Remove the original "runtimeseries" space so that we that new
         # RuntimeSeriesEstimateReward wrapper instance does not attempt to
         # redefine, raising a warning.
         del fkd.unwrapped.reward.spaces["runtimeseries"]

From a53276aa137ba49e5ef805f6a4de664903b138a0 Mon Sep 17 00:00:00 2001
From: Qingwei Lan <qingweilan@gmail.com>
Date: Thu, 25 Aug 2022 13:59:36 -0500
Subject: [PATCH 19/19] make RuntimeSeriesReward more readable

---
 compiler_gym/spaces/runtime_series_reward.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
index 739345e77..e01674bed 100644
--- a/compiler_gym/spaces/runtime_series_reward.py
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -69,6 +69,17 @@ def update(
         # difference between the two medians. Otherwise, set the reward as 0.
         # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
         _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
-        reward = np.median(self.previous_runtimes) - np.median(runtimes) if pval < 0.05 else 0
+
+        # If the pval is less than 0.05, this means that the current series of
+        # runtimes is significantly different from the previous series of
+        # runtimes. In this case, we compute the reward as the differences
+        # between the medians of the two series.
+        if pval < 0.05:
+            reward = np.median(self.previous_runtimes) - np.median(runtimes)
+        # If the runtimes are not significantly different, set reward as 0.
+        else:
+            reward = 0
+
+        # Update previous runtimes
         self.previous_runtimes = runtimes
         return reward