Skip to content
15 changes: 15 additions & 0 deletions adastop/benchopt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd


def process_benchopt(file):
"""
For now, suppose that there is only one dataset
"""
df = pd.read_parquet(file)
df= df[["solver_name",'objective_value','idx_rep']]
df_ret = { name : [] for name in df["solver_name"].unique()}
for rep in df["idx_rep"].unique():
for solver in df["solver_name"].unique():
df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
df_ret[solver].append(df_rep_solver['objective_test_loss'].iloc[-1])
return pd.DataFrame(df_ret)
140 changes: 113 additions & 27 deletions adastop/cli.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,25 @@
import click
import pickle
import yaml
import os
from pathlib import Path
import subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from .benchopt import process_benchopt
from .compare_agents import MultipleAgentsComparator

LITTER_FILE = ".adastop_comparator.pkl"

@click.group()
@click.pass_context
def adastop(ctx):
"""
Program to perform adaptive stopping algorithm using csv file intput_file.
LITTER_FILE = ".adastop_comparator.pkl"

Use adastop sub-command --help to have help for a specific sub-command
"""
pass

@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.argument('input_file',required = True, type=str)
@click.pass_context
def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
def compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of adaptive stopping algorithm using csv file intput_file.
Perform one step of adaptive stopping algorithm using the dataframe df.
At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
"""
path_lf = Path(input_file).parent.absolute() / LITTER_FILE
df = pd.read_csv(input_file, index_col=0)

n_fits_per_group = len(df)
n_agents = len(df.columns)
if compare_to_first:
Expand All @@ -48,7 +32,13 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
with open(path_lf, 'rb') as fp:
comparator = pickle.load(fp)

Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in df.columns]
names = []
for i in range(len(comparator.agent_names)):
if i in comparator.current_comparisons.ravel():
names.append(comparator.agent_names[i])


Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
if len(Z[0]) > comparator.K * n_fits_per_group:
raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
Expand All @@ -57,13 +47,15 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
comparator = MultipleAgentsComparator(n_fits_per_group, n_groups,
n_permutations, comparisons,
alpha, beta, seed)
Z = [df[agent].values for agent in df.columns]
names = df.columns

Z = [df[agent].values for agent in names]

data = {df.columns[i] : Z[i] for i in range(len(df.columns))}
data = {names[i] : Z[i] for i in range(len(names))}
# recover also the data of agent that were decided.
if comparator.agent_names is not None:
for agent in comparator.agent_names:
if agent not in df.columns:
if agent not in data.keys():
data[agent]=comparator.eval_values[agent]

comparator.partial_compare(data, False)
Expand All @@ -86,6 +78,100 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
pickle.dump(comparator, fp)
click.echo("Comparator Saved")


@click.group()
@click.pass_context
def adastop(ctx):
"""
Program to perform adaptive stopping algorithm using csv file intput_file.

Use adastop sub-command --help to have help for a specific sub-command
"""
pass

@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.argument('input_file',required = True, type=str)
@click.pass_context
def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of adaptive stopping algorithm using csv file intput_file.
At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
"""
path_lf = Path(input_file).parent.absolute() / LITTER_FILE
df = pd.read_csv(input_file, index_col=0)
compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first)


@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.option("--size-group", default=6, show_default=True, help="Number of groups.")
@click.argument('config_file',required = True, type=str)
@click.pass_context
def compare_benchopt(ctx, config_file, size_group, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of computing benchmark and then adaptive stopping algorithm.
The benchmark is supposed to be in the current directory.

WARNING: still experimental.
"""
path_lf = Path(config_file).parent.absolute() / LITTER_FILE


if os.path.isfile(path_lf):
with open(path_lf, 'rb') as fp:
comparator = pickle.load(fp)
k = comparator.k
else:
k = 0

# if this is not first group, load data for comparator.
if os.path.isfile( "outputs/adastop_result_file_"+str(k)+".csv"):
df = pd.read_csv("outputs/adastop_result_file_"+str(k)+".csv", index_col=0)
else:
if k > 0:
undecided_solvers = []
for i in range(len(comparator.agent_names)):
if i in comparator.current_comparisons.ravel():
undecided_solvers.append(comparator.agent_names[i])

with open(config_file, 'r') as file:
config = yaml.safe_load(file)


config['solver']=undecided_solvers

with open("/tmp/config_benchopt.yml", 'w') as file:
config = yaml.dump(config, file)


print("Doing comparisons for "+str(len(undecided_solvers))+ " solvers: "+", ".join(undecided_solvers))
subprocess.check_output(["benchopt", "run", ".", "--config", "/tmp/config_benchopt.yml",
"--env", "-r", str(size_group),
"--output", "adastop_result_file_"+str(k)])

else:
# initially, run everything
subprocess.check_output(["benchopt", "run", ".", "--config",
config_file, "--env", "-r", str(size_group),
"--output", "adastop_result_file_"+str(k)])

df = process_benchopt("outputs/adastop_result_file_"+str(k)+".parquet")
df.to_csv("outputs/adastop_result_file_"+str(k)+".csv")

compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first)


@adastop.command()
@click.argument('folder',required = True, type=str)
@click.pass_context
Expand Down
3 changes: 2 additions & 1 deletion adastop/compare_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ def partial_compare(self, eval_values, verbose=True):
admissible_values_sup = values[
self.level_spent + icumulative_probas <= clevel
]
if len(np.unique(values)) < 1/clevel:
print("WARNING: too many values are equal, or size of group too small, the test may not be precise.")

if len(admissible_values_sup) > 0:
bk_sup = admissible_values_sup[0] # the minimum admissible value
Expand Down Expand Up @@ -324,7 +326,6 @@ def partial_compare(self, eval_values, verbose=True):
- np.mean(Z[comp[1]][: ((k + 1) * self.n[comp[1]])]
)
)

if Tmax > bk_sup:
id_reject = np.arange(len(current_decisions))[current_decisions== "continue"][imax]
current_decisions[id_reject] = "reject"
Expand Down
15 changes: 15 additions & 0 deletions adastop/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd


def process_benchopt(file):
"""
For now, suppose that there is only one dataset
"""
df = pd.read_parquet(file)
df= df[["solver_name",'objective_value','idx_rep']]
df_ret = { name : [] for name in df["solver_name"].unique()}
for rep in df["idx_rep"].unique():
for solver in df["solver_name"].unique():
df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
df_ret[solver].append(df_rep_solver['objective_value'].iloc[-1])
return pd.DataFrame(df_ret)