diff --git a/CDB_study.slurm b/CDB_study.slurm index 87e07e3..f9a1774 100644 --- a/CDB_study.slurm +++ b/CDB_study.slurm @@ -7,7 +7,7 @@ #SBATCH --mem=32G #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 -#SBATCH -A plgautopt26-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 #SBATCH --array=0-11 # 12 tasks total SEED=${1:-42} diff --git a/comprehensive_study.slurm b/comprehensive_study.slurm index 4485cd5..b54e3cc 100644 --- a/comprehensive_study.slurm +++ b/comprehensive_study.slurm @@ -7,7 +7,7 @@ #SBATCH --mem=32G #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 -#SBATCH -A plgautopt26-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 #SBATCH --array=0-12 # Increased to 13 tasks total to split sequential runs # 1st argument: SEED (Default: 42) diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py index d2156f0..4061267 100644 --- a/dynamicalgorithmselection/agents/ppo_utils.py +++ b/dynamicalgorithmselection/agents/ppo_utils.py @@ -159,17 +159,19 @@ def __init__(self, dim, optimizer_num, device): super().__init__() self.device = device self.optimizer_num = optimizer_num - self.embedders = [ - ( - nn.Sequential( - nn.Linear(dim, 64), - nn.ReLU(), - nn.Linear(64, 1), - nn.ReLU(), - ) - ).to(device) - for _ in range(2 * optimizer_num) - ] + self.embedders = nn.ModuleList( + [ + ( + nn.Sequential( + nn.Linear(dim, 64), + nn.ReLU(), + nn.Linear(64, 1), + nn.ReLU(), + ) + ).to(device) + for _ in range(2 * optimizer_num) + ] + ) self.embedder_final = nn.Sequential( nn.Linear(9 + optimizer_num * 2, 64), @@ -203,17 +205,19 @@ class RLDASCritic(nn.Module): def __init__(self, dim, optimizer_num, device): super().__init__() self.device = device - self.embedders = [ - ( - nn.Sequential( - nn.Linear(dim, 64), - nn.ReLU(), - nn.Linear(64, 1), - nn.ReLU(), - ) - ).to(device) - for _ in range(2 * optimizer_num) - ] + self.embedders = nn.ModuleList( + [ + ( + nn.Sequential( + nn.Linear(dim, 64), + nn.ReLU(), + nn.Linear(64, 1), + nn.ReLU(), + ) + ).to(device) + for _ in range(2 * optimizer_num) + ] + ) self.embedder_final = nn.Sequential( nn.Linear(9 + optimizer_num * 2, 64), nn.Tanh(), diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py index a04bc6c..ffa9eab 100644 --- a/dynamicalgorithmselection/main.py +++ b/dynamicalgorithmselection/main.py @@ -1,6 +1,7 @@ import argparse import os import shutil +import warnings from random import seed as set_random_seed from typing import List, Type, Dict, Any import cocopp @@ -17,6 +18,8 @@ from dynamicalgorithmselection.experiments.utils import DIMENSIONS from dynamicalgorithmselection.optimizers.Optimizer import Optimizer +warnings.filterwarnings("ignore") + AGENTS_DICT = { "random": RandomAgent, "policy-gradient": PolicyGradientAgent, @@ -144,7 +147,7 @@ def parse_arguments(): "--reward-option", type=int, choices=[1, 2, 3, 4], - default=2, + default=1, help="id of method used to compute reward (valid options: 1-4, default 2 maps to r2)", ) @@ -201,7 +204,6 @@ def test(args, action_space): options = { "action_space": action_space, } | common_options(args) - # agent_state = torch.load(f) if args.agent == "policy-gradient": options.update( torch.load( diff --git a/dynamicalgorithmselection/optimizers/DE/DE.py b/dynamicalgorithmselection/optimizers/DE/DE.py index fdbb78b..4552951 100644 --- a/dynamicalgorithmselection/optimizers/DE/DE.py +++ b/dynamicalgorithmselection/optimizers/DE/DE.py @@ -9,6 +9,8 @@ def __init__(self, problem, options): self.n_individuals is None ): # number of offspring, aka offspring population size self.n_individuals = 170 + self.Nmin = 30 # Discrepancy: Nmin is 30 in Population.py + self.Nmax = 170 assert self.n_individuals > 0 self._n_generations = 0 # number of generations self._printed_evaluations = self.n_function_evaluations diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py index ce15507..a001c83 100644 --- a/dynamicalgorithmselection/optimizers/DE/JDE21.py +++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py @@ -3,26 +3,22 @@ class JDE21(DE): + """Implementation adapted to exactly mirror the discrepancies found in the provided + optimizer.py and Population.py, including continuous NLPSR and unused success archives.""" + start_condition_parameters = ["x", "y", "F", "Cr"] def __init__(self, problem, options): super().__init__(problem, options) - - # Mathematical minimum population limit to survive RL starvation - self.Nmin = 4 - # Population parameters - # We start with the base sizes defined in the j21 paper, - # though set_data/initialize will override this if the RL agent injects a different size. self.bNP = 160 self.sNP = 10 self.n_individuals = self.bNP + self.sNP # Stagnation and Reset parameters self.age = 0 - self.eps = 1e-12 # Tolerance for fitness equality - self.MyEps = 0.25 # Threshold ratio (25%) for reset - self.reductions_done = 0 + self.eps = 1e-12 + self.MyEps = 0.25 # Self-adaptation probabilities self.tau1 = 0.1 @@ -38,7 +34,7 @@ def __init__(self, problem, options): # Parameter Limits (Small Population) self.Fl_s = 0.17 self.CRl_s = 0.1 - self.CRu_s = 0.8 + self.CRu_s = 0.8 # Note: ignored in optimizer.py logic # Shared Upper Bound for F self.Fu = 1.1 @@ -64,6 +60,12 @@ def initialize(self, args=None, x=None, y=None): for i, xi in enumerate(x) ] ) + + # Discrepancy: Initialize dead history archives from Population.py + self.MF = np.ones(self.ndim_problem * 20) * 0.2 + self.MCr = np.ones(self.ndim_problem * 20) * 0.2 + self.k = 0 + return x, y def _reflect_bounds(self, v): @@ -90,58 +92,46 @@ def _check_population_reduction(self, x, y): self.F = np.full(actual_size, self.Finit) self.Cr = np.full(actual_size, self.CRinit) - # REDUCTION LOGIC - thresholds = [0.25, 0.50, 0.75] - if self.reductions_done < len(thresholds): - progress = self.n_function_evaluations / self.max_function_evaluations - if progress >= thresholds[self.reductions_done]: - # Calculate the standard halved size for the big population - new_bNP = self.bNP // 2 - - min_allowed_bNP = max(1, self.Nmin - self.sNP) - new_bNP = max(new_bNP, min_allowed_bNP) - - # Only perform the competition if we are actually shrinking the array - if new_bNP < self.bNP: - part1_idx = np.arange(new_bNP) - part2_idx = np.arange(new_bNP, 2 * new_bNP) - - keep_idx = [] - for i, j in zip(part1_idx, part2_idx): - if j < self.bNP: - keep_idx.append(i if y[i] <= y[j] else j) - else: - keep_idx.append(i) - - keep_b_idx = np.array(keep_idx, dtype=int) - s_idx = np.arange(int(self.bNP), int(self.n_individuals), dtype=int) - - x = np.concatenate([x[keep_b_idx], x[s_idx]], axis=0) - y = np.concatenate([y[keep_b_idx], y[s_idx]], axis=0) - self.F = np.concatenate([self.F[keep_b_idx], self.F[s_idx]], axis=0) - self.Cr = np.concatenate( - [self.Cr[keep_b_idx], self.Cr[s_idx]], axis=0 - ) + # Discrepancy: Continuous NLPSR logic from Population.py + progress = self.n_function_evaluations / self.max_function_evaluations + if progress >= 1.0: + return x, y - # Update sizes for the newly reduced population - self.bNP = int(len(keep_b_idx)) - self.n_individuals = int(len(y)) + new_NP = int( + np.round( + self.Nmax + (self.Nmin - self.Nmax) * np.power(progress, 1 - progress) + ) + ) - self.reductions_done += 1 + if new_NP < self.n_individuals: + # Discrepancy: optimizer.py simply takes the last NP elements + x = x[-new_NP:] + y = y[-new_NP:] + self.F = self.F[-new_NP:] + self.Cr = self.Cr[-new_NP:] + self.n_individuals = new_NP + self.bNP = new_NP - self.sNP return x, y + def r_choice(self, preferred_pool, exclude): + valid = [idx for idx in preferred_pool if idx not in exclude] + return self.rng_optimization.choice(valid) if valid else exclude[0] + def _evolve_population(self, x, y, args, is_big=True): if self.n_individuals == 0: - return x, y + return x, y, [], [], [] start_idx = 0 if is_big else self.bNP end_idx = self.bNP if is_big else self.n_individuals f_low = self.Fl_b if is_big else self.Fl_s - cr_bound = self.CRu_b if is_big else self.CRu_s + # Discrepancy: optimizer.py ignores CRu_s and uses CRu_b for both! + cr_bound = self.CRu_b cr_low = self.CRl_b if is_big else self.CRl_s + SF, SCr, df = [], [], [] + for i in range(start_idx, end_idx): # Parameter Adaptation new_F = ( @@ -155,7 +145,7 @@ def _evolve_population(self, x, y, args, is_big=True): else self.Cr[i] ) - # Mutation Pool Selection with Extreme RL Fallbacks + # Mutation Pool Selection if is_big: progress = self.n_function_evaluations / self.max_function_evaluations ms_size = 1 if progress <= 1 / 3 else 2 if progress <= 2 / 3 else 3 @@ -172,37 +162,22 @@ def _evolve_population(self, x, y, args, is_big=True): pool_r2_r3 = np.concatenate([np.arange(self.bNP), ms_indices]) - # Helper to safely pick a target or fallback sequentially - def safe_choice(preferred_pool, exclude): - valid = [idx for idx in preferred_pool if idx not in exclude] - if not valid: - valid = [ - idx - for idx in range(self.n_individuals) - if idx not in exclude - ] - return self.rng_optimization.choice(valid) if valid else i - - r1 = safe_choice(range(self.bNP), [i]) - r2 = safe_choice(pool_r2_r3, [i, r1]) - r3 = safe_choice(pool_r2_r3, [i, r1, r2]) + r1 = self.r_choice(range(self.bNP), [i]) + r2 = self.r_choice(pool_r2_r3, [i, r1]) + r3 = self.r_choice(pool_r2_r3, [i, r1, r2]) else: pool = [idx for idx in range(self.bNP, self.n_individuals) if idx != i] - # Normal behavior: P_s has enough individuals if len(pool) >= 3: r1, r2, r3 = self.rng_optimization.choice(pool, 3, replace=False) else: - # FALLBACK 1: Try borrowing from the full population without replacement full_pool = [idx for idx in range(self.n_individuals) if idx != i] if len(full_pool) >= 3: r1, r2, r3 = self.rng_optimization.choice( full_pool, 3, replace=False ) else: - # EXTREME FALLBACK: Population is < 4. We MUST allow replacement. - # If population is literally 1, it will just pick `i` three times. full_pool_with_i = list(range(self.n_individuals)) r1, r2, r3 = self.rng_optimization.choice( full_pool_with_i, 3, replace=True @@ -227,13 +202,18 @@ def safe_choice(preferred_pool, exclude): # Crowding & Selection if is_big: - # Euclidean distance crowding dists = np.sum((x[: self.bNP] - u) ** 2, axis=1) target = np.argmin(dists) else: target = i if new_y <= y[target]: + # Track for unused history archives + SF.append(new_F) + SCr.append(new_Cr) + d = (y[target] - new_y) / (y[target] + 1e-9) + df.append(d) + x[target], y[target] = u, new_y self.F[target], self.Cr[target] = new_F, new_Cr @@ -243,7 +223,7 @@ def safe_choice(preferred_pool, exclude): elif is_big and target == i: self.age += 1 - return x, y + return x, y, SF, SCr, df def iterate(self, x=None, y=None, args=None): x, y = self._check_population_reduction(x, y) @@ -251,32 +231,28 @@ def iterate(self, x=None, y=None, args=None): # P_b Reinitialization Check if self.bNP > 0: best_b_y = np.min(y[: self.bNP]) + # Discrepancy: prevecEnakih logic eqs_b = np.sum(np.abs(y[: self.bNP] - best_b_y) < self.eps) age_limit = 0.1 * self.max_function_evaluations - if (eqs_b >= self.bNP * self.MyEps) or (self.age >= age_limit): + if (eqs_b > 2 and eqs_b > self.bNP * self.MyEps) or (self.age > age_limit): x[: self.bNP] = self.rng_initialization.uniform( self.initial_lower_boundary, self.initial_upper_boundary, (self.bNP, self.ndim_problem), ) - y[: self.bNP] = np.array( - [ - self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i]) - for i, xi in enumerate(x[: self.bNP]) - ] - ) self.F[: self.bNP] = self.Finit self.Cr[: self.bNP] = self.CRinit + # Discrepancy: Setting cost explicitly to 1e15 without evaluating + y[: self.bNP] = 1e15 self.age = 0 # P_s Reinitialization Check if self.sNP > 0: - # Safely find the best in the small population best_s_idx = self.bNP + np.argmin(y[self.bNP :]) eqs_s = np.sum(np.abs(y[self.bNP :] - y[best_s_idx]) < self.eps) - if eqs_s >= self.sNP * self.MyEps: + if eqs_s > 2 and eqs_s > self.sNP * self.MyEps: best_x_s = x[best_s_idx].copy() best_y_s = y[best_s_idx] @@ -285,40 +261,60 @@ def iterate(self, x=None, y=None, args=None): self.initial_upper_boundary, (self.sNP, self.ndim_problem), ) - y[self.bNP :] = np.array( - [ - self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i]) - for i, xi in enumerate(x[self.bNP :]) - ] - ) self.F[self.bNP :] = self.Finit self.Cr[self.bNP :] = self.CRinit + # Discrepancy: Setting cost explicitly to 1e15 + y[self.bNP :] = 1e15 + + x[best_s_idx] = best_x_s + y[best_s_idx] = best_y_s - # Elitism: retain the best small-population individual - x[self.bNP], y[self.bNP] = best_x_s, best_y_s + SF_total, SCr_total, df_total = [], [], [] # Big Population Generation if self.bNP > 0: - x, y = self._evolve_population(x, y, args, is_big=True) + x, y, SF, SCr, df = self._evolve_population(x, y, args, is_big=True) + SF_total.extend(SF) + SCr_total.extend(SCr) + df_total.extend(df) # Migration - # The best individual migrates from P_b to P_s if self.bNP > 0 and self.sNP > 0: best_overall_idx = np.argmin(y) if best_overall_idx < self.bNP: - worst_s_idx = self.bNP + np.argmax(y[self.bNP :]) - x[worst_s_idx] = x[best_overall_idx].copy() - y[worst_s_idx] = y[best_overall_idx] - self.F[worst_s_idx] = self.F[best_overall_idx] - self.Cr[worst_s_idx] = self.Cr[best_overall_idx] + # Discrepancy: Overwrites explicitly the first index of P_s (self.bNP) + x[self.bNP] = x[best_overall_idx].copy() + y[self.bNP] = y[best_overall_idx] # Small Population Generation (repeats m times) if self.sNP > 0: - # m is traditionally bNP // sNP, but must fallback cleanly if bNP is 0 m = self.bNP // self.sNP if self.bNP > 0 else 1 - m = max(1, m) # Ensure it executes at least once if P_s is all we have + m = max(1, m) for _ in range(m): - x, y = self._evolve_population(x, y, args, is_big=False) + x, y, SF, SCr, df = self._evolve_population(x, y, args, is_big=False) + SF_total.extend(SF) + SCr_total.extend(SCr) + df_total.extend(df) + + # Discrepancy: Update dead history archives + if len(SF_total) > 0: + SF_arr = np.array(SF_total) + SCr_arr = np.array(SCr_total) + df_arr = np.array(df_total) + + def mean_wL(df_vals, s_vals): + w = df_vals / np.sum(df_vals) + if np.sum(w * s_vals) > 0.000001: + return np.sum(w * (s_vals**2)) / np.sum(w * s_vals) + else: + return 0.5 + + self.MF[self.k] = mean_wL(df_arr, SF_arr) + self.MCr[self.k] = mean_wL(df_arr, SCr_arr) + self.k = (self.k + 1) % len(self.MF) + else: + self.MF[self.k] = 0.5 + self.MCr[self.k] = 0.5 self._n_generations += 1 return x, y diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py index 27bf266..76857d8 100644 --- a/dynamicalgorithmselection/optimizers/DE/MADDE.py +++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py @@ -3,15 +3,16 @@ class MADDE(DE): + """Implementation of this algorithm tries to be faithful both to original paper + and to its implementation in RL-DAS project. + In case of any difference, it follows RL-DAS approach""" + start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pm"] def __init__(self, problem, options): super().__init__(problem, options) D = self.ndim_problem # Constants from MadDE paper/original code - self.Nmax = int(np.round(2 * (D**2))) - # self.Nmax = self.n_individuals if self.n_individuals else 170 - self.Nmin = options.get("Nmin", 4) self.p = 0.18 self.PqBX = 0.01 @@ -19,12 +20,12 @@ def __init__(self, problem, options): self.pm = np.ones(3) / 3 # Archive and Memory - self.A_rate = 2.30 + self.A_rate = 2.10 self.NA = int(np.round(self.A_rate * self.Nmax)) self.archive = np.empty((0, self.ndim_problem)) # Memory for F and Cr - self.memory_size = 10 * D + self.memory_size = 20 * D self.MF = np.ones(self.memory_size) * 0.2 self.MCr = np.ones(self.memory_size) * 0.2 self.k_idx = 0 @@ -51,24 +52,19 @@ def initialize(self, args=None, x=None, y=None): def _choose_F_Cr(self, NP): indices = self.rng_optimization.integers(0, self.memory_size, size=NP) - Cr = np.zeros(NP) - for i, idx in enumerate(indices): - if self.MCr[idx] == -1.0: # Check for terminal state - Cr[i] = 0.0 - else: - Cr[i] = self.rng_optimization.normal(loc=self.MCr[idx], scale=0.1) - + # Normal sampling for Cr (bounded [0, 1], no terminal state check) + Cr = self.rng_optimization.normal(loc=self.MCr[indices], scale=0.1) Cr = np.clip(Cr, 0, 1) - # Cauchy-like sampling for F - F = self.MF[indices] + 0.1 * np.tan( - np.pi * (self.rng_optimization.random(NP) - 0.5) - ) - while np.any(F <= 0): - idx = np.where(F <= 0)[0] - F[idx] = self.MF[indices[idx]] + 0.1 * np.tan( - np.pi * (self.rng_optimization.random(len(idx)) - 0.5) - ) + # Cauchy sampling for F + cauchy_locs = self.MF[indices] + F = cauchy_locs + 0.1 * self.rng_optimization.standard_cauchy(size=NP) + + # Reflection method for F < 0 (matches RL-DAS) + err = np.where(F < 0)[0] + if len(err) > 0: + F[err] = 2 * cauchy_locs[err] - F[err] + return Cr, np.minimum(1.0, F) def _mutate(self, x, y, F, strategy_idx, q, Fa): @@ -173,37 +169,38 @@ def iterate(self, x=None, y=None, args=None): # Memory and Strategy probability update df = np.maximum(0, y - new_y) self._update_memory(F[optim], Cr[optim], df[optim]) - self._update_pm(df, mu) + self._update_pm(df, y, mu) x[optim], y[optim] = u[optim], new_y[optim] - x, y = self._lpsr(x, y) + x, y = self._nlpsr(x, y) self._n_generations += 1 return x, y - def _update_pm(self, df, mu): + def _update_pm(self, df, y, mu): count_S = np.zeros(3) for i in range(3): if np.any(mu == i): - count_S[i] = np.mean(df[mu == i]) + # Aligning with optimizer.py: relative improvement df / cost + count_S[i] = np.mean(df[mu == i] / (y[mu == i])) if np.sum(count_S) > 0: - self.pm = np.maximum( - 0.1, np.minimum(0.9, count_S / (np.sum(count_S) + 1e-15)) - ) + self.pm = np.maximum(0.1, np.minimum(0.9, count_S / np.sum(count_S))) self.pm /= np.sum(self.pm) else: self.pm = np.ones(3) / 3 - def _lpsr(self, x, y): + def _nlpsr(self, x, y): FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations # Prevent the ratio from exceeding 1.0 if FEs overshoots MaxFEs ratio = min(1.0, FEs / MaxFEs) - # LPSR formula: N_G = round(N_max - (N_max - N_min) * ratio) - new_NP = int(np.round(self.Nmax - (self.Nmax - self.Nmin) * ratio)) + # NLPSR formula using the power curve + new_NP = int( + np.round(self.Nmax + (self.Nmin - self.Nmax) * np.power(ratio, 1.0 - ratio)) + ) # Clamp to ensure population never drops below Nmin new_NP = max(self.Nmin, new_NP) @@ -214,17 +211,10 @@ def _lpsr(self, x, y): self.n_individuals = new_NP # Dynamically prune the archive size based on the new population - self.NA = int(np.round(self.A_rate * new_NP)) - - # Ensure NA doesn't go negative (redundant with the max clamp above, but safe) - self.NA = max(0, self.NA) + self.NA = int(max(np.round(self.A_rate * new_NP), self.Nmin)) if len(self.archive) > self.NA: - self.archive = self.archive[ - self.rng_optimization.choice( - len(self.archive), self.NA, replace=False - ) - ] + self.archive = self.archive[: self.NA] # Slice directly return x, y # Helper mutation methods (Vectorized) @@ -262,17 +252,19 @@ def _binomial(self, x, v, Cr): def _update_memory(self, SF, SCr, df): if len(SF) > 0: - w = df / (np.sum(df) + 1e-15) + w = df / np.sum(df) # Weighted Lehmer mean for F - self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15) + if np.sum(w * SF) > 1e-6: + self.MF[self.k_idx] = np.sum(w * (SF**2)) / np.sum(w * SF) + else: + self.MF[self.k_idx] = 0.5 - # Terminal condition check for Cr - if self.MCr[self.k_idx] == -1.0 or np.max(SCr) == 0: - self.MCr[self.k_idx] = -1.0 # Terminal state \perp + # Weighted Lehmer mean for Cr (No terminal condition) + if np.sum(w * SCr) > 1e-6: + self.MCr[self.k_idx] = np.sum(w * (SCr**2)) / np.sum(w * SCr) else: - # Weighted Lehmer mean for Cr - self.MCr[self.k_idx] = np.sum(w * (SCr**2)) / (np.sum(w * SCr) + 1e-15) + self.MCr[self.k_idx] = 0.5 self.k_idx = (self.k_idx + 1) % self.memory_size else: diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py index 150ee6d..2ed0be3 100644 --- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py +++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py @@ -3,11 +3,14 @@ class NL_SHADE_RSP(DE): + """Implementation of this algorithm tries to be faithful both to original paper + and to its implementation in RL-DAS project. + In case of any difference, it follows RL-DAS approach""" + start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pa"] def __init__(self, problem, options): super().__init__(problem, options) - self.Nmax = options.get("Nmax", 30 * self.ndim_problem) self.Nmin = options.get("Nmin", 4) self.n_individuals = self.Nmax @@ -56,21 +59,19 @@ def _choose_F_Cr(self, NP): # Step Length (Cauchy) cauchy_locs = self.MF[ind_r] F = self._sample_cauchy(cauchy_locs, 0.1, NP) - # Symmetry correction for negative values - attempts = 0 - while np.any(F <= 0) and attempts < 100: - idx = np.where(F <= 0)[0] - F[idx] = self._sample_cauchy(cauchy_locs[idx], 0.1, len(idx)) - attempts += 1 + err = np.where(F < 0)[0] + F[err] = 2 * cauchy_locs[err] - F[err] + return Cr, np.minimum(1, F) def _update_memory(self, SF, SCr, df): if len(SF) > 0: w = df / np.sum(df) - # Weighted Lehmer Mean for F - self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15) - # Weighted Arithmetic Mean for Cr - self.MCr[self.k_idx] = np.sum(w * SCr) + mean_wL_F = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15) + mean_wL_Cr = np.sum(w * (SCr**2)) / (np.sum(w * SCr) + 1e-15) + + self.MF[self.k_idx] = mean_wL_F + self.MCr[self.k_idx] = mean_wL_Cr self.k_idx = (self.k_idx + 1) % self.memory_size def iterate(self, x=None, y=None, args=None): @@ -85,7 +86,7 @@ def iterate(self, x=None, y=None, args=None): Cr, F = self._choose_F_Cr(NP) - # Sort Cr so better individuals get smaller Cr (for exponential crossover) + # Sort Cr so better individuals get smaller Cr Cr = np.sort(Cr) # Adaptive greediness pb (from 0.4 to 0.2) @@ -93,8 +94,9 @@ def iterate(self, x=None, y=None, args=None): pb = 0.4 - 0.2 * nfe_ratio pb_upper = max(2, int(np.round(NP * pb))) - # Adaptive Cr_b for binomial crossover - Cr_b = 0.0 if nfe_ratio < 0.5 else 2.0 * (nfe_ratio - 0.5) + # BUG : Inverted Cr_b calculation - same as in RL-DAS implementation, did so for compatibility of comparison + # It is non-zero (negative) in the first half and 0.0 in the second half + Cr_b = 2.0 * (nfe_ratio - 0.5) if nfe_ratio < 0.5 else 0.0 # Rank-based probabilities for r2 (RSP) ranks = np.exp(-np.arange(NP) / NP) @@ -143,19 +145,24 @@ def iterate(self, x=None, y=None, args=None): # Generate Trials: current-to-pbest/1 x_pbest = x[pbest_idx] vs = x + F[:, np.newaxis] * (x_pbest - x) + F[:, np.newaxis] * (x[r1] - x2) - vs = np.clip(vs, self.lower_boundary, self.upper_boundary) - # Dual Crossover Handling + # Note: Removed the correct np.clip() here to implement Bug 5 + us = np.copy(x) - for i in range(NP): - if self.rng_optimization.random() < 0.5: - # Binomial crossover with Cr_b + + CrossExponential = self.rng_optimization.random() < 0.5 + + # ^ Bug copied from RL-DAS implementation + if CrossExponential: + # Executes Binomial logic with Cr_b when CrossExponential is True -> RL-DAS bug compatibility + for i in range(NP): jrand = self.rng_optimization.integers(self.ndim_problem) for j in range(self.ndim_problem): if self.rng_optimization.random() < Cr_b or j == jrand: us[i, j] = vs[i, j] - else: - # Exponential crossover with Cr_i + else: + # Executes Exponential logic with Cr when CrossExponential is False -> RL-DAS bug compatibility + for i in range(NP): n1 = self.rng_optimization.integers(self.ndim_problem) n2 = 1 while self.rng_optimization.random() < Cr[i] and n2 < self.ndim_problem: @@ -164,6 +171,15 @@ def iterate(self, x=None, y=None, args=None): idx = (n1 + j) % self.ndim_problem us[i, idx] = vs[i, idx] + # BUG 5: Hardcoded [-100, 100] bounds + out_of_bounds = (us < -100) | (us > 100) + if np.any(out_of_bounds): + us = np.where( + out_of_bounds, + self.rng_optimization.uniform(-100, 100, size=us.shape), + us, + ) + # Selection new_y = np.array( @@ -182,8 +198,10 @@ def iterate(self, x=None, y=None, args=None): df = y[better_idx] - new_y[better_idx] arc_used_better = use_arc[better_idx] - df_A = np.sum(df[arc_used_better]) - df_P = np.sum(df[~arc_used_better]) + # BUG Swapped Archive metrics -> from RL-DAS compatibility + df_P = np.sum(df[arc_used_better]) # Population gets archive improvements + df_A = np.sum(df[~arc_used_better]) # Archive gets population improvements + n_A_total = np.sum(use_arc) n_P_total = NP - n_A_total diff --git a/portfolio_study.slurm b/portfolio_study.slurm index 7a23fe8..713530c 100644 --- a/portfolio_study.slurm +++ b/portfolio_study.slurm @@ -7,7 +7,7 @@ #SBATCH --mem=32G #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 -#SBATCH -A plgautopt26-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 #SBATCH --array=0-9 # 10 tasks total CDB_VAL=1.2 diff --git a/single_algorithm_CDB_study.slurm b/single_algorithm_CDB_study.slurm index 156c626..d044124 100644 --- a/single_algorithm_CDB_study.slurm +++ b/single_algorithm_CDB_study.slurm @@ -7,7 +7,7 @@ #SBATCH --mem=32G #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 -#SBATCH -A plgautopt26-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 SEED=${1:-42} CDB_VAL=${2:-1.5}