|
| 1 | +"""Hyper optimization using a pure Python differential evolution strategy.""" |
| 2 | + |
| 3 | +from ..utils import get_rng |
| 4 | +from ._param_mapping import ( |
| 5 | + LCBOptimizer, |
| 6 | + build_params, |
| 7 | + convert_raw, |
| 8 | + num_params, |
| 9 | +) |
| 10 | +from .hyper import HyperOptLib, register_hyper_optlib |
| 11 | + |
| 12 | + |
| 13 | +class HyperDESampler: |
| 14 | + """A lightweight differential evolution optimizer operating in raw |
| 15 | + ``[-1, 1]`` parameter space. |
| 16 | +
|
| 17 | + Each generation maintains a population of candidate vectors. New trial |
| 18 | + vectors are created using ``DE/rand/1/bin`` mutation and binomial |
| 19 | + crossover, then kept only if they improve on their parent. |
| 20 | +
|
| 21 | + Parameters |
| 22 | + ---------- |
| 23 | + space : dict[str, dict] |
| 24 | + The search space for a single contraction method. |
| 25 | + seed : None or int, optional |
| 26 | + Random seed. |
| 27 | + population_size : int or "auto", optional |
| 28 | + The population size. When ``"auto"`` it is chosen based on the mapped |
| 29 | + parameter dimension. |
| 30 | + mutation : float, optional |
| 31 | + The differential weight (F) applied to the difference vector. |
| 32 | + crossover : float, optional |
| 33 | + The crossover probability (CR) for binomial crossover. |
| 34 | + mutation_decay : float, optional |
| 35 | + Multiplicative decay applied to ``mutation`` after each completed |
| 36 | + generation. |
| 37 | + mutation_min : float, optional |
| 38 | + Lower bound for ``mutation``. |
| 39 | + mutation_max : float, optional |
| 40 | + Upper bound for ``mutation``. |
| 41 | + exponential_param_power : float, optional |
| 42 | + Passed through to the shared parameter mapping for ``FLOAT_EXP`` |
| 43 | + parameters. |
| 44 | + """ |
| 45 | + |
| 46 | + def __init__( |
| 47 | + self, |
| 48 | + space, |
| 49 | + seed=None, |
| 50 | + population_size="auto", |
| 51 | + mutation=0.8, |
| 52 | + crossover=0.7, |
| 53 | + mutation_decay=1.0, |
| 54 | + mutation_min=0.1, |
| 55 | + mutation_max=1.5, |
| 56 | + exponential_param_power=None, |
| 57 | + ): |
| 58 | + self.rng = get_rng(seed) |
| 59 | + self.params = build_params( |
| 60 | + space, exponential_param_power=exponential_param_power |
| 61 | + ) |
| 62 | + self.ndim = num_params(self.params) |
| 63 | + |
| 64 | + if population_size == "auto": |
| 65 | + population_size = max(8, 5 * self.ndim) |
| 66 | + self.population_size = population_size |
| 67 | + self.mutation = mutation |
| 68 | + self.crossover = crossover |
| 69 | + self.mutation_decay = mutation_decay |
| 70 | + self.mutation_min = mutation_min |
| 71 | + self.mutation_max = mutation_max |
| 72 | + |
| 73 | + # initialize population uniformly in [-1, 1] |
| 74 | + self._population = [ |
| 75 | + tuple(self.rng.uniform(-1.0, 1.0) for _ in range(self.ndim)) |
| 76 | + for _ in range(self.population_size) |
| 77 | + ] |
| 78 | + self._scores = [float("inf")] * self.population_size |
| 79 | + |
| 80 | + self._trial_counter = 0 |
| 81 | + self._target_index = 0 |
| 82 | + self._generation = None |
| 83 | + self._trial_map = {} |
| 84 | + |
| 85 | + def _mutate(self, target_idx): |
| 86 | + """Create a trial vector via DE/rand/1/bin.""" |
| 87 | + # pick three distinct indices, all different from target |
| 88 | + indices = list(range(self.population_size)) |
| 89 | + indices.remove(target_idx) |
| 90 | + r0, r1, r2 = self.rng.sample(indices, 3) |
| 91 | + |
| 92 | + x_r0 = self._population[r0] |
| 93 | + x_r1 = self._population[r1] |
| 94 | + x_r2 = self._population[r2] |
| 95 | + |
| 96 | + # mutation: v = x_r0 + F * (x_r1 - x_r2) |
| 97 | + v = [] |
| 98 | + for d in range(self.ndim): |
| 99 | + vi = x_r0[d] + self.mutation * (x_r1[d] - x_r2[d]) |
| 100 | + v.append(min(max(vi, -1.0), 1.0)) |
| 101 | + |
| 102 | + # binomial crossover |
| 103 | + x_target = self._population[target_idx] |
| 104 | + j_rand = self.rng.randrange(self.ndim) |
| 105 | + trial = [] |
| 106 | + for d in range(self.ndim): |
| 107 | + if self.rng.random() < self.crossover or d == j_rand: |
| 108 | + trial.append(v[d]) |
| 109 | + else: |
| 110 | + trial.append(x_target[d]) |
| 111 | + |
| 112 | + return tuple(trial) |
| 113 | + |
| 114 | + def _sample_generation(self): |
| 115 | + """Prepare trial vectors for all population members.""" |
| 116 | + self._generation = { |
| 117 | + "trials": [], |
| 118 | + "trial_numbers": [], |
| 119 | + "target_indices": [], |
| 120 | + "scores": {}, |
| 121 | + "next_index": 0, |
| 122 | + } |
| 123 | + for i in range(self.population_size): |
| 124 | + self._extend_generation(i) |
| 125 | + |
| 126 | + def _extend_generation(self, target_idx=None): |
| 127 | + """Append one more trial to the current generation.""" |
| 128 | + if target_idx is None: |
| 129 | + # wrap around if we need more trials than population |
| 130 | + target_idx = len(self._generation["trials"]) % self.population_size |
| 131 | + |
| 132 | + trial_number = self._trial_counter |
| 133 | + self._trial_counter += 1 |
| 134 | + |
| 135 | + trial_vec = self._mutate(target_idx) |
| 136 | + slot = len(self._generation["trials"]) |
| 137 | + |
| 138 | + self._generation["trials"].append(trial_vec) |
| 139 | + self._generation["trial_numbers"].append(trial_number) |
| 140 | + self._generation["target_indices"].append(target_idx) |
| 141 | + self._trial_map[trial_number] = slot |
| 142 | + |
| 143 | + def ask(self): |
| 144 | + """Return the next candidate from the current generation. |
| 145 | +
|
| 146 | + If all prepared candidates have been issued, grow the generation |
| 147 | + by one more sample. |
| 148 | + """ |
| 149 | + if self._generation is None: |
| 150 | + self._sample_generation() |
| 151 | + |
| 152 | + if self._generation["next_index"] >= len(self._generation["trials"]): |
| 153 | + self._extend_generation() |
| 154 | + |
| 155 | + i = self._generation["next_index"] |
| 156 | + self._generation["next_index"] += 1 |
| 157 | + |
| 158 | + trial_number = self._generation["trial_numbers"][i] |
| 159 | + x = self._generation["trials"][i] |
| 160 | + |
| 161 | + return trial_number, convert_raw(self.params, x) |
| 162 | + |
| 163 | + def tell(self, trial_number, score): |
| 164 | + """Record a completed trial and perform selection if the |
| 165 | + generation is complete. |
| 166 | +
|
| 167 | + For each trial vector, if it scores better than (or equal to) its |
| 168 | + target parent, it replaces the parent in the population. |
| 169 | + """ |
| 170 | + slot = self._trial_map.pop(trial_number) |
| 171 | + self._generation["scores"][slot] = score |
| 172 | + |
| 173 | + if len(self._generation["scores"]) != self._generation["next_index"]: |
| 174 | + return |
| 175 | + |
| 176 | + # selection: compare each trial against its target |
| 177 | + # (only iterate over issued slots, not all pre-sampled ones) |
| 178 | + for slot_i in range(self._generation["next_index"]): |
| 179 | + target_idx = self._generation["target_indices"][slot_i] |
| 180 | + trial_score = self._generation["scores"][slot_i] |
| 181 | + if trial_score <= self._scores[target_idx]: |
| 182 | + self._population[target_idx] = self._generation["trials"][ |
| 183 | + slot_i |
| 184 | + ] |
| 185 | + self._scores[target_idx] = trial_score |
| 186 | + |
| 187 | + # decay mutation factor |
| 188 | + self.mutation *= self.mutation_decay |
| 189 | + self.mutation = min( |
| 190 | + max(self.mutation, self.mutation_min), self.mutation_max |
| 191 | + ) |
| 192 | + |
| 193 | + self._generation = None |
| 194 | + |
| 195 | + |
| 196 | +class DEOptLib(HyperOptLib): |
| 197 | + """Hyper-optimization using differential evolution.""" |
| 198 | + |
| 199 | + def setup( |
| 200 | + self, |
| 201 | + methods, |
| 202 | + space, |
| 203 | + optimizer=None, |
| 204 | + population_size="auto", |
| 205 | + mutation=0.8, |
| 206 | + crossover=0.7, |
| 207 | + mutation_decay=1.0, |
| 208 | + mutation_min=0.1, |
| 209 | + mutation_max=1.5, |
| 210 | + method_exploration=1.0, |
| 211 | + method_temperature=1.0, |
| 212 | + exponential_param_power=None, |
| 213 | + seed=None, |
| 214 | + **kwargs, |
| 215 | + ): |
| 216 | + """Initialize DE optimizers for each contraction method. |
| 217 | +
|
| 218 | + Parameters |
| 219 | + ---------- |
| 220 | + methods : list[str] |
| 221 | + The contraction methods to optimize over. |
| 222 | + space : dict[str, dict[str, dict]] |
| 223 | + The per-method hyperparameter search space. |
| 224 | + optimizer : HyperOptimizer, optional |
| 225 | + The parent optimizer. Used to size the initial population |
| 226 | + large enough for parallel pre-dispatch. |
| 227 | + population_size : int or "auto", optional |
| 228 | + The population size for each method-specific DE sampler. |
| 229 | + mutation : float, optional |
| 230 | + Differential weight (F). |
| 231 | + crossover : float, optional |
| 232 | + Crossover probability (CR). |
| 233 | + mutation_decay, mutation_min, mutation_max : float, optional |
| 234 | + Parameters controlling mutation scale over generations. |
| 235 | + method_exploration : float, optional |
| 236 | + Exploration strength for the LCB-based method chooser. |
| 237 | + method_temperature : float, optional |
| 238 | + Noise temperature for the LCB-based method chooser. |
| 239 | + exponential_param_power : float, optional |
| 240 | + Passed to the shared parameter mapping for ``FLOAT_EXP``. |
| 241 | + seed : None or int, optional |
| 242 | + Random seed. |
| 243 | + """ |
| 244 | + if population_size == "auto": |
| 245 | + max_ndim = max( |
| 246 | + num_params( |
| 247 | + build_params( |
| 248 | + space[m], |
| 249 | + exponential_param_power=exponential_param_power, |
| 250 | + ) |
| 251 | + ) |
| 252 | + for m in methods |
| 253 | + ) |
| 254 | + population_size = max( |
| 255 | + 8, |
| 256 | + max(1, getattr(optimizer, "pre_dispatch", 1)), |
| 257 | + 5 * max_ndim, |
| 258 | + ) |
| 259 | + |
| 260 | + self._method_chooser = LCBOptimizer( |
| 261 | + options=methods, |
| 262 | + exploration=method_exploration, |
| 263 | + temperature=method_temperature, |
| 264 | + seed=seed, |
| 265 | + ) |
| 266 | + self._optimizers = { |
| 267 | + method: HyperDESampler( |
| 268 | + space[method], |
| 269 | + seed=seed, |
| 270 | + population_size=population_size, |
| 271 | + mutation=mutation, |
| 272 | + crossover=crossover, |
| 273 | + mutation_decay=mutation_decay, |
| 274 | + mutation_min=mutation_min, |
| 275 | + mutation_max=mutation_max, |
| 276 | + exponential_param_power=exponential_param_power, |
| 277 | + ) |
| 278 | + for method in methods |
| 279 | + } |
| 280 | + |
| 281 | + def get_setting(self): |
| 282 | + """Choose a contraction method, then request its next setting.""" |
| 283 | + method = self._method_chooser.ask() |
| 284 | + params_token, params = self._optimizers[method].ask() |
| 285 | + |
| 286 | + return { |
| 287 | + "method": method, |
| 288 | + "params_token": params_token, |
| 289 | + "params": params, |
| 290 | + } |
| 291 | + |
| 292 | + def report_result(self, setting, trial, score): |
| 293 | + """Report a completed trial back to the method chooser and DE.""" |
| 294 | + self._method_chooser.tell(setting["method"], score) |
| 295 | + self._optimizers[setting["method"]].tell( |
| 296 | + setting["params_token"], score |
| 297 | + ) |
| 298 | + |
| 299 | + |
| 300 | +register_hyper_optlib("de", DEOptLib) |
| 301 | +register_hyper_optlib("diffev", DEOptLib) |
0 commit comments