From 8d8e09239e17da165b6b936f774c767b78ca3200 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:01:16 +0100 Subject: [PATCH 01/11] add _BaseSMACAdapter --- src/hyperactive/opt/_adapters/__init__.py | 3 +- .../opt/_adapters/_base_smac_adapter.py | 458 ++++++++++++++++++ 2 files changed, 460 insertions(+), 1 deletion(-) create mode 100644 src/hyperactive/opt/_adapters/_base_smac_adapter.py diff --git a/src/hyperactive/opt/_adapters/__init__.py b/src/hyperactive/opt/_adapters/__init__.py index 6e40d407..cd8c4720 100644 --- a/src/hyperactive/opt/_adapters/__init__.py +++ b/src/hyperactive/opt/_adapters/__init__.py @@ -2,6 +2,7 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) from ._base_optuna_adapter import _BaseOptunaAdapter +from ._base_smac_adapter import _BaseSMACAdapter from ._gfo import _BaseGFOadapter -__all__ = ["_BaseOptunaAdapter", "_BaseGFOadapter"] +__all__ = ["_BaseOptunaAdapter", "_BaseGFOadapter", "_BaseSMACAdapter"] diff --git a/src/hyperactive/opt/_adapters/_base_smac_adapter.py b/src/hyperactive/opt/_adapters/_base_smac_adapter.py new file mode 100644 index 00000000..c3cebc7b --- /dev/null +++ b/src/hyperactive/opt/_adapters/_base_smac_adapter.py @@ -0,0 +1,458 @@ +"""Base adapter for SMAC3 optimizers.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import time + +import numpy as np + +from hyperactive.base import BaseOptimizer + +__all__ = ["_BaseSMACAdapter"] + + +class _BaseSMACAdapter(BaseOptimizer): + """Base adapter class for SMAC3 optimizers. + + This adapter handles the conversion between Hyperactive's interface and + SMAC3's facade-based optimization pattern. Key responsibilities: + + * Search space conversion to ConfigSpace format + * Score negation (SMAC minimizes, Hyperactive maximizes) + * Ask-tell optimization loop implementation + * Random state handling + * Time-based early stopping + + Extension interface for subclasses: + + * ``_get_facade_class``: Return the SMAC facade class + * ``_get_facade_kwargs``: Return facade-specific constructor kwargs + * ``_get_scenario_kwargs``: Return scenario-specific kwargs + + Notes + ----- + SMAC3 is designed for minimization, while Hyperactive uses maximization + (higher scores are better). This adapter negates scores when calling + ``smac.tell()`` to handle this difference. + + SMAC3 uses ConfigSpace for parameter space definition. This adapter + converts Hyperactive's simple dict format to ConfigSpace objects. + + Parameter type detection uses the following rules: + + * Tuple ``(int, int)``: Integer parameter + * Tuple ``(float, float)`` or mixed: Float parameter + * List/array: Categorical parameter + + For ambiguous cases like ``(1, 10)``, the adapter checks if both bounds + are Python ``int`` type. Use ``(1.0, 10.0)`` to force float interpretation. + """ + + _tags = { + "python_dependencies": ["smac", "ConfigSpace"], + "info:name": "SMAC3-based optimizer", + } + + def __init__( + self, + param_space=None, + n_iter=100, + max_time=None, + initialize=None, + random_state=None, + deterministic=True, + experiment=None, + ): + self.param_space = param_space + self.n_iter = n_iter + self.max_time = max_time + self.initialize = initialize + self.random_state = random_state + self.deterministic = deterministic + self.experiment = experiment + super().__init__() + + def _get_facade_class(self): + """Get the SMAC facade class to use. + + Returns + ------- + class + The SMAC facade class. Must be a class from ``smac``. + + Raises + ------ + NotImplementedError + If not implemented by subclass. + """ + raise NotImplementedError( + "Subclasses must implement _get_facade_class to return " + "the SMAC facade class." + ) + + def _get_facade_kwargs(self): + """Get facade-specific keyword arguments for instantiation. + + Override this method in subclasses to pass algorithm-specific + parameters to the SMAC facade constructor. + + Returns + ------- + dict + Keyword arguments to pass to the facade constructor. + Default is an empty dict. + """ + return {} + + def _get_scenario_kwargs(self): + """Get scenario-specific keyword arguments. + + Override this method in subclasses to pass scenario-specific + parameters (e.g., min_budget, max_budget for multi-fidelity). + + Returns + ------- + dict + Keyword arguments to pass to the Scenario constructor. + Default is an empty dict. + """ + return {} + + def _convert_to_configspace(self, param_space): + """Convert Hyperactive parameter space to ConfigSpace. + + Handles conversion of different parameter formats: + + * Tuples ``(int, int)``: Converted to ``Integer`` + * Tuples ``(float, float)`` or mixed: Converted to ``Float`` + * Lists/arrays of values: Converted to ``Categorical`` + * numpy arrays: Treated same as lists + + Parameters + ---------- + param_space : dict[str, tuple | list | np.ndarray] + The parameter space to convert. Keys are parameter names, + values are either: + + - Tuple of (low, high) for ranges + - List/array of discrete values + + Returns + ------- + ConfigurationSpace + ConfigSpace ConfigurationSpace object. + + Raises + ------ + ValueError + If parameter space format is not supported. + + Examples + -------- + >>> adapter = _BaseSMACAdapter() + >>> space = {"x": (0.0, 1.0), "y": [1, 2, 3], "z": (1, 10)} + >>> cs = adapter._convert_to_configspace(space) + """ + from ConfigSpace import Categorical, ConfigurationSpace, Float, Integer + + cs = ConfigurationSpace(seed=self.random_state) + + for name, space in param_space.items(): + if isinstance(space, tuple) and len(space) == 2: + low, high = space + # Check if both bounds are strictly int type + if isinstance(low, int) and isinstance(high, int): + # Exclude bool since bool is subclass of int + if not isinstance(low, bool) and not isinstance(high, bool): + cs.add(Integer(name, bounds=(low, high))) + else: + # bool values -> treat as categorical + cs.add(Categorical(name, items=[low, high])) + else: + # Float range (includes mixed int/float like (1, 10.0)) + cs.add(Float(name, bounds=(float(low), float(high)))) + + elif isinstance(space, (list, np.ndarray)): + values = list(space) if isinstance(space, np.ndarray) else space + + if len(values) == 0: + raise ValueError(f"Empty parameter space for '{name}'") + + cs.add(Categorical(name, items=values)) + + else: + raise ValueError( + f"Unsupported parameter space type for '{name}': {type(space)}. " + "Expected tuple (low, high) or list of values." + ) + + return cs + + def _config_to_dict(self, config): + """Convert SMAC Configuration to parameter dictionary. + + Converts numpy scalar types to native Python types to ensure + compatibility with sklearn estimators and JSON serialization. + + Parameters + ---------- + config : Configuration + SMAC Configuration object. + + Returns + ------- + dict + Parameter dictionary with keys matching param_space. + All values are native Python types (str, int, float, bool). + """ + params = dict(config) + # Convert numpy scalars to Python native types + for key, value in params.items(): + if hasattr(value, "item"): + # numpy scalar types have .item() method + params[key] = value.item() + return params + + def _create_target_function(self, experiment): + """Create a target function for SMAC optimization. + + SMAC expects a target function with signature: + ``target_function(config, seed=None) -> float`` + + This method creates such a function that wraps the experiment. + + Parameters + ---------- + experiment : BaseExperiment + The experiment to optimize. + + Returns + ------- + callable + Target function compatible with SMAC. + """ + + def target_function(config, seed=None): + params = self._config_to_dict(config) + score = experiment(params) + # Negate score since SMAC minimizes and Hyperactive maximizes + return -score + + return target_function + + def _setup_warm_start(self, smac, experiment, initialize): + """Set up warm start initialization if provided. + + Warm start points are evaluated and told to SMAC before + the main optimization loop begins. + + Parameters + ---------- + smac : AbstractFacade + The SMAC facade instance. + experiment : BaseExperiment + The experiment to evaluate. + initialize : dict or None + Initialization configuration. If contains "warm_start" key, + those points are evaluated and told to the optimizer. + + Returns + ------- + int + Number of warm start evaluations performed. + """ + from smac.runhistory.dataclasses import TrialValue + + if initialize is None: + return 0 + + if not isinstance(initialize, dict) or "warm_start" not in initialize: + return 0 + + warm_start_points = initialize["warm_start"] + if not isinstance(warm_start_points, list): + return 0 + + count = 0 + for point in warm_start_points: + # Evaluate the point + score = experiment(point) + + # Ask for a trial info and tell SMAC about this evaluation + info = smac.ask() + + # Tell optimizer about this evaluation (negate for minimization) + value = TrialValue(cost=-score, time=0.0) + smac.tell(info, value) + count += 1 + + return count + + def _solve(self, experiment, param_space, n_iter, max_time=None, **kwargs): + """Run the SMAC optimization loop. + + Implements the ask-tell pattern: + + 1. Create ConfigSpace from param_space + 2. Create Scenario with budget and constraints + 3. Create facade with target function + 4. Run optimization loop + 5. Return best parameters + + Parameters + ---------- + experiment : BaseExperiment + The experiment to optimize. + param_space : dict + The parameter space to search. + n_iter : int + Number of iterations (trials). + max_time : float, optional + Maximum time in seconds. If provided, optimization stops + when time limit is reached even if budget not exhausted. + **kwargs + Additional parameters (unused, for compatibility). + + Returns + ------- + dict + Best parameters found during optimization. + """ + from smac import Scenario + from smac.runhistory.dataclasses import TrialValue + + # Convert search space to ConfigSpace format + configspace = self._convert_to_configspace(param_space) + + # Build scenario kwargs + scenario_kwargs = { + "configspace": configspace, + "n_trials": n_iter, + "deterministic": self.deterministic, + } + + # Add seed if provided + if self.random_state is not None: + scenario_kwargs["seed"] = self.random_state + + # Add time limit if provided + if max_time is not None: + scenario_kwargs["walltime_limit"] = max_time + + # Add subclass-specific scenario kwargs + scenario_kwargs.update(self._get_scenario_kwargs()) + + # Create scenario + scenario = Scenario(**scenario_kwargs) + + # Get facade class and kwargs + facade_cls = self._get_facade_class() + facade_kwargs = self._get_facade_kwargs() + + # Create target function + target_function = self._create_target_function(experiment) + + # Create facade instance + smac = facade_cls( + scenario=scenario, + target_function=target_function, + overwrite=True, # Allow overwriting previous runs + **facade_kwargs, + ) + + # Handle warm start initialization + warm_start_count = self._setup_warm_start(smac, experiment, self.initialize) + + # Track best result manually for early access + best_score = float("-inf") + best_params = None + + # Optimization loop using ask-tell interface + start_time = time.time() + remaining_budget = n_iter - warm_start_count + + for _ in range(remaining_budget): + # Check time limit + if max_time is not None and (time.time() - start_time) > max_time: + break + + # Ask for next configuration + info = smac.ask() + + # Extract parameters and evaluate + params = self._config_to_dict(info.config) + score = experiment(params) + + # Tell SMAC (negate for minimization) + value = TrialValue(cost=-score, time=0.0) + smac.tell(info, value) + + # Track best + if score > best_score: + best_score = score + best_params = params.copy() + + # Get incumbent (best found configuration) from intensifier + incumbent = smac.intensifier.get_incumbent() + if incumbent is not None: + final_params = self._config_to_dict(incumbent) + else: + # Fallback to manually tracked best + final_params = best_params + + # Store best score for access + self.best_score_ = best_score + + return final_params + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the optimizer. + + Returns + ------- + list of dict + List of parameter configurations for testing. + """ + from sklearn.datasets import load_iris + from sklearn.svm import SVC + + from hyperactive.experiment.integrations import SklearnCvExperiment + + X, y = load_iris(return_X_y=True) + sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + + # Test with tuple ranges + params_tuples = { + "param_space": { + "C": (0.01, 10.0), + "gamma": (0.0001, 1.0), + }, + "n_iter": 10, + "experiment": sklearn_exp, + } + + # Test with discrete lists + params_lists = { + "param_space": { + "C": [0.01, 0.1, 1.0, 10.0], + "gamma": [0.0001, 0.001, 0.01, 0.1], + }, + "n_iter": 10, + "experiment": sklearn_exp, + } + + # Test with mixed types + from hyperactive.experiment.bench import Ackley + + ackley_exp = Ackley.create_test_instance() + params_bench = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "experiment": ackley_exp, + } + + return [params_tuples, params_lists, params_bench] From ec73b575f18be507908df801b14a57d84d2c037a Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:18:52 +0100 Subject: [PATCH 02/11] add multiple smac optimizers --- src/hyperactive/opt/smac/__init__.py | 60 ++++ src/hyperactive/opt/smac/_gaussian_process.py | 224 +++++++++++++++ src/hyperactive/opt/smac/_random_forest.py | 261 ++++++++++++++++++ src/hyperactive/opt/smac/_random_search.py | 226 +++++++++++++++ 4 files changed, 771 insertions(+) create mode 100644 src/hyperactive/opt/smac/__init__.py create mode 100644 src/hyperactive/opt/smac/_gaussian_process.py create mode 100644 src/hyperactive/opt/smac/_random_forest.py create mode 100644 src/hyperactive/opt/smac/_random_search.py diff --git a/src/hyperactive/opt/smac/__init__.py b/src/hyperactive/opt/smac/__init__.py new file mode 100644 index 00000000..d4d48eab --- /dev/null +++ b/src/hyperactive/opt/smac/__init__.py @@ -0,0 +1,60 @@ +"""SMAC3 optimization algorithms. + +This module provides wrappers for SMAC3 (Sequential Model-based Algorithm +Configuration), a versatile Bayesian optimization package developed by the +AutoML groups at the Universities of Hannover and Freiburg. + +Available Optimizers +-------------------- +SmacRandomForest + Optimizer using Random Forest surrogate model. + Best for mixed continuous/categorical/integer parameter spaces. + +SmacGaussianProcess + Optimizer using Gaussian Process surrogate model. + Best for continuous parameter spaces with small to moderate budgets. + +SmacRandomSearch + Random search baseline without surrogate model. + Useful for comparison and high-dimensional problems. + +Installation +------------ +SMAC3 requires additional dependencies. Install with:: + + pip install smac + +Or install hyperactive with SMAC support:: + + pip install hyperactive[smac] + +Examples +-------- +>>> from hyperactive.opt.smac import SmacRandomForest +>>> from hyperactive.experiment.bench import Ackley + +>>> ackley = Ackley.create_test_instance() +>>> optimizer = SmacRandomForest( +... param_space={"x0": (-5.0, 5.0), "x1": (-5.0, 5.0)}, +... n_iter=50, +... experiment=ackley, +... ) +>>> best_params = optimizer.solve() # doctest: +SKIP + +References +---------- +.. [1] Lindauer, M., et al. (2022). SMAC3: A Versatile Bayesian Optimization + Package for Hyperparameter Optimization. JMLR. +""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +from ._gaussian_process import SmacGaussianProcess +from ._random_forest import SmacRandomForest +from ._random_search import SmacRandomSearch + +__all__ = [ + "SmacRandomForest", + "SmacGaussianProcess", + "SmacRandomSearch", +] diff --git a/src/hyperactive/opt/smac/_gaussian_process.py b/src/hyperactive/opt/smac/_gaussian_process.py new file mode 100644 index 00000000..4098bc66 --- /dev/null +++ b/src/hyperactive/opt/smac/_gaussian_process.py @@ -0,0 +1,224 @@ +"""SMAC3 Gaussian Process surrogate optimizer.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +from hyperactive.opt._adapters._base_smac_adapter import _BaseSMACAdapter + +__all__ = ["SmacGaussianProcess"] + + +class SmacGaussianProcess(_BaseSMACAdapter): + """SMAC3 optimizer with Gaussian Process surrogate model. + + This optimizer uses SMAC3's BlackBoxFacade, which combines a Gaussian + Process (GP) surrogate model with Expected Improvement (EI) acquisition + function. It's particularly effective for: + + * Continuous parameter spaces (float ranges) + * Small to moderate budgets (10-100 evaluations) + * Low-dimensional problems (typically < 20 dimensions) + * Problems where uncertainty estimates are valuable + + The Gaussian Process surrogate provides uncertainty estimates, which helps + balance exploration and exploitation. However, GPs scale cubically with + the number of observations, making this optimizer less suitable for + large budgets. + + Parameters + ---------- + param_space : dict[str, tuple | list] + The search space to explore. Dictionary with parameter names as keys. + Values can be: + + * Tuple ``(int, int)``: Integer range (e.g., ``(1, 100)``) + * Tuple ``(float, float)``: Float range (e.g., ``(0.01, 10.0)``) + * List of values: Categorical choices (NOT recommended for GP) + + Note: Gaussian Processes work best with continuous parameters. + For mixed or categorical spaces, consider using ``SmacRandomForest``. + + n_iter : int, default=100 + Number of optimization iterations (trials). For GP-based optimization, + 50-100 iterations is often sufficient due to the model's sample + efficiency. + + max_time : float, optional + Maximum optimization time in seconds. If provided, optimization stops + when time limit is reached even if ``n_iter`` not exhausted. + + initialize : dict, optional + Initialization configuration. Supports: + + * ``{"warm_start": [{"param1": val1, ...}, ...]}``: Start with + known good configurations. + + random_state : int, optional + Random seed for reproducibility. + + deterministic : bool, default=True + Whether the objective function is deterministic. + + experiment : BaseExperiment, optional + The experiment to optimize. + + Attributes + ---------- + best_params_ : dict + Best parameters found after calling ``solve()``. + + best_score_ : float + Score of the best parameters found. + + See Also + -------- + SmacRandomForest : Random Forest surrogate for mixed/categorical spaces. + SmacRandomSearch : Random search baseline. + + Notes + ----- + The Gaussian Process surrogate uses a Matern 5/2 kernel by default. + Key characteristics: + + * Provides uncertainty estimates for exploration + * Scales O(n^3) with number of observations + * Does not support instance-based optimization + * Best suited for continuous parameter spaces + + For problems with categorical parameters or large budgets, the + ``SmacRandomForest`` optimizer is recommended. + + References + ---------- + .. [1] Lindauer, M., et al. (2022). SMAC3: A Versatile Bayesian Optimization + Package for Hyperparameter Optimization. JMLR. + + .. [2] Snoek, J., Larochelle, H., & Adams, R. P. (2012). Practical Bayesian + Optimization of Machine Learning Algorithms. NeurIPS. + + Examples + -------- + Basic usage with a benchmark function: + + >>> from hyperactive.experiment.bench import Ackley + >>> from hyperactive.opt.smac import SmacGaussianProcess + + Create a benchmark experiment: + + >>> ackley = Ackley.create_test_instance() + + Configure the optimizer: + + >>> optimizer = SmacGaussianProcess( + ... param_space={ + ... "x0": (-5.0, 5.0), + ... "x1": (-5.0, 5.0), + ... }, + ... n_iter=50, + ... random_state=42, + ... experiment=ackley, + ... ) + + Run optimization: + + >>> best_params = optimizer.solve() # doctest: +SKIP + >>> print(best_params) # doctest: +SKIP + {'x0': 0.001, 'x1': -0.002} + + With scikit-learn hyperparameter optimization (continuous params only): + + >>> from hyperactive.experiment.integrations import SklearnCvExperiment + >>> from sklearn.datasets import load_iris + >>> from sklearn.svm import SVC + + >>> X, y = load_iris(return_X_y=True) + >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + + Configure with continuous parameters: + + >>> optimizer = SmacGaussianProcess( + ... param_space={ + ... "C": (0.01, 100.0), + ... "gamma": (0.0001, 1.0), + ... }, + ... n_iter=50, # GP is sample-efficient + ... experiment=sklearn_exp, + ... ) + >>> best_params = optimizer.solve() # doctest: +SKIP + """ + + _tags = { + "info:name": "SMAC Gaussian Process", + "info:local_vs_global": "global", + "info:explore_vs_exploit": "balanced", + "info:compute": "middle", + "python_dependencies": ["smac", "ConfigSpace"], + } + + def __init__( + self, + param_space=None, + n_iter=100, + max_time=None, + initialize=None, + random_state=None, + deterministic=True, + experiment=None, + ): + super().__init__( + param_space=param_space, + n_iter=n_iter, + max_time=max_time, + initialize=initialize, + random_state=random_state, + deterministic=deterministic, + experiment=experiment, + ) + + def _get_facade_class(self): + """Get the BlackBoxFacade class. + + Returns + ------- + class + The SMAC BlackBoxFacade class. + """ + from smac import BlackBoxFacade + + return BlackBoxFacade + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the optimizer. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the parameter set to return. + + Returns + ------- + list of dict + List of parameter configurations for testing. + + Examples + -------- + >>> params = SmacGaussianProcess.get_test_params() + >>> len(params) >= 1 + True + """ + # Only use continuous parameters for GP-based optimization + from hyperactive.experiment.bench import Ackley + + ackley_exp = Ackley.create_test_instance() + + params_continuous = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + return [params_continuous] diff --git a/src/hyperactive/opt/smac/_random_forest.py b/src/hyperactive/opt/smac/_random_forest.py new file mode 100644 index 00000000..88270aac --- /dev/null +++ b/src/hyperactive/opt/smac/_random_forest.py @@ -0,0 +1,261 @@ +"""SMAC3 Random Forest surrogate optimizer.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +from hyperactive.opt._adapters._base_smac_adapter import _BaseSMACAdapter + +__all__ = ["SmacRandomForest"] + + +class SmacRandomForest(_BaseSMACAdapter): + """SMAC3 optimizer with Random Forest surrogate model. + + This optimizer uses SMAC3's HyperparameterOptimizationFacade, which + combines a Random Forest surrogate model with Expected Improvement (EI) + acquisition function. It's particularly effective for: + + * Mixed continuous/categorical/integer parameter spaces + * Moderate to large budgets (50+ evaluations recommended) + * Problems where function evaluations are expensive + * Hyperparameter optimization of machine learning models + + The Random Forest surrogate model handles categorical parameters natively, + making this optimizer well-suited for search spaces with parameters like + ``kernel`` or ``activation`` that are categorical. + + Parameters + ---------- + param_space : dict[str, tuple | list] + The search space to explore. Dictionary with parameter names as keys. + Values can be: + + * Tuple ``(int, int)``: Integer range (e.g., ``(1, 100)``) + * Tuple ``(float, float)``: Float range (e.g., ``(0.01, 10.0)``) + * List of values: Categorical choices (e.g., ``["rbf", "linear"]``) + + For ambiguous integer tuples like ``(1, 10)``, both bounds must be + Python ``int`` type. Use ``(1.0, 10.0)`` for float ranges. + + n_iter : int, default=100 + Number of optimization iterations (trials). Each iteration evaluates + one configuration. + + max_time : float, optional + Maximum optimization time in seconds. If provided, optimization stops + when time limit is reached even if ``n_iter`` not exhausted. + + initialize : dict, optional + Initialization configuration. Supports: + + * ``{"warm_start": [{"param1": val1, ...}, ...]}``: Start with + known good configurations to seed the optimization. + + random_state : int, optional + Random seed for reproducibility. Controls both the surrogate model + and the acquisition function optimizer. + + deterministic : bool, default=True + Whether the objective function is deterministic. If False, SMAC will + use multiple seeds per configuration to estimate variance. + + n_initial_points : int, default=10 + Number of initial random configurations before starting model-based + optimization. More initial points improve the surrogate model quality + but delay exploitation. + + experiment : BaseExperiment, optional + The experiment to optimize. Can also be set via ``set_params()``. + + Attributes + ---------- + best_params_ : dict + Best parameters found after calling ``solve()``. + + best_score_ : float + Score of the best parameters found. + + See Also + -------- + SmacGaussianProcess : Gaussian Process surrogate for continuous spaces. + SmacRandomSearch : Random search baseline using SMAC infrastructure. + + Notes + ----- + SMAC3 (Sequential Model-based Algorithm Configuration) was developed by + the AutoML groups at the Universities of Hannover and Freiburg. + + The optimizer internally uses: + + * Random Forest as surrogate model + * Expected Improvement with log transformation as acquisition function + * Sobol sequence for initial design + * Local + random search for acquisition optimization + + References + ---------- + .. [1] Lindauer, M., et al. (2022). SMAC3: A Versatile Bayesian Optimization + Package for Hyperparameter Optimization. JMLR. + + .. [2] Hutter, F., Hoos, H. H., & Leyton-Brown, K. (2011). Sequential + model-based optimization for general algorithm configuration. + LION 5. + + Examples + -------- + Basic usage with a benchmark function: + + >>> from hyperactive.experiment.bench import Ackley + >>> from hyperactive.opt.smac import SmacRandomForest + + Create a benchmark experiment: + + >>> ackley = Ackley.create_test_instance() + + Configure the optimizer: + + >>> optimizer = SmacRandomForest( + ... param_space={ + ... "x0": (-5.0, 5.0), + ... "x1": (-5.0, 5.0), + ... }, + ... n_iter=50, + ... random_state=42, + ... experiment=ackley, + ... ) + + Run optimization: + + >>> best_params = optimizer.solve() # doctest: +SKIP + >>> print(best_params) # doctest: +SKIP + {'x0': 0.001, 'x1': -0.002} + + With scikit-learn hyperparameter optimization: + + >>> from hyperactive.experiment.integrations import SklearnCvExperiment + >>> from sklearn.datasets import load_iris + >>> from sklearn.svm import SVC + + >>> X, y = load_iris(return_X_y=True) + >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + + Configure with mixed parameter types: + + >>> optimizer = SmacRandomForest( + ... param_space={ + ... "C": (0.01, 100.0), # Float range + ... "gamma": (0.0001, 1.0), # Float range + ... "kernel": ["rbf", "linear"], # Categorical + ... }, + ... n_iter=100, + ... experiment=sklearn_exp, + ... ) + >>> best_params = optimizer.solve() # doctest: +SKIP + + Using warm start with known good configurations: + + >>> optimizer = SmacRandomForest( + ... param_space={"x0": (-5.0, 5.0), "x1": (-5.0, 5.0)}, + ... n_iter=50, + ... initialize={"warm_start": [{"x0": 0.0, "x1": 0.0}]}, + ... experiment=ackley, + ... ) + >>> best_params = optimizer.solve() # doctest: +SKIP + """ + + _tags = { + "info:name": "SMAC Random Forest", + "info:local_vs_global": "global", + "info:explore_vs_exploit": "balanced", + "info:compute": "middle", + "python_dependencies": ["smac", "ConfigSpace"], + } + + def __init__( + self, + param_space=None, + n_iter=100, + max_time=None, + initialize=None, + random_state=None, + deterministic=True, + n_initial_points=10, + experiment=None, + ): + self.n_initial_points = n_initial_points + + super().__init__( + param_space=param_space, + n_iter=n_iter, + max_time=max_time, + initialize=initialize, + random_state=random_state, + deterministic=deterministic, + experiment=experiment, + ) + + def _get_facade_class(self): + """Get the HyperparameterOptimizationFacade class. + + Returns + ------- + class + The SMAC HyperparameterOptimizationFacade class. + """ + from smac import HyperparameterOptimizationFacade + + return HyperparameterOptimizationFacade + + def _get_scenario_kwargs(self): + """Get scenario arguments. + + Returns + ------- + dict + Scenario arguments. + """ + kwargs = {} + if self.n_initial_points is not None: + # SMAC uses this to determine initial design size + kwargs["n_workers"] = 1 # Single worker for sequential evaluation + return kwargs + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the optimizer. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the parameter set to return. + + Returns + ------- + list of dict + List of parameter configurations for testing. + + Examples + -------- + >>> params = SmacRandomForest.get_test_params() + >>> len(params) >= 1 + True + """ + params = super().get_test_params(parameter_set) + + # Add test with custom parameters + from hyperactive.experiment.bench import Ackley + + ackley_exp = Ackley.create_test_instance() + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "n_initial_points": 5, + "experiment": ackley_exp, + "random_state": 42, + } + ) + + return params diff --git a/src/hyperactive/opt/smac/_random_search.py b/src/hyperactive/opt/smac/_random_search.py new file mode 100644 index 00000000..dc230f18 --- /dev/null +++ b/src/hyperactive/opt/smac/_random_search.py @@ -0,0 +1,226 @@ +"""SMAC3 Random Search optimizer.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +from hyperactive.opt._adapters._base_smac_adapter import _BaseSMACAdapter + +__all__ = ["SmacRandomSearch"] + + +class SmacRandomSearch(_BaseSMACAdapter): + """SMAC3 Random Search optimizer. + + This optimizer uses SMAC3's RandomFacade, which performs pure random + search without any surrogate model. It's useful for: + + * Baseline comparison against model-based optimizers + * Problems where random search is competitive (high-dimensional) + * Sanity checking optimization setups + * Embarrassingly parallel optimization + + Random search samples configurations uniformly at random from the + parameter space. Despite its simplicity, it can be surprisingly + effective, especially in high-dimensional spaces where model-based + methods struggle. + + Parameters + ---------- + param_space : dict[str, tuple | list] + The search space to explore. Dictionary with parameter names as keys. + Values can be: + + * Tuple ``(int, int)``: Integer range (e.g., ``(1, 100)``) + * Tuple ``(float, float)``: Float range (e.g., ``(0.01, 10.0)``) + * List of values: Categorical choices (e.g., ``["rbf", "linear"]``) + + n_iter : int, default=100 + Number of random configurations to evaluate. + + max_time : float, optional + Maximum optimization time in seconds. + + initialize : dict, optional + Initialization configuration. Supports: + + * ``{"warm_start": [{"param1": val1, ...}, ...]}``: Start with + known configurations before random sampling. + + random_state : int, optional + Random seed for reproducibility. + + deterministic : bool, default=True + Whether the objective function is deterministic. + + experiment : BaseExperiment, optional + The experiment to optimize. + + Attributes + ---------- + best_params_ : dict + Best parameters found after calling ``solve()``. + + best_score_ : float + Score of the best parameters found. + + See Also + -------- + SmacRandomForest : Model-based optimizer with Random Forest surrogate. + SmacGaussianProcess : Model-based optimizer with Gaussian Process surrogate. + + Notes + ----- + Random search has several advantages: + + * No model fitting overhead + * Trivially parallelizable + * No risk of model misspecification + * Works in any dimensional space + + However, it doesn't learn from previous evaluations, so it requires + more samples than model-based methods for most problems. + + References + ---------- + .. [1] Bergstra, J., & Bengio, Y. (2012). Random Search for Hyper-Parameter + Optimization. JMLR. + + .. [2] Lindauer, M., et al. (2022). SMAC3: A Versatile Bayesian Optimization + Package for Hyperparameter Optimization. JMLR. + + Examples + -------- + Basic usage with a benchmark function: + + >>> from hyperactive.experiment.bench import Ackley + >>> from hyperactive.opt.smac import SmacRandomSearch + + Create a benchmark experiment: + + >>> ackley = Ackley.create_test_instance() + + Configure the optimizer: + + >>> optimizer = SmacRandomSearch( + ... param_space={ + ... "x0": (-5.0, 5.0), + ... "x1": (-5.0, 5.0), + ... }, + ... n_iter=100, + ... random_state=42, + ... experiment=ackley, + ... ) + + Run optimization: + + >>> best_params = optimizer.solve() # doctest: +SKIP + >>> print(best_params) # doctest: +SKIP + {'x0': 0.5, 'x1': -0.3} + + Comparing random search with model-based optimization: + + >>> from hyperactive.experiment.integrations import SklearnCvExperiment + >>> from sklearn.datasets import load_iris + >>> from sklearn.svm import SVC + + >>> X, y = load_iris(return_X_y=True) + >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + + >>> random_opt = SmacRandomSearch( + ... param_space={ + ... "C": (0.01, 100.0), + ... "gamma": (0.0001, 1.0), + ... "kernel": ["rbf", "linear"], + ... }, + ... n_iter=100, + ... experiment=sklearn_exp, + ... ) + >>> best_params = random_opt.solve() # doctest: +SKIP + """ + + _tags = { + "info:name": "SMAC Random Search", + "info:local_vs_global": "global", + "info:explore_vs_exploit": "explore", + "info:compute": "low", + "python_dependencies": ["smac", "ConfigSpace"], + } + + def __init__( + self, + param_space=None, + n_iter=100, + max_time=None, + initialize=None, + random_state=None, + deterministic=True, + experiment=None, + ): + super().__init__( + param_space=param_space, + n_iter=n_iter, + max_time=max_time, + initialize=initialize, + random_state=random_state, + deterministic=deterministic, + experiment=experiment, + ) + + def _get_facade_class(self): + """Get the RandomFacade class. + + Returns + ------- + class + The SMAC RandomFacade class. + """ + from smac import RandomFacade + + return RandomFacade + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the optimizer. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the parameter set to return. + + Returns + ------- + list of dict + List of parameter configurations for testing. + + Examples + -------- + >>> params = SmacRandomSearch.get_test_params() + >>> len(params) >= 1 + True + """ + from hyperactive.experiment.bench import Ackley + + ackley_exp = Ackley.create_test_instance() + + # Test with continuous parameters + params_continuous = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test with mixed parameters + params_mixed = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": [1, 2, 3, 4, 5], + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + return [params_continuous, params_mixed] From 5d981f252d3ae3147183cd5adb669572756c2ab3 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:26:31 +0100 Subject: [PATCH 03/11] add more tests to _BaseSMACAdapter --- .../opt/_adapters/_base_smac_adapter.py | 169 ++++++++++++++++-- 1 file changed, 159 insertions(+), 10 deletions(-) diff --git a/src/hyperactive/opt/_adapters/_base_smac_adapter.py b/src/hyperactive/opt/_adapters/_base_smac_adapter.py index c3cebc7b..203f5927 100644 --- a/src/hyperactive/opt/_adapters/_base_smac_adapter.py +++ b/src/hyperactive/opt/_adapters/_base_smac_adapter.py @@ -413,17 +413,32 @@ def get_test_params(cls, parameter_set="default"): ------- list of dict List of parameter configurations for testing. + + Notes + ----- + Test parameter sets cover: + + * Float tuple ranges (continuous parameters) + * Integer tuple ranges (discrete parameters) + * Categorical lists (string and numeric) + * Boolean categorical parameters + * Mixed parameter types (float + int + categorical) + * Warm start initialization + * Random state for reproducibility + * Deterministic vs non-deterministic settings """ from sklearn.datasets import load_iris from sklearn.svm import SVC + from hyperactive.experiment.bench import Ackley from hyperactive.experiment.integrations import SklearnCvExperiment X, y = load_iris(return_X_y=True) sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + ackley_exp = Ackley.create_test_instance() - # Test with tuple ranges - params_tuples = { + # Test 1: Float tuple ranges (continuous parameters) + params_float_tuples = { "param_space": { "C": (0.01, 10.0), "gamma": (0.0001, 1.0), @@ -432,8 +447,8 @@ def get_test_params(cls, parameter_set="default"): "experiment": sklearn_exp, } - # Test with discrete lists - params_lists = { + # Test 2: Categorical lists (discrete values) + params_categorical = { "param_space": { "C": [0.01, 0.1, 1.0, 10.0], "gamma": [0.0001, 0.001, 0.01, 0.1], @@ -442,17 +457,151 @@ def get_test_params(cls, parameter_set="default"): "experiment": sklearn_exp, } - # Test with mixed types - from hyperactive.experiment.bench import Ackley + # Test 3: Integer tuple ranges + params_integer_tuples = { + "param_space": { + "x0": (-5, 5), + "x1": (-5, 5), + }, + "n_iter": 10, + "experiment": ackley_exp, + } - ackley_exp = Ackley.create_test_instance() - params_bench = { + # Test 4: Mixed parameter types (float + categorical) + params_mixed_float_cat = { + "param_space": { + "C": (0.01, 100.0), + "gamma": (0.0001, 1.0), + "kernel": ["rbf", "linear", "poly"], + }, + "n_iter": 10, + "experiment": sklearn_exp, + } + + # Test 5: Mixed parameter types (int + float + categorical) + params_mixed_all = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": [1, 2, 3, 4, 5], + }, + "n_iter": 10, + "experiment": ackley_exp, + } + + # Test 6: Boolean categorical parameters + params_boolean_cat = { + "param_space": { + "C": (0.1, 10.0), + "shrinking": [True, False], + }, + "n_iter": 10, + "experiment": sklearn_exp, + } + + # Test 7: String categorical with many options + params_string_cat = { + "param_space": { + "C": (0.1, 10.0), + "kernel": ["rbf", "linear", "poly", "sigmoid"], + }, + "n_iter": 10, + "experiment": sklearn_exp, + } + + # Test 8: With random_state for reproducibility + params_random_state = { "param_space": { "x0": (-5.0, 5.0), "x1": (-5.0, 5.0), }, - "n_iter": 20, + "n_iter": 15, + "random_state": 42, + "experiment": ackley_exp, + } + + # Test 9: With deterministic=False (stochastic objective) + params_non_deterministic = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 10, + "deterministic": False, + "experiment": ackley_exp, + } + + # Test 10: With warm_start initialization + params_warm_start = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, + "experiment": ackley_exp, + } + + # Test 11: Multiple warm start points + params_multi_warm_start = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "initialize": { + "warm_start": [ + {"x0": 0.0, "x1": 0.0}, + {"x0": 1.0, "x1": -1.0}, + {"x0": -2.0, "x1": 2.0}, + ] + }, + "random_state": 123, + "experiment": ackley_exp, + } + + # Test 12: Large integer range + params_large_int_range = { + "param_space": { + "x0": (-100, 100), + "x1": (0, 1000), + }, + "n_iter": 10, + "experiment": ackley_exp, + } + + # Test 13: Small float range (high precision) + params_small_float_range = { + "param_space": { + "x0": (-0.001, 0.001), + "x1": (-0.001, 0.001), + }, + "n_iter": 10, + "experiment": ackley_exp, + } + + # Test 14: Asymmetric ranges + params_asymmetric = { + "param_space": { + "x0": (-10.0, 2.0), + "x1": (0.5, 100.0), + }, + "n_iter": 10, "experiment": ackley_exp, } - return [params_tuples, params_lists, params_bench] + return [ + params_float_tuples, + params_categorical, + params_integer_tuples, + params_mixed_float_cat, + params_mixed_all, + params_boolean_cat, + params_string_cat, + params_random_state, + params_non_deterministic, + params_warm_start, + params_multi_warm_start, + params_large_int_range, + params_small_float_range, + params_asymmetric, + ] From 739e2cf9ac2d702c23ba314bf90d39f518609a85 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:31:47 +0100 Subject: [PATCH 04/11] add tests to SmacGaussianProcess --- src/hyperactive/opt/smac/_gaussian_process.py | 225 +++++++++++++++++- 1 file changed, 220 insertions(+), 5 deletions(-) diff --git a/src/hyperactive/opt/smac/_gaussian_process.py b/src/hyperactive/opt/smac/_gaussian_process.py index 4098bc66..92440189 100644 --- a/src/hyperactive/opt/smac/_gaussian_process.py +++ b/src/hyperactive/opt/smac/_gaussian_process.py @@ -200,25 +200,240 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. + Notes + ----- + SmacGaussianProcess tests focus on continuous parameter spaces, + where Gaussian Processes excel. Tests cover: + + * Different dimensional spaces (1D to 4D) + * Various range sizes (narrow, wide, asymmetric) + * Sample efficiency (fewer iterations) + * Reproducibility with random_state + * sklearn continuous hyperparameter optimization + + Categorical parameters are intentionally excluded since GPs + work best with continuous spaces. Use SmacRandomForest for + mixed or categorical parameter spaces. + Examples -------- >>> params = SmacGaussianProcess.get_test_params() >>> len(params) >= 1 True """ - # Only use continuous parameters for GP-based optimization + from sklearn.datasets import load_iris, load_wine + from sklearn.linear_model import Ridge + from sklearn.svm import SVR + from hyperactive.experiment.bench import Ackley + from hyperactive.experiment.integrations import SklearnCvExperiment + + # Create Ackley instances with different dimensions + ackley_1d = Ackley(d=1) + ackley_2d = Ackley.create_test_instance() # default is 2D + ackley_3d = Ackley(d=3) + ackley_4d = Ackley(d=4) + + X_iris, y_iris = load_iris(return_X_y=True) + X_wine, y_wine = load_wine(return_X_y=True) + + # Test GP-1: Basic 2D continuous space + params_2d_basic = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-2: 1D continuous space (simplest case) + params_1d = { + "param_space": { + "x0": (-10.0, 10.0), + }, + "n_iter": 15, + "experiment": ackley_1d, + "random_state": 42, + } + + # Test GP-3: 3D continuous space + params_3d = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + "x2": (-5.0, 5.0), + }, + "n_iter": 25, + "experiment": ackley_3d, + "random_state": 42, + } + + # Test GP-4: 4D continuous space (GP limit for efficiency) + params_4d = { + "param_space": { + "x0": (-3.0, 3.0), + "x1": (-3.0, 3.0), + "x2": (-3.0, 3.0), + "x3": (-3.0, 3.0), + }, + "n_iter": 30, + "experiment": ackley_4d, + "random_state": 42, + } + + # Test GP-5: Narrow range (high precision optimization) + params_narrow = { + "param_space": { + "x0": (-0.5, 0.5), + "x1": (-0.5, 0.5), + }, + "n_iter": 15, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-6: Wide range + params_wide = { + "param_space": { + "x0": (-100.0, 100.0), + "x1": (-100.0, 100.0), + }, + "n_iter": 20, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-7: Asymmetric ranges + params_asymmetric = { + "param_space": { + "x0": (-10.0, 2.0), + "x1": (0.001, 50.0), + }, + "n_iter": 20, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-8: Very small range (local optimization) + params_small = { + "param_space": { + "x0": (-0.01, 0.01), + "x1": (-0.01, 0.01), + }, + "n_iter": 15, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-9: Sample efficient (fewer iterations) + params_sample_efficient = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 10, + "experiment": ackley_2d, + "random_state": 42, + } - ackley_exp = Ackley.create_test_instance() + # Test GP-10: With reproducibility test (different seeds) + params_seed_42 = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "random_state": 42, + "experiment": ackley_2d, + } - params_continuous = { + # Test GP-11: Different random state + params_seed_123 = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "random_state": 123, + "experiment": ackley_2d, + } + + # Test GP-12: Sklearn SVR with continuous params only + svr_exp = SklearnCvExperiment(estimator=SVR(), X=X_iris, y=y_iris) + params_svr = { + "param_space": { + "C": (0.01, 100.0), + "gamma": (0.0001, 1.0), + "epsilon": (0.01, 1.0), + }, + "n_iter": 15, + "experiment": svr_exp, + "random_state": 42, + } + + # Test GP-13: Sklearn Ridge regression (simple continuous) + ridge_exp = SklearnCvExperiment(estimator=Ridge(), X=X_wine, y=y_wine) + params_ridge = { + "param_space": { + "alpha": (0.001, 100.0), + }, + "n_iter": 15, + "experiment": ridge_exp, + "random_state": 42, + } + + # Test GP-14: With warm_start + params_warm_start = { "param_space": { "x0": (-5.0, 5.0), "x1": (-5.0, 5.0), }, "n_iter": 20, - "experiment": ackley_exp, + "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-15: Non-deterministic setting + params_non_det = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "deterministic": False, + "experiment": ackley_2d, + "random_state": 42, + } + + # Test GP-16: Positive-only range + params_positive = { + "param_space": { + "x0": (0.1, 10.0), + "x1": (0.1, 10.0), + }, + "n_iter": 15, + "experiment": ackley_2d, "random_state": 42, } - return [params_continuous] + return [ + params_2d_basic, + params_1d, + params_3d, + params_4d, + params_narrow, + params_wide, + params_asymmetric, + params_small, + params_sample_efficient, + params_seed_42, + params_seed_123, + params_svr, + params_ridge, + params_warm_start, + params_non_det, + params_positive, + ] From bd7809aaada0b88b414831516df9082d9f01cae2 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:40:17 +0100 Subject: [PATCH 05/11] add tests to SmacRandomForest --- src/hyperactive/opt/smac/_random_forest.py | 157 ++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/opt/smac/_random_forest.py b/src/hyperactive/opt/smac/_random_forest.py index 88270aac..0cd89931 100644 --- a/src/hyperactive/opt/smac/_random_forest.py +++ b/src/hyperactive/opt/smac/_random_forest.py @@ -233,6 +233,15 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. + Notes + ----- + In addition to base class tests, SmacRandomForest adds: + + * Different n_initial_points values (controls exploration) + * Mixed parameter spaces (RF handles categoricals natively) + * Sklearn hyperparameter optimization with many param types + * Higher dimensional spaces + Examples -------- >>> params = SmacRandomForest.get_test_params() @@ -241,10 +250,137 @@ def get_test_params(cls, parameter_set="default"): """ params = super().get_test_params(parameter_set) - # Add test with custom parameters + from sklearn.datasets import load_iris, load_wine + from sklearn.ensemble import RandomForestClassifier + from sklearn.svm import SVC + from hyperactive.experiment.bench import Ackley + from hyperactive.experiment.integrations import SklearnCvExperiment + + # Create Ackley instances with different dimensions + ackley_exp = Ackley.create_test_instance() # 2D + ackley_3d = Ackley(d=3) + ackley_5d = Ackley(d=5) + + X_iris, y_iris = load_iris(return_X_y=True) + X_wine, y_wine = load_wine(return_X_y=True) + + # Test RF-1: Default n_initial_points + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "n_initial_points": 10, + "experiment": ackley_exp, + "random_state": 42, + } + ) + + # Test RF-2: Small n_initial_points (more exploitation) + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "n_initial_points": 3, + "experiment": ackley_exp, + "random_state": 42, + } + ) + + # Test RF-3: Large n_initial_points (more exploration) + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 25, + "n_initial_points": 15, + "experiment": ackley_exp, + "random_state": 42, + } + ) + + # Test RF-4: Mixed params - RF handles categoricals natively + sklearn_exp_svc = SklearnCvExperiment(estimator=SVC(), X=X_iris, y=y_iris) + params.append( + { + "param_space": { + "C": (0.01, 100.0), + "gamma": (0.0001, 1.0), + "kernel": ["rbf", "linear", "poly"], + "shrinking": [True, False], + }, + "n_iter": 15, + "n_initial_points": 5, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + ) + + # Test RF-5: Comprehensive sklearn RF hyperparameter optimization + sklearn_exp_rf = SklearnCvExperiment( + estimator=RandomForestClassifier(random_state=42), + X=X_wine, + y=y_wine, + cv=3, + ) + params.append( + { + "param_space": { + "n_estimators": (10, 100), + "max_depth": (1, 15), + "min_samples_split": (2, 10), + "min_samples_leaf": (1, 5), + "max_features": ["sqrt", "log2"], + "bootstrap": [True, False], + }, + "n_iter": 15, + "n_initial_points": 5, + "experiment": sklearn_exp_rf, + "random_state": 42, + } + ) + + # Test RF-6: Integer + float + categorical combined (numeric categorical) + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-10, 10), + "x2": [-3.0, -1.0, 0.0, 1.0, 3.0], + }, + "n_iter": 15, + "n_initial_points": 5, + "experiment": ackley_3d, + "random_state": 42, + } + ) + + # Test RF-7: Higher dimensional space (RF scales well) + params.append( + { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + "x2": (-5.0, 5.0), + "x3": (-5.0, 5.0), + "x4": (-5.0, 5.0), + }, + "n_iter": 20, + "n_initial_points": 10, + "experiment": ackley_5d, + "random_state": 42, + } + ) - ackley_exp = Ackley.create_test_instance() + # Test RF-8: With warm_start and n_initial_points params.append( { "param_space": { @@ -253,9 +389,26 @@ def get_test_params(cls, parameter_set="default"): }, "n_iter": 20, "n_initial_points": 5, + "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, "experiment": ackley_exp, "random_state": 42, } ) + # Test RF-9: Many categorical options + params.append( + { + "param_space": { + "C": (0.1, 10.0), + "kernel": ["rbf", "linear", "poly", "sigmoid"], + "degree": [2, 3, 4, 5], + "gamma": ["scale", "auto"], + }, + "n_iter": 15, + "n_initial_points": 5, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + ) + return params From 14cc20ed12ae2a6411fb79be2abbc67a70025be3 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:46:14 +0100 Subject: [PATCH 06/11] add tests to SmacRandomSearch --- src/hyperactive/opt/smac/_random_search.py | 263 ++++++++++++++++++++- 1 file changed, 259 insertions(+), 4 deletions(-) diff --git a/src/hyperactive/opt/smac/_random_search.py b/src/hyperactive/opt/smac/_random_search.py index dc230f18..e2b83917 100644 --- a/src/hyperactive/opt/smac/_random_search.py +++ b/src/hyperactive/opt/smac/_random_search.py @@ -191,17 +191,42 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. + Notes + ----- + SmacRandomSearch tests cover all parameter types since random + search works with any search space. Tests include: + + * Continuous parameters (float ranges) + * Integer parameters (int ranges) + * Categorical parameters (string, numeric, boolean) + * Mixed parameter types + * High-dimensional spaces + * Various iteration counts + * Reproducibility with random_state + * Warm start initialization + Examples -------- >>> params = SmacRandomSearch.get_test_params() >>> len(params) >= 1 True """ + from sklearn.datasets import load_iris, load_wine + from sklearn.ensemble import RandomForestClassifier + from sklearn.neighbors import KNeighborsClassifier + from sklearn.svm import SVC + from hyperactive.experiment.bench import Ackley + from hyperactive.experiment.integrations import SklearnCvExperiment - ackley_exp = Ackley.create_test_instance() + # Create Ackley instances with different dimensions + ackley_exp = Ackley.create_test_instance() # 2D + ackley_8d = Ackley(d=8) - # Test with continuous parameters + X_iris, y_iris = load_iris(return_X_y=True) + X_wine, y_wine = load_wine(return_X_y=True) + + # Test RS-1: Continuous parameters (float ranges) params_continuous = { "param_space": { "x0": (-5.0, 5.0), @@ -212,7 +237,7 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - # Test with mixed parameters + # Test RS-2: Mixed float + categorical params_mixed = { "param_space": { "x0": (-5.0, 5.0), @@ -223,4 +248,234 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - return [params_continuous, params_mixed] + # Test RS-3: Pure integer ranges + params_integers = { + "param_space": { + "x0": (-10, 10), + "x1": (0, 100), + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-4: Pure categorical (string values) + sklearn_exp_svc = SklearnCvExperiment(estimator=SVC(), X=X_iris, y=y_iris) + params_categorical_str = { + "param_space": { + "kernel": ["rbf", "linear", "poly", "sigmoid"], + "gamma": ["scale", "auto"], + }, + "n_iter": 15, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + + # Test RS-5: Pure categorical (numeric values) + params_categorical_num = { + "param_space": { + "C": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0], + "gamma": [0.0001, 0.001, 0.01, 0.1, 1.0], + }, + "n_iter": 15, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + + # Test RS-6: Boolean categorical + params_boolean = { + "param_space": { + "C": (0.1, 10.0), + "shrinking": [True, False], + "probability": [True, False], + }, + "n_iter": 15, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + + # Test RS-7: Mixed all types (float + int + categorical) + params_mixed_all = { + "param_space": { + "C": (0.01, 100.0), + "degree": (2, 5), + "kernel": ["rbf", "linear", "poly"], + "shrinking": [True, False], + }, + "n_iter": 20, + "experiment": sklearn_exp_svc, + "random_state": 42, + } + + # Test RS-8: High-dimensional space (random search scales well) + params_high_dim = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + "x2": (-5.0, 5.0), + "x3": (-5.0, 5.0), + "x4": (-5.0, 5.0), + "x5": (-5.0, 5.0), + "x6": (-5.0, 5.0), + "x7": (-5.0, 5.0), + }, + "n_iter": 50, + "experiment": ackley_8d, + "random_state": 42, + } + + # Test RS-9: Very high iteration count (random search is cheap) + params_many_iter = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 100, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-10: Low iteration count + params_few_iter = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 5, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-11: Sklearn KNN with integer params + knn_exp = SklearnCvExperiment( + estimator=KNeighborsClassifier(), X=X_iris, y=y_iris + ) + params_knn = { + "param_space": { + "n_neighbors": (1, 20), + "leaf_size": (10, 50), + "p": [1, 2], + "weights": ["uniform", "distance"], + }, + "n_iter": 20, + "experiment": knn_exp, + "random_state": 42, + } + + # Test RS-12: Sklearn RandomForest with comprehensive space + rf_exp = SklearnCvExperiment( + estimator=RandomForestClassifier(random_state=42), + X=X_wine, + y=y_wine, + cv=3, + ) + params_rf = { + "param_space": { + "n_estimators": (10, 200), + "max_depth": (1, 20), + "min_samples_split": (2, 20), + "min_samples_leaf": (1, 10), + "max_features": ["sqrt", "log2"], + "bootstrap": [True, False], + "criterion": ["gini", "entropy"], + }, + "n_iter": 30, + "experiment": rf_exp, + "random_state": 42, + } + + # Test RS-13: With warm_start + params_warm_start = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 25, + "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-14: Multiple warm start points + params_multi_warm = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 25, + "initialize": { + "warm_start": [ + {"x0": 0.0, "x1": 0.0}, + {"x0": -2.0, "x1": 2.0}, + {"x0": 3.0, "x1": -3.0}, + ] + }, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-15: Non-deterministic setting + params_non_det = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 20, + "deterministic": False, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-16: Different random states for reproducibility + params_seed_0 = { + "param_space": { + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), + }, + "n_iter": 15, + "random_state": 0, + "experiment": ackley_exp, + } + + # Test RS-17: Large range values + params_large_range = { + "param_space": { + "x0": (-1000.0, 1000.0), + "x1": (-1000.0, 1000.0), + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + # Test RS-18: Small range values + params_small_range = { + "param_space": { + "x0": (-0.001, 0.001), + "x1": (-0.001, 0.001), + }, + "n_iter": 20, + "experiment": ackley_exp, + "random_state": 42, + } + + return [ + params_continuous, + params_mixed, + params_integers, + params_categorical_str, + params_categorical_num, + params_boolean, + params_mixed_all, + params_high_dim, + params_many_iter, + params_few_iter, + params_knn, + params_rf, + params_warm_start, + params_multi_warm, + params_non_det, + params_seed_0, + params_large_range, + params_small_range, + ] From 0d023d4ced2327c06d39b0773278cdf3efe3284b Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:57:27 +0100 Subject: [PATCH 07/11] distill tests --- .../opt/_adapters/_base_smac_adapter.py | 163 ++--------- src/hyperactive/opt/smac/_gaussian_process.py | 210 +------------- src/hyperactive/opt/smac/_random_forest.py | 156 +---------- src/hyperactive/opt/smac/_random_search.py | 263 +----------------- 4 files changed, 30 insertions(+), 762 deletions(-) diff --git a/src/hyperactive/opt/_adapters/_base_smac_adapter.py b/src/hyperactive/opt/_adapters/_base_smac_adapter.py index 203f5927..94e009b8 100644 --- a/src/hyperactive/opt/_adapters/_base_smac_adapter.py +++ b/src/hyperactive/opt/_adapters/_base_smac_adapter.py @@ -413,19 +413,6 @@ def get_test_params(cls, parameter_set="default"): ------- list of dict List of parameter configurations for testing. - - Notes - ----- - Test parameter sets cover: - - * Float tuple ranges (continuous parameters) - * Integer tuple ranges (discrete parameters) - * Categorical lists (string and numeric) - * Boolean categorical parameters - * Mixed parameter types (float + int + categorical) - * Warm start initialization - * Random state for reproducibility - * Deterministic vs non-deterministic settings """ from sklearn.datasets import load_iris from sklearn.svm import SVC @@ -437,28 +424,19 @@ def get_test_params(cls, parameter_set="default"): sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) ackley_exp = Ackley.create_test_instance() - # Test 1: Float tuple ranges (continuous parameters) - params_float_tuples = { - "param_space": { - "C": (0.01, 10.0), - "gamma": (0.0001, 1.0), - }, - "n_iter": 10, - "experiment": sklearn_exp, - } - - # Test 2: Categorical lists (discrete values) - params_categorical = { + # Test 1: Continuous parameters (float tuples) + params_continuous = { "param_space": { - "C": [0.01, 0.1, 1.0, 10.0], - "gamma": [0.0001, 0.001, 0.01, 0.1], + "x0": (-5.0, 5.0), + "x1": (-5.0, 5.0), }, - "n_iter": 10, - "experiment": sklearn_exp, + "n_iter": 15, + "random_state": 42, + "experiment": ackley_exp, } - # Test 3: Integer tuple ranges - params_integer_tuples = { + # Test 2: Integer parameters + params_integer = { "param_space": { "x0": (-5, 5), "x1": (-5, 5), @@ -467,70 +445,19 @@ def get_test_params(cls, parameter_set="default"): "experiment": ackley_exp, } - # Test 4: Mixed parameter types (float + categorical) - params_mixed_float_cat = { + # Test 3: Mixed types (float + int + categorical + boolean) + params_mixed = { "param_space": { "C": (0.01, 100.0), - "gamma": (0.0001, 1.0), + "degree": (2, 5), "kernel": ["rbf", "linear", "poly"], - }, - "n_iter": 10, - "experiment": sklearn_exp, - } - - # Test 5: Mixed parameter types (int + float + categorical) - params_mixed_all = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": [1, 2, 3, 4, 5], - }, - "n_iter": 10, - "experiment": ackley_exp, - } - - # Test 6: Boolean categorical parameters - params_boolean_cat = { - "param_space": { - "C": (0.1, 10.0), "shrinking": [True, False], }, "n_iter": 10, "experiment": sklearn_exp, } - # Test 7: String categorical with many options - params_string_cat = { - "param_space": { - "C": (0.1, 10.0), - "kernel": ["rbf", "linear", "poly", "sigmoid"], - }, - "n_iter": 10, - "experiment": sklearn_exp, - } - - # Test 8: With random_state for reproducibility - params_random_state = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "random_state": 42, - "experiment": ackley_exp, - } - - # Test 9: With deterministic=False (stochastic objective) - params_non_deterministic = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 10, - "deterministic": False, - "experiment": ackley_exp, - } - - # Test 10: With warm_start initialization + # Test 4: With warm_start params_warm_start = { "param_space": { "x0": (-5.0, 5.0), @@ -541,67 +468,9 @@ def get_test_params(cls, parameter_set="default"): "experiment": ackley_exp, } - # Test 11: Multiple warm start points - params_multi_warm_start = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "initialize": { - "warm_start": [ - {"x0": 0.0, "x1": 0.0}, - {"x0": 1.0, "x1": -1.0}, - {"x0": -2.0, "x1": 2.0}, - ] - }, - "random_state": 123, - "experiment": ackley_exp, - } - - # Test 12: Large integer range - params_large_int_range = { - "param_space": { - "x0": (-100, 100), - "x1": (0, 1000), - }, - "n_iter": 10, - "experiment": ackley_exp, - } - - # Test 13: Small float range (high precision) - params_small_float_range = { - "param_space": { - "x0": (-0.001, 0.001), - "x1": (-0.001, 0.001), - }, - "n_iter": 10, - "experiment": ackley_exp, - } - - # Test 14: Asymmetric ranges - params_asymmetric = { - "param_space": { - "x0": (-10.0, 2.0), - "x1": (0.5, 100.0), - }, - "n_iter": 10, - "experiment": ackley_exp, - } - return [ - params_float_tuples, - params_categorical, - params_integer_tuples, - params_mixed_float_cat, - params_mixed_all, - params_boolean_cat, - params_string_cat, - params_random_state, - params_non_deterministic, + params_continuous, + params_integer, + params_mixed, params_warm_start, - params_multi_warm_start, - params_large_int_range, - params_small_float_range, - params_asymmetric, ] diff --git a/src/hyperactive/opt/smac/_gaussian_process.py b/src/hyperactive/opt/smac/_gaussian_process.py index 92440189..cc19253d 100644 --- a/src/hyperactive/opt/smac/_gaussian_process.py +++ b/src/hyperactive/opt/smac/_gaussian_process.py @@ -200,45 +200,23 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. - Notes - ----- - SmacGaussianProcess tests focus on continuous parameter spaces, - where Gaussian Processes excel. Tests cover: - - * Different dimensional spaces (1D to 4D) - * Various range sizes (narrow, wide, asymmetric) - * Sample efficiency (fewer iterations) - * Reproducibility with random_state - * sklearn continuous hyperparameter optimization - - Categorical parameters are intentionally excluded since GPs - work best with continuous spaces. Use SmacRandomForest for - mixed or categorical parameter spaces. - Examples -------- >>> params = SmacGaussianProcess.get_test_params() >>> len(params) >= 1 True """ - from sklearn.datasets import load_iris, load_wine - from sklearn.linear_model import Ridge + from sklearn.datasets import load_iris from sklearn.svm import SVR from hyperactive.experiment.bench import Ackley from hyperactive.experiment.integrations import SklearnCvExperiment - # Create Ackley instances with different dimensions - ackley_1d = Ackley(d=1) - ackley_2d = Ackley.create_test_instance() # default is 2D - ackley_3d = Ackley(d=3) - ackley_4d = Ackley(d=4) - - X_iris, y_iris = load_iris(return_X_y=True) - X_wine, y_wine = load_wine(return_X_y=True) + ackley_2d = Ackley.create_test_instance() + X, y = load_iris(return_X_y=True) - # Test GP-1: Basic 2D continuous space - params_2d_basic = { + # Test 1: Basic 2D continuous space + params_continuous = { "param_space": { "x0": (-5.0, 5.0), "x1": (-5.0, 5.0), @@ -248,120 +226,8 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - # Test GP-2: 1D continuous space (simplest case) - params_1d = { - "param_space": { - "x0": (-10.0, 10.0), - }, - "n_iter": 15, - "experiment": ackley_1d, - "random_state": 42, - } - - # Test GP-3: 3D continuous space - params_3d = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - "x2": (-5.0, 5.0), - }, - "n_iter": 25, - "experiment": ackley_3d, - "random_state": 42, - } - - # Test GP-4: 4D continuous space (GP limit for efficiency) - params_4d = { - "param_space": { - "x0": (-3.0, 3.0), - "x1": (-3.0, 3.0), - "x2": (-3.0, 3.0), - "x3": (-3.0, 3.0), - }, - "n_iter": 30, - "experiment": ackley_4d, - "random_state": 42, - } - - # Test GP-5: Narrow range (high precision optimization) - params_narrow = { - "param_space": { - "x0": (-0.5, 0.5), - "x1": (-0.5, 0.5), - }, - "n_iter": 15, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-6: Wide range - params_wide = { - "param_space": { - "x0": (-100.0, 100.0), - "x1": (-100.0, 100.0), - }, - "n_iter": 20, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-7: Asymmetric ranges - params_asymmetric = { - "param_space": { - "x0": (-10.0, 2.0), - "x1": (0.001, 50.0), - }, - "n_iter": 20, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-8: Very small range (local optimization) - params_small = { - "param_space": { - "x0": (-0.01, 0.01), - "x1": (-0.01, 0.01), - }, - "n_iter": 15, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-9: Sample efficient (fewer iterations) - params_sample_efficient = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 10, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-10: With reproducibility test (different seeds) - params_seed_42 = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "random_state": 42, - "experiment": ackley_2d, - } - - # Test GP-11: Different random state - params_seed_123 = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "random_state": 123, - "experiment": ackley_2d, - } - - # Test GP-12: Sklearn SVR with continuous params only - svr_exp = SklearnCvExperiment(estimator=SVR(), X=X_iris, y=y_iris) + # Test 2: Sklearn SVR with continuous params (GP's strength) + svr_exp = SklearnCvExperiment(estimator=SVR(), X=X, y=y) params_svr = { "param_space": { "C": (0.01, 100.0), @@ -373,67 +239,7 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - # Test GP-13: Sklearn Ridge regression (simple continuous) - ridge_exp = SklearnCvExperiment(estimator=Ridge(), X=X_wine, y=y_wine) - params_ridge = { - "param_space": { - "alpha": (0.001, 100.0), - }, - "n_iter": 15, - "experiment": ridge_exp, - "random_state": 42, - } - - # Test GP-14: With warm_start - params_warm_start = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 20, - "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-15: Non-deterministic setting - params_non_det = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "deterministic": False, - "experiment": ackley_2d, - "random_state": 42, - } - - # Test GP-16: Positive-only range - params_positive = { - "param_space": { - "x0": (0.1, 10.0), - "x1": (0.1, 10.0), - }, - "n_iter": 15, - "experiment": ackley_2d, - "random_state": 42, - } - return [ - params_2d_basic, - params_1d, - params_3d, - params_4d, - params_narrow, - params_wide, - params_asymmetric, - params_small, - params_sample_efficient, - params_seed_42, - params_seed_123, + params_continuous, params_svr, - params_ridge, - params_warm_start, - params_non_det, - params_positive, ] diff --git a/src/hyperactive/opt/smac/_random_forest.py b/src/hyperactive/opt/smac/_random_forest.py index 0cd89931..9ddab121 100644 --- a/src/hyperactive/opt/smac/_random_forest.py +++ b/src/hyperactive/opt/smac/_random_forest.py @@ -233,15 +233,6 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. - Notes - ----- - In addition to base class tests, SmacRandomForest adds: - - * Different n_initial_points values (controls exploration) - * Mixed parameter spaces (RF handles categoricals natively) - * Sklearn hyperparameter optimization with many param types - * Higher dimensional spaces - Examples -------- >>> params = SmacRandomForest.get_test_params() @@ -250,22 +241,11 @@ def get_test_params(cls, parameter_set="default"): """ params = super().get_test_params(parameter_set) - from sklearn.datasets import load_iris, load_wine - from sklearn.ensemble import RandomForestClassifier - from sklearn.svm import SVC - from hyperactive.experiment.bench import Ackley - from hyperactive.experiment.integrations import SklearnCvExperiment - # Create Ackley instances with different dimensions - ackley_exp = Ackley.create_test_instance() # 2D - ackley_3d = Ackley(d=3) - ackley_5d = Ackley(d=5) + ackley_exp = Ackley.create_test_instance() - X_iris, y_iris = load_iris(return_X_y=True) - X_wine, y_wine = load_wine(return_X_y=True) - - # Test RF-1: Default n_initial_points + # Test with n_initial_points (RF-specific parameter) params.append( { "param_space": { @@ -273,142 +253,10 @@ def get_test_params(cls, parameter_set="default"): "x1": (-5.0, 5.0), }, "n_iter": 20, - "n_initial_points": 10, - "experiment": ackley_exp, - "random_state": 42, - } - ) - - # Test RF-2: Small n_initial_points (more exploitation) - params.append( - { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "n_initial_points": 3, - "experiment": ackley_exp, - "random_state": 42, - } - ) - - # Test RF-3: Large n_initial_points (more exploration) - params.append( - { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 25, - "n_initial_points": 15, - "experiment": ackley_exp, - "random_state": 42, - } - ) - - # Test RF-4: Mixed params - RF handles categoricals natively - sklearn_exp_svc = SklearnCvExperiment(estimator=SVC(), X=X_iris, y=y_iris) - params.append( - { - "param_space": { - "C": (0.01, 100.0), - "gamma": (0.0001, 1.0), - "kernel": ["rbf", "linear", "poly"], - "shrinking": [True, False], - }, - "n_iter": 15, "n_initial_points": 5, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - ) - - # Test RF-5: Comprehensive sklearn RF hyperparameter optimization - sklearn_exp_rf = SklearnCvExperiment( - estimator=RandomForestClassifier(random_state=42), - X=X_wine, - y=y_wine, - cv=3, - ) - params.append( - { - "param_space": { - "n_estimators": (10, 100), - "max_depth": (1, 15), - "min_samples_split": (2, 10), - "min_samples_leaf": (1, 5), - "max_features": ["sqrt", "log2"], - "bootstrap": [True, False], - }, - "n_iter": 15, - "n_initial_points": 5, - "experiment": sklearn_exp_rf, - "random_state": 42, - } - ) - - # Test RF-6: Integer + float + categorical combined (numeric categorical) - params.append( - { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-10, 10), - "x2": [-3.0, -1.0, 0.0, 1.0, 3.0], - }, - "n_iter": 15, - "n_initial_points": 5, - "experiment": ackley_3d, - "random_state": 42, - } - ) - - # Test RF-7: Higher dimensional space (RF scales well) - params.append( - { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - "x2": (-5.0, 5.0), - "x3": (-5.0, 5.0), - "x4": (-5.0, 5.0), - }, - "n_iter": 20, - "n_initial_points": 10, - "experiment": ackley_5d, - "random_state": 42, - } - ) - - # Test RF-8: With warm_start and n_initial_points - params.append( - { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 20, - "n_initial_points": 5, - "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, "experiment": ackley_exp, "random_state": 42, } ) - # Test RF-9: Many categorical options - params.append( - { - "param_space": { - "C": (0.1, 10.0), - "kernel": ["rbf", "linear", "poly", "sigmoid"], - "degree": [2, 3, 4, 5], - "gamma": ["scale", "auto"], - }, - "n_iter": 15, - "n_initial_points": 5, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - ) - return params diff --git a/src/hyperactive/opt/smac/_random_search.py b/src/hyperactive/opt/smac/_random_search.py index e2b83917..22363049 100644 --- a/src/hyperactive/opt/smac/_random_search.py +++ b/src/hyperactive/opt/smac/_random_search.py @@ -191,42 +191,17 @@ def get_test_params(cls, parameter_set="default"): list of dict List of parameter configurations for testing. - Notes - ----- - SmacRandomSearch tests cover all parameter types since random - search works with any search space. Tests include: - - * Continuous parameters (float ranges) - * Integer parameters (int ranges) - * Categorical parameters (string, numeric, boolean) - * Mixed parameter types - * High-dimensional spaces - * Various iteration counts - * Reproducibility with random_state - * Warm start initialization - Examples -------- >>> params = SmacRandomSearch.get_test_params() >>> len(params) >= 1 True """ - from sklearn.datasets import load_iris, load_wine - from sklearn.ensemble import RandomForestClassifier - from sklearn.neighbors import KNeighborsClassifier - from sklearn.svm import SVC - from hyperactive.experiment.bench import Ackley - from hyperactive.experiment.integrations import SklearnCvExperiment - # Create Ackley instances with different dimensions - ackley_exp = Ackley.create_test_instance() # 2D - ackley_8d = Ackley(d=8) + ackley_exp = Ackley.create_test_instance() - X_iris, y_iris = load_iris(return_X_y=True) - X_wine, y_wine = load_wine(return_X_y=True) - - # Test RS-1: Continuous parameters (float ranges) + # Test 1: Continuous parameters params_continuous = { "param_space": { "x0": (-5.0, 5.0), @@ -237,7 +212,7 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - # Test RS-2: Mixed float + categorical + # Test 2: Mixed float + categorical params_mixed = { "param_space": { "x0": (-5.0, 5.0), @@ -248,234 +223,4 @@ def get_test_params(cls, parameter_set="default"): "random_state": 42, } - # Test RS-3: Pure integer ranges - params_integers = { - "param_space": { - "x0": (-10, 10), - "x1": (0, 100), - }, - "n_iter": 20, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-4: Pure categorical (string values) - sklearn_exp_svc = SklearnCvExperiment(estimator=SVC(), X=X_iris, y=y_iris) - params_categorical_str = { - "param_space": { - "kernel": ["rbf", "linear", "poly", "sigmoid"], - "gamma": ["scale", "auto"], - }, - "n_iter": 15, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - - # Test RS-5: Pure categorical (numeric values) - params_categorical_num = { - "param_space": { - "C": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0], - "gamma": [0.0001, 0.001, 0.01, 0.1, 1.0], - }, - "n_iter": 15, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - - # Test RS-6: Boolean categorical - params_boolean = { - "param_space": { - "C": (0.1, 10.0), - "shrinking": [True, False], - "probability": [True, False], - }, - "n_iter": 15, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - - # Test RS-7: Mixed all types (float + int + categorical) - params_mixed_all = { - "param_space": { - "C": (0.01, 100.0), - "degree": (2, 5), - "kernel": ["rbf", "linear", "poly"], - "shrinking": [True, False], - }, - "n_iter": 20, - "experiment": sklearn_exp_svc, - "random_state": 42, - } - - # Test RS-8: High-dimensional space (random search scales well) - params_high_dim = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - "x2": (-5.0, 5.0), - "x3": (-5.0, 5.0), - "x4": (-5.0, 5.0), - "x5": (-5.0, 5.0), - "x6": (-5.0, 5.0), - "x7": (-5.0, 5.0), - }, - "n_iter": 50, - "experiment": ackley_8d, - "random_state": 42, - } - - # Test RS-9: Very high iteration count (random search is cheap) - params_many_iter = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 100, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-10: Low iteration count - params_few_iter = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 5, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-11: Sklearn KNN with integer params - knn_exp = SklearnCvExperiment( - estimator=KNeighborsClassifier(), X=X_iris, y=y_iris - ) - params_knn = { - "param_space": { - "n_neighbors": (1, 20), - "leaf_size": (10, 50), - "p": [1, 2], - "weights": ["uniform", "distance"], - }, - "n_iter": 20, - "experiment": knn_exp, - "random_state": 42, - } - - # Test RS-12: Sklearn RandomForest with comprehensive space - rf_exp = SklearnCvExperiment( - estimator=RandomForestClassifier(random_state=42), - X=X_wine, - y=y_wine, - cv=3, - ) - params_rf = { - "param_space": { - "n_estimators": (10, 200), - "max_depth": (1, 20), - "min_samples_split": (2, 20), - "min_samples_leaf": (1, 10), - "max_features": ["sqrt", "log2"], - "bootstrap": [True, False], - "criterion": ["gini", "entropy"], - }, - "n_iter": 30, - "experiment": rf_exp, - "random_state": 42, - } - - # Test RS-13: With warm_start - params_warm_start = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 25, - "initialize": {"warm_start": [{"x0": 0.0, "x1": 0.0}]}, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-14: Multiple warm start points - params_multi_warm = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 25, - "initialize": { - "warm_start": [ - {"x0": 0.0, "x1": 0.0}, - {"x0": -2.0, "x1": 2.0}, - {"x0": 3.0, "x1": -3.0}, - ] - }, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-15: Non-deterministic setting - params_non_det = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 20, - "deterministic": False, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-16: Different random states for reproducibility - params_seed_0 = { - "param_space": { - "x0": (-5.0, 5.0), - "x1": (-5.0, 5.0), - }, - "n_iter": 15, - "random_state": 0, - "experiment": ackley_exp, - } - - # Test RS-17: Large range values - params_large_range = { - "param_space": { - "x0": (-1000.0, 1000.0), - "x1": (-1000.0, 1000.0), - }, - "n_iter": 20, - "experiment": ackley_exp, - "random_state": 42, - } - - # Test RS-18: Small range values - params_small_range = { - "param_space": { - "x0": (-0.001, 0.001), - "x1": (-0.001, 0.001), - }, - "n_iter": 20, - "experiment": ackley_exp, - "random_state": 42, - } - - return [ - params_continuous, - params_mixed, - params_integers, - params_categorical_str, - params_categorical_num, - params_boolean, - params_mixed_all, - params_high_dim, - params_many_iter, - params_few_iter, - params_knn, - params_rf, - params_warm_start, - params_multi_warm, - params_non_det, - params_seed_0, - params_large_range, - params_small_range, - ] + return [params_continuous, params_mixed] From 3f90787aa62bb8ef1ae9eafeba10816ff8ed9dfe Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:58:41 +0100 Subject: [PATCH 08/11] add smac optimizers --- src/hyperactive/opt/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/hyperactive/opt/__init__.py b/src/hyperactive/opt/__init__.py index da303a23..155090a9 100644 --- a/src/hyperactive/opt/__init__.py +++ b/src/hyperactive/opt/__init__.py @@ -38,6 +38,11 @@ RandomOptimizer, TPEOptimizer, ) +from .smac import ( + SmacGaussianProcess, + SmacRandomForest, + SmacRandomSearch, +) __all__ = [ "GridSearchSk", @@ -71,4 +76,7 @@ "NSGAIIOptimizer", "NSGAIIIOptimizer", "QMCOptimizer", + "SmacRandomForest", + "SmacGaussianProcess", + "SmacRandomSearch", ] From e8effae72127a3c3e980e2aa7d766cdd9007d555 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 13 Dec 2025 19:58:52 +0100 Subject: [PATCH 09/11] add smac requirements --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 098a4ac3..a32a2c3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,9 @@ sktime-integration = [ "skpro", 'sktime; python_version < "3.14"', ] +smac = [ + "smac>=2.0.0", +] build = [ "setuptools", "build", @@ -97,6 +100,7 @@ docs = [ ] all_extras = [ "hyperactive[integrations]", + "hyperactive[smac]", "optuna<5", "cmaes", # Required for CmaEsOptimizer (optuna's CMA-ES sampler) "lightning", From 667ec6f9862e16905bd422034eded4f38d411ddb Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 15 Dec 2025 07:46:37 +0100 Subject: [PATCH 10/11] add ConfigSpace to smac req. --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0e776786..852529da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,8 @@ sktime-integration = [ 'sktime; python_version < "3.14"', ] smac = [ - "smac>=2.0.0", + "smac>=2.0.0,<3.0.0", + "ConfigSpace>=0.7.0,<2.0.0", ] build = [ "setuptools", From 9d5fba46f88e91e8d57d040cf63d3843fd68f986 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 15 Dec 2025 07:51:07 +0100 Subject: [PATCH 11/11] add smac backend to docs --- .../source/_snippets/user_guide/optimizers.py | 54 ++++ .../source/api_reference/optimizers/index.rst | 5 +- docs/source/api_reference/optimizers/smac.rst | 18 ++ docs/source/examples.rst | 5 + docs/source/examples/smac_backend.rst | 232 ++++++++++++++++++ docs/source/user_guide/optimizers/index.rst | 18 +- docs/source/user_guide/optimizers/smac.rst | 57 +++++ 7 files changed, 385 insertions(+), 4 deletions(-) create mode 100644 docs/source/api_reference/optimizers/smac.rst create mode 100644 docs/source/examples/smac_backend.rst create mode 100644 docs/source/user_guide/optimizers/smac.rst diff --git a/docs/source/_snippets/user_guide/optimizers.py b/docs/source/_snippets/user_guide/optimizers.py index 463addcd..fd1bf1dc 100644 --- a/docs/source/_snippets/user_guide/optimizers.py +++ b/docs/source/_snippets/user_guide/optimizers.py @@ -225,6 +225,60 @@ def objective(params): # [end:optuna_tpe] +# ============================================================================ +# SMAC Backend +# ============================================================================ + +# [start:smac_imports] +from hyperactive.opt.smac import ( + SmacRandomForest, # Random Forest surrogate + SmacGaussianProcess, # Gaussian Process surrogate + SmacRandomSearch, # Random sampling baseline +) +# [end:smac_imports] + + +# [start:smac_random_forest] +from hyperactive.opt.smac import SmacRandomForest + +# Define search space with continuous parameters +smac_param_space = { + "x": (-5.0, 5.0), + "y": (-5.0, 5.0), +} + +optimizer = SmacRandomForest( + param_space=smac_param_space, + n_iter=50, + n_initial_points=10, + experiment=objective, +) +# [end:smac_random_forest] + + +# [start:smac_gaussian_process] +from hyperactive.opt.smac import SmacGaussianProcess + +# Best for continuous parameter spaces +optimizer = SmacGaussianProcess( + param_space=smac_param_space, + n_iter=50, + experiment=objective, +) +# [end:smac_gaussian_process] + + +# [start:smac_random_search] +from hyperactive.opt.smac import SmacRandomSearch + +optimizer = SmacRandomSearch( + param_space=smac_param_space, + n_iter=100, + experiment=objective, +) +# [end:smac_random_search] + + # ============================================================================ # Configuration Examples # ============================================================================ diff --git a/docs/source/api_reference/optimizers/index.rst b/docs/source/api_reference/optimizers/index.rst index 75a0b879..5207f244 100644 --- a/docs/source/api_reference/optimizers/index.rst +++ b/docs/source/api_reference/optimizers/index.rst @@ -8,7 +8,7 @@ The :mod:`hyperactive.opt` module contains optimization algorithms for hyperpara All optimizers inherit from :class:`~hyperactive.base.BaseOptimizer` and share the same interface: the ``solve()`` method to run optimization, and configuration via the ``experiment`` and ``search_space`` parameters. -Hyperactive provides optimizers from three backends: +Hyperactive provides optimizers from four backends: .. list-table:: :widths: 25 75 @@ -20,6 +20,8 @@ Hyperactive provides optimizers from three backends: - Native gradient-free optimization algorithms (21 optimizers) * - :doc:`optuna` - Interface to Optuna's samplers (8 optimizers) + * - :doc:`smac` + - Interface to SMAC3's Bayesian optimization (3 optimizers) * - :doc:`sklearn` - sklearn-compatible search interfaces (2 optimizers) @@ -28,4 +30,5 @@ Hyperactive provides optimizers from three backends: gfo optuna + smac sklearn diff --git a/docs/source/api_reference/optimizers/smac.rst b/docs/source/api_reference/optimizers/smac.rst new file mode 100644 index 00000000..03221568 --- /dev/null +++ b/docs/source/api_reference/optimizers/smac.rst @@ -0,0 +1,18 @@ +.. _optimizers_smac_ref: + +SMAC +==== + +.. currentmodule:: hyperactive.opt + +The SMAC backend provides an interface to `SMAC3 `_ +(Sequential Model-based Algorithm Configuration) optimization algorithms. +These optimizers use Bayesian optimization with different surrogate models. + +.. autosummary:: + :toctree: ../auto_generated/ + :template: class.rst + + SmacRandomForest + SmacGaussianProcess + SmacRandomSearch diff --git a/docs/source/examples.rst b/docs/source/examples.rst index c8b88c45..7ae785ae 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -18,6 +18,7 @@ on GitHub. examples/population_based examples/sequential_model_based examples/optuna_backend + examples/smac_backend examples/sklearn_backend examples/integrations examples/other @@ -61,6 +62,10 @@ Backend Examples Examples using Optuna's samplers including TPE, CMA-ES, NSGA-II/III, and Gaussian Process optimization. +:ref:`examples_smac_backend` + State-of-the-art Bayesian optimization using SMAC3 with Random Forest + and Gaussian Process surrogate models. + :ref:`examples_sklearn_backend` Scikit-learn compatible interfaces as drop-in replacements for GridSearchCV and RandomizedSearchCV. diff --git a/docs/source/examples/smac_backend.rst b/docs/source/examples/smac_backend.rst new file mode 100644 index 00000000..5a3af8e8 --- /dev/null +++ b/docs/source/examples/smac_backend.rst @@ -0,0 +1,232 @@ +.. _examples_smac_backend: + +============ +SMAC Backend +============ + +Hyperactive provides wrappers for SMAC3's (Sequential Model-based Algorithm +Configuration) optimization algorithms, enabling state-of-the-art Bayesian +optimization with Random Forest and Gaussian Process surrogate models. + +.. note:: + + SMAC must be installed separately: + + .. code-block:: bash + + pip install hyperactive[smac] + # or + pip install hyperactive[all_extras] + + +Available Optimizers +-------------------- + +SMAC provides three optimization strategies with different surrogate models: + +.. list-table:: + :header-rows: 1 + :widths: 25 35 40 + + * - Optimizer + - Surrogate Model + - Best For + * - ``SmacRandomForest`` + - Random Forest + - Mixed parameter spaces (continuous, categorical, integer) + * - ``SmacGaussianProcess`` + - Gaussian Process + - Continuous parameter spaces, small to moderate budgets + * - ``SmacRandomSearch`` + - None (random sampling) + - Baseline comparison, high-dimensional spaces + + +SmacRandomForest +---------------- + +The flagship SMAC optimizer. Uses a Random Forest surrogate model with +Expected Improvement acquisition function. Handles mixed parameter types natively. + +.. code-block:: python + + from hyperactive.opt.smac import SmacRandomForest + + param_space = { + "C": (0.01, 100.0), # Float range + "gamma": (0.0001, 1.0), # Float range + "kernel": ["rbf", "linear"], # Categorical + } + + optimizer = SmacRandomForest( + param_space=param_space, + n_iter=100, + n_initial_points=10, # Random points before model-based search + random_state=42, + experiment=objective, + ) + best_params = optimizer.solve() + + +SmacGaussianProcess +------------------- + +Uses a Gaussian Process surrogate model (Matern 5/2 kernel) for sample-efficient +optimization. Best suited for continuous parameter spaces. + +.. warning:: + + Gaussian Processes scale O(n^3) with observations. Not recommended for + budgets exceeding 100 evaluations. For mixed or categorical spaces, + use ``SmacRandomForest`` instead. + +.. code-block:: python + + from hyperactive.opt.smac import SmacGaussianProcess + + # Continuous parameters work best with GP + param_space = { + "learning_rate": (0.0001, 0.1), + "weight_decay": (0.0, 0.1), + } + + optimizer = SmacGaussianProcess( + param_space=param_space, + n_iter=50, # GP is sample-efficient + random_state=42, + experiment=objective, + ) + best_params = optimizer.solve() + + +SmacRandomSearch +---------------- + +Pure random search without surrogate modeling. Useful as a baseline or for +high-dimensional spaces where model-based methods struggle. + +.. code-block:: python + + from hyperactive.opt.smac import SmacRandomSearch + + optimizer = SmacRandomSearch( + param_space=param_space, + n_iter=100, + random_state=42, + experiment=objective, + ) + best_params = optimizer.solve() + + +Common Parameters +----------------- + +All SMAC optimizers share these parameters: + +.. list-table:: + :header-rows: 1 + :widths: 25 15 60 + + * - Parameter + - Default + - Description + * - ``param_space`` + - Required + - Search space dictionary with parameter ranges + * - ``n_iter`` + - 100 + - Number of optimization iterations + * - ``max_time`` + - None + - Optional time limit in seconds + * - ``random_state`` + - None + - Random seed for reproducibility + * - ``deterministic`` + - True + - Whether objective function is deterministic + * - ``initialize`` + - None + - Warm start configuration (see below) + + +Parameter Space Definition +-------------------------- + +SMAC optimizers support three parameter types: + +.. code-block:: python + + param_space = { + # Float range: both bounds must be float + "learning_rate": (0.001, 0.1), + + # Integer range: both bounds must be int + "n_estimators": (10, 500), + + # Categorical: list of choices + "kernel": ["rbf", "linear", "poly"], + } + +.. note:: + + For ambiguous tuples like ``(1, 10)``, Python type determines the parameter + type. Use ``(1, 10)`` for integer range and ``(1.0, 10.0)`` for float range. + + +Warm Starting +------------- + +Use warm starting to seed optimization with known good configurations: + +.. code-block:: python + + optimizer = SmacRandomForest( + param_space=param_space, + n_iter=100, + initialize={ + "warm_start": [ + {"C": 1.0, "gamma": 0.1, "kernel": "rbf"}, + {"C": 10.0, "gamma": 0.01, "kernel": "linear"}, + ] + }, + experiment=objective, + ) + + +When to Use SMAC Backend +------------------------ + +The SMAC backend is useful when you need: + +- **State-of-the-art Bayesian optimization** with proven surrogate models +- **Native handling of mixed parameter spaces** (Random Forest handles categorical parameters well) +- **Sample-efficient optimization** for expensive function evaluations +- **Hyperparameter optimization** following AutoML best practices +- **Reproducible results** in scientific experiments + +Choose ``SmacRandomForest`` when: + +- Your search space has mixed parameter types +- You have 50+ evaluations budget +- Parameters interact in complex ways + +Choose ``SmacGaussianProcess`` when: + +- All parameters are continuous +- Budget is small (10-50 evaluations) +- You need uncertainty estimates + +Choose ``SmacRandomSearch`` when: + +- You need a baseline for comparison +- Search space is high-dimensional (>20 parameters) +- Evaluations are cheap and parallelizable + + +References +---------- + +- `SMAC3 Documentation `_ +- Lindauer, M., et al. (2022). SMAC3: A Versatile Bayesian Optimization + Package for Hyperparameter Optimization. JMLR. diff --git a/docs/source/user_guide/optimizers/index.rst b/docs/source/user_guide/optimizers/index.rst index 4babba3b..06049625 100644 --- a/docs/source/user_guide/optimizers/index.rst +++ b/docs/source/user_guide/optimizers/index.rst @@ -4,7 +4,7 @@ Optimizers ========== -Hyperactive provides 31 algorithms across 5 categories and 3 backends. +Hyperactive provides 34 algorithms across 5 categories and 4 backends. Optimizers navigate the search space to find optimal parameters. Each implements a different strategy for balancing exploration (trying diverse regions) and exploitation (refining promising solutions). Local search methods like Hill Climbing work well for @@ -133,6 +133,17 @@ Algorithm Categories *TPEOptimizer, CmaEsOptimizer, GPOptimizer, NSGAIIOptimizer, and more* + .. grid-item-card:: SMAC Backend + :link: smac + :link-type: doc + :class-card: sd-border-secondary + + **3 algorithms** + ^^^ + State-of-the-art Bayesian optimization from the AutoML community. + + *SmacRandomForest, SmacGaussianProcess, SmacRandomSearch* + ---- Scenario Reference @@ -151,7 +162,7 @@ Detailed recommendations based on problem characteristics: - ``HillClimbing``, ``RandomSearch`` - Fast, simple, good for initial exploration * - Expensive evaluations - - ``BayesianOptimizer``, ``TPEOptimizer`` + - ``BayesianOptimizer``, ``TPEOptimizer``, ``SmacRandomForest`` - Learn from past evaluations, minimize function calls * - Large search space - ``RandomSearch``, ``ParticleSwarmOptimizer`` @@ -166,7 +177,7 @@ Detailed recommendations based on problem characteristics: - ``BayesianOptimizer``, ``CmaEsOptimizer`` - Designed for smooth, continuous spaces * - Mixed parameter types - - ``TPEOptimizer``, ``RandomSearch`` + - ``TPEOptimizer``, ``SmacRandomForest``, ``RandomSearch`` - Handle categorical + continuous well ---- @@ -191,4 +202,5 @@ All optimizers share common parameters and configuration options. population_based sequential_model_based optuna + smac configuration diff --git a/docs/source/user_guide/optimizers/smac.rst b/docs/source/user_guide/optimizers/smac.rst new file mode 100644 index 00000000..1be294fc --- /dev/null +++ b/docs/source/user_guide/optimizers/smac.rst @@ -0,0 +1,57 @@ +.. _user_guide_optimizers_smac: + +============ +SMAC Backend +============ + +Hyperactive provides wrappers for SMAC3's (Sequential Model-based Algorithm +Configuration) Bayesian optimization algorithms, offering state-of-the-art +surrogate model-based optimization. + + +Available Optimizers +-------------------- + +.. literalinclude:: ../../_snippets/user_guide/optimizers.py + :language: python + :start-after: # [start:smac_imports] + :end-before: # [end:smac_imports] + + +Example: SmacRandomForest +------------------------- + +The flagship SMAC optimizer using a Random Forest surrogate model. Handles mixed +parameter types (continuous, categorical, integer) natively: + +.. literalinclude:: ../../_snippets/user_guide/optimizers.py + :language: python + :start-after: # [start:smac_random_forest] + :end-before: # [end:smac_random_forest] + + +Example: SmacGaussianProcess +---------------------------- + +Uses a Gaussian Process surrogate model. Best for continuous parameter spaces +with small to moderate evaluation budgets: + +.. literalinclude:: ../../_snippets/user_guide/optimizers.py + :language: python + :start-after: # [start:smac_gaussian_process] + :end-before: # [end:smac_gaussian_process] + + +When to Use SMAC Backend +------------------------ + +The SMAC backend is useful when: + +- You need state-of-the-art Bayesian optimization from the AutoML community +- Your search space has mixed parameter types (Random Forest handles these well) +- You want sample-efficient optimization for expensive function evaluations +- You need reproducible results following established AutoML best practices + +Choose ``SmacRandomForest`` for mixed parameter spaces and larger budgets (50+). +Choose ``SmacGaussianProcess`` for purely continuous spaces with smaller budgets. +Choose ``SmacRandomSearch`` as a baseline for comparison.