diff --git a/DESCRIPTION b/DESCRIPTION index 0caa59272..52c8c7c5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,9 +44,12 @@ Suggests: processx, progressr, redux, + reticulate, RhpcBLASctl, rush (>= 0.4.1), testthat (>= 3.0.0) +Remotes: + mlr-org/paradox@cs Config/testthat/edition: 3 Config/testthat/parallel: false Encoding: UTF-8 @@ -95,6 +98,7 @@ Collate: 'OptimizerBatchLocalSearch.R' 'OptimizerBatchNLoptr.R' 'OptimizerBatchRandomSearch.R' + 'OptimizerBatchSmac3.R' 'Progressor.R' 'mlr_terminators.R' 'Terminator.R' diff --git a/NAMESPACE b/NAMESPACE index b3194c483..14ac9d790 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ export(OptimizerBatchIrace) export(OptimizerBatchLocalSearch) export(OptimizerBatchNLoptr) export(OptimizerBatchRandomSearch) +export(OptimizerBatchSmac3) export(Terminator) export(TerminatorClockTime) export(TerminatorCombo) diff --git a/R/OptimizerBatchSmac3.R b/R/OptimizerBatchSmac3.R new file mode 100644 index 000000000..25aebe683 --- /dev/null +++ b/R/OptimizerBatchSmac3.R @@ -0,0 +1,200 @@ +#' @title Sequential Model-Based Algorithm Configuration (SMAC3) +#' +#' @include Optimizer.R +#' @name mlr_optimizers_smac +#' +#' @description +#' Calls SMAC3 from Python via the \CRANpkg{reticulate} package. +#' +#' @note +#' All parameters of the search space must have default values. +#' +#' @section Parameters: +#' \describe{ +#' \item{`n_init`}{`integer(1)`\cr +#' Number of initial configurations to evaluate before starting the optimization. +#' Defaults to `10` times the number of hyperparameters.} +#' \item{`facade`}{`character(1)`\cr +#' Facade to use. +#' Either `"smac4bb"` (Black-Box Facade) or `"smac4hb"` (Hyperparameter Optimization Facade). +#' Default is `"smac4bb"`.} +#' \item{`output_directory`}{`character(1)`\cr +#' Directory to store the output of SMAC3. +#' Default is a temporary directory.} +#' } +#' +#' @template section_progress_bars +#' +#' @source +#' `r format_bib("lindauer_2022")` +#' +#' @export +#' @examples +#' \dontrun{ +#' # define the objective function +#' fun = function(xs) { +#' list(y = -(xs[[1]] - 2)^2 - (xs[[2]] + 3)^2 + 10) +#' } +#' +#' # set domain (all parameters must have defaults for ConfigSpace) +#' domain = ps( +#' x1 = p_dbl(-10, 10, default = 0), +#' x2 = p_dbl(-5, 5, default = 0) +#' ) +#' +#' # set codomain +#' codomain = ps( +#' y = p_dbl(tags = "maximize") +#' ) +#' +#' # create objective +#' objective = ObjectiveRFun$new( +#' fun = fun, +#' domain = domain, +#' codomain = codomain, +#' properties = "deterministic" +#' ) +#' +#' # initialize instance +#' instance = oi( +#' objective = objective, +#' terminator = trm("evals", n_evals = 20) +#' ) +#' +#' # load optimizer +#' optimizer = opt("smac") +#' +#' # trigger optimization +#' optimizer$optimize(instance) +#' +#' # all evaluated configurations +#' instance$archive +#' +#' # best performing configuration +#' instance$result +#' } +OptimizerBatchSmac3 = R6Class("OptimizerBatchSmac3", + inherit = OptimizerBatch, + public = list( + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + param_set = ps( + n_init = p_int(lower = 1L), + facade = p_fct(levels = c("smac4bb", "smac4hb"), init = "smac4bb"), + output_directory = p_uty(init = tempdir()) + ) + super$initialize( + id = "smac", + param_set = param_set, + param_classes = c("ParamDbl", "ParamInt", "ParamFct", "ParamLgl"), + properties = c("dependencies", "single-crit"), + packages = "reticulate", + label = "Sequential Model-Based Algorithm Configuration", + man = "bbotk::mlr_optimizers_smac" + ) + } + ), + + private = list( + .optimize = function(inst) { + assert_python_packages(c("smac", "ConfigSpace")) + smac = reticulate::import("smac") + + pv = self$param_set$values + search_space = inst$search_space + + # convert paradox search space to ConfigSpace + cs = paramset_to_configspace(search_space) + + terminator = inst$terminator + if (inherits(terminator, "TerminatorEvals")) { + n_trials = terminator$param_set$values$n_evals + } else { + # use a large number for other terminators + n_trials = .Machine$integer.max + } + + n_init = pv$n_init %??% (10L * search_space$length) + + scenario = smac$Scenario( + configspace = cs, + deterministic = TRUE, + n_trials = as.integer(n_trials), + seed = as.integer(sample.int(.Machine$integer.max, 1L)), + output_directory = pv$output_directory + ) + + if (pv$facade == "smac4bb") { + facade_class = smac$BlackBoxFacade + } else { + facade_class = smac$HyperparameterOptimizationFacade + } + + intensifier = facade_class$get_intensifier( + scenario, + max_config_calls = 1L + ) + + # create initial design + initial_design = facade_class$get_initial_design( + scenario, + n_configs = as.integer(n_init) + ) + + # create smac optimizer + # use a dummy target function since we use the ask-tell interface + # SMAC validates the function signature, so we create a Python function directly + reticulate::py_run_string("def _dummy_target_fn(config, seed): return 0.0") + dummy_fn = reticulate::py$`_dummy_target_fn` + + smac_optimizer = facade_class( + scenario = scenario, + target_function = dummy_fn, + intensifier = intensifier, + initial_design = initial_design, + overwrite = TRUE + ) + + # import TrialValue for reporting results + TrialValue = smac$runhistory$dataclasses$TrialValue + + repeat { + # ask for next configuration + trial_info = tryCatch( + smac_optimizer$ask(), + error = function(e) NULL + ) + + if (is.null(trial_info)) { + break + } + + # extract configuration as named list + config = trial_info$config + config_dict = reticulate::py_to_r(config$get_dictionary()) + + # inactive parameters are not in the config dictionary + # create data.table with all parameters (inactive ones are NA) + all_params = search_space$ids() + xdt = setDT(lapply(set_names(all_params), function(p) config_dict[[p]] %??% NA)) + + # fix logical parameters (ConfigSpace uses strings "TRUE"/"FALSE") + lgl_params = search_space$ids(class = "ParamLgl") + if (length(lgl_params)) { + xdt[, (lgl_params) := lapply(.SD, as.logical), .SDcols = lgl_params] + } + + res = inst$eval_batch(xdt) + cost = res[[inst$archive$cols_y]] * inst$objective_multiplicator + + # tell smac the result + trial_value = TrialValue(cost = cost, time = 0.0) + smac_optimizer$tell(trial_info, trial_value) + } + } + ) +) + +mlr_optimizers$add("smac", OptimizerBatchSmac3) diff --git a/R/assertions.R b/R/assertions.R index eb1df6949..5028bece4 100644 --- a/R/assertions.R +++ b/R/assertions.R @@ -180,3 +180,24 @@ assert_archive_batch = function(archive, null_ok = FALSE) { if (null_ok && is.null(archive)) return(NULL) assert_r6(archive, "ArchiveBatch") } + +#' @title Assert Python Packages +#' +#' @description +#' Assert that the given Python packages are available. +#' +#' @param packages (`character()`)\cr +#' Python packages to check. +#' @param python_version (`character(1)`)\cr +#' Python version to use. If `NULL`, the default Python version is used. +#' +#' @return (`character()`)\cr +#' Invisibly returns the input `packages` vector if all requested Python packages are available; otherwise throws an error listing the missing packages. +assert_python_packages = function(packages, python_version = NULL) { + reticulate::py_require(packages, python_version = python_version) + available = map_lgl(packages, reticulate::py_module_available) + if (any(!available)) { + stopf("Package %s not available.", as_short_string(packages[!available])) + } + invisible(packages) +} diff --git a/R/bibentries.R b/R/bibentries.R index fe4baab48..a98a8c1cf 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -61,5 +61,16 @@ bibentries = c( eprint = "1903.04703", archivePrefix = "arXiv", primaryClass = "cs.LG" + ), + + lindauer_2022 = bibentry("article", + title = "{SMAC3}: A Versatile Bayesian Optimization Package for Hyperparameter Optimization", + author = "Marius Lindauer and Katharina Eggensperger and Matthias Feurer and Andre Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and Rene Sass and Frank Hutter", + year = "2022", + journal = "Journal of Machine Learning Research", + volume = "23", + number = "54", + pages = "1--9", + url = "http://jmlr.org/papers/v23/21-0888.html" ) ) diff --git a/man/assert_python_packages.Rd b/man/assert_python_packages.Rd new file mode 100644 index 000000000..a9375ef21 --- /dev/null +++ b/man/assert_python_packages.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assertions.R +\name{assert_python_packages} +\alias{assert_python_packages} +\title{Assert Python Packages} +\usage{ +assert_python_packages(packages, python_version = NULL) +} +\arguments{ +\item{packages}{(\code{character()})\cr +Python packages to check.} + +\item{python_version}{(\code{character(1)})\cr +Python version to use. If \code{NULL}, the default Python version is used.} +} +\value{ +(\code{character()})\cr +Invisibly returns the input \code{packages} vector if all requested Python packages are available; otherwise throws an error listing the missing packages. +} +\description{ +Assert that the given Python packages are available. +} diff --git a/man/mlr_optimizers_async_design_points.Rd b/man/mlr_optimizers_async_design_points.Rd index 9a0c790e6..8750eae7a 100644 --- a/man/mlr_optimizers_async_design_points.Rd +++ b/man/mlr_optimizers_async_design_points.Rd @@ -82,7 +82,7 @@ as.data.table(instance$archive) } } \section{Super classes}{ -\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{bbotk::OptimizerAsync} -> \code{OptimizerAsyncDesignPoints} +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerAsync]{bbotk::OptimizerAsync}} -> \code{OptimizerAsyncDesignPoints} } \section{Methods}{ \subsection{Public methods}{ diff --git a/man/mlr_optimizers_async_grid_search.Rd b/man/mlr_optimizers_async_grid_search.Rd index 909bd25be..86094ed0f 100644 --- a/man/mlr_optimizers_async_grid_search.Rd +++ b/man/mlr_optimizers_async_grid_search.Rd @@ -88,7 +88,7 @@ as.data.table(instance$archive) } } \section{Super classes}{ -\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{bbotk::OptimizerAsync} -> \code{OptimizerAsyncGridSearch} +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerAsync]{bbotk::OptimizerAsync}} -> \code{OptimizerAsyncGridSearch} } \section{Methods}{ \subsection{Public methods}{ diff --git a/man/mlr_optimizers_async_random_search.Rd b/man/mlr_optimizers_async_random_search.Rd index 3bd77dc6a..0d1193129 100644 --- a/man/mlr_optimizers_async_random_search.Rd +++ b/man/mlr_optimizers_async_random_search.Rd @@ -78,7 +78,7 @@ as.data.table(instance$archive) } } \section{Super classes}{ -\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{bbotk::OptimizerAsync} -> \code{OptimizerAsyncRandomSearch} +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerAsync]{bbotk::OptimizerAsync}} -> \code{OptimizerAsyncRandomSearch} } \section{Methods}{ \subsection{Public methods}{ diff --git a/man/mlr_optimizers_smac.Rd b/man/mlr_optimizers_smac.Rd new file mode 100644 index 000000000..df77d7c78 --- /dev/null +++ b/man/mlr_optimizers_smac.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/OptimizerBatchSmac3.R +\name{mlr_optimizers_smac} +\alias{mlr_optimizers_smac} +\alias{OptimizerBatchSmac3} +\title{Sequential Model-Based Algorithm Configuration (SMAC3)} +\source{ +Lindauer M, Eggensperger K, Feurer M, Biedenkapp A, Deng D, Benjamins C, Ruhkopf T, Sass R, Hutter F (2022). +\dQuote{SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization.} +\emph{Journal of Machine Learning Research}, \bold{23}(54), 1--9. +\url{http://jmlr.org/papers/v23/21-0888.html}. +} +\description{ +Calls SMAC3 from Python via the \CRANpkg{reticulate} package. +} +\note{ +All parameters of the search space must have default values. +} +\section{Parameters}{ + +\describe{ +\item{\code{n_init}}{\code{integer(1)}\cr +Number of initial configurations to evaluate before starting the optimization. +Defaults to \code{10} times the number of hyperparameters.} +\item{\code{facade}}{\code{character(1)}\cr +Facade to use. +Either \code{"smac4bb"} (Black-Box Facade) or \code{"smac4hb"} (Hyperparameter Optimization Facade). +Default is \code{"smac4bb"}.} +\item{\code{output_directory}}{\code{character(1)}\cr +Directory to store the output of SMAC3. +Default is a temporary directory.} +} +} + +\section{Progress Bars}{ + +\verb{$optimize()} supports progress bars via the package \CRANpkg{progressr} +combined with a \link{Terminator}. Simply wrap the function in +\code{progressr::with_progress()} to enable them. We recommend to use package +\CRANpkg{progress} as backend; enable with \code{progressr::handlers("progress")}. +} + +\examples{ +\dontrun{ +# define the objective function +fun = function(xs) { + list(y = -(xs[[1]] - 2)^2 - (xs[[2]] + 3)^2 + 10) +} + +# set domain (all parameters must have defaults for ConfigSpace) +domain = ps( + x1 = p_dbl(-10, 10, default = 0), + x2 = p_dbl(-5, 5, default = 0) +) + +# set codomain +codomain = ps( + y = p_dbl(tags = "maximize") +) + +# create objective +objective = ObjectiveRFun$new( + fun = fun, + domain = domain, + codomain = codomain, + properties = "deterministic" +) + +# initialize instance +instance = oi( + objective = objective, + terminator = trm("evals", n_evals = 20) +) + +# load optimizer +optimizer = opt("smac") + +# trigger optimization +optimizer$optimize(instance) + +# all evaluated configurations +instance$archive + +# best performing configuration +instance$result +} +} +\section{Super classes}{ +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerBatch]{bbotk::OptimizerBatch}} -> \code{OptimizerBatchSmac3} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-OptimizerBatchSmac3-new}{\code{OptimizerBatchSmac3$new()}} +\item \href{#method-OptimizerBatchSmac3-clone}{\code{OptimizerBatchSmac3$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchSmac3-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchSmac3$new()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchSmac3-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchSmac3$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 1735017a1..f80a9db63 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -86,3 +86,4 @@ reference: - trafo_xs - terminated_error - choose_search_space + - assert_python_packages diff --git a/tests/testthat/teardown.R b/tests/testthat/teardown.R index 5bbca3170..d2aeb0452 100644 --- a/tests/testthat/teardown.R +++ b/tests/testthat/teardown.R @@ -1,3 +1,9 @@ options(old_opts) lg_bbotk$set_threshold(old_threshold_bbotk) lg_rush$set_threshold(old_threshold_rush) + +if (requireNamespace("reticulate", quietly = TRUE)) { + python_temp_dir = dirname(reticulate::py_run_string("import tempfile; x=tempfile.NamedTemporaryFile().name", local = TRUE)$x) + detritus = list.files(python_temp_dir, pattern = "^uv-setuptools-[0-9A-Fa-f]{16}\\.lock$", full.names = TRUE) + if (length(detritus)) unlink(detritus) +} diff --git a/tests/testthat/test_OptimizerBatchSmac3.R b/tests/testthat/test_OptimizerBatchSmac3.R new file mode 100644 index 000000000..0292b3b32 --- /dev/null +++ b/tests/testthat/test_OptimizerBatchSmac3.R @@ -0,0 +1,134 @@ +skip_if_not_installed("reticulate") + +test_that("OptimizerBatchSmac3", { + search_space = ps( + x = p_dbl(lower = -1, upper = 1, default = 0) + ) + + fun = function(xs) { + list(y = as.numeric(xs)^2) + } + + objective = ObjectiveRFun$new( + fun = fun, + domain = search_space, + properties = "single-crit" + ) + + terminator = trm("evals", n_evals = 5L) + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = terminator + ) + + optimizer = opt("smac") + expect_class(optimizer, "OptimizerBatchSmac3") + + optimizer$optimize(instance) + + expect_data_table(instance$archive$data, min.rows = 5L) + expect_equal(instance$archive$n_evals, 5L) + + x_opt = instance$result_x_domain + y_opt = instance$result_y + expect_list(x_opt, len = 1) + expect_named(x_opt, "x") + expect_numeric(y_opt, len = 1) + expect_named(y_opt, "y") +}) + +test_that("OptimizerBatchSmac3 with 2d search space", { + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_dbl(lower = -1, upper = 1, default = 0) + ) + + fun = function(xs) { + list(y = sum(as.numeric(xs)^2)) + } + + objective = ObjectiveRFun$new( + fun = fun, + domain = search_space, + properties = "single-crit" + ) + + terminator = trm("evals", n_evals = 10L) + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = terminator + ) + + optimizer = opt("smac", n_init = 5L) + optimizer$optimize(instance) + + expect_data_table(instance$archive$data, min.rows = 10L) + expect_equal(instance$archive$n_evals, 10L) +}) + +test_that("OptimizerBatchSmac3 with mixed parameter types", { + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_int(lower = 1L, upper = 10L, default = 5L), + x3 = p_fct(levels = c("a", "b", "c"), default = "a"), + x4 = p_lgl(default = TRUE) + ) + + fun = function(xs) { + y = xs$x1^2 + xs$x2 / 10 + if (xs$x3 == "a") y = y + 1 + if (xs$x4) y = y + 0.5 + list(y = y) + } + + objective = ObjectiveRFun$new( + fun = fun, + domain = search_space, + properties = "single-crit" + ) + + terminator = trm("evals", n_evals = 10L) + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = terminator + ) + + optimizer = opt("smac", n_init = 5L) + optimizer$optimize(instance) + + expect_data_table(instance$archive$data, min.rows = 10L) + expect_equal(instance$archive$n_evals, 10L) +}) + +test_that("OptimizerBatchSmac3 with dependencies", { + search_space = ps( + x1 = p_fct(levels = c("a", "b"), default = "a"), + x2 = p_dbl(lower = -1, upper = 1, default = 0, depends = x1 == "a") + ) + + fun = function(xs) { + y = if (xs$x1 == "a") xs$x2^2 else 1 + list(y = y) + } + + objective = ObjectiveRFun$new( + fun = fun, + domain = search_space, + properties = "single-crit" + ) + + terminator = trm("evals", n_evals = 10L) + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = terminator + ) + + optimizer = opt("smac", n_init = 5L) + optimizer$optimize(instance) + + expect_data_table(instance$archive$data, min.rows = 10L) +})