From d2a3f78c385167235a8f36e59eb871a6ce035157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Sat, 13 Dec 2025 08:36:37 +0200 Subject: [PATCH 01/10] Enable progress bars in non-interactive mode for SLURM clusters Configure pbapply to display text-based progress bars when running in non-interactive mode (e.g., batch jobs, Rscript). Previously, progress tracking was disabled in non-interactive sessions, making it impossible to monitor simulation progress in SLURM log files. Changes: - R/analysis.R: Add pboptions configuration to force type="txt" in non-interactive mode while preserving timer bars in interactive sessions - R/runSimulation.R: Update progress parameter documentation to describe the new behavior Interactive users see no change. Non-interactive users (SLURM, batch jobs) now see text progress bars when monitoring logs via tail -f. Fixes #75 --- R/analysis.R | 7 +++++++ R/runSimulation.R | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/R/analysis.R b/R/analysis.R index 8b74785c..1470e9ca 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -10,6 +10,13 @@ Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI, { # This defines the work-flow for the Monte Carlo simulation given the condition (row in Design) # and number of replications desired + + # Configure pbapply for non-interactive mode (e.g., SLURM cluster logs) + if(progress && !interactive()) { + old_pboptions <- pbapply::pboptions(type = "txt", char = "=", style = 3) + on.exit(pbapply::pboptions(old_pboptions), add = TRUE) + } + used_mainsim <- if(is.finite(max_time)) mainsim_maxtime else mainsim if(useFuture){ if(!is.null(seed)) set_seed(seed[condition$ID]) diff --git a/R/runSimulation.R b/R/runSimulation.R index abcf396c..412abe3b 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -600,7 +600,9 @@ #' range 1 to 2147483647 for each condition via \code{\link{genSeeds}} #' #' @param progress logical; display a progress bar (using the \code{pbapply} package) -#' for each simulation condition? +#' for each simulation condition? In interactive sessions, shows a timer-based +#' progress bar. In non-interactive sessions (e.g., HPC cluster jobs), displays +#' text-based progress updates that are visible in log files. #' This is useful when simulations conditions take a long time to run (see also the #' \code{notification} argument). Default is \code{TRUE} #' From 58b04b517d4e146b4b5b44f2d76771fde6f75a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Sat, 13 Dec 2025 20:17:47 +0200 Subject: [PATCH 02/10] Add prepare function for condition-level object preparation Implements #74 by adding a prepare parameter to runSimulation() that modifies fixed_objects once per condition before replications run. The prepare function accepts condition and fixed_objects as arguments and returns the modified fixed_objects, which is then passed to all replications for that condition. Use case: Pre-compute expensive condition-specific objects (design matrices, lookup tables) once per condition instead of per replication, avoiding both memory issues (from pre-computing all conditions) and performance issues (from recomputing per replication). Implementation: - Added prepare parameter with validation - Calls prepare(condition, fixed_objects) in main loop per condition - Returns modified fixed_objects for use in replications - Exports prepare to parallel clusters when provided - Includes prepare globals in check.globals - Full backward compatibility (prepare defaults to NULL) Example: prepare <- function(condition, fixed_objects) { fixed_objects$design_matrix <- matrix(rnorm(condition$N * 10), ncol=10) return(fixed_objects) } --- R/analysis.R | 12 +++++++++++- R/runSimulation.R | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/R/analysis.R b/R/analysis.R index 1470e9ca..4128246b 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -1,4 +1,4 @@ -Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI, seed, save, +Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL, cl, MPI, seed, save, save_results, save_results_out_rootdir, save_results_dirname, max_errors, boot_method, boot_draws, CI, save_seeds, save_seeds_dirname, load_seed, @@ -11,6 +11,16 @@ Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI, # This defines the work-flow for the Monte Carlo simulation given the condition (row in Design) # and number of replications desired + # Call prepare function once per condition if provided + if(!is.null(prepare)) { + prep_result <- try(prepare(condition=condition, fixed_objects=fixed_objects), silent=FALSE) + if(is(prep_result, 'try-error')){ + stop(sprintf('prepare() failed for condition %i with error: %s', + condition$ID, as.character(prep_result)), call.=FALSE) + } + fixed_objects <- prep_result + } + # Configure pbapply for non-interactive mode (e.g., SLURM cluster logs) if(progress && !interactive()) { old_pboptions <- pbapply::pboptions(type = "txt", char = "=", style = 3) diff --git a/R/runSimulation.R b/R/runSimulation.R index 412abe3b..5e7ca76e 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -201,6 +201,21 @@ #' to less than 3 (for initial testing purpose) will disable the \code{save} #' and \code{control$stop_on_fatal} flags #' +#' @param prepare (optional) a function that executes once per simulation condition +#' (i.e., once per row in \code{design}) to prepare/modify the \code{fixed_objects} +#' before replications are run. This function should accept \code{condition} and +#' \code{fixed_objects} as arguments and return the modified \code{fixed_objects}. +#' +#' This is useful for pre-computing expensive condition-specific objects that can be +#' reused across replications (e.g., design matrices, correlation matrices, lookup tables) +#' without having to either pre-compute them for all conditions (which can cause +#' memory issues) or recompute them for every replication (which causes performance issues). +#' +#' The function signature should be: +#' \code{prepare <- function(condition, fixed_objects) \{ ... return(fixed_objects) \}} +#' +#' Default is \code{NULL}, in which case no preparation step is performed +#' #' @param fixed_objects (optional) an object (usually a named \code{list}) #' containing additional user-defined objects #' that should remain fixed across conditions. This is useful when including @@ -1014,7 +1029,7 @@ #' } #' runSimulation <- function(design, replications, generate, analyse, summarise, - fixed_objects = NULL, packages = NULL, filename = NULL, + prepare = NULL, fixed_objects = NULL, packages = NULL, filename = NULL, debug = 'none', load_seed = NULL, save = any(replications > 2), store_results = TRUE, save_results = FALSE, parallel = FALSE, ncores = parallelly::availableCores(omit = 1L), @@ -1246,6 +1261,15 @@ runSimulation <- function(design, replications, generate, analyse, summarise, if(!is.null(seed)) stopifnot(nrow(design) == length(seed)) debug <- tolower(debug) + # Validate prepare function + if(!is.null(prepare)){ + if(!is.function(prepare)) + stop('prepare must be a function', call. = FALSE) + fms <- names(formals(prepare)) + truefms <- c('condition', 'fixed_objects') + if(!all(truefms %in% fms)) + stop('Function arguments for prepare are not correct. Must include: condition, fixed_objects', call. = FALSE) + } for(i in names(Functions)){ fms <- names(formals(Functions[[i]])) truefms <- switch(i, @@ -1338,14 +1362,20 @@ runSimulation <- function(design, replications, generate, analyse, summarise, parallel::clusterExport(cl=cl, export_funs, envir = parent.frame(1L)) parallel::clusterExport(cl=cl, "ANALYSE_FUNCTIONS", envir = environment()) parallel::clusterExport(cl=cl, "TRY_ALL_ANALYSE", envir = environment()) + if(!is.null(prepare)) + parallel::clusterExport(cl=cl, "prepare", envir = environment()) if(verbose) message(sprintf("\nNumber of parallel clusters in use: %i", length(cl))) } } if(check.globals){ + prepare_globals <- if(!is.null(prepare)) + codetools::findGlobals(prepare, merge=FALSE)[['variables']] + else NULL globals <- unique(c(codetools::findGlobals(generate, merge=FALSE)[['variables']], codetools::findGlobals(analyse, merge=FALSE)[['variables']], - codetools::findGlobals(summarise, merge=FALSE)[['variables']])) + codetools::findGlobals(summarise, merge=FALSE)[['variables']], + prepare_globals)) return(setdiff(globals, c(names(design), names(fixed_objects)))) } Result_list <- vector('list', nrow(design)) @@ -1484,6 +1514,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, else design[i,], replications=replications[i], fixed_objects=fixed_objects, + prepare=prepare, cl=if(i %in% not_parallel) NULL else cl, MPI=MPI, .options.mpi=.options.mpi, seed=seed, boot_draws=boot_draws, boot_method=boot_method, CI=CI, @@ -1525,6 +1556,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, condition=if(was_tibble) dplyr::as_tibble(design[i,]) else design[i,], replications=replications[i], fixed_objects=fixed_objects, + prepare=prepare, cl=if(i %in% not_parallel) NULL else cl, MPI=MPI, .options.mpi=.options.mpi, seed=seed, store_Random.seeds=store_Random.seeds, From a50e0dc377da63b859e0f77b2374fb5a4eecfc36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Sun, 14 Dec 2025 10:27:56 +0200 Subject: [PATCH 03/10] Add RNG state management for prepare() function This commit adds comprehensive random number generator (RNG) state management for the prepare() function, ensuring reproducibility and debugging support consistent with generate/analyse/summarise functions. Key Changes: 1. Seed Capture (R/analysis.R:15-52) - Automatically capture .Random.seed state before prepare() executes - Initialize RNG if .Random.seed doesn't exist yet - Store prepare error seed when prepare() fails for debugging 2. Seed Storage (R/analysis.R:26-37, 251-261) - Save prepare seeds to disk when save_seeds=TRUE - File path format: design-row-{ID}/prepare-seed - Store prepare_Random.seed in attributes when store_Random.seeds=TRUE - Always store prepare_error_seed for debugging (independent of flag) 3. New Parameter: load_seed_prepare (R/runSimulation.R:1033) - Dedicated parameter for debugging prepare function - Accepts character path, integer vector, or tibble/data.frame - Supports both absolute and relative file paths - Automatically detects path type and handles appropriately - Documented at R/runSimulation.R:345-352 4. Seed Extraction (R/SimExtract.R:120-123, 199-209) - SimExtract(res, 'prepare_seeds') - extract all prepare seeds - SimExtract(res, 'prepare_error_seed') - extract error seeds 5. Attribute Preservation (R/runSimulation.R:1635-1636) - Manually restore prepare seed attributes when Result_list is rebuilt as data.frame to prevent attribute loss Example Usage: # Run simulation with prepare that uses RNG res <- runSimulation(Design, replications=10, prepare=prepare, # Uses rnorm(), runif(), etc. control=list(save_seeds=TRUE, store_Random.seeds=TRUE)) # Extract prepare seeds for reproducibility prepare_seeds <- SimExtract(res, 'prepare_seeds') # Debug prepare errors by loading the error seed res2 <- runSimulation(Design[2,], replications=1, load_seed_prepare='design-row-2/prepare-seed') Design Decisions: - prepare_Random.seed only stored when store_Random.seeds=TRUE for consistency with stored_Random.seeds behavior - prepare_error_seed always stored for debugging, like error_seeds and warning_seeds - Separate attributes (prepare_Random.seed, prepare_error_seed) instead of nested list for consistency with existing codebase patterns - File path detection allows both absolute and relative paths Related: Complements PR #78 (prepare function feature) --- R/SimExtract.R | 20 ++++++++++++++++++++ R/analysis.R | 38 ++++++++++++++++++++++++++++++++++++-- R/runSimulation.R | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/R/SimExtract.R b/R/SimExtract.R index c7b16cb6..6fa999e0 100644 --- a/R/SimExtract.R +++ b/R/SimExtract.R @@ -17,6 +17,10 @@ #' \code{runSimulation(..., control = list(store_Random.seeds=TRUE))}), #' \code{'error_seeds'} and \code{'warning_seeds'} #' to extract the associated \code{.Random.seed} values associated with the ERROR/WARNING messages, +#' \code{'prepare_seeds'} to extract the \code{.Random.seed} states captured before +#' \code{prepare()} was called for each condition, \code{'prepare_error_seed'} to extract the +#' \code{.Random.seed} state when \code{prepare()} encountered an error (useful for debugging with +#' \code{load_seed_prepare}), #' \code{'results'} to extract the simulation results if the option \code{store_results} was passed to #' \code{\link{runSimulation}}, \code{'filename'} and \code{'save_results_dirname'} for extracting #' the saved file/directory name information (if used), \code{'functions'} to extract the defined functions @@ -113,6 +117,10 @@ SimExtract <- function(object, what, fuzzy = TRUE, append = TRUE){ if(length(wrn) && append) cbind(Design, wrn) else wrn } else if(what == 'warning_seeds'){ extract_warning_seeds(object) + } else if(what == 'prepare_seeds'){ + extract_prepare_seeds(object) + } else if(what == 'prepare_error_seed'){ + extract_prepare_error_seed(object) } else if(what == 'save_results_dirname'){ attr(object, 'extra_info')$save_info['save_results_dirname'] } else if(what == 'filename'){ @@ -188,6 +196,18 @@ extract_functions <- function(object){ ret } +extract_prepare_seeds <- function(object){ + extra_info <- attr(object, 'extra_info') + ret <- extra_info$prepare_seeds + ret +} + +extract_prepare_error_seed <- function(object){ + extra_info <- attr(object, 'extra_info') + ret <- extra_info$prepare_error_seeds + ret +} + fuzzy_reduce <- function(df){ if(!length(df)) return(df) nms <- colnames(df) diff --git a/R/analysis.R b/R/analysis.R index 4128246b..d263c4cd 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -1,4 +1,5 @@ -Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL, cl, MPI, seed, save, +Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL, + load_seed_prepare = NULL, cl, MPI, seed, save, save_results, save_results_out_rootdir, save_results_dirname, max_errors, boot_method, boot_draws, CI, save_seeds, save_seeds_dirname, load_seed, @@ -12,9 +13,34 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare # and number of replications desired # Call prepare function once per condition if provided + prepare_error_seed <- NULL + prepare_Random.seed <- NULL if(!is.null(prepare)) { + + # Restore seed if debugging prepare + if(!is.null(load_seed_prepare)) + .GlobalEnv$.Random.seed <- load_seed_prepare + + # Ensure .Random.seed exists (initialize RNG if needed) + else if(!exists(".Random.seed", envir = .GlobalEnv)) + runif(1) + + # Capture seed state before prepare (similar to mainsim line 296) + prepare_Random.seed <- .GlobalEnv$.Random.seed + + # Save seed to disk if requested + if(save_seeds){ + filename <- paste0(save_seeds_dirname, '/design-row-', condition$ID, '/prepare-seed') + dir.create(dirname(file.path(save_results_out_rootdir, filename)), + showWarnings = FALSE, recursive = TRUE) + write(prepare_Random.seed, file.path(save_results_out_rootdir, filename), sep = ' ') + } + prep_result <- try(prepare(condition=condition, fixed_objects=fixed_objects), silent=FALSE) + if(is(prep_result, 'try-error')){ + # Capture seed on error (similar to mainsim) + prepare_error_seed <- prepare_Random.seed stop(sprintf('prepare() failed for condition %i with error: %s', condition$ID, as.character(prep_result)), call.=FALSE) } @@ -222,9 +248,17 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare attr(ret, 'error_seeds') <- try_error_seeds attr(ret, 'warning_seeds') <- warning_message_seeds attr(ret, 'summarise_list') <- summarise_list + if(!is.null(prepare_error_seed)) + attr(ret, 'prepare_error_seed') <- prepare_error_seed + if(store_results) attr(ret, 'full_results') <- tabled_results - if(store_Random.seeds) + if(store_Random.seeds){ attr(ret, 'stored_Random.seeds') <- stored_Random.seeds + # Store prepare seed information + if(!is.null(prepare)) { + attr(ret, 'prepare_Random.seed') <- prepare_Random.seed + } + } ret } diff --git a/R/runSimulation.R b/R/runSimulation.R index 5e7ca76e..69b26188 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -342,6 +342,15 @@ #' then it WILL be important to modify the \code{design} input in order to load this #' exact seed for the corresponding design row. Default is \code{NULL} #' +#' @param load_seed_prepare similar to \code{load_seed}, but specifically for +#' debugging the \code{prepare} function. Used to replicate the exact RNG state +#' when prepare is called for a given condition. Accepts the same input formats +#' as \code{load_seed}: a character string path (e.g., \code{'design-row-2/prepare-seed'}), +#' an integer vector containing the \code{.Random.seed} state, or a tibble/data.frame +#' with seed values. This is particularly useful when prepare encounters an error +#' and you need to reproduce the exact state. The prepare error seed can be +#' extracted using \code{SimExtract(res, 'prepare_error_seed')}. Default is \code{NULL} +#' #' @param filename (optional) the name of the \code{.rds} file to save the final #' simulation results to. If the extension #' \code{.rds} is not included in the file name (e.g. \code{"mysimulation"} @@ -1030,7 +1039,8 @@ #' runSimulation <- function(design, replications, generate, analyse, summarise, prepare = NULL, fixed_objects = NULL, packages = NULL, filename = NULL, - debug = 'none', load_seed = NULL, save = any(replications > 2), + debug = 'none', load_seed = NULL, load_seed_prepare = NULL, + save = any(replications > 2), store_results = TRUE, save_results = FALSE, parallel = FALSE, ncores = parallelly::availableCores(omit = 1L), cl = NULL, notification = 'none', notifier = NULL, @@ -1298,6 +1308,24 @@ runSimulation <- function(design, replications, generate, analyse, summarise, load_seed <- as.integer(as.data.frame(load_seed)[,1]) stopifnot(is.integer(load_seed)) } + # Validate load_seed_prepare (same logic as load_seed) + if(!is.null(load_seed_prepare)){ + if(length(load_seed_prepare) == 7L){ + rngkind <- RNGkind() + RNGkind("L'Ecuyer-CMRG") + on.exit(RNGkind(rngkind[1L]), add = TRUE) + } + if(is.character(load_seed_prepare)){ + # Character path to saved prepare seed file + # Only prepend save_seeds_dirname if it's a relative path + if(!file.exists(load_seed_prepare)) + load_seed_prepare <- paste0(save_seeds_dirname, '/', load_seed_prepare) + load_seed_prepare <- as.integer(scan(load_seed_prepare, sep = ' ', quiet = TRUE)) + } + if(is(load_seed_prepare, 'tbl')) + load_seed_prepare <- as.integer(as.data.frame(load_seed_prepare)[,1]) + stopifnot(is.integer(load_seed_prepare)) + } if(MPI){ parallel <- FALSE verbose <- FALSE @@ -1515,6 +1543,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, replications=replications[i], fixed_objects=fixed_objects, prepare=prepare, + load_seed_prepare=load_seed_prepare, cl=if(i %in% not_parallel) NULL else cl, MPI=MPI, .options.mpi=.options.mpi, seed=seed, boot_draws=boot_draws, boot_method=boot_method, CI=CI, @@ -1557,6 +1586,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, replications=replications[i], fixed_objects=fixed_objects, prepare=prepare, + load_seed_prepare=load_seed_prepare, cl=if(i %in% not_parallel) NULL else cl, MPI=MPI, .options.mpi=.options.mpi, seed=seed, store_Random.seeds=store_Random.seeds, @@ -1602,6 +1632,8 @@ runSimulation <- function(design, replications, generate, analyse, summarise, attr(Result_list[[i]], 'error_seeds') <- attr(tmp, 'error_seeds') attr(Result_list[[i]], 'warning_seeds') <- attr(tmp, 'warning_seeds') attr(Result_list[[i]], 'summarise_list') <- attr(tmp, 'summarise_list') + attr(Result_list[[i]], 'prepare_Random.seed') <- attr(tmp, 'prepare_Random.seed') + attr(Result_list[[i]], 'prepare_error_seed') <- attr(tmp, 'prepare_error_seed') Result_list[[i]]$COMPLETED <- date() time1 <- proc.time()[3L] Result_list[[i]]$SIM_TIME <- time1 - time0 @@ -1712,6 +1744,17 @@ runSimulation <- function(design, replications, generate, analyse, summarise, rownames(ret)) t(ret) }))) + # Collect prepare seeds + prepare_seeds <- lapply(1L:length(Result_list), function(x) { + attr(Result_list[[x]], "prepare_Random.seed") + }) + prepare_error_seeds <- lapply(1L:length(Result_list), function(x) { + attr(Result_list[[x]], "prepare_error_seed") + }) + # Remove NULL entries from error seeds + prepare_error_seeds <- Filter(Negate(is.null), prepare_error_seeds) + if(length(prepare_error_seeds) == 0L) prepare_error_seeds <- NULL + summarise_list <- lapply(1L:length(Result_list), function(x) attr(Result_list[[x]], "summarise_list") ) @@ -1785,6 +1828,8 @@ runSimulation <- function(design, replications, generate, analyse, summarise, date_completed = noquote(date()), total_elapsed_time = sum(SIM_TIME), error_seeds=dplyr::as_tibble(error_seeds), warning_seeds=dplyr::as_tibble(warning_seeds), + prepare_seeds=prepare_seeds, + prepare_error_seeds=prepare_error_seeds, stored_results = if(store_results) stored_Results_list else NULL, Design.ID=Design.ID, functions=list(Generate=Generate, Analyse=Analyse, Summarise=Summarise)) From ff4868f3fff967ea6089c4ed9fa8ea004239ec90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 09:33:51 +0200 Subject: [PATCH 04/10] Moved RNG initialization to .onAttach() --- R/analysis.R | 5 ----- R/zzz.R | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/R/analysis.R b/R/analysis.R index d263c4cd..b25f1fd8 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -16,15 +16,10 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare prepare_error_seed <- NULL prepare_Random.seed <- NULL if(!is.null(prepare)) { - # Restore seed if debugging prepare if(!is.null(load_seed_prepare)) .GlobalEnv$.Random.seed <- load_seed_prepare - # Ensure .Random.seed exists (initialize RNG if needed) - else if(!exists(".Random.seed", envir = .GlobalEnv)) - runif(1) - # Capture seed state before prepare (similar to mainsim line 296) prepare_Random.seed <- .GlobalEnv$.Random.seed diff --git a/R/zzz.R b/R/zzz.R index 71fbacca..92cf64b5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,4 +1,8 @@ .onAttach <- function(libname, pkgname) { + # Initialize RNG to ensure .Random.seed exists + if(!exists(".Random.seed", envir = .GlobalEnv)) + runif(1) + if(interactive()) packageStartupMessage("See ?SimFunctions to get started with SimDesign") } From 98eec838fda27ddaeb6824ee0a1817eaf26d5b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 09:48:17 +0200 Subject: [PATCH 05/10] Added tests for the prepare function. --- tests/tests/test-04-prepare.R | 172 ++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 tests/tests/test-04-prepare.R diff --git a/tests/tests/test-04-prepare.R b/tests/tests/test-04-prepare.R new file mode 100644 index 00000000..94e15609 --- /dev/null +++ b/tests/tests/test-04-prepare.R @@ -0,0 +1,172 @@ +context('prepare') + +test_that('prepare RNG management', { + + library(SimDesign) + + # Setup common functions + Design <- createDesign(N = c(10, 20)) + + prepare <- function(condition, fixed_objects) { + fixed_objects$data <- rnorm(condition$N) + return(fixed_objects) + } + + generate <- function(condition, fixed_objects) { + data.frame(x = rnorm(5)) + } + + analyse <- function(condition, dat, fixed_objects) { + c(mean_x = mean(dat$x)) + } + + summarise <- function(condition, results, fixed_objects) { + c(mean_x = mean(results[,'mean_x'])) + } + + # Test 1: Basic prepare() seed capture + res <- runSimulation(Design, replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + control=list(store_Random.seeds=TRUE), + verbose=FALSE) + + expect_is(res, 'SimDesign') + prepare_seeds <- SimExtract(res, what='prepare_seeds') + expect_true(length(prepare_seeds) == nrow(Design)) + expect_true(all(sapply(prepare_seeds, length) == 626)) + expect_true(!is.null(prepare_seeds[[1]])) + expect_true(!is.null(prepare_seeds[[2]])) + + # Test 2: prepare() seeds NOT captured when flag is FALSE + res2 <- runSimulation(Design, replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + verbose=FALSE) + + prepare_seeds2 <- SimExtract(res2, what='prepare_seeds') + expect_true(is.null(prepare_seeds2[[1]])) + expect_true(is.null(prepare_seeds2[[2]])) + + # Test 3: Seed reproducibility with load_seed_prepare + res3a <- runSimulation(Design[1,], replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + seed=1234, + control=list(store_Random.seeds=TRUE), + verbose=FALSE) + + seed1 <- SimExtract(res3a, what='prepare_seeds')[[1]] + + res3b <- runSimulation(Design[1,], replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + load_seed_prepare=seed1, + seed=1234, + control=list(store_Random.seeds=TRUE), + verbose=FALSE) + + expect_equal(res3a$mean_x, res3b$mean_x) + + # Test 4: prepare() error seed capture + prepare_error <- function(condition, fixed_objects) { + if(condition$N > 15) stop('N too large in prepare') + fixed_objects$data <- rnorm(condition$N) + return(fixed_objects) + } + + res_fail <- tryCatch( + runSimulation(Design[2,], replications=1, + prepare=prepare_error, + generate=generate, + analyse=analyse, + summarise=summarise, + verbose=FALSE), + error = function(e) e + ) + + expect_true(inherits(res_fail, 'error')) + + # Test 5: load_seed_prepare with file path + res5 <- runSimulation(Design, replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + control=list(save_seeds=TRUE, + store_Random.seeds=TRUE), + filename='test', + verbose=FALSE) + + seed_dirs <- list.dirs('.', recursive=FALSE) + seed_dir <- seed_dirs[grep('SimDesign-seeds', seed_dirs)] + + if(length(seed_dir) > 0) { + seed_file <- file.path(seed_dir, 'design-row-1', 'prepare-seed') + + if(file.exists(seed_file)) { + res5b <- runSimulation(Design[1,], replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + load_seed_prepare=seed_file, + verbose=FALSE) + + expect_equal(res5$mean_x[1], res5b$mean_x[1]) + } + } + + SimClean(results=TRUE, temp=TRUE) + + # Test 6: Backward compatibility - no prepare() + res6 <- runSimulation(Design, replications=2, + generate=generate, + analyse=analyse, + summarise=summarise, + verbose=FALSE) + + expect_is(res6, 'SimDesign') + expect_equal(nrow(res6), nrow(Design)) + + prepare_seeds6 <- SimExtract(res6, what='prepare_seeds') + expect_true(all(sapply(prepare_seeds6, is.null))) + + # Test 7: prepare() seed isolation between conditions + res7 <- runSimulation(Design, replications=2, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + control=list(store_Random.seeds=TRUE), + verbose=FALSE) + + prepare_seeds7 <- SimExtract(res7, what='prepare_seeds') + expect_false(identical(prepare_seeds7[[1]], prepare_seeds7[[2]])) + + # Test 8: Integration with existing seed management + res8 <- runSimulation(Design, replications=10, + prepare=prepare, + generate=generate, + analyse=analyse, + summarise=summarise, + control=list(store_Random.seeds=TRUE), + verbose=FALSE) + + prepare_seeds8 <- SimExtract(res8, what='prepare_seeds') + random_seeds8 <- SimExtract(res8, what='Random.seeds') + + expect_true(length(prepare_seeds8) == nrow(Design)) + expect_is(random_seeds8, 'list') + expect_true(length(random_seeds8) == nrow(Design)) + expect_true(all(sapply(random_seeds8, function(x) nrow(x) == 10))) + +}) From 847211ed74adff39bcacce6b5072c6dd9172fe87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 10:26:38 +0200 Subject: [PATCH 06/10] Update prepare() documentation to emphasize loading pre-computed objects - Rewrite @param prepare to prioritize loading RDS files over dynamic generation - Add RNG reproducibility warning when generating within prepare() - Note that prepare objects are not stored by runSimulation() - Add complete working example demonstrating recommended two-step workflow - Document prepare seed storage in save_seeds and store_Random.seeds sections Changes address feedback from PR #78 to position prepare() primarily as an object loader for cluster workflows, with dynamic generation as a secondary use case requiring explicit RNG state management. --- R/runSimulation.R | 120 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 8 deletions(-) diff --git a/R/runSimulation.R b/R/runSimulation.R index 69b26188..99e1b2d6 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -202,14 +202,41 @@ #' and \code{control$stop_on_fatal} flags #' #' @param prepare (optional) a function that executes once per simulation condition -#' (i.e., once per row in \code{design}) to prepare/modify the \code{fixed_objects} -#' before replications are run. This function should accept \code{condition} and -#' \code{fixed_objects} as arguments and return the modified \code{fixed_objects}. +#' (i.e., once per row in \code{design}) to load or prepare condition-specific objects +#' into \code{fixed_objects} before replications are run. This function should accept +#' \code{condition} and \code{fixed_objects} as arguments and return the modified +#' \code{fixed_objects}. +#' +#' The primary use case is to load pre-computed objects from disk that were +#' generated offline: +#' +#' \code{prepare <- function(condition, fixed_objects) \{} +#' +#' \code{ # Create filename from design parameters} +#' +#' \code{ fname <- paste0('prepare/N', condition$N, '_SD', condition$SD, '.rds')} +#' +#' \code{ fixed_objects$expensive_stuff <- readRDS(fname)} +#' +#' \code{ return(fixed_objects)} +#' +#' \code{\}} +#' +#' This approach allows you to: (1) pre-generate expensive condition-specific objects +#' prior to running the simulation , (2) save them as individual +#' RDS files, and (3) load them efficiently during the simulation. This is preferable to +#' generating objects within \code{prepare()} itself because it allows you to inspect the +#' objects, ensures reproducibility, and separates object generation from the simulation workflow. +#' +#' Note: Objects added to \code{fixed_objects} in \code{prepare()} are not stored +#' by \code{runSimulation()} to conserve memory, as they are typically large. You should +#' maintain your own records of these objects outside the simulation. +#' +#' RNG Warning: If you generate objects within \code{prepare()} using random number +#' generation (e.g., \code{rnorm()}, \code{runif()}), reproducibility requires explicit RNG +#' state management via \code{store_Random.seeds=TRUE} and \code{load_seed_prepare}. Pre-computing +#' and saving objects as RDS files is the recommended approach for reproducible simulations. #' -#' This is useful for pre-computing expensive condition-specific objects that can be -#' reused across replications (e.g., design matrices, correlation matrices, lookup tables) -#' without having to either pre-compute them for all conditions (which can cause -#' memory issues) or recompute them for every replication (which causes performance issues). #' #' The function signature should be: #' \code{prepare <- function(condition, fixed_objects) \{ ... return(fixed_objects) \}} @@ -401,6 +428,8 @@ #' analysis state (mostly useful #' for debugging). When \code{TRUE}, temporary files will also be saved #' to the working directory (in the same way as when \code{save = TRUE}). +#' Additionally, if a \code{prepare} function is provided, the RNG state before +#' \code{prepare()} execution is saved to \code{'design-row-X/prepare-seed'}. #' Default is \code{FALSE} #' #' Note, however, that this option is not typically necessary or recommended since @@ -418,7 +447,10 @@ #' take up a great deal of unnecessary RAM (see related \code{save_seeds}), #' however this may be useful #' when used with \code{\link{runArraySimulation}}. To extract use -#' \code{SimExtract(..., what = 'stored_Random.seeds')}} +#' \code{SimExtract(..., what = 'stored_Random.seeds')}. Additionally, +#' if a \code{prepare} function is provided, the RNG state before \code{prepare()} +#' execution is stored and can be extracted with +#' \code{SimExtract(..., what = 'prepare_seeds')}} #' #' \item{\code{store_warning_seeds}}{logical (default is \code{FALSE}); #' in addition to storing the \code{.Random.seed} states whenever error messages @@ -1035,6 +1067,78 @@ #' facet_grid(stats~standard_deviation_ratio) + #' theme(legend.position = 'none') #' +#' #------------------------------------------------------------------------------- +#' # Example with prepare() function - Loading pre-computed objects +#' +#' \dontrun{ +#' +#' # Step 1: Pre-generate expensive objects offline (can be parallelized) +#' Design <- createDesign(N = c(10, 20), rho = c(0.3, 0.7)) +#' +#' # Create directory for storing prepared objects +#' dir.create('prepared_objects', showWarnings = FALSE) +#' +#' # Generate and save correlation matrices for each condition +#' for(i in 1:nrow(Design)) { +#' cond <- Design[i, ] +#' +#' # Generate correlation matrix +#' corr_matrix <- matrix(cond$rho, nrow=cond$N, ncol=cond$N) +#' diag(corr_matrix) <- 1 +#' +#' # Create filename based on design parameters +#' fname <- paste0('prepared_objects/N', cond$N, '_rho', cond$rho, '.rds') +#' +#' # Save to disk +#' saveRDS(corr_matrix, file = fname) +#' } +#' +#' # Step 2: Use prepare() to load these objects during simulation +#' prepare <- function(condition, fixed_objects) { +#' # Create matching filename +#' fname <- paste0('prepared_objects/N', condition$N, +#' '_rho', condition$rho, '.rds') +#' +#' # Load the pre-computed correlation matrix +#' fixed_objects$corr_matrix <- readRDS(fname) +#' +#' return(fixed_objects) +#' } +#' +#' generate <- function(condition, fixed_objects) { +#' # Use the loaded correlation matrix to generate multivariate data +#' dat <- MASS::mvrnorm(n = 50, +#' mu = rep(0, condition$N), +#' Sigma = fixed_objects$corr_matrix) +#' data.frame(dat) +#' } +#' +#' analyse <- function(condition, dat, fixed_objects) { +#' # Calculate mean correlation in generated data +#' obs_corr <- cor(dat) +#' c(mean_corr = mean(obs_corr[lower.tri(obs_corr)])) +#' } +#' +#' summarise <- function(condition, results, fixed_objects) { +#' c(mean_corr = mean(results$mean_corr)) +#' } +#' +#' # Run simulation - prepare() loads objects efficiently +#' results <- runSimulation(design = Design, +#' replications = 2, +#' prepare = prepare, +#' generate = generate, +#' analyse = analyse, +#' summarise = summarise, +#' verbose = FALSE) +#' +#' results +#' +#' # Cleanup +#' unlink('prepared_objects', recursive = TRUE) +#' +#' } +#' #' } #' runSimulation <- function(design, replications, generate, analyse, summarise, From e84755d64e2909fa0008d95c2a9b31f11976dc91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 10:33:45 +0200 Subject: [PATCH 07/10] Added logging changes and prepare function in a new release. --- NEWS.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/NEWS.md b/NEWS.md index d482f7c9..afe5094c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,26 @@ # NEWS file for SimDesign +## Changes in SimDesign 2.22 + +- `runSimulation()` gains a `prepare` function argument for loading or preparing + condition-specific objects before replications are executed. This function executes + once per simulation condition and is particularly useful for loading pre-computed + expensive objects (e.g., correlation matrices, design matrices) from disk in cluster + workflows. The recommended approach is to pre-generate objects offline and load them + via `prepare()` using `readRDS()`, rather than generating them dynamically. Objects + added to `fixed_objects` in `prepare()` are available to all replications within + that condition but are not stored in the final results to conserve memory. + +- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()` + function by reproducing exact RNG states. Works similarly to `load_seed` but specifically + for the prepare step. Prepare seeds can be extracted using + `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds + can be extracted with `SimExtract(res, 'prepare_error_seed')`. + +- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved. + Progress bars now display correctly when running batch jobs on cluster systems, + providing better visibility into long-running simulations. + ## Changes in SimDesign 2.21 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a From 5f4b945eb945cc970185087ad8afcbace06c1573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 10:40:28 +0200 Subject: [PATCH 08/10] Revert "Added logging changes and prepare function in a new release." This reverts commit e84755d64e2909fa0008d95c2a9b31f11976dc91. --- NEWS.md | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index afe5094c..d482f7c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,26 +1,5 @@ # NEWS file for SimDesign -## Changes in SimDesign 2.22 - -- `runSimulation()` gains a `prepare` function argument for loading or preparing - condition-specific objects before replications are executed. This function executes - once per simulation condition and is particularly useful for loading pre-computed - expensive objects (e.g., correlation matrices, design matrices) from disk in cluster - workflows. The recommended approach is to pre-generate objects offline and load them - via `prepare()` using `readRDS()`, rather than generating them dynamically. Objects - added to `fixed_objects` in `prepare()` are available to all replications within - that condition but are not stored in the final results to conserve memory. - -- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()` - function by reproducing exact RNG states. Works similarly to `load_seed` but specifically - for the prepare step. Prepare seeds can be extracted using - `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds - can be extracted with `SimExtract(res, 'prepare_error_seed')`. - -- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved. - Progress bars now display correctly when running batch jobs on cluster systems, - providing better visibility into long-running simulations. - ## Changes in SimDesign 2.21 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a From f8c727713b9e5707c61e214fff5e2eb32ce75b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 10:42:29 +0200 Subject: [PATCH 09/10] Incremented release number and added description of the prepare function and improved logging on cluster. --- NEWS.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/NEWS.md b/NEWS.md index d482f7c9..2dbce029 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,23 @@ # NEWS file for SimDesign +## Changes in SimDesign 2.22 + +- `runSimulation()` gains a `prepare` function argument for loading or preparing + condition-specific objects before replications are executed. This function executes + once per simulation condition and is particularly useful for loading pre-computed + expensive objects (e.g., correlation matrices, design matrices) from disk in cluster + workflows. + +- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()` + function by reproducing exact RNG states. Works similarly to `load_seed` but specifically + for the prepare step. Prepare seeds can be extracted using + `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds + can be extracted with `SimExtract(res, 'prepare_error_seed')`. + +- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved. + Progress bars now display correctly when running batch jobs on cluster systems, + providing better visibility into long-running simulations. + ## Changes in SimDesign 2.21 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a From 3dfdccd0f2ec0e365a3b7f2cb3fdbd93cf66aa34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= Date: Mon, 15 Dec 2025 10:43:53 +0200 Subject: [PATCH 10/10] =?UTF-8?q?Fixed=20Mikko=20R=C3=B6nkk=C3=B6's=20last?= =?UTF-8?q?=20name=20to=20have=20umlauts=20and=20changed=20ctb=20->=20aut?= =?UTF-8?q?=20role.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fedc81f7..6a10086d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Authors@R: c(person("Phil", "Chalmers", email = "rphilip.chalmers@gmail.com", ro comment = c(ORCID="0000-0001-5332-2810")), person("Matthew", "Sigal", role = c("ctb")), person("Ogreden", family="Oguzhan", role = c("ctb")), - person("Mikko ", family="Ronkko", role = c("ctb")), + person("Mikko", family="Rönkkö", role = c("aut")), person("Moritz", family="Ketzer", role = c("ctb"))) Description: Provides tools to safely and efficiently organize and execute Monte Carlo simulation experiments in R.