From d2a3f78c385167235a8f36e59eb871a6ce035157 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Sat, 13 Dec 2025 08:36:37 +0200
Subject: [PATCH 01/10] Enable progress bars in non-interactive mode for SLURM
 clusters

Configure pbapply to display text-based progress bars when running
in non-interactive mode (e.g., batch jobs, Rscript). Previously,
progress tracking was disabled in non-interactive sessions, making
it impossible to monitor simulation progress in SLURM log files.

Changes:
- R/analysis.R: Add pboptions configuration to force type="txt"
  in non-interactive mode while preserving timer bars in interactive
  sessions
- R/runSimulation.R: Update progress parameter documentation to
  describe the new behavior

Interactive users see no change. Non-interactive users (SLURM, batch
jobs) now see text progress bars when monitoring logs via tail -f.

Fixes #75
---
 R/analysis.R      | 7 +++++++
 R/runSimulation.R | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/R/analysis.R b/R/analysis.R
index 8b74785c..1470e9ca 100644
--- a/R/analysis.R
+++ b/R/analysis.R
@@ -10,6 +10,13 @@ Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI,
 {
     # This defines the work-flow for the Monte Carlo simulation given the condition (row in Design)
     #  and number of replications desired
+
+    # Configure pbapply for non-interactive mode (e.g., SLURM cluster logs)
+    if(progress && !interactive()) {
+        old_pboptions <- pbapply::pboptions(type = "txt", char = "=", style = 3)
+        on.exit(pbapply::pboptions(old_pboptions), add = TRUE)
+    }
+
     used_mainsim <- if(is.finite(max_time)) mainsim_maxtime else mainsim
     if(useFuture){
         if(!is.null(seed)) set_seed(seed[condition$ID])
diff --git a/R/runSimulation.R b/R/runSimulation.R
index abcf396c..412abe3b 100644
--- a/R/runSimulation.R
+++ b/R/runSimulation.R
@@ -600,7 +600,9 @@
 #'   range 1 to 2147483647 for each condition via \code{\link{genSeeds}}
 #'
 #' @param progress logical; display a progress bar (using the \code{pbapply} package)
-#'   for each simulation condition?
+#'   for each simulation condition? In interactive sessions, shows a timer-based
+#'   progress bar. In non-interactive sessions (e.g., HPC cluster jobs), displays
+#'   text-based progress updates that are visible in log files.
 #'   This is useful when simulations conditions take a long time to run (see also the
 #'   \code{notification} argument). Default is \code{TRUE}
 #'

From 58b04b517d4e146b4b5b44f2d76771fde6f75a6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Sat, 13 Dec 2025 20:17:47 +0200
Subject: [PATCH 02/10] Add prepare function for condition-level object
 preparation

Implements #74 by adding a prepare parameter to runSimulation() that
modifies fixed_objects once per condition before replications run.

The prepare function accepts condition and fixed_objects as arguments
and returns the modified fixed_objects, which is then passed to all
replications for that condition.

Use case: Pre-compute expensive condition-specific objects (design
matrices, lookup tables) once per condition instead of per replication,
avoiding both memory issues (from pre-computing all conditions) and
performance issues (from recomputing per replication).

Implementation:
- Added prepare parameter with validation
- Calls prepare(condition, fixed_objects) in main loop per condition
- Returns modified fixed_objects for use in replications
- Exports prepare to parallel clusters when provided
- Includes prepare globals in check.globals
- Full backward compatibility (prepare defaults to NULL)

Example:
prepare <- function(condition, fixed_objects) {
    fixed_objects$design_matrix <- matrix(rnorm(condition$N * 10), ncol=10)
    return(fixed_objects)
}
---
 R/analysis.R      | 12 +++++++++++-
 R/runSimulation.R | 36 ++++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/R/analysis.R b/R/analysis.R
index 1470e9ca..4128246b 100644
--- a/R/analysis.R
+++ b/R/analysis.R
@@ -1,4 +1,4 @@
-Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI, seed, save,
+Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL, cl, MPI, seed, save,
                      save_results, save_results_out_rootdir, save_results_dirname, max_errors,
                      boot_method, boot_draws, CI,
                      save_seeds, save_seeds_dirname, load_seed,
@@ -11,6 +11,16 @@ Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI,
     # This defines the work-flow for the Monte Carlo simulation given the condition (row in Design)
     #  and number of replications desired
 
+    # Call prepare function once per condition if provided
+    if(!is.null(prepare)) {
+        prep_result <- try(prepare(condition=condition, fixed_objects=fixed_objects), silent=FALSE)
+        if(is(prep_result, 'try-error')){
+            stop(sprintf('prepare() failed for condition %i with error: %s',
+                         condition$ID, as.character(prep_result)), call.=FALSE)
+        }
+        fixed_objects <- prep_result
+    }
+
     # Configure pbapply for non-interactive mode (e.g., SLURM cluster logs)
     if(progress && !interactive()) {
         old_pboptions <- pbapply::pboptions(type = "txt", char = "=", style = 3)
diff --git a/R/runSimulation.R b/R/runSimulation.R
index 412abe3b..5e7ca76e 100644
--- a/R/runSimulation.R
+++ b/R/runSimulation.R
@@ -201,6 +201,21 @@
 #'   to less than 3 (for initial testing purpose) will disable the \code{save}
 #'   and \code{control$stop_on_fatal} flags
 #'
+#' @param prepare (optional) a function that executes once per simulation condition
+#'   (i.e., once per row in \code{design}) to prepare/modify the \code{fixed_objects}
+#'   before replications are run. This function should accept \code{condition} and
+#'   \code{fixed_objects} as arguments and return the modified \code{fixed_objects}.
+#'
+#'   This is useful for pre-computing expensive condition-specific objects that can be
+#'   reused across replications (e.g., design matrices, correlation matrices, lookup tables)
+#'   without having to either pre-compute them for all conditions (which can cause
+#'   memory issues) or recompute them for every replication (which causes performance issues).
+#'
+#'   The function signature should be:
+#'   \code{prepare <- function(condition, fixed_objects) \{ ... return(fixed_objects) \}}
+#'
+#'   Default is \code{NULL}, in which case no preparation step is performed
+#'
 #' @param fixed_objects (optional) an object (usually a named \code{list})
 #'   containing additional user-defined objects
 #'   that should remain fixed across conditions. This is useful when including
@@ -1014,7 +1029,7 @@
 #' }
 #'
 runSimulation <- function(design, replications, generate, analyse, summarise,
-                          fixed_objects = NULL, packages = NULL, filename = NULL,
+                          prepare = NULL, fixed_objects = NULL, packages = NULL, filename = NULL,
                           debug = 'none', load_seed = NULL, save = any(replications > 2),
                           store_results = TRUE, save_results = FALSE,
                           parallel = FALSE, ncores = parallelly::availableCores(omit = 1L),
@@ -1246,6 +1261,15 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
     if(!is.null(seed))
         stopifnot(nrow(design) == length(seed))
     debug <- tolower(debug)
+    # Validate prepare function
+    if(!is.null(prepare)){
+        if(!is.function(prepare))
+            stop('prepare must be a function', call. = FALSE)
+        fms <- names(formals(prepare))
+        truefms <- c('condition', 'fixed_objects')
+        if(!all(truefms %in% fms))
+            stop('Function arguments for prepare are not correct. Must include: condition, fixed_objects', call. = FALSE)
+    }
     for(i in names(Functions)){
         fms <- names(formals(Functions[[i]]))
         truefms <- switch(i,
@@ -1338,14 +1362,20 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
             parallel::clusterExport(cl=cl, export_funs, envir = parent.frame(1L))
             parallel::clusterExport(cl=cl, "ANALYSE_FUNCTIONS", envir = environment())
             parallel::clusterExport(cl=cl, "TRY_ALL_ANALYSE", envir = environment())
+            if(!is.null(prepare))
+                parallel::clusterExport(cl=cl, "prepare", envir = environment())
             if(verbose)
                 message(sprintf("\nNumber of parallel clusters in use: %i", length(cl)))
         }
     }
     if(check.globals){
+        prepare_globals <- if(!is.null(prepare))
+            codetools::findGlobals(prepare, merge=FALSE)[['variables']]
+        else NULL
         globals <- unique(c(codetools::findGlobals(generate, merge=FALSE)[['variables']],
                      codetools::findGlobals(analyse, merge=FALSE)[['variables']],
-                     codetools::findGlobals(summarise, merge=FALSE)[['variables']]))
+                     codetools::findGlobals(summarise, merge=FALSE)[['variables']],
+                     prepare_globals))
         return(setdiff(globals, c(names(design), names(fixed_objects))))
     }
     Result_list <- vector('list', nrow(design))
@@ -1484,6 +1514,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                                            else design[i,],
                                          replications=replications[i],
                                          fixed_objects=fixed_objects,
+                                         prepare=prepare,
                                          cl=if(i %in% not_parallel) NULL else cl,
                                          MPI=MPI, .options.mpi=.options.mpi, seed=seed,
                                          boot_draws=boot_draws, boot_method=boot_method, CI=CI,
@@ -1525,6 +1556,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                             condition=if(was_tibble) dplyr::as_tibble(design[i,]) else design[i,],
                             replications=replications[i],
                             fixed_objects=fixed_objects,
+                            prepare=prepare,
                             cl=if(i %in% not_parallel) NULL else cl,
                             MPI=MPI, .options.mpi=.options.mpi, seed=seed,
                             store_Random.seeds=store_Random.seeds,

From a50e0dc377da63b859e0f77b2374fb5a4eecfc36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Sun, 14 Dec 2025 10:27:56 +0200
Subject: [PATCH 03/10] Add RNG state management for prepare() function

This commit adds comprehensive random number generator (RNG) state
management for the prepare() function, ensuring reproducibility and
debugging support consistent with generate/analyse/summarise functions.

Key Changes:

1. Seed Capture (R/analysis.R:15-52)
   - Automatically capture .Random.seed state before prepare() executes
   - Initialize RNG if .Random.seed doesn't exist yet
   - Store prepare error seed when prepare() fails for debugging

2. Seed Storage (R/analysis.R:26-37, 251-261)
   - Save prepare seeds to disk when save_seeds=TRUE
   - File path format: design-row-{ID}/prepare-seed
   - Store prepare_Random.seed in attributes when store_Random.seeds=TRUE
   - Always store prepare_error_seed for debugging (independent of flag)

3. New Parameter: load_seed_prepare (R/runSimulation.R:1033)
   - Dedicated parameter for debugging prepare function
   - Accepts character path, integer vector, or tibble/data.frame
   - Supports both absolute and relative file paths
   - Automatically detects path type and handles appropriately
   - Documented at R/runSimulation.R:345-352

4. Seed Extraction (R/SimExtract.R:120-123, 199-209)
   - SimExtract(res, 'prepare_seeds') - extract all prepare seeds
   - SimExtract(res, 'prepare_error_seed') - extract error seeds

5. Attribute Preservation (R/runSimulation.R:1635-1636)
   - Manually restore prepare seed attributes when Result_list is
     rebuilt as data.frame to prevent attribute loss

Example Usage:

# Run simulation with prepare that uses RNG
res <- runSimulation(Design, replications=10,
                     prepare=prepare,  # Uses rnorm(), runif(), etc.
                     control=list(save_seeds=TRUE,
                                  store_Random.seeds=TRUE))

# Extract prepare seeds for reproducibility
prepare_seeds <- SimExtract(res, 'prepare_seeds')

# Debug prepare errors by loading the error seed
res2 <- runSimulation(Design[2,], replications=1,
                      load_seed_prepare='design-row-2/prepare-seed')

Design Decisions:

- prepare_Random.seed only stored when store_Random.seeds=TRUE for
  consistency with stored_Random.seeds behavior
- prepare_error_seed always stored for debugging, like error_seeds
  and warning_seeds
- Separate attributes (prepare_Random.seed, prepare_error_seed)
  instead of nested list for consistency with existing codebase patterns
- File path detection allows both absolute and relative paths

Related: Complements PR #78 (prepare function feature)
---
 R/SimExtract.R    | 20 ++++++++++++++++++++
 R/analysis.R      | 38 ++++++++++++++++++++++++++++++++++++--
 R/runSimulation.R | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/R/SimExtract.R b/R/SimExtract.R
index c7b16cb6..6fa999e0 100644
--- a/R/SimExtract.R
+++ b/R/SimExtract.R
@@ -17,6 +17,10 @@
 #'   \code{runSimulation(..., control = list(store_Random.seeds=TRUE))}),
 #'   \code{'error_seeds'} and \code{'warning_seeds'}
 #'   to extract the associated \code{.Random.seed} values associated with the ERROR/WARNING messages,
+#'   \code{'prepare_seeds'} to extract the \code{.Random.seed} states captured before
+#'   \code{prepare()} was called for each condition, \code{'prepare_error_seed'} to extract the
+#'   \code{.Random.seed} state when \code{prepare()} encountered an error (useful for debugging with
+#'   \code{load_seed_prepare}),
 #'   \code{'results'} to extract the simulation results if the option \code{store_results} was passed to
 #'   \code{\link{runSimulation}}, \code{'filename'} and \code{'save_results_dirname'} for extracting
 #'   the saved file/directory name information (if used), \code{'functions'} to extract the defined functions
@@ -113,6 +117,10 @@ SimExtract <- function(object, what, fuzzy = TRUE, append = TRUE){
         if(length(wrn) && append) cbind(Design, wrn) else wrn
     } else if(what == 'warning_seeds'){
         extract_warning_seeds(object)
+    } else if(what == 'prepare_seeds'){
+        extract_prepare_seeds(object)
+    } else if(what == 'prepare_error_seed'){
+        extract_prepare_error_seed(object)
     } else if(what == 'save_results_dirname'){
         attr(object, 'extra_info')$save_info['save_results_dirname']
     } else if(what == 'filename'){
@@ -188,6 +196,18 @@ extract_functions <- function(object){
     ret
 }
 
+extract_prepare_seeds <- function(object){
+    extra_info <- attr(object, 'extra_info')
+    ret <- extra_info$prepare_seeds
+    ret
+}
+
+extract_prepare_error_seed <- function(object){
+    extra_info <- attr(object, 'extra_info')
+    ret <- extra_info$prepare_error_seeds
+    ret
+}
+
 fuzzy_reduce <- function(df){
     if(!length(df)) return(df)
     nms <- colnames(df)
diff --git a/R/analysis.R b/R/analysis.R
index 4128246b..d263c4cd 100644
--- a/R/analysis.R
+++ b/R/analysis.R
@@ -1,4 +1,5 @@
-Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL, cl, MPI, seed, save,
+Analysis <- function(Functions, condition, replications, fixed_objects, prepare = NULL,
+                     load_seed_prepare = NULL, cl, MPI, seed, save,
                      save_results, save_results_out_rootdir, save_results_dirname, max_errors,
                      boot_method, boot_draws, CI,
                      save_seeds, save_seeds_dirname, load_seed,
@@ -12,9 +13,34 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare
     #  and number of replications desired
 
     # Call prepare function once per condition if provided
+    prepare_error_seed <- NULL
+    prepare_Random.seed <- NULL
     if(!is.null(prepare)) {
+
+        # Restore seed if debugging prepare
+        if(!is.null(load_seed_prepare))
+            .GlobalEnv$.Random.seed <- load_seed_prepare
+
+        # Ensure .Random.seed exists (initialize RNG if needed)
+        else if(!exists(".Random.seed", envir = .GlobalEnv))
+            runif(1)
+        
+        # Capture seed state before prepare (similar to mainsim line 296)
+        prepare_Random.seed <- .GlobalEnv$.Random.seed
+
+        # Save seed to disk if requested
+        if(save_seeds){
+            filename <- paste0(save_seeds_dirname, '/design-row-', condition$ID, '/prepare-seed')
+            dir.create(dirname(file.path(save_results_out_rootdir, filename)),
+                       showWarnings = FALSE, recursive = TRUE)
+            write(prepare_Random.seed, file.path(save_results_out_rootdir, filename), sep = ' ')
+        }
+
         prep_result <- try(prepare(condition=condition, fixed_objects=fixed_objects), silent=FALSE)
+
         if(is(prep_result, 'try-error')){
+            # Capture seed on error (similar to mainsim)
+            prepare_error_seed <- prepare_Random.seed
             stop(sprintf('prepare() failed for condition %i with error: %s',
                          condition$ID, as.character(prep_result)), call.=FALSE)
         }
@@ -222,9 +248,17 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare
     attr(ret, 'error_seeds') <- try_error_seeds
     attr(ret, 'warning_seeds') <- warning_message_seeds
     attr(ret, 'summarise_list') <- summarise_list
+    if(!is.null(prepare_error_seed))
+        attr(ret, 'prepare_error_seed') <- prepare_error_seed
+
     if(store_results)
         attr(ret, 'full_results') <- tabled_results
-    if(store_Random.seeds)
+    if(store_Random.seeds){
         attr(ret, 'stored_Random.seeds') <- stored_Random.seeds
+        # Store prepare seed information
+        if(!is.null(prepare)) {
+            attr(ret, 'prepare_Random.seed') <- prepare_Random.seed
+        }
+    }
     ret
 }
diff --git a/R/runSimulation.R b/R/runSimulation.R
index 5e7ca76e..69b26188 100644
--- a/R/runSimulation.R
+++ b/R/runSimulation.R
@@ -342,6 +342,15 @@
 #'   then it WILL be important to modify the \code{design} input in order to load this
 #'   exact seed for the corresponding design row. Default is \code{NULL}
 #'
+#' @param load_seed_prepare similar to \code{load_seed}, but specifically for
+#'   debugging the \code{prepare} function. Used to replicate the exact RNG state
+#'   when prepare is called for a given condition. Accepts the same input formats
+#'   as \code{load_seed}: a character string path (e.g., \code{'design-row-2/prepare-seed'}),
+#'   an integer vector containing the \code{.Random.seed} state, or a tibble/data.frame
+#'   with seed values. This is particularly useful when prepare encounters an error
+#'   and you need to reproduce the exact state. The prepare error seed can be
+#'   extracted using \code{SimExtract(res, 'prepare_error_seed')}. Default is \code{NULL}
+#'
 #' @param filename (optional) the name of the \code{.rds} file to save the final
 #' simulation results to. If the extension
 #'   \code{.rds} is not included in the file name (e.g. \code{"mysimulation"}
@@ -1030,7 +1039,8 @@
 #'
 runSimulation <- function(design, replications, generate, analyse, summarise,
                           prepare = NULL, fixed_objects = NULL, packages = NULL, filename = NULL,
-                          debug = 'none', load_seed = NULL, save = any(replications > 2),
+                          debug = 'none', load_seed = NULL, load_seed_prepare = NULL,
+                          save = any(replications > 2),
                           store_results = TRUE, save_results = FALSE,
                           parallel = FALSE, ncores = parallelly::availableCores(omit = 1L),
                           cl = NULL, notification = 'none', notifier = NULL,
@@ -1298,6 +1308,24 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
             load_seed <- as.integer(as.data.frame(load_seed)[,1])
         stopifnot(is.integer(load_seed))
     }
+    # Validate load_seed_prepare (same logic as load_seed)
+    if(!is.null(load_seed_prepare)){
+        if(length(load_seed_prepare) == 7L){
+            rngkind <- RNGkind()
+            RNGkind("L'Ecuyer-CMRG")
+            on.exit(RNGkind(rngkind[1L]), add = TRUE)
+        }
+        if(is.character(load_seed_prepare)){
+            # Character path to saved prepare seed file
+            # Only prepend save_seeds_dirname if it's a relative path
+            if(!file.exists(load_seed_prepare))
+                load_seed_prepare <- paste0(save_seeds_dirname, '/', load_seed_prepare)
+            load_seed_prepare <- as.integer(scan(load_seed_prepare, sep = ' ', quiet = TRUE))
+        }
+        if(is(load_seed_prepare, 'tbl'))
+            load_seed_prepare <- as.integer(as.data.frame(load_seed_prepare)[,1])
+        stopifnot(is.integer(load_seed_prepare))
+    }
     if(MPI){
         parallel <- FALSE
         verbose <- FALSE
@@ -1515,6 +1543,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                                          replications=replications[i],
                                          fixed_objects=fixed_objects,
                                          prepare=prepare,
+                                         load_seed_prepare=load_seed_prepare,
                                          cl=if(i %in% not_parallel) NULL else cl,
                                          MPI=MPI, .options.mpi=.options.mpi, seed=seed,
                                          boot_draws=boot_draws, boot_method=boot_method, CI=CI,
@@ -1557,6 +1586,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                             replications=replications[i],
                             fixed_objects=fixed_objects,
                             prepare=prepare,
+                            load_seed_prepare=load_seed_prepare,
                             cl=if(i %in% not_parallel) NULL else cl,
                             MPI=MPI, .options.mpi=.options.mpi, seed=seed,
                             store_Random.seeds=store_Random.seeds,
@@ -1602,6 +1632,8 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
             attr(Result_list[[i]], 'error_seeds') <- attr(tmp, 'error_seeds')
             attr(Result_list[[i]], 'warning_seeds') <- attr(tmp, 'warning_seeds')
             attr(Result_list[[i]], 'summarise_list') <- attr(tmp, 'summarise_list')
+            attr(Result_list[[i]], 'prepare_Random.seed') <- attr(tmp, 'prepare_Random.seed')
+            attr(Result_list[[i]], 'prepare_error_seed') <- attr(tmp, 'prepare_error_seed')
             Result_list[[i]]$COMPLETED <- date()
             time1 <- proc.time()[3L]
             Result_list[[i]]$SIM_TIME <- time1 - time0
@@ -1712,6 +1744,17 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                                 rownames(ret))
         t(ret)
     })))
+    # Collect prepare seeds
+    prepare_seeds <- lapply(1L:length(Result_list), function(x) {
+        attr(Result_list[[x]], "prepare_Random.seed")
+    })
+    prepare_error_seeds <- lapply(1L:length(Result_list), function(x) {
+        attr(Result_list[[x]], "prepare_error_seed")
+    })
+    # Remove NULL entries from error seeds
+    prepare_error_seeds <- Filter(Negate(is.null), prepare_error_seeds)
+    if(length(prepare_error_seeds) == 0L) prepare_error_seeds <- NULL
+
     summarise_list <- lapply(1L:length(Result_list), function(x)
         attr(Result_list[[x]], "summarise_list")
     )
@@ -1785,6 +1828,8 @@ runSimulation <- function(design, replications, generate, analyse, summarise,
                                       date_completed = noquote(date()), total_elapsed_time = sum(SIM_TIME),
                                       error_seeds=dplyr::as_tibble(error_seeds),
                                       warning_seeds=dplyr::as_tibble(warning_seeds),
+                                      prepare_seeds=prepare_seeds,
+                                      prepare_error_seeds=prepare_error_seeds,
                                       stored_results = if(store_results) stored_Results_list else NULL,
                                       Design.ID=Design.ID,
                                       functions=list(Generate=Generate, Analyse=Analyse, Summarise=Summarise))

From ff4868f3fff967ea6089c4ed9fa8ea004239ec90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 09:33:51 +0200
Subject: [PATCH 04/10] Moved RNG initialization to .onAttach()

---
 R/analysis.R | 5 -----
 R/zzz.R      | 4 ++++
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/R/analysis.R b/R/analysis.R
index d263c4cd..b25f1fd8 100644
--- a/R/analysis.R
+++ b/R/analysis.R
@@ -16,15 +16,10 @@ Analysis <- function(Functions, condition, replications, fixed_objects, prepare
     prepare_error_seed <- NULL
     prepare_Random.seed <- NULL
     if(!is.null(prepare)) {
-
         # Restore seed if debugging prepare
         if(!is.null(load_seed_prepare))
             .GlobalEnv$.Random.seed <- load_seed_prepare
 
-        # Ensure .Random.seed exists (initialize RNG if needed)
-        else if(!exists(".Random.seed", envir = .GlobalEnv))
-            runif(1)
-        
         # Capture seed state before prepare (similar to mainsim line 296)
         prepare_Random.seed <- .GlobalEnv$.Random.seed
 
diff --git a/R/zzz.R b/R/zzz.R
index 71fbacca..92cf64b5 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1,4 +1,8 @@
 .onAttach <- function(libname, pkgname) {
+    # Initialize RNG to ensure .Random.seed exists
+    if(!exists(".Random.seed", envir = .GlobalEnv))
+        runif(1)
+
     if(interactive())
         packageStartupMessage("See ?SimFunctions to get started with SimDesign")
 }

From 98eec838fda27ddaeb6824ee0a1817eaf26d5b1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 09:48:17 +0200
Subject: [PATCH 05/10] Added tests for the prepare function.

---
 tests/tests/test-04-prepare.R | 172 ++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 tests/tests/test-04-prepare.R

diff --git a/tests/tests/test-04-prepare.R b/tests/tests/test-04-prepare.R
new file mode 100644
index 00000000..94e15609
--- /dev/null
+++ b/tests/tests/test-04-prepare.R
@@ -0,0 +1,172 @@
+context('prepare')
+
+test_that('prepare RNG management', {
+
+    library(SimDesign)
+
+    # Setup common functions
+    Design <- createDesign(N = c(10, 20))
+
+    prepare <- function(condition, fixed_objects) {
+        fixed_objects$data <- rnorm(condition$N)
+        return(fixed_objects)
+    }
+
+    generate <- function(condition, fixed_objects) {
+        data.frame(x = rnorm(5))
+    }
+
+    analyse <- function(condition, dat, fixed_objects) {
+        c(mean_x = mean(dat$x))
+    }
+
+    summarise <- function(condition, results, fixed_objects) {
+        c(mean_x = mean(results[,'mean_x']))
+    }
+
+    # Test 1: Basic prepare() seed capture
+    res <- runSimulation(Design, replications=2,
+                         prepare=prepare,
+                         generate=generate,
+                         analyse=analyse,
+                         summarise=summarise,
+                         control=list(store_Random.seeds=TRUE),
+                         verbose=FALSE)
+
+    expect_is(res, 'SimDesign')
+    prepare_seeds <- SimExtract(res, what='prepare_seeds')
+    expect_true(length(prepare_seeds) == nrow(Design))
+    expect_true(all(sapply(prepare_seeds, length) == 626))
+    expect_true(!is.null(prepare_seeds[[1]]))
+    expect_true(!is.null(prepare_seeds[[2]]))
+
+    # Test 2: prepare() seeds NOT captured when flag is FALSE
+    res2 <- runSimulation(Design, replications=2,
+                          prepare=prepare,
+                          generate=generate,
+                          analyse=analyse,
+                          summarise=summarise,
+                          verbose=FALSE)
+
+    prepare_seeds2 <- SimExtract(res2, what='prepare_seeds')
+    expect_true(is.null(prepare_seeds2[[1]]))
+    expect_true(is.null(prepare_seeds2[[2]]))
+
+    # Test 3: Seed reproducibility with load_seed_prepare
+    res3a <- runSimulation(Design[1,], replications=2,
+                           prepare=prepare,
+                           generate=generate,
+                           analyse=analyse,
+                           summarise=summarise,
+                           seed=1234,
+                           control=list(store_Random.seeds=TRUE),
+                           verbose=FALSE)
+
+    seed1 <- SimExtract(res3a, what='prepare_seeds')[[1]]
+
+    res3b <- runSimulation(Design[1,], replications=2,
+                           prepare=prepare,
+                           generate=generate,
+                           analyse=analyse,
+                           summarise=summarise,
+                           load_seed_prepare=seed1,
+                           seed=1234,
+                           control=list(store_Random.seeds=TRUE),
+                           verbose=FALSE)
+
+    expect_equal(res3a$mean_x, res3b$mean_x)
+
+    # Test 4: prepare() error seed capture
+    prepare_error <- function(condition, fixed_objects) {
+        if(condition$N > 15) stop('N too large in prepare')
+        fixed_objects$data <- rnorm(condition$N)
+        return(fixed_objects)
+    }
+
+    res_fail <- tryCatch(
+        runSimulation(Design[2,], replications=1,
+                      prepare=prepare_error,
+                      generate=generate,
+                      analyse=analyse,
+                      summarise=summarise,
+                      verbose=FALSE),
+        error = function(e) e
+    )
+
+    expect_true(inherits(res_fail, 'error'))
+
+    # Test 5: load_seed_prepare with file path
+    res5 <- runSimulation(Design, replications=2,
+                          prepare=prepare,
+                          generate=generate,
+                          analyse=analyse,
+                          summarise=summarise,
+                          control=list(save_seeds=TRUE,
+                                      store_Random.seeds=TRUE),
+                          filename='test',
+                          verbose=FALSE)
+
+    seed_dirs <- list.dirs('.', recursive=FALSE)
+    seed_dir <- seed_dirs[grep('SimDesign-seeds', seed_dirs)]
+
+    if(length(seed_dir) > 0) {
+        seed_file <- file.path(seed_dir, 'design-row-1', 'prepare-seed')
+
+        if(file.exists(seed_file)) {
+            res5b <- runSimulation(Design[1,], replications=2,
+                                   prepare=prepare,
+                                   generate=generate,
+                                   analyse=analyse,
+                                   summarise=summarise,
+                                   load_seed_prepare=seed_file,
+                                   verbose=FALSE)
+
+            expect_equal(res5$mean_x[1], res5b$mean_x[1])
+        }
+    }
+
+    SimClean(results=TRUE, temp=TRUE)
+
+    # Test 6: Backward compatibility - no prepare()
+    res6 <- runSimulation(Design, replications=2,
+                          generate=generate,
+                          analyse=analyse,
+                          summarise=summarise,
+                          verbose=FALSE)
+
+    expect_is(res6, 'SimDesign')
+    expect_equal(nrow(res6), nrow(Design))
+
+    prepare_seeds6 <- SimExtract(res6, what='prepare_seeds')
+    expect_true(all(sapply(prepare_seeds6, is.null)))
+
+    # Test 7: prepare() seed isolation between conditions
+    res7 <- runSimulation(Design, replications=2,
+                          prepare=prepare,
+                          generate=generate,
+                          analyse=analyse,
+                          summarise=summarise,
+                          control=list(store_Random.seeds=TRUE),
+                          verbose=FALSE)
+
+    prepare_seeds7 <- SimExtract(res7, what='prepare_seeds')
+    expect_false(identical(prepare_seeds7[[1]], prepare_seeds7[[2]]))
+
+    # Test 8: Integration with existing seed management
+    res8 <- runSimulation(Design, replications=10,
+                          prepare=prepare,
+                          generate=generate,
+                          analyse=analyse,
+                          summarise=summarise,
+                          control=list(store_Random.seeds=TRUE),
+                          verbose=FALSE)
+
+    prepare_seeds8 <- SimExtract(res8, what='prepare_seeds')
+    random_seeds8 <- SimExtract(res8, what='Random.seeds')
+
+    expect_true(length(prepare_seeds8) == nrow(Design))
+    expect_is(random_seeds8, 'list')
+    expect_true(length(random_seeds8) == nrow(Design))
+    expect_true(all(sapply(random_seeds8, function(x) nrow(x) == 10)))
+
+})

From 847211ed74adff39bcacce6b5072c6dd9172fe87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 10:26:38 +0200
Subject: [PATCH 06/10] Update prepare() documentation to emphasize loading
 pre-computed objects

- Rewrite @param prepare to prioritize loading RDS files over dynamic generation
- Add RNG reproducibility warning when generating within prepare()
- Note that prepare objects are not stored by runSimulation()
- Add complete working example demonstrating recommended two-step workflow
- Document prepare seed storage in save_seeds and store_Random.seeds sections

Changes address feedback from PR #78 to position prepare() primarily as
an object loader for cluster workflows, with dynamic generation as a
secondary use case requiring explicit RNG state management.
---
 R/runSimulation.R | 120 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 112 insertions(+), 8 deletions(-)

diff --git a/R/runSimulation.R b/R/runSimulation.R
index 69b26188..99e1b2d6 100644
--- a/R/runSimulation.R
+++ b/R/runSimulation.R
@@ -202,14 +202,41 @@
 #'   and \code{control$stop_on_fatal} flags
 #'
 #' @param prepare (optional) a function that executes once per simulation condition
-#'   (i.e., once per row in \code{design}) to prepare/modify the \code{fixed_objects}
-#'   before replications are run. This function should accept \code{condition} and
-#'   \code{fixed_objects} as arguments and return the modified \code{fixed_objects}.
+#'   (i.e., once per row in \code{design}) to load or prepare condition-specific objects
+#'   into \code{fixed_objects} before replications are run. This function should accept
+#'   \code{condition} and \code{fixed_objects} as arguments and return the modified
+#'   \code{fixed_objects}.
+#'
+#'   The primary use case is to load pre-computed objects from disk that were
+#'   generated offline:
+#'
+#'   \code{prepare <- function(condition, fixed_objects) \{}
+#'
+#'   \code{  # Create filename from design parameters}
+#'
+#'   \code{  fname <- paste0('prepare/N', condition$N, '_SD', condition$SD, '.rds')}
+#'
+#'   \code{  fixed_objects$expensive_stuff <- readRDS(fname)}
+#'
+#'   \code{  return(fixed_objects)}
+#'
+#'   \code{\}}
+#'
+#'   This approach allows you to: (1) pre-generate expensive condition-specific objects
+#'   prior to running the simulation , (2) save them as individual
+#'   RDS files, and (3) load them efficiently during the simulation. This is preferable to
+#'   generating objects within \code{prepare()} itself because it allows you to inspect the
+#'   objects, ensures reproducibility, and separates object generation from the simulation workflow.
+#'
+#'   Note: Objects added to \code{fixed_objects} in \code{prepare()} are not stored
+#'   by \code{runSimulation()} to conserve memory, as they are typically large. You should
+#'   maintain your own records of these objects outside the simulation.
+#'
+#'   RNG Warning: If you generate objects within \code{prepare()} using random number
+#'   generation (e.g., \code{rnorm()}, \code{runif()}), reproducibility requires explicit RNG
+#'   state management via \code{store_Random.seeds=TRUE} and \code{load_seed_prepare}. Pre-computing
+#'   and saving objects as RDS files is the recommended approach for reproducible simulations.
 #'
-#'   This is useful for pre-computing expensive condition-specific objects that can be
-#'   reused across replications (e.g., design matrices, correlation matrices, lookup tables)
-#'   without having to either pre-compute them for all conditions (which can cause
-#'   memory issues) or recompute them for every replication (which causes performance issues).
 #'
 #'   The function signature should be:
 #'   \code{prepare <- function(condition, fixed_objects) \{ ... return(fixed_objects) \}}
@@ -401,6 +428,8 @@
 #'      analysis state (mostly useful
 #'      for debugging). When \code{TRUE}, temporary files will also be saved
 #'      to the working directory (in the same way as when \code{save = TRUE}).
+#'      Additionally, if a \code{prepare} function is provided, the RNG state before
+#'      \code{prepare()} execution is saved to \code{'design-row-X/prepare-seed'}.
 #'      Default is \code{FALSE}
 #'
 #'      Note, however, that this option is not typically necessary or recommended since
@@ -418,7 +447,10 @@
 #'       take up a great deal of unnecessary RAM (see related \code{save_seeds}),
 #'       however this may be useful
 #'       when used with \code{\link{runArraySimulation}}. To extract use
-#'       \code{SimExtract(..., what = 'stored_Random.seeds')}}
+#'       \code{SimExtract(..., what = 'stored_Random.seeds')}. Additionally,
+#'       if a \code{prepare} function is provided, the RNG state before \code{prepare()}
+#'       execution is stored and can be extracted with
+#'       \code{SimExtract(..., what = 'prepare_seeds')}}
 #'
 #'      \item{\code{store_warning_seeds}}{logical (default is \code{FALSE});
 #'       in addition to storing the \code{.Random.seed} states whenever error messages
@@ -1035,6 +1067,78 @@
 #'     facet_grid(stats~standard_deviation_ratio) +
 #'     theme(legend.position = 'none')
 #'
+#' #-------------------------------------------------------------------------------
+#' # Example with prepare() function - Loading pre-computed objects
+#'
+#' \dontrun{
+#'
+#' # Step 1: Pre-generate expensive objects offline (can be parallelized)
+#' Design <- createDesign(N = c(10, 20), rho = c(0.3, 0.7))
+#'
+#' # Create directory for storing prepared objects
+#' dir.create('prepared_objects', showWarnings = FALSE)
+#'
+#' # Generate and save correlation matrices for each condition
+#' for(i in 1:nrow(Design)) {
+#'   cond <- Design[i, ]
+#'
+#'   # Generate correlation matrix
+#'   corr_matrix <- matrix(cond$rho, nrow=cond$N, ncol=cond$N)
+#'   diag(corr_matrix) <- 1
+#'
+#'   # Create filename based on design parameters
+#'   fname <- paste0('prepared_objects/N', cond$N, '_rho', cond$rho, '.rds')
+#'
+#'   # Save to disk
+#'   saveRDS(corr_matrix, file = fname)
+#' }
+#'
+#' # Step 2: Use prepare() to load these objects during simulation
+#' prepare <- function(condition, fixed_objects) {
+#'   # Create matching filename
+#'   fname <- paste0('prepared_objects/N', condition$N,
+#'                   '_rho', condition$rho, '.rds')
+#'
+#'   # Load the pre-computed correlation matrix
+#'   fixed_objects$corr_matrix <- readRDS(fname)
+#'
+#'   return(fixed_objects)
+#' }
+#'
+#' generate <- function(condition, fixed_objects) {
+#'   # Use the loaded correlation matrix to generate multivariate data
+#'   dat <- MASS::mvrnorm(n = 50,
+#'                        mu = rep(0, condition$N),
+#'                        Sigma = fixed_objects$corr_matrix)
+#'   data.frame(dat)
+#' }
+#'
+#' analyse <- function(condition, dat, fixed_objects) {
+#'   # Calculate mean correlation in generated data
+#'   obs_corr <- cor(dat)
+#'   c(mean_corr = mean(obs_corr[lower.tri(obs_corr)]))
+#' }
+#'
+#' summarise <- function(condition, results, fixed_objects) {
+#'   c(mean_corr = mean(results$mean_corr))
+#' }
+#'
+#' # Run simulation - prepare() loads objects efficiently
+#' results <- runSimulation(design = Design,
+#'                          replications = 2,
+#'                          prepare = prepare,
+#'                          generate = generate,
+#'                          analyse = analyse,
+#'                          summarise = summarise,
+#'                          verbose = FALSE)
+#'
+#' results
+#'
+#' # Cleanup
+#' unlink('prepared_objects', recursive = TRUE)
+#'
+#' }
+#'
 #' }
 #'
 runSimulation <- function(design, replications, generate, analyse, summarise,

From e84755d64e2909fa0008d95c2a9b31f11976dc91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 10:33:45 +0200
Subject: [PATCH 07/10] Added logging changes and prepare function in a new
 release.

---
 NEWS.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index d482f7c9..afe5094c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,26 @@
 # NEWS file for SimDesign
 
+## Changes in SimDesign 2.22
+
+- `runSimulation()` gains a `prepare` function argument for loading or preparing
+  condition-specific objects before replications are executed. This function executes
+  once per simulation condition and is particularly useful for loading pre-computed
+  expensive objects (e.g., correlation matrices, design matrices) from disk in cluster
+  workflows. The recommended approach is to pre-generate objects offline and load them
+  via `prepare()` using `readRDS()`, rather than generating them dynamically. Objects
+  added to `fixed_objects` in `prepare()` are available to all replications within
+  that condition but are not stored in the final results to conserve memory.
+
+- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()`
+  function by reproducing exact RNG states. Works similarly to `load_seed` but specifically
+  for the prepare step. Prepare seeds can be extracted using
+  `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds
+  can be extracted with `SimExtract(res, 'prepare_error_seed')`.
+
+- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved.
+  Progress bars now display correctly when running batch jobs on cluster systems,
+  providing better visibility into long-running simulations.
+
 ## Changes in SimDesign 2.21
 
 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a 

From 5f4b945eb945cc970185087ad8afcbace06c1573 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 10:40:28 +0200
Subject: [PATCH 08/10] Revert "Added logging changes and prepare function in a
 new release."

This reverts commit e84755d64e2909fa0008d95c2a9b31f11976dc91.
---
 NEWS.md | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index afe5094c..d482f7c9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,26 +1,5 @@
 # NEWS file for SimDesign
 
-## Changes in SimDesign 2.22
-
-- `runSimulation()` gains a `prepare` function argument for loading or preparing
-  condition-specific objects before replications are executed. This function executes
-  once per simulation condition and is particularly useful for loading pre-computed
-  expensive objects (e.g., correlation matrices, design matrices) from disk in cluster
-  workflows. The recommended approach is to pre-generate objects offline and load them
-  via `prepare()` using `readRDS()`, rather than generating them dynamically. Objects
-  added to `fixed_objects` in `prepare()` are available to all replications within
-  that condition but are not stored in the final results to conserve memory.
-
-- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()`
-  function by reproducing exact RNG states. Works similarly to `load_seed` but specifically
-  for the prepare step. Prepare seeds can be extracted using
-  `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds
-  can be extracted with `SimExtract(res, 'prepare_error_seed')`.
-
-- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved.
-  Progress bars now display correctly when running batch jobs on cluster systems,
-  providing better visibility into long-running simulations.
-
 ## Changes in SimDesign 2.21
 
 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a 

From f8c727713b9e5707c61e214fff5e2eb32ce75b13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 10:42:29 +0200
Subject: [PATCH 09/10] Incremented release number and added description of the
 prepare function and improved logging on cluster.

---
 NEWS.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index d482f7c9..2dbce029 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,23 @@
 # NEWS file for SimDesign
 
+## Changes in SimDesign 2.22
+
+- `runSimulation()` gains a `prepare` function argument for loading or preparing
+  condition-specific objects before replications are executed. This function executes
+  once per simulation condition and is particularly useful for loading pre-computed
+  expensive objects (e.g., correlation matrices, design matrices) from disk in cluster
+  workflows. 
+
+- Added `load_seed_prepare` parameter to `runSimulation()` for debugging the `prepare()`
+  function by reproducing exact RNG states. Works similarly to `load_seed` but specifically
+  for the prepare step. Prepare seeds can be extracted using
+  `SimExtract(res, 'prepare_seeds')` when `store_Random.seeds=TRUE`, and error seeds
+  can be extracted with `SimExtract(res, 'prepare_error_seed')`.
+
+- Progress reporting in non-interactive mode (e.g., SLURM clusters) has been improved.
+  Progress bars now display correctly when running batch jobs on cluster systems,
+  providing better visibility into long-running simulations.
+
 ## Changes in SimDesign 2.21
 
 - `createDesign()` gains a `fully.crossed = TRUE` argument. When disabled (`FALSE`) will create a 

From 3dfdccd0f2ec0e365a3b7f2cb3fdbd93cf66aa34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikko=20R=C3=B6nkk=C3=B6?= <mikko.ronkko@iki.fi>
Date: Mon, 15 Dec 2025 10:43:53 +0200
Subject: [PATCH 10/10] =?UTF-8?q?Fixed=20Mikko=20R=C3=B6nkk=C3=B6's=20last?=
 =?UTF-8?q?=20name=20to=20have=20umlauts=20and=20changed=20ctb=20->=20aut?=
 =?UTF-8?q?=20role.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fedc81f7..6a10086d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,7 +5,7 @@ Authors@R: c(person("Phil", "Chalmers", email = "rphilip.chalmers@gmail.com", ro
                     comment = c(ORCID="0000-0001-5332-2810")),
              person("Matthew", "Sigal", role = c("ctb")),
              person("Ogreden", family="Oguzhan", role = c("ctb")),
-             person("Mikko ", family="Ronkko", role = c("ctb")),
+             person("Mikko", family="Rönkkö", role = c("aut")),
              person("Moritz", family="Ketzer", role = c("ctb")))
 Description: Provides tools to safely and efficiently organize and execute 
     Monte Carlo simulation experiments in R.