From e6ee751902a2df52f5dba230e211a84534cfd6d0 Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Mon, 25 Aug 2025 16:06:31 +0200 Subject: [PATCH 1/8] Add CPU-parallelization using mclapply and L'Ecuyer-CMGR --- R/GOF_model.R | 13 +++++++++---- R/GOF_model_test.R | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/R/GOF_model.R b/R/GOF_model.R index 344dc92..9f5d5e8 100644 --- a/R/GOF_model.R +++ b/R/GOF_model.R @@ -1,3 +1,5 @@ +library(parallel) + ##' @title Convenience function for creating a GOF-test for statistical models ##' ##' @description Simplifies the creation of an instance of @@ -54,7 +56,9 @@ GOF_model <- function(model, # nolint y_name, Rn1_statistic, # nolint gof_model_resample_class = GOF_model_resample, - gof_model_test_class = GOF_model_test + gof_model_test_class = GOF_model_test, + n_cores = NULL, + seed = NULL ) { checkmate::assert_subset( x = simulator_type, @@ -67,8 +71,6 @@ GOF_model <- function(model, # nolint )) } - - simulators <- list( lm = list( parametric = GOF_lm_sim_param, @@ -113,6 +115,9 @@ GOF_model <- function(model, # nolint y_name = y_name, Rn1_statistic = Rn1_statistic, gof_model_info_extractor = mie, - gof_model_resample = model_resample) + gof_model_resample = model_resample, + n_cores = n_cores, + seed = seed) + return(ret) } diff --git a/R/GOF_model_test.R b/R/GOF_model_test.R index 24a2ec2..6357fd2 100644 --- a/R/GOF_model_test.R +++ b/R/GOF_model_test.R @@ -29,7 +29,9 @@ GOF_model_test <- R6::R6Class( # nolint y_name, Rn1_statistic, # nolint gof_model_info_extractor, - gof_model_resample) { + gof_model_resample, + n_cores, + seed) { checkmate::assert_count(x = nmb_boot_samples, positive = TRUE) private$model_org <- model private$data_org <- data @@ -38,6 +40,8 @@ GOF_model_test <- R6::R6Class( # nolint private$nmb_boot_samples <- nmb_boot_samples private$model_info_extractor <- gof_model_info_extractor private$model_resample <- gof_model_resample + private$n_cores <- n_cores + private$seed <- seed private$order_beta_dot_X_org <- order( # nolint private$model_info_extractor$beta_x_covariates( model = private$model_org @@ -84,6 +88,8 @@ GOF_model_test <- R6::R6Class( # nolint nmb_boot_samples = NULL, model_info_extractor = NULL, model_resample = NULL, + n_cores = NULL, + seed = NULL, Rn1_statistic = NULL, Rn1_boot = NULL, Rn1_org = NULL, @@ -108,5 +114,34 @@ GOF_model_test <- R6::R6Class( # nolint order_beta_x_covariates = private$order_beta_dot_X_org) return(Rn1_boot) } - private$Rn1_boot <- lapply(X = 1:private$nmb_boot_samples, FUN = f) # nolint + + # Replace RNG with "L'Ecuyer-CMRG" if going parallel + replaced.RNG <- FALSE + if ((!is.null(private$n_cores)) && private$n_cores > 0) { + # save and replace current RNG state + original.state <- if (exists(".Random.seed", .GlobalEnv)) .GlobalEnv$.Random.seed else NULL + RNGkind("L'Ecuyer-CMRG") + set.seed(NULL) + replaced.RNG <- TRUE + } else { + if ((!is.null(private$n_cores)) && private$n_cores < 0) { + warning("The number of cores must not be a negative number. Defaulting to 1") + } + private$n_cores <- 1 + } + + if (!is.null(private$seed)) { + set.seed(private$seed) + } + + private$Rn1_boot <- parallel::mclapply(X = 1:private$nmb_boot_samples, FUN = f, mc.cores = private$n_cores) # nolint + + # Reset initial RNG if it has been replaced + if (replaced.RNG) { + if (!is.null(original.state)) { + .GlobalEnv$.Random.seed <- original.state + } else { + RNGkind("default") + } + } })) From de041280e8adf2c4aa63298312dd865b70b4e721 Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Sat, 18 Oct 2025 09:40:41 +0200 Subject: [PATCH 2/8] Update documentation for cpu-parallel feature and its parameters --- R/GOF_model.R | 7 +++++++ R/GOF_model_test.R | 6 ++++++ README.Rmd | 13 +++++++++++++ vignettes/New-Models.Rmd | 8 ++++++-- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/R/GOF_model.R b/R/GOF_model.R index 9f5d5e8..f581ba7 100644 --- a/R/GOF_model.R +++ b/R/GOF_model.R @@ -20,6 +20,13 @@ library(parallel) ##' class used for performing the GOF test (\link{GOF_model_test}) ##' is injected. This parameter simply makes it easier to test the ##' convenience function properly. +##' @param n_cores positive integer specifying the number of CPU cores to use +##' for parallel resampling. If bigger than 1, the L'Ecuyer-CMRG is used; +##' if 'NULL' or 1, one core is used with the current RNG. +##' Default is "NULL". +##' @param seed integer intended to seed the internally setup +##' L'Ecuyer-CMRG, but will also be applied when RNG not replaced. +##' Default is "NULL", which will not alter the seed. ##' @export ##' @return instance of \link{GOF_model_test} ##' @examples diff --git a/R/GOF_model_test.R b/R/GOF_model_test.R index 6357fd2..0bb553d 100644 --- a/R/GOF_model_test.R +++ b/R/GOF_model_test.R @@ -22,6 +22,12 @@ GOF_model_test <- R6::R6Class( # nolint ##' @param gof_model_resample an instance that implements ##' \link{GOF_model_resample} in order to apply it to ##' \code{model} + ##' @param n_cores positive integer specifying the number of CPU cores to + ##' use for parallel resampling. If bigger than 1, the L'Ecuyer-CMRG is + ##' used; if 'NULL' or 1, one core is used with the current RNG. + ##' @param seed integer intended to seed the internally setup + ##' L'Ecuyer-CMRG, but will also be applied when RNG not replaced, + ##' as long as it is not "NULL". ##' @return An instance of the Class initialize = function(model, data, diff --git a/README.Rmd b/README.Rmd index 6f8e167..bc1c359 100644 --- a/README.Rmd +++ b/README.Rmd @@ -81,6 +81,19 @@ mt$get_pvalue() ... +## Parallelization + +The bootstrapping process can be accelerated using the `n_cores` parameter of +the `GOF_model` function, that specifies the number of CPU cores to use. + +If this parameter is set to at least two cores, internally the currently used +RNG is stored and replaced by the L'Ecuyer-CMRG generator, which is safe to use +in a parallel context. + +This internal generator can be seeded using the `seed` parameter of the +`GOF_model` function, which however will also apply if the `n_cores` parameter +is not used and thus the generator is not replaced. + ## Installation You can install it from CRAN diff --git a/vignettes/New-Models.Rmd b/vignettes/New-Models.Rmd index 58f53be..1495489 100644 --- a/vignettes/New-Models.Rmd +++ b/vignettes/New-Models.Rmd @@ -240,7 +240,9 @@ mt <- GOF_model_test$new( gof_model_resample = GOF_model_resample$new( gof_model_simulator = my_simulator, gof_model_trainer = my_trainer - ) + ), + n_cores = NULL, + seed = NULL ) mt$get_pvalue() ``` @@ -304,7 +306,9 @@ mt <- GOF_model_test$new( gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_glm_sim_param$new(), gof_model_trainer = my_negbin_trainer$new() - ) + ), + n_cores = NULL, + seed = NULL ) mt$get_pvalue() ``` From 1241bb83ffe9c57e56804ad704ffdb6711f47a48 Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Wed, 22 Oct 2025 12:01:26 +0200 Subject: [PATCH 3/8] Adapt tests for changed interface in context of the cpu-parallel feature and extend tests --- inst/tinytest/test_GOF_model.R | 12 +- inst/tinytest/test_GOF_model_test.R | 186 ++++++++++++++++++++++++++-- 2 files changed, 188 insertions(+), 10 deletions(-) diff --git a/inst/tinytest/test_GOF_model.R b/inst/tinytest/test_GOF_model.R index 2a007ce..78a9bf7 100644 --- a/inst/tinytest/test_GOF_model.R +++ b/inst/tinytest/test_GOF_model.R @@ -33,7 +33,9 @@ GOF_model_test_dummy <- R6::R6Class( # nolint y_name, Rn1_statistic, # nolint gof_model_info_extractor, - gof_model_resample) { + gof_model_resample, + n_cores, + seed) { })) GOF_model_error_if_fit_class_is_not_lm_or_glm <- function() { # nolint @@ -86,7 +88,9 @@ GOF_model_uses_lm_info_extractor <- function() { # nolint y_name, Rn1_statistic, # nolint gof_model_info_extractor, - gof_model_resample) { + gof_model_resample, + n_cores, + seed) { inject_lm_info_extractor <<- inherits( x = gof_model_info_extractor, what = "GOF_lm_info_extractor") @@ -209,7 +213,9 @@ GOF_model_uses_glm_info_extractor <- function() { # nolint y_name, Rn1_statistic, # nolint gof_model_info_extractor, - gof_model_resample) { + gof_model_resample, + n_cores, + seed) { inject_glm_info_extractor <<- inherits( x = gof_model_info_extractor, what = "GOF_glm_info_extractor") diff --git a/inst/tinytest/test_GOF_model_test.R b/inst/tinytest/test_GOF_model_test.R index ea815b2..04a6f45 100644 --- a/inst/tinytest/test_GOF_model_test.R +++ b/inst/tinytest/test_GOF_model_test.R @@ -47,6 +47,27 @@ GOF_model_test_necessary_input <- function() { # nolint Rn1_statistic = "dummy", gof_model_info_extractor = "dummy"), pattern = "gof_model_resample") + expect_error( + GOF_model_test$new( + model = "dummy", + nmb_boot_samples = 1, + data = "dummy", + y_name = "dummy", + Rn1_statistic = "dummy", + gof_model_info_extractor = "dummy", + gof_model_resample = "dummy"), + pattern = "n_cores") + expect_error( + GOF_model_test$new( + model = "dummy", + nmb_boot_samples = 1, + data = "dummy", + y_name = "dummy", + Rn1_statistic = "dummy", + gof_model_info_extractor = "dummy", + gof_model_resample = "dummy", + n_cores = 1), + pattern = "seed") } GOF_model_test_necessary_input() @@ -63,7 +84,9 @@ GOF_model_test_calc_Rn1_org <- function() { # nolint Rn1_statistic = "dummy", nmb_boot_samples = 10, gof_model_info_extractor = GOF_lm_info_extractor$new(), - gof_model_resample = "dummy") + gof_model_resample = "dummy", + n_cores = NULL, + seed = NULL) expect_equal( mt$get_Rn1_org(), Rn1_fun(r = residuals(fit), o = order(X)) @@ -98,7 +121,9 @@ GOF_model_test_calc_Rn1_boot <- function() { # nolint gof_model_resample = GOF_model_resample$new( gof_model_simulator = lm_sim_para_mock, gof_model_trainer = GOF_lm_trainer$new() - ) + ), + n_cores = NULL, + seed = NULL ) d1 <- d @@ -137,7 +162,10 @@ GOF_model_test_calc_pvalue <- function() { # nolint gof_model_info_extractor = GOF_lm_info_extractor$new(), gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_lm_sim_param$new(), - gof_model_trainer = GOF_lm_trainer$new())) + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = NULL, + seed = NULL + ) out <- mt$get_pvalue() expect_equal( out, { @@ -168,7 +196,10 @@ GOF_model_test_expect_small_pvalue <- function() { # nolint gof_model_info_extractor = GOF_glm_info_extractor$new(), gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_glm_sim_param$new(), - gof_model_trainer = GOF_glm_trainer$new())) + gof_model_trainer = GOF_glm_trainer$new()), + n_cores = NULL, + seed = NULL + ) expect_equal(mt$get_pvalue(), 0) @@ -189,7 +220,10 @@ GOF_model_test_expect_small_pvalue <- function() { # nolint gof_model_simulator = GOF_sim_wild_rademacher$new( gof_model_info_extractor = ie ), - gof_model_trainer = GOF_lm_trainer$new())) + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = NULL, + seed = NULL + ) expect_equal(mt$get_pvalue(), 0) } @@ -211,7 +245,10 @@ GOF_model_test_expect_non_small_pvalue <- function() { # nolint gof_model_info_extractor = GOF_glm_info_extractor$new(), gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_glm_sim_param$new(), - gof_model_trainer = GOF_glm_trainer$new())) + gof_model_trainer = GOF_glm_trainer$new()), + n_cores = NULL, + seed = NULL + ) expect_equal(mt$get_pvalue(), 0.74) @@ -232,8 +269,143 @@ GOF_model_test_expect_non_small_pvalue <- function() { # nolint gof_model_simulator = GOF_sim_wild_rademacher$new( gof_model_info_extractor = ie ), - gof_model_trainer = GOF_lm_trainer$new())) + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = NULL, + seed = NULL + ) expect_equal(mt$get_pvalue(), 0.8) } GOF_model_test_expect_non_small_pvalue() + + + +GOF_model_test_calc_pvalue_parallel <- function() { # nolint + set.seed(1) + X <- rnorm(10) # nolint + Y <- 5 * X + rnorm(10) # nolint + d <- data.frame(y = Y, x = X) + fit <- lm(y ~ x, data = d) + KS <- Rn1_KS$new() # nolint + mt <- GOF_model_test$new( + model = fit, + data = d, + y_name = "y", + Rn1_statistic = KS, + nmb_boot_samples = 10, + gof_model_info_extractor = GOF_lm_info_extractor$new(), + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_lm_sim_param$new(), + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = 4, + seed = 1 + ) + out <- mt$get_pvalue() + expect_equal( + out, { + stat_org <- KS$calc_statistic(mt$get_Rn1_org()) + stat_boot <- sapply(mt$get_Rn1_boot(), KS$calc_statistic) + mean(stat_org < stat_boot) + } + ) +} +GOF_model_test_calc_pvalue_parallel() + +GOF_model_test_expect_small_pvalue_parallel <- function() { # nolint + set.seed(1) + X1 <- rnorm(100) # nolint + X2 <- rnorm(100) # nolint + d <- data.frame( + y = rpois(n = 100, lambda = exp(4 + X1 * 2 + X2 * 6)), + x1 = X1) + fit <- glm(y~x1, data = d, family = poisson()) + mt <- GOF_model_test$new( + model = fit, + data = d, + y_name = "y", + Rn1_statistic = Rn1_KS$new(), + nmb_boot_samples = 100, + gof_model_info_extractor = GOF_glm_info_extractor$new(), + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_glm_sim_param$new(), + gof_model_trainer = GOF_glm_trainer$new()), + n_cores = 4, + seed = 1 + ) + + expect_equal(mt$get_pvalue(), 0) + + X1 <- rnorm(100) # nolint + d <- data.frame( + y = rnorm(n = 100, mean = 4 + X1^2), + x1 = X1) + fit <- lm(y~x1, data = d) + ie <- GOF_lm_info_extractor$new() + mt <- GOF_model_test$new( + model = fit, + data = d, + y_name = "y", + Rn1_statistic = Rn1_KS$new(), + nmb_boot_samples = 100, + gof_model_info_extractor = ie, + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_sim_wild_rademacher$new( + gof_model_info_extractor = ie + ), + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = 4, + seed = 1 + ) + + expect_equal(mt$get_pvalue(), 0) +} +GOF_model_test_expect_small_pvalue_parallel() + +GOF_model_test_expect_non_small_pvalue_parallel <- function() { # nolint + set.seed(1) + X1 <- rnorm(100) # nolint + d <- data.frame( + y = rpois(n = 100, lambda = exp(4 + X1 * 2)), + x1 = X1) + fit <- glm(y~x1, data = d, family = poisson()) + mt <- GOF_model_test$new( + model = fit, + data = d, + y_name = "y", + Rn1_statistic = Rn1_KS$new(), + nmb_boot_samples = 100, + gof_model_info_extractor = GOF_glm_info_extractor$new(), + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_glm_sim_param$new(), + gof_model_trainer = GOF_glm_trainer$new()), + n_cores = 4, + seed = 1 + ) + + expect_equal(mt$get_pvalue(), 0.89) + + X1 <- rnorm(100) # nolint + d <- data.frame( + y = rnorm(n = 100, mean = 4 + X1 + X1^2), + x1 = X1) + fit <- lm(y~x1 + I(x1^2), data = d) + ie <- GOF_lm_info_extractor$new() + mt <- GOF_model_test$new( + model = fit, + data = d, + y_name = "y", + Rn1_statistic = Rn1_CvM$new(), + nmb_boot_samples = 100, + gof_model_info_extractor = ie, + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_sim_wild_rademacher$new( + gof_model_info_extractor = ie + ), + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = 4, + seed = 1 + ) + + expect_equal(mt$get_pvalue(), 0.94) +} +GOF_model_test_expect_non_small_pvalue_parallel() From bc4a6a50cce3f155356fa7103dcf8b0baa7af451 Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Wed, 22 Oct 2025 13:47:04 +0200 Subject: [PATCH 4/8] Add assert for n_cores to improve code aestetic and test coverage --- R/GOF_model_test.R | 10 ++- inst/tinytest/test_GOF_model_test.R | 108 ++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 20 deletions(-) diff --git a/R/GOF_model_test.R b/R/GOF_model_test.R index 0bb553d..ebe89b4 100644 --- a/R/GOF_model_test.R +++ b/R/GOF_model_test.R @@ -39,6 +39,7 @@ GOF_model_test <- R6::R6Class( # nolint n_cores, seed) { checkmate::assert_count(x = nmb_boot_samples, positive = TRUE) + checkmate::assert_count(x = n_cores, positive = TRUE, null.ok = TRUE) private$model_org <- model private$data_org <- data private$y_name <- y_name @@ -123,17 +124,14 @@ GOF_model_test <- R6::R6Class( # nolint # Replace RNG with "L'Ecuyer-CMRG" if going parallel replaced.RNG <- FALSE - if ((!is.null(private$n_cores)) && private$n_cores > 0) { + if (is.null(private$n_cores)) { + private$n_cores <- 1 + } else if (private$n_cores > 1) { # save and replace current RNG state original.state <- if (exists(".Random.seed", .GlobalEnv)) .GlobalEnv$.Random.seed else NULL RNGkind("L'Ecuyer-CMRG") set.seed(NULL) replaced.RNG <- TRUE - } else { - if ((!is.null(private$n_cores)) && private$n_cores < 0) { - warning("The number of cores must not be a negative number. Defaulting to 1") - } - private$n_cores <- 1 } if (!is.null(private$seed)) { diff --git a/inst/tinytest/test_GOF_model_test.R b/inst/tinytest/test_GOF_model_test.R index 04a6f45..eaa9508 100644 --- a/inst/tinytest/test_GOF_model_test.R +++ b/inst/tinytest/test_GOF_model_test.R @@ -11,33 +11,38 @@ GOF_model_test_necessary_input <- function() { # nolint expect_error( GOF_model_test$new( nmb_boot_samples = 1), - pattern = "model") + pattern = "n_cores") expect_error( GOF_model_test$new( - model = "dummy", - nmb_boot_samples = 1), - pattern = "data") + nmb_boot_samples = 1, + n_cores = 0), + pattern = ".*n_cores.*>= 1") + expect_error( + GOF_model_test$new( + nmb_boot_samples = 1, + n_cores = 1), + pattern = "model") expect_error( GOF_model_test$new( model = "dummy", nmb_boot_samples = 1, - data = "dummy"), - pattern = "y_name") + n_cores = 1), + pattern = "data") expect_error( GOF_model_test$new( model = "dummy", nmb_boot_samples = 1, data = "dummy", - y_name = "dummy"), - pattern = "Rn1_statistic") + n_cores = 1), + pattern = "y_name") expect_error( GOF_model_test$new( model = "dummy", nmb_boot_samples = 1, data = "dummy", y_name = "dummy", - Rn1_statistic = "dummy"), - pattern = "gof_model_info_extractor") + n_cores = 1), + pattern = "Rn1_statistic") expect_error( GOF_model_test$new( model = "dummy", @@ -45,8 +50,8 @@ GOF_model_test_necessary_input <- function() { # nolint data = "dummy", y_name = "dummy", Rn1_statistic = "dummy", - gof_model_info_extractor = "dummy"), - pattern = "gof_model_resample") + n_cores = 1), + pattern = "gof_model_info_extractor") expect_error( GOF_model_test$new( model = "dummy", @@ -55,8 +60,8 @@ GOF_model_test_necessary_input <- function() { # nolint y_name = "dummy", Rn1_statistic = "dummy", gof_model_info_extractor = "dummy", - gof_model_resample = "dummy"), - pattern = "n_cores") + n_cores = 1), + pattern = "gof_model_resample") expect_error( GOF_model_test$new( model = "dummy", @@ -280,6 +285,81 @@ GOF_model_test_expect_non_small_pvalue() +GOF_model_test_no_initial_RNG <- function() { # nolint + set.seed(1) + X <- rnorm(10) # nolint + Y <- 5 * X + rnorm(10) # nolint + d <- data.frame(y = Y, x = X) + fit <- lm(y ~ x, data = d) + + remove(.Random.seed, envir = .GlobalEnv) + + mt <- GOF_model_test$new( + model = fit, + nmb_boot_samples = 1, + data = d, + y_name = "y", + Rn1_statistic = "dummy", + gof_model_info_extractor = GOF_lm_info_extractor$new(), + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = GOF_lm_sim_param$new(), + gof_model_trainer = GOF_lm_trainer$new()), + n_cores = 4, + seed = 1 + ) + out <- mt$get_Rn1_boot() + + reset.state <- .GlobalEnv$.Random.seed + RNGkind("default") + expect_equal(reset.state[1:2], .GlobalEnv$.Random.seed[1:2]) +} +GOF_model_test_no_initial_RNG() + +GOF_model_test_calc_Rn1_boot_parallel <- function() { # nolint + set.seed(1) + X <- rnorm(10) # nolint + Y <- 5 * X + rnorm(10) # nolint + d <- data.frame(y = Y, x = X) + fit <- lm(y ~ x, data = d) + + model_resample_mock <- list(rnorm(10), rnorm(10), rnorm(10)) + MODEL_MOCK_NMB <- 0 # nolint + lm_sim_para_mock <- R6::R6Class( + public = list( + resample_y = function(model) { + MODEL_MOCK_NMB <<- MODEL_MOCK_NMB + 1 # nolint + model_resample_mock[[MODEL_MOCK_NMB]] + }))$new() + + mt <- GOF_model_test$new( + model = fit, + nmb_boot_samples = 3, + data = d, + y_name = "y", + Rn1_statistic = "dummy", + gof_model_info_extractor = GOF_lm_info_extractor$new(), + gof_model_resample = GOF_model_resample$new( + gof_model_simulator = lm_sim_para_mock, + gof_model_trainer = GOF_lm_trainer$new() + ), + n_cores = 4, + seed = 1 + ) + + d1 <- d + d1$y <- model_resample_mock[[1]] + fit1 <- lm(y~x, data = d1) + out <- mt$get_Rn1_boot() + expect_equal( + out, + list( + Rn1_fun(r = residuals(fit1), o = order(X)), + Rn1_fun(r = residuals(fit1), o = order(X)), + Rn1_fun(r = residuals(fit1), o = order(X)) + )) +} +GOF_model_test_calc_Rn1_boot_parallel() + GOF_model_test_calc_pvalue_parallel <- function() { # nolint set.seed(1) X <- rnorm(10) # nolint From 51e961a19888fc6a55d54f49f98c25534e79859f Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Wed, 22 Oct 2025 18:02:10 +0200 Subject: [PATCH 5/8] Rename and line-break as linter suggests --- R/GOF_model_test.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/GOF_model_test.R b/R/GOF_model_test.R index ebe89b4..a37bf30 100644 --- a/R/GOF_model_test.R +++ b/R/GOF_model_test.R @@ -123,15 +123,16 @@ GOF_model_test <- R6::R6Class( # nolint } # Replace RNG with "L'Ecuyer-CMRG" if going parallel - replaced.RNG <- FALSE + replaced_rng <- FALSE if (is.null(private$n_cores)) { private$n_cores <- 1 } else if (private$n_cores > 1) { # save and replace current RNG state - original.state <- if (exists(".Random.seed", .GlobalEnv)) .GlobalEnv$.Random.seed else NULL + original_state <- if (exists(".Random.seed", .GlobalEnv)) + .GlobalEnv$.Random.seed else NULL # nolint RNGkind("L'Ecuyer-CMRG") set.seed(NULL) - replaced.RNG <- TRUE + replaced_rng <- TRUE } if (!is.null(private$seed)) { @@ -141,9 +142,9 @@ GOF_model_test <- R6::R6Class( # nolint private$Rn1_boot <- parallel::mclapply(X = 1:private$nmb_boot_samples, FUN = f, mc.cores = private$n_cores) # nolint # Reset initial RNG if it has been replaced - if (replaced.RNG) { - if (!is.null(original.state)) { - .GlobalEnv$.Random.seed <- original.state + if (replaced_rng) { + if (!is.null(original_state)) { + .GlobalEnv$.Random.seed <- original_state } else { RNGkind("default") } From 93b66c9f489cdcb98ea34923f80b31351204942a Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Wed, 22 Oct 2025 18:06:08 +0200 Subject: [PATCH 6/8] Update Description (auto-generated) --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c80b63f..d495553 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,7 @@ Imports: R6 (>= 2.4.1) License: GPL-3 Encoding: UTF-8 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 URL: https://github.com/MarselScheer/bootGOF BugReports: https://github.com/MarselScheer/bootGOF/issues Suggests: From 34d3578e838ab12acc1320054fc2cb9fd3ad7287 Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Thu, 23 Oct 2025 09:07:30 +0200 Subject: [PATCH 7/8] Fix parallel cases for R-cmd-check make target by only using 2 cores --- inst/tinytest/test_GOF_model_test.R | 39 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/inst/tinytest/test_GOF_model_test.R b/inst/tinytest/test_GOF_model_test.R index eaa9508..af35410 100644 --- a/inst/tinytest/test_GOF_model_test.R +++ b/inst/tinytest/test_GOF_model_test.R @@ -304,7 +304,7 @@ GOF_model_test_no_initial_RNG <- function() { # nolint gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_lm_sim_param$new(), gof_model_trainer = GOF_lm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) out <- mt$get_Rn1_boot() @@ -315,7 +315,7 @@ GOF_model_test_no_initial_RNG <- function() { # nolint } GOF_model_test_no_initial_RNG() -GOF_model_test_calc_Rn1_boot_parallel <- function() { # nolint +GOF_model_test_calc_Rn1_boot_2_cores <- function() { # nolint set.seed(1) X <- rnorm(10) # nolint Y <- 5 * X + rnorm(10) # nolint @@ -342,25 +342,28 @@ GOF_model_test_calc_Rn1_boot_parallel <- function() { # nolint gof_model_simulator = lm_sim_para_mock, gof_model_trainer = GOF_lm_trainer$new() ), - n_cores = 4, + n_cores = 2, seed = 1 ) d1 <- d d1$y <- model_resample_mock[[1]] fit1 <- lm(y~x, data = d1) + d2 <- d + d2$y <- model_resample_mock[[2]] + fit2 <- lm(y~x, data = d2) out <- mt$get_Rn1_boot() expect_equal( out, list( Rn1_fun(r = residuals(fit1), o = order(X)), Rn1_fun(r = residuals(fit1), o = order(X)), - Rn1_fun(r = residuals(fit1), o = order(X)) + Rn1_fun(r = residuals(fit2), o = order(X)) )) } -GOF_model_test_calc_Rn1_boot_parallel() +GOF_model_test_calc_Rn1_boot_2_cores() -GOF_model_test_calc_pvalue_parallel <- function() { # nolint +GOF_model_test_calc_pvalue_2_cores <- function() { # nolint set.seed(1) X <- rnorm(10) # nolint Y <- 5 * X + rnorm(10) # nolint @@ -377,7 +380,7 @@ GOF_model_test_calc_pvalue_parallel <- function() { # nolint gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_lm_sim_param$new(), gof_model_trainer = GOF_lm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) out <- mt$get_pvalue() @@ -389,9 +392,9 @@ GOF_model_test_calc_pvalue_parallel <- function() { # nolint } ) } -GOF_model_test_calc_pvalue_parallel() +GOF_model_test_calc_pvalue_2_cores() -GOF_model_test_expect_small_pvalue_parallel <- function() { # nolint +GOF_model_test_expect_small_pvalue_2_cores <- function() { # nolint set.seed(1) X1 <- rnorm(100) # nolint X2 <- rnorm(100) # nolint @@ -409,7 +412,7 @@ GOF_model_test_expect_small_pvalue_parallel <- function() { # nolint gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_glm_sim_param$new(), gof_model_trainer = GOF_glm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) @@ -433,15 +436,15 @@ GOF_model_test_expect_small_pvalue_parallel <- function() { # nolint gof_model_info_extractor = ie ), gof_model_trainer = GOF_lm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) expect_equal(mt$get_pvalue(), 0) } -GOF_model_test_expect_small_pvalue_parallel() +GOF_model_test_expect_small_pvalue_2_cores() -GOF_model_test_expect_non_small_pvalue_parallel <- function() { # nolint +GOF_model_test_expect_non_small_pvalue_2_cores <- function() { # nolint set.seed(1) X1 <- rnorm(100) # nolint d <- data.frame( @@ -458,11 +461,11 @@ GOF_model_test_expect_non_small_pvalue_parallel <- function() { # nolint gof_model_resample = GOF_model_resample$new( gof_model_simulator = GOF_glm_sim_param$new(), gof_model_trainer = GOF_glm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) - expect_equal(mt$get_pvalue(), 0.89) + expect_equal(mt$get_pvalue(), 0.8) X1 <- rnorm(100) # nolint d <- data.frame( @@ -482,10 +485,10 @@ GOF_model_test_expect_non_small_pvalue_parallel <- function() { # nolint gof_model_info_extractor = ie ), gof_model_trainer = GOF_lm_trainer$new()), - n_cores = 4, + n_cores = 2, seed = 1 ) - expect_equal(mt$get_pvalue(), 0.94) + expect_equal(mt$get_pvalue(), 0.92) } -GOF_model_test_expect_non_small_pvalue_parallel() +GOF_model_test_expect_non_small_pvalue_2_cores() From 9405160f4e84d2400a746b352bd5ef13c02224ba Mon Sep 17 00:00:00 2001 From: Yannic Kitten Date: Wed, 22 Oct 2025 18:07:16 +0200 Subject: [PATCH 8/8] Update parallelization section in READMEs to include example code --- README.Rmd | 27 ++++++++++++++++++++++++++- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/README.Rmd b/README.Rmd index bc1c359..0b38c88 100644 --- a/README.Rmd +++ b/README.Rmd @@ -87,13 +87,38 @@ The bootstrapping process can be accelerated using the `n_cores` parameter of the `GOF_model` function, that specifies the number of CPU cores to use. If this parameter is set to at least two cores, internally the currently used -RNG is stored and replaced by the L'Ecuyer-CMRG generator, which is safe to use +RNG is replaced by the L'Ecuyer-CMRG generator, which is safe to use in a parallel context. This internal generator can be seeded using the `seed` parameter of the `GOF_model` function, which however will also apply if the `n_cores` parameter is not used and thus the generator is not replaced. +For example: +```{r} +set.seed(1) +N <- 100 +X1 <- rnorm(N) +X2 <- rnorm(N) +d <- data.frame( + y = rpois(n = N, lambda = exp(4 + X1 * 2 + X2 * 6)), + x1 = X1, + x2 = X2) + +fit <- glm(y ~ x1 + x2, data = d, family = poisson()) + +mt <- GOF_model( + model = fit, + data = d, + nmb_boot_samples = 100, + simulator_type = "parametric", + y_name = "y", + Rn1_statistic = Rn1_KS$new(), + n_cores = 2, + seed = 1) +mt$get_pvalue() +``` + ## Installation You can install it from CRAN diff --git a/README.md b/README.md index 62757e9..f85d7bb 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,45 @@ not be rejected by the GOF-test: … +## Parallelization + +The bootstrapping process can be accelerated using the `n_cores` +parameter of the `GOF_model` function, that specifies the number of CPU +cores to use. + +If this parameter is set to at least two cores, internally the currently +used RNG is replaced by the L’Ecuyer-CMRG generator, which is safe to +use in a parallel context. + +This internal generator can be seeded using the `seed` parameter of the +`GOF_model` function, which however will also apply if the `n_cores` +parameter is not used and thus the generator is not replaced. + +For example: + + set.seed(1) + N <- 100 + X1 <- rnorm(N) + X2 <- rnorm(N) + d <- data.frame( + y = rpois(n = N, lambda = exp(4 + X1 * 2 + X2 * 6)), + x1 = X1, + x2 = X2) + + fit <- glm(y ~ x1 + x2, data = d, family = poisson()) + + mt <- GOF_model( + model = fit, + data = d, + nmb_boot_samples = 100, + simulator_type = "parametric", + y_name = "y", + Rn1_statistic = Rn1_KS$new(), + n_cores = 2, + seed = 1) + mt$get_pvalue() + #> [1] 0.62 + ## Installation You can install it from CRAN @@ -89,9 +128,9 @@ package in your environment by calling: # sessionInfo sessionInfo() - #> R Under development (unstable) (2025-08-19 r88650) + #> R Under development (unstable) (2025-10-19 r88945) #> Platform: x86_64-pc-linux-gnu - #> Running under: Ubuntu 24.04.2 LTS + #> Running under: Ubuntu 24.04.3 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 @@ -109,15 +148,15 @@ package in your environment by calling: #> tzcode source: system (glibc) #> #> attached base packages: - #> [1] stats graphics grDevices datasets utils methods base + #> [1] parallel stats graphics grDevices utils datasets methods + #> [8] base #> #> other attached packages: - #> [1] bootGOF_0.1.1 + #> [1] bootGOF_0.1.1.9000 #> #> loaded via a namespace (and not attached): #> [1] digest_0.6.37 desc_1.4.3 backports_1.5.0 R6_2.6.1 #> [5] fastmap_1.2.0 xfun_0.53 knitr_1.50 htmltools_0.5.8.1 - #> [9] rmarkdown_2.29 cli_3.6.5 renv_1.1.5 withr_3.0.2 - #> [13] pkgload_1.4.0 compiler_4.6.0 rprojroot_2.1.0 tools_4.6.0 - #> [17] pkgbuild_1.4.8 checkmate_2.3.3 evaluate_1.0.4 yaml_2.3.10 - #> [21] rlang_1.1.6 + #> [9] rmarkdown_2.30 cli_3.6.5 pkgload_1.4.1 compiler_4.6.0 + #> [13] rprojroot_2.1.1 tools_4.6.0 pkgbuild_1.4.8 checkmate_2.3.3 + #> [17] evaluate_1.0.5 yaml_2.3.10 rlang_1.1.6