diff --git a/NAMESPACE b/NAMESPACE index e0db266..9ce011d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(plot,dunnett_test_result) S3method(print,RSCABS) S3method(print,StepDownRSCABS) S3method(print,drcComp) +S3method(print,dunn_test_result) S3method(print,dunnett_test_result) S3method(print,stepDownTrendBinom) S3method(print,tskresult) @@ -19,11 +20,13 @@ export(ECx_rating) export(ED.ZG) export(ED.plus) export(RSCABK) +export(SpearmanKarber_modified) export(Tarone.test) export(Tarone.trend.test) export(addECxCI) export(aggregate_from_individual_simple) export(aggregate_from_individual_tidy) +export(analyze_SK) export(backCalcSE) export(broom_dunnett) export(broom_williams) @@ -33,12 +36,14 @@ export(calcTaronesTest) export(calculate_noec_rstatix) export(cochranArmitageTrendTest) export(compare_to_control_fisher) +export(compute_mdd_williams) export(contEndpoint) export(convert2Score) export(convert_fish_data) export(create_contingency_table) export(dose.p.glmmPQL) export(drcCompare) +export(dunn_test) export(dunnett_test) export(expand_to_individual_simple) export(expand_to_individual_tidy) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..61287bd --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# drcHelper (development version) + +* Initial CRAN submission preparation. diff --git a/R/MDD.R b/R/MDD.R new file mode 100644 index 0000000..150467f --- /dev/null +++ b/R/MDD.R @@ -0,0 +1,57 @@ +#' Calculate MDD% for a Williams Test Result +#' +#' @param williams_obj The tibble result from broom_williams. +#' @param data The original dataframe used for the test. +#' @param formula The formula used for the test, e.g., Response ~ Dose. +#' @return A tibble with Dose, MDD, and MDD_pct. +#' @export +compute_mdd_williams <- function(williams_obj, data, formula) { + + if (!inherits(williams_obj, "tbl_df") || !all(c("comparison", "t'-crit") %in% names(williams_obj))) { + stop("williams_obj must be the result from a broom_williams call.") + } + if (nrow(williams_obj) == 0) return(tibble::tibble(Dose=numeric(), MDD_pct=numeric())) + + # 1. Extract variable names + resp_name <- all.vars(formula)[1] + dose_name <- all.vars(formula)[2] # Correctly gets "Dose" + + # --- START OF FIX --- + # Create a local copy of the data to avoid modifying the original + local_data <- data + + # Ensure the dose column is a factor with control (0) as the first level. + # This makes the function robust and independent of the input data's column type. + dose_values <- local_data[[dose_name]] + factor_levels <- sort(unique(dose_values)) + local_data$dose_factor_col <- factor(dose_values, levels = factor_levels) + + # Define the new, reliable factor column name and update formula for aov() + factor_col_name <- "dose_factor_col" + aov_formula <- as.formula(paste(resp_name, "~", factor_col_name)) + # --- END OF FIX --- + + # 2. Extract info from the Williams test object + Tcrit <- williams_obj[["t'-crit"]] + doses <- as.numeric(gsub(" - 0.*", "", williams_obj$comparison)) + + # 3. Get ANOVA stats (MSE) using the corrected formula and local data + aov_fit <- stats::aov(aov_formula, data = local_data) + mse <- summary(aov_fit)[[1]]["Residuals", "Mean Sq"] + + # 4. Get control group stats using the reliable factor column + control_level <- levels(local_data[[factor_col_name]])[1] + ctrl_data <- local_data[local_data[[factor_col_name]] == control_level, ] + mu_c <- mean(ctrl_data[[resp_name]], na.rm = TRUE) + n_c <- nrow(ctrl_data) + + # 5. Get treatment sample sizes + n_t <- sapply(doses, function(d) sum(local_data[[dose_name]] == d)) + + # 6. Calculate MDD and MDD% + SE_diff <- sqrt(mse * (1 / n_c + 1 / n_t)) + MDD <- Tcrit * SE_diff + MDD_pct <- 100 * MDD / abs(mu_c) + + tibble::tibble(Dose = doses, MDD_pct = MDD_pct) +} diff --git a/R/RSCABS.R b/R/RSCABS.R index e3011a5..f8c0f5a 100644 --- a/R/RSCABS.R +++ b/R/RSCABS.R @@ -7,19 +7,19 @@ #' Run RSCABS test (DEPRECATED) #' -#' @description -#' **DEPRECATED**: Please use [step_down_RSCABS()] instead for new code. -#' +#' @description +#' **DEPRECATED**: Please use [drcHelper::step_down_RSCABS()] instead for new code. +#' #' Runs the Rao-Scott adjusted Cochran-Armitage trend test by slices (RSCABS) #' analysis.The function is adapted from the archived version of RSCABS developed by #' Joe Swintek et al with CC0 license. It is not updated anymore and included -#' for validation purpose. The modern replacement is [step_down_RSCABS()]. -#' -#' @details +#' for validation purpose. The modern replacement is [drcHelper::step_down_RSCABS()]. +#' +#' @details #' This function is deprecated. For new analyses, please use the modern -#' implementation in [step_down_RSCABS()] which provides: +#' implementation in [drcHelper::step_down_RSCABS()] which provides: #' - Better error handling and input validation -#' - More flexible data input formats +#' - More flexible data input formats #' - Improved statistical methodology #' - Better documentation and examples #' @@ -51,11 +51,11 @@ #' } runRSCABS <- function(Data,Treatment,Replicate='',Effects='',test.type='RS'){ # Issue deprecation warning - .Deprecated("step_down_RSCABS", - msg = paste("runRSCABS() is deprecated.", + .Deprecated("step_down_RSCABS", + msg = paste("runRSCABS() is deprecated.", "Please use step_down_RSCABS() for new code.", "See ?step_down_RSCABS for the modern API.")) - + #This function will produce a table of step-down Cochran-Armitage trend tests with possible Rao-Scott adjustment by slices #It will Run the test on every effect in the Effect list #' @export diff --git a/R/SK_TSK_tests_wrapper.R b/R/SK_TSK_tests_wrapper.R new file mode 100644 index 0000000..8fe1f0b --- /dev/null +++ b/R/SK_TSK_tests_wrapper.R @@ -0,0 +1,553 @@ +## README +## Functions included in this R file +##* *SpearmanKarber_modified* +##* *Modular `extract_*` Functions: +##* + + +## A cleaned-up and fixed version of Harold's SpearmanKarber_modified function. +## It corrects indexing, handles zero-dose safely, avoids hard-coded sizes, +## simplifies pruning/pooling when control mortality > 0, and computes CIs in a stable way. +## I kept your two-branch logic (p0 ≈ 0 vs p0 > 0) and Fieller CI for the latter. + +#' Spearman-Karber Estimation with Modified Handling for Control Mortality +#' +#' Estimates the LC50 (median lethal concentration) or ED50 (median effective dose) +#' and its confidence interval using the Spearman-Karber method. This version includes +#' robust handling for zero-dose (control) mortality and a Fieller-like approach for +#' confidence interval estimation when control mortality is non-zero. +#' +#' @details +#' The function operates in two branches: +#' - If control mortality (p0) is near zero, the standard Spearman-Karber method is used on the log-transformed doses, assuming the first observed response is 0%. +#' - If p0 is non-zero, it estimates the background rate (c) by cumulative pooling, Abbott-corrects the proportions, prunes/pools low-dose groups, and uses a modified Spearman-Karber method on the scaled proportions. Confidence intervals are calculated using an approach inspired by Fieller's theorem to account for the background correction uncertainty. +#' +#' All concentrations must be non-negative and in increasing order. +#' +#' @param conc A numeric vector of doses or concentrations (must be >= 0 and in increasing order). +#' @param dead A numeric vector of the number of organisms that died or responded at each \code{conc} level. +#' @param total A numeric vector of the total number of organisms (population size) at each \code{conc} level. Can be a single value if all are the same. +#' @param conf.level A single numeric value specifying the confidence level for the interval estimation (e.g., 0.95 for a 95% CI). Must be in (0, 1). +#' @param retData A logical value. If \code{TRUE}, the results are returned in a list. If \code{FALSE}, \code{invisible(NULL)} is returned. +#' @param showOutput A logical value. If \code{TRUE}, the input data table and estimation results (including log-scale values and CIs) are printed to the console. +#' @param showPlot A logical value. If \code{TRUE}, a plot is generated showing the observed and Abbott-corrected mortalities/responses, the estimated LC50, and its confidence interval. +#' +#' @return A list containing the estimation results if \code{retData} is \code{TRUE}, otherwise \code{invisible(NULL)}. The list includes: +#' \item{\code{log10LC50}}{Estimated log10 of the LC50.} +#' \item{\code{varianceOfLog10LC50}}{Estimated variance of log10LC50.} +#' \item{\code{StandardDeviationOfm}}{Estimated standard deviation of log10LC50.} +#' \item{\code{confidenceIntervalLog10}}{CI for log10LC50 (vector of two values).} +#' \item{\code{LC50}}{Estimated LC50 (10^log10LC50).} +#' \item{\code{confidenceIntervalLC50}}{CI for LC50 (vector of two values).} +#' \item{\code{conf.level}}{The confidence level used for the CIs.} +#' +#' @export +#' +#' @references +#' ART CARTER, WYETH-AYERST RESEARCH, CHAZY, NY (1994): Using the Spearman-Karber Method to Estimate the ED50. +#' Proceedings of the Nineteenth Annual SAS Users Group International Conference, Dallas, Texas, April, pp. 10-13. +#' \url{http://www.sascommunity.org/sugi/SUGI94/Sugi-94-195%20Carter.pdf}. +#' +#' @seealso \code{\link[drcHelper]{tsk_auto}} +#' +#' @author Sarah Baumert, Harald Schulz, Zhenglei Gao +#' +#' @examples +#' # Example 1: Zero control mortality (standard Spearman-Karber) +#' x1 <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +#' n1 <- c(30, 30, 30, 30, 30, 30) +#' r1 <- c(0, 1, 3, 16, 24, 30) +#' SpearmanKarber_modified(x1, r1, n1, showOutput = TRUE) +#' +#' # Example 2: Non-zero control mortality (modified method) +#' x2 <- c(0, 15.54, 20.47, 27.92, 35.98, 55.52) +#' n2 <- c(40, 40, 40, 40, 40, 40) +#' r2 <- c(3, 2, 6, 11, 18, 33) +#' results <- SpearmanKarber_modified(x2, r2, n2, retData = TRUE, +#' showOutput = TRUE, showPlot = TRUE) +#' print(results$LC50) +SpearmanKarber_modified <- function(conc, dead, total, + conf.level = 0.95, + retData = TRUE, showOutput = FALSE, showPlot = FALSE) { + # Basic checks + if (any(is.na(conc))) stop("conc must not contain NA") + if (any(is.na(dead))) stop("dead must not contain NA") + if (any(is.na(total))) stop("total must not contain NA") + if (!is.numeric(conc)) stop("conc must be numeric") + if (!is.numeric(dead)) stop("dead must be numeric") + if (!is.numeric(total)) stop("total must be numeric") + + N <- length(conc) + if (N != length(dead)) stop("Different numbers of concentrations and responses were given") + if (length(total) == 1) total <- rep(total, N) + if (N != length(total)) stop("Different numbers of concentrations and subject groups were given") + if (any(conc < 0)) stop("All concentrations must be >= 0. Provide concentrations, not log10(concentrations).") + if (!all(conc == conc[order(conc)])) stop("Data must be in order of increasing concentration.") + if (any(dead < 0)) stop("Dead (responses) must be >= 0") + if (any(total <= 0)) stop("Population sizes must be > 0") + if (any(dead > total)) stop("dead cannot exceed total") + if (conf.level <= 0 || conf.level >= 1) stop("conf.level must be in (0,1)") + + # Observed proportions + p <- dead / total + p0 <- p[1] + eps <- .Machine$double.eps^0.5 + z <- stats::qnorm(1 - (1 - conf.level) / 2) + + # Abbott-corrected proportions for reporting/plotting (clamped to [0,1]) + if (p0 >= 1 - 1e-12) { + stop("Control mortality is 100% (or extremely close). Spearman-Karber not applicable.") + } + pAbbott <- (p - p0) / (1 - p0) + pAbbott <- pmin(pmax(pAbbott, 0), 1) + adjustCounts <- pAbbott * total + + # Containers for outputs + mu <- NA_real_ + LC50 <- NA_real_ + varianceOfMu <- NA_real_ + ciMu <- c(NA_real_, NA_real_) + ciLC50 <- c(NA_real_, NA_real_) + ciHalfWidth <- NA_real_ # for reporting convenience + + # Helper for variance term with interior i + # xlog is log10(x) for positive x; t is totals for same indices; pin is proportions (not Abbott) for same indices + variance_SK <- function(xlog, pin, t) { + M <- length(xlog) + if (M < 3) return(NA_real_) + v <- 0 + for (i in 2:(M - 1)) { + v <- v + 0.25 * (xlog[i - 1] - xlog[i + 1])^2 * (pin[i] * (1 - pin[i])) / t[i] + } + v + } + + # Case A: control mortality effectively zero + if (p0 <= eps) { + # Use only positive concentrations for log10 + pos_idx <- which(conc > 0) + if (length(pos_idx) < 2L) { + stop("Need at least two positive concentrations when control mortality is zero.") + } + x <- conc[pos_idx] + p_use <- p[pos_idx] # equals pAbbott[pos_idx] since p0 ~ 0 + t_use <- total[pos_idx] + + # Build extended log-dose with ghost endpoints + xlog_ext <- numeric(length(x) + 2L) + xlog_ext[2:(length(x) + 1L)] <- log10(x) + # Low ghost (needs at least two positive x) + xlog_ext[1] <- 2 * xlog_ext[2] - xlog_ext[3] + # High ghost + xlog_ext[length(x) + 2L] <- 2 * xlog_ext[length(x) + 1L] - xlog_ext[length(x)] + + # Build extended proportions: 0 at low, observed in middle, 1 at high + p_ext <- numeric(length(p_use) + 2L) + p_ext[1] <- 0 + p_ext[2:(length(p_use) + 1L)] <- p_use + p_ext[length(p_use) + 2L] <- 1 + + # Spearman-Karber trapezoid + mids <- 0.5 * (xlog_ext[-length(xlog_ext)] + xlog_ext[-1]) + dp <- diff(p_ext) + mu <- sum(dp * mids) + LC50 <- 10^mu + + # Variance on observed internal points (interior of observed grid, not ghosts) + v <- variance_SK(xlog = xlog_ext[2:(length(xlog_ext) - 1L)], + pin = p_use, + t = t_use) + varianceOfMu <- if (is.na(v)) NA_real_ else max(v, 0) + sdMu <- if (is.na(varianceOfMu)) NA_real_ else sqrt(varianceOfMu) + if (!is.na(sdMu)) { + ciMu <- c(mu - z * sdMu, mu + z * sdMu) + ciLC50 <- 10^ciMu + ciHalfWidth <- z * sdMu + } + + } else { + # Case B: nonzero control mortality, estimate background c by cumulative pooling + cum_dead <- cumsum(dead) + cum_total <- cumsum(total) + cum_ratio <- cum_dead / cum_total + k_star <- which.min(cum_ratio)[1] + c_bg <- cum_ratio[k_star] + n_c <- cum_total[k_star] + + # Prune: keep all observations with p >= c_bg - small tolerance; pool others into the first kept + keep_mask <- p >= (c_bg - 1e-12) + if (!any(keep_mask)) stop("After background-rate screening, no observations remain.") + keep_idx <- which(keep_mask) + # Ensure order + x_keep <- conc[keep_idx] + d_keep <- dead[keep_idx] + n_keep <- total[keep_idx] + + # Pool the excluded into the first kept entry + if (any(!keep_mask)) { + d_pool <- sum(dead[!keep_mask]) + n_pool <- sum(total[!keep_mask]) + d_keep[1] <- d_keep[1] + d_pool + n_keep[1] <- n_keep[1] + n_pool + } + + # Ensure positive concentrations for integration; pool any nonpositive into first positive + pos_keep <- which(x_keep > 0) + if (length(pos_keep) < 2L) { + stop("Need at least two positive concentrations after background-rate adjustment.") + } + if (any(x_keep <= 0)) { + first_pos <- pos_keep[1] + d_keep[first_pos] <- d_keep[first_pos] + sum(d_keep[x_keep <= 0]) + n_keep[first_pos] <- n_keep[first_pos] + sum(n_keep[x_keep <= 0]) + keep_positive <- x_keep > 0 + x_keep <- x_keep[keep_positive] + d_keep <- d_keep[keep_positive] + n_keep <- n_keep[keep_positive] + } + + p_keep <- d_keep / n_keep + xlog <- log10(x_keep) + M <- length(xlog) + + # Rescale by background-rate c to [0,1]; force the first to be exactly 0 + p_scaled <- (p_keep - c_bg) / (1 - c_bg) + p_scaled <- pmin(pmax(p_scaled, 0), 1) + p_scaled[1] <- 0 + + # Append a high ghost endpoint at p=1 and log-dose extrapolated + xlog_ext <- c(xlog, 2 * xlog[M] - xlog[M - 1]) + p_scaled_ext <- c(p_scaled, 1) + + # Raw SK on scaled proportions + mids <- 0.5 * (xlog_ext[-length(xlog_ext)] + xlog_ext[-1]) + dp <- diff(p_scaled_ext) + mu_raw <- sum(dp * mids) + + # Variance on original (unscaled) proportions, interior points only + v_raw <- variance_SK(xlog = xlog, pin = p_keep, t = n_keep) + varianceOfMu <- if (is.na(v_raw)) NA_real_ else max(v_raw, 0) + + # Fieller-based CI on log-scale (following your structure) + a <- 0.5 * (xlog[1] + xlog[2]) # first-segment midpoint + V12 <- (a^2) * c_bg * (1 - c_bg) / n_c + V22 <- c_bg * (1 - c_bg) / n_c + V11 <- varianceOfMu + V12 + B <- 1 - c_bg + g <- (z^2) * V22 / (B^2) + # Guard against edge cases + discr <- V11 - 2 * mu_raw * V12 + (mu_raw^2) * V22 - g * (V11 - (V12^2) / V22) + discr <- max(discr, 0) + denom <- (1 - g) + if (abs(denom) < 1e-12) { + sigma <- NA_real_ + } else { + sigma <- (z / B) * sqrt(discr) / denom + } + # Adjust mu for background c + mu <- (mu_raw - c_bg * a) / (1 - c_bg) + LC50 <- 10^mu + + if (is.finite(sigma)) { + ciMu <- sort(c(mu - sigma, mu + sigma)) + ciLC50 <- 10^ciMu + # Back out a variance proxy from sigma (approximate) + varianceOfMu <- max((sigma / z)^2, varianceOfMu) + ciHalfWidth <- sigma + } else { + ciMu <- c(NA_real_, NA_real_) + ciLC50 <- c(NA_real_, NA_real_) + ciHalfWidth <- NA_real_ + } + } + + # Plot if requested + if (isTRUE(showPlot)) { + xlimMax <- max(conc, na.rm = TRUE) + plot(conc, p * 100, type = "p", pch = 4, xlim = c(0, xlimMax), + ylim = c(0, 100), ylab = "Response, %", xlab = "Concentration", cex = 0.7) + lines(conc, pAbbott * 100, lty = 2) + legend("topleft", + c("Observed Mortality %", "Abbott-corrected %"), + lty = c(0, 2), pch = c(4, NA), cex = 0.8, bty = "n") + points(LC50, 50, pch = 16, col = "blue") + # horizontal error bar for LC50 CI (if available) + if (all(is.finite(ciLC50))) { + segments(x0 = ciLC50[1], y0 = 50, x1 = ciLC50[2], y1 = 50, col = "blue") + segments(x0 = ciLC50[1], y0 = 50 - 2, x1 = ciLC50[1], y1 = 50 + 2, col = "blue") + segments(x0 = ciLC50[2], y0 = 50 - 2, x1 = ciLC50[2], y1 = 50 + 2, col = "blue") + } + } + + # Console output if requested + if (isTRUE(showOutput)) { + out <- data.frame( + idx = seq_len(N), + concentration = conc, + total = total, + dead = dead, + obs_prop = round(p, 6), + abbott_prop = round(pAbbott, 6), + abbott_count = round(adjustCounts, 3) + ) + print(out, row.names = FALSE) + cat("log10(LC50) =", mu, "\n") + cat("estimated variance of log10(LC50) =", varianceOfMu, "\n") + cat("approx. half-width on log10-scale =", ciHalfWidth, "\n") + cat(paste0(round(100 * conf.level, 1), "% CI for log10(LC50) = [", + paste(ciMu, collapse = ", "), "]\n")) + cat("estimated LC50 =", LC50, "\n") + cat(paste0("estimated ", round(100 * conf.level, 1), "% CI for LC50 = [", + paste(ciLC50, collapse = ", "), "]\n")) + } + + if (isTRUE(retData)) { + return(list( + log10LC50 = mu, + varianceOfLog10LC50 = varianceOfMu, + StandardDeviationOfm = if (is.na(varianceOfMu)) NA_real_ else sqrt(varianceOfMu), + confidenceIntervalLog10 = ciMu, + LC50 = LC50, + confidenceIntervalLC50 = ciLC50, + conf.level = conf.level + )) + } else { + invisible(NULL) + } +} + + + + + +##* **Modular `extract_*` Functions:** +##* I created a set of individual `extract_*` functions, one for each SK method. +##* Each function runs its respective model, safely captures key results (LC50, CI, etc.), +##* and returns them in a standardized, single-row data frame. +##* This allows you to easily combine their outputs into a final comparison table. + +# REVISED: Helper to create a standardized output row with clearer SD columns +standard_row <- function(method, scale, trim_or_A = NA, est = NA, lcl = NA, ucl = NA, + log10est = NA, sd_lc50 = NA, sd_log10 = NA, gsd = NA, + notes = "") { + data.frame( + method = as.character(method), + scale = as.character(scale), + trim_or_A = as.numeric(trim_or_A), + LC50 = as.numeric(est), + LCL = as.numeric(lcl), + UCL = as.numeric(ucl), + log10LC50 = as.numeric(log10est), + SD_LC50 = as.numeric(sd_lc50), # SD on the same scale as LC50 + SD_log10 = as.numeric(sd_log10), # SD on the log10 scale + GSD = as.numeric(gsd), + notes = as.character(notes), + stringsAsFactors = FALSE + ) +} + + +# CORRECTED: For drcHelper::tsk_auto +extract_tsk_auto <- function(x, n, r, use.log.doses = TRUE, conf.level = 0.95) { + method_name <- "tsk_auto" + scale_name <- if (use.log.doses) "log-dose" else "linear-dose" + + obj <- try( + drcHelper::tsk_auto(data.frame(x = x, n = n, r = r), use.log.doses = use.log.doses, conf.level = conf.level), + silent = TRUE + ) + + if (inherits(obj, "try-error")) { + return(standard_row(method_name, scale_name, notes = as.character(obj))) + } + + `%||%` <- function(a, b) if (!is.null(a)) a else b + + # *** THE FIX IS HERE: Added obj$mu to the list of possible estimate names *** + est <- obj$LD50 %||% obj$ED50 %||% obj$mu %||% NA + + ci <- obj$conf.int %||% obj$ci %||% c(NA, NA) + sd_val <- obj$sd %||% obj$SD %||% NA + gsd_val <- obj$GSD %||% obj$gsd %||% NA + trim_val <- obj$trim %||% attr(obj, "trim") %||% NA + + standard_row( + method = method_name, + scale = scale_name, + trim_or_A = trim_val, + est = est, + lcl = ci[1], + ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA, + sd_lc50 = if (!use.log.doses) sd_val else NA, # SD(LC50) is returned for linear scale + sd_log10 = if (use.log.doses && is.finite(gsd_val)) log(gsd_val) else NA, # SD(log10) is ~log(GSD) for log scale + gsd = gsd_val + ) +} + + +# UPDATED: For your custom function to match new standard_row +extract_SpearmanKarber_modified <- function(x, n, r, conf.level = 0.95) { + skm <- try( + SpearmanKarber_modified(x, r, n, conf.level = conf.level, retData = TRUE), + silent = TRUE + ) + + if (inherits(skm, "try-error")) { + return(standard_row("SpearmanKarber_modified", "log-dose", notes = as.character(skm))) + } + + standard_row( + method = "SpearmanKarber_modified", + scale = "log-dose", + est = skm$LC50, + lcl = skm$confidenceIntervalLC50[1], + ucl = skm$confidenceIntervalLC50[2], + log10est = skm$log10LC50, + sd_log10 = if (!is.null(skm$StandardDeviationOfm)) skm$StandardDeviationOfm else NA + ) +} + + +# UPDATED: For ecotoxicology::SpearmanKarber to match new standard_row +extract_ecotox_SpearmanKarber <- function(x, n, r) { + if (length(unique(n)) != 1) { + return(standard_row("ecotox::SpearmanKarber", "log-dose", notes = "Requires constant N")) + } + + use_idx <- which(x > 0) + if (length(use_idx) < 2) { + return(standard_row("ecotox::SpearmanKarber", "log-dose", notes = "Requires at least 2 positive doses")) + } + + toxData <- cbind(x[use_idx], r[use_idx], r[use_idx] / n[use_idx]) + + obj <- try( + ecotoxicology::SpearmanKarber(toxData, N = unique(n)[1], retData = TRUE, showOutput = FALSE, showPlot = FALSE), + silent = TRUE + ) + + if (inherits(obj, "try-error")) { + return(standard_row("ecotox::SpearmanKarber", "log-dose", notes = as.character(obj))) + } + + standard_row( + method = "ecotox::SpearmanKarber", + scale = "log-dose", + est = obj$LC50, + lcl = obj$confidenceInterval95LC50[1], + ucl = obj$confidenceInterval95LC50[2], + log10est = obj$log10LC50, + sd_log10 = if (!is.null(obj$varianceOfm)) sqrt(obj$varianceOfm) else NA + ) +} + + + +#' Unified Spearman-Karber Analysis Function +#' +#' This function serves as a wrapper to run various Spearman-Karber (SK) and +#' Trimmed Spearman-Karber (TSK) analyses using a single interface. +#' It returns a standardized one-row data frame for easy comparison. +#' +#' @param x Numeric vector of concentrations. +#' @param n Numeric vector of total subjects per concentration. +#' @param r Numeric vector of responses (e.g., dead) per concentration. +#' @param method The analysis method to use. Must be one of: +#' - "SK_modified" (your custom SpearmanKarber_modified function) +#' - "tsk_auto_log" (drcHelper::tsk_auto with log-doses) +#' - "tsk_auto_linear" (drcHelper::tsk_auto with linear doses) +#' - "ecotox_SK" (ecotoxicology::SpearmanKarber) +#' @param conf.level The confidence level for intervals (default 0.95). +#' @param ... Additional arguments passed to the underlying functions (e.g., max.trim for tsk_auto). +#' +#' @return A single-row data frame containing the standardized analysis results. +#' @export +#' +#' @examples +#' x <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +#' n <- c(30, 30, 30, 30, 30, 30) +#' r <- c(0, 1, 3, 16, 24, 30) +#' +#' # Run a single analysis +#' analyze_SK(x, n, r, method = "SK_modified") +#' +#' # Run multiple analyses and combine into a single table +#' methods_to_run <- c("SK_modified", "tsk_auto_log", "tsk_auto_linear", "ecotox_SK") +#' all_results <- lapply(methods_to_run, function(m) { +#' analyze_SK(x, n, r, method = m) +#' }) +#' comparison_table <- do.call(rbind, all_results) +#' print(comparison_table) +analyze_SK <- function(x, n, r, method, conf.level = 0.95, ...) { + # --- Input Validation --- + available_methods <- c("SK_modified", "tsk_auto_log", "tsk_auto_linear", "ecotox_SK") + if (!method %in% available_methods) { + stop(paste("Unknown method specified. Please choose from:", paste(available_methods, collapse = ", "))) + } + + dots <- list(...) + + # --- Method Dispatch using switch() --- + switch( + method, + + "SK_modified" = { + obj <- try(SpearmanKarber_modified(x, r, n, conf.level = conf.level, retData = TRUE), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row(method, "log-dose", notes = as.character(obj))) + } + standard_row(method, "log-dose", + est = obj$LC50, lcl = obj$confidenceIntervalLC50[1], ucl = obj$confidenceIntervalLC50[2], + log10est = obj$log10LC50, sd_log10 = obj$StandardDeviationOfm) + }, + + "tsk_auto_log" = { + call_args <- c(list(data.frame(x = x, n = n, r = r), use.log.doses = TRUE, conf.level = conf.level), dots) + obj <- try(do.call(drcHelper::tsk_auto, call_args), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row(method, "log-dose", notes = as.character(obj))) + } + est <- obj$LD50 %||% obj$ED50 %||% obj$mu %||% NA + ci <- obj$conf.int %||% obj$ci %||% c(NA, NA) + gsd <- obj$GSD %||% obj$gsd %||% NA + standard_row(method, "log-dose", trim_or_A = obj$trim %||% attr(obj, "trim"), + est = est, lcl = ci[1], ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA, + sd_log10 = if (is.finite(gsd)) log(gsd) else NA, gsd = gsd) + }, + + "tsk_auto_linear" = { + call_args <- c(list(data.frame(x = x, n = n, r = r), use.log.doses = FALSE, conf.level = conf.level), dots) + obj <- try(do.call(drcHelper::tsk_auto, call_args), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row(method, "linear-dose", notes = as.character(obj))) + } + est <- obj$LD50 %||% obj$ED50 %||% obj$mu %||% NA + ci <- obj$conf.int %||% obj$ci %||% c(NA, NA) + sd_val <- obj$sd %||% obj$SD %||% NA + standard_row(method, "linear-dose", trim_or_A = obj$trim %||% attr(obj, "trim"), + est = est, lcl = ci[1], ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA, + sd_lc50 = sd_val) + }, + + "ecotox_SK" = { + if (length(unique(n)) != 1) { + return(standard_row(method, "log-dose", notes = "Requires constant N")) + } + use_idx <- which(x > 0) + if (length(use_idx) < 2) { + return(standard_row(method, "log-dose", notes = "Requires at least 2 positive doses")) + } + toxData <- cbind(x[use_idx], r[use_idx], r[use_idx] / n[use_idx]) + obj <- try(ecotoxicology::SpearmanKarber(toxData, N = unique(n)[1], retData = TRUE, showOutput = FALSE, showPlot = FALSE), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row(method, "log-dose", notes = as.character(obj))) + } + standard_row(method, "log-dose", + est = obj$LC50, lcl = obj$confidenceInterval95LC50[1], ucl = obj$confidenceInterval95LC50[2], + log10est = obj$log10LC50, sd_log10 = if (!is.null(obj$varianceOfm)) sqrt(obj$varianceOfm) else NA) + } + ) +} diff --git a/R/brsr_tsk.R b/R/brsr_tsk.R index a253e0b..c1002df 100644 --- a/R/brsr_tsk.R +++ b/R/brsr_tsk.R @@ -29,6 +29,19 @@ #' tsk <- function(...) UseMethod("tsk") +## wrapper for extracting output. not used anymore, see @extract_tsk_auto +extract_tsk_like <- function(obj) { + # Try common fields; fall back to NA + est <- obj$LD50 %||% obj$ED50 %||% obj$estimate %||% NA_real_ + lcl <- (obj$conf.int %||% obj$ci %||% c(NA_real_, NA_real_))[1] + ucl <- (obj$conf.int %||% obj$ci %||% c(NA_real_, NA_real_))[2] + sd <- obj$sd %||% obj$SE %||% NA_real_ + gsd <- obj$GSD %||% obj$gsd %||% NA_real_ + list(est = as.numeric(est), lcl = as.numeric(lcl), ucl = as.numeric(ucl), + sd = as.numeric(sd), gsd = as.numeric(gsd)) +} + + #' Auto-trimmed TSK Analysis #' #' This function automatically determines the appropriate trim level for TSK analysis @@ -40,15 +53,15 @@ tsk <- function(...) UseMethod("tsk") #' from the trim level to 1-trim level, which typically occurs when responses are #' too close to 0% or 100% at the extreme doses. #' -#' @param x A numeric vector of doses (for numeric method) or a data frame +#' @param x A numeric vector of doses (for numeric method) or a data frame #' containing columns 'x', 'n', and 'r' (for data.frame method). #' @param n A numeric vector of total counts (for numeric method only). #' @param r A numeric vector of response counts (for numeric method only). #' @param control A numeric value indicating the control dose (default is 0). #' @param conf.level A numeric value indicating the confidence level (default is 0.95). -#' @param use.log.doses A logical value indicating whether to use log-transformed +#' @param use.log.doses A logical value indicating whether to use log-transformed #' doses (default is TRUE). -#' @param max.trim A numeric value indicating the maximum allowed trim level +#' @param max.trim A numeric value indicating the maximum allowed trim level #' (default is 0.45, must be < 0.5). #' @param ... Additional arguments passed to the tsk function. #' @return The result of the TSK analysis with automatic trimming applied. @@ -68,7 +81,7 @@ tsk <- function(...) UseMethod("tsk") #' r = c(2, 5, 8, 12, 15, 17) #' ) #' result <- tsk_auto(data) -#' +#' #' # Using hamilton dataset (if available) #' if (exists("hamilton")) { #' # Try with one of the hamilton datasets @@ -82,7 +95,7 @@ tsk_auto <- function(x, ...) { #' @rdname tsk_auto #' @method tsk_auto numeric #' @export -tsk_auto.numeric <- function(x, n, r, control = 0, conf.level = 0.95, +tsk_auto.numeric <- function(x, n, r, control = 0, conf.level = 0.95, use.log.doses = TRUE, max.trim = 0.45, ...) { input <- data.frame(x = x, n = n, r = r) tsk_auto.data.frame(input, control = control, conf.level = conf.level, @@ -92,15 +105,48 @@ tsk_auto.numeric <- function(x, n, r, control = 0, conf.level = 0.95, #' @rdname tsk_auto #' @method tsk_auto data.frame #' @export -tsk_auto.data.frame <- function(x, control = 0, conf.level = 0.95, +tsk_auto.data.frame <- function(x, control = 0, conf.level = 0.95, use.log.doses = TRUE, max.trim = 0.45, ...) { input <- x - + # Optional: strip 'trim' from ... to avoid duplicate matching if user passes it to tsk_auto + dots <- list(...) + if (!is.null(dots$trim)) { + warning("tsk_auto ignores 'trim'. Use drcHelper::tsk(...) for explicit trims. Removing 'trim' from ...") + dots$trim <- NULL + } + # Validate max.trim if (max.trim <= 0 || max.trim >= 0.5) { stop("max.trim must be between 0 and 0.5 (exclusive).") } - + # ADD THIS BLOCK: preemptively trim if log-doses requested and zero (or negative) doses exist + if (isTRUE(use.log.doses) && any(input$x <= 0, na.rm = TRUE)) { + # Minimal safe trim: roughly 1/min(total per dose), capped by max.trim + # Falls back to 0.001 if totals are missing + base_trim <- tryCatch(1 / max(input$n, na.rm = TRUE), error = function(e) NA_real_) + if (!is.finite(base_trim) || base_trim <= 0) base_trim <- 0.001 + auto_trim <- min(base_trim, max.trim) + + message(sprintf("tsk_auto: zero dose with log transform detected; applying auto-trim = %.4f", auto_trim)) + # Try trimmed call; if it fails with a “suggested trim” message, upgrade trim slightly (capped) + return(tryCatch( + do.call(tsk, c(list(input, control = control, trim = auto_trim, conf.level = conf.level, + use.log.doses = use.log.doses), dots)), + error = function(e) { + if (grepl("consider using this trim:", e$message)) { + suggested_trim_match <- regmatches(e$message, regexpr("consider using this trim: [0-9.]+", e$message)) + if (length(suggested_trim_match) > 0) { + suggested_trim <- as.numeric(sub("consider using this trim: ", "", suggested_trim_match)) + auto_trim2 <- min(suggested_trim + 0.001, max.trim) + message(sprintf("tsk_auto: increasing auto-trim to %.4f due to suggestion", auto_trim2)) + return(do.call(tsk, c(list(input, control = control, trim = auto_trim2, conf.level = conf.level, + use.log.doses = use.log.doses), dots))) + } + } + stop(e) + } + )) + } # First try with no trimming result <- tryCatch({ tsk(input, control = control, trim = 0, conf.level = conf.level, @@ -109,18 +155,18 @@ tsk_auto.data.frame <- function(x, control = 0, conf.level = 0.95, # Only apply auto-trimming for specific trim-related errors if (grepl("responses do not increase from trim to 1-trim", e$message)) { # Extract suggested trim from error message - suggested_trim_match <- regmatches(e$message, + suggested_trim_match <- regmatches(e$message, regexpr("consider using this trim: [0-9.]+", e$message)) - + if (length(suggested_trim_match) > 0) { suggested_trim <- as.numeric(sub("consider using this trim: ", "", suggested_trim_match)) - + # Apply a small buffer to ensure success, but cap at max.trim auto_trim <- min(suggested_trim + 0.001, max.trim) - + message(paste("Auto-trimming applied: trim =", round(auto_trim, 4))) message(paste("Reason: Responses don't span the full range from 0 to 1")) - + # Try again with calculated trim tsk(input, control = control, trim = auto_trim, conf.level = conf.level, use.log.doses = use.log.doses, ...) @@ -133,7 +179,7 @@ tsk_auto.data.frame <- function(x, control = 0, conf.level = 0.95, stop(e) } }) - + return(result) } diff --git a/R/drc_Helper.R b/R/drc_Helper.R index e1a7dcf..85e2fa3 100644 --- a/R/drc_Helper.R +++ b/R/drc_Helper.R @@ -302,7 +302,7 @@ getModelName <- function(fname = NULL) { #' @rdname ED.plus #' @details #' Due to old ECxHelper development where ED.plus is defined as ED.ZG -#' @seealso [ED.plus()] for the public version of usage +#' @seealso [drcHelper::ED.plus()] for the public version of usage #' @export ED.ZG #' @keywords Deprecated ED.ZG <- function(...) { diff --git a/README.md b/README.md index 227380b..c4f6756 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,7 @@ - - - -[![R-CMD-check (dev)](https://github.com/Bayer-Group/drcHelper/actions/workflows/R-CMD-check.yaml/badge.svg?branch=dev)](https://github.com/Bayer-Group/drcHelper/actions/workflows/R-CMD-check.yaml?query=branch%3Adev) +[![R-CMD-check](https://github.com/Bayer-Group/drcHelper/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Bayer-Group/drcHelper/actions/workflows/R-CMD-check.yaml) The goal of **drcHelper** is to assist with routine dose-response @@ -107,19 +104,19 @@ res <- mselect.plus(mod,fctList = fctList ) modList <- res$modList res$Comparison #> logLik IC Lack of fit Res var -#> LN.4 -15.45496 40.90992 5.893537e-01 0.2547068 -#> LL.4 -15.69685 41.39370 5.180082e-01 0.2598931 +#> LN.4 -14.65361 39.30722 6.118094e-01 0.2382532 +#> LL.4 -14.94568 39.89136 5.241523e-01 0.2441232 #> LL.3 -19.24379 46.48759 6.848925e-02 0.3326394 -#> W1.3 -20.55410 49.10820 4.800972e-02 0.3710183 -#> LL2.2 -70.79793 147.59586 8.398391e-17 23.3118491 +#> W1.3 -20.46060 48.92121 3.233853e-02 0.3681387 +#> LL2.2 -70.78500 147.57000 5.059273e-17 23.2867452 drcCompare(modRes=res) #> logLik IC Lack of fit Res var Certainty_Protection -#> LN.4 -15.45496 40.90992 5.893537e-01 0.2547068 High -#> LL.4 -15.69685 41.39370 5.180082e-01 0.2598931 High +#> LN.4 -14.65361 39.30722 6.118094e-01 0.2382532 High +#> LL.4 -14.94568 39.89136 5.241523e-01 0.2441232 High #> LL.3 -19.24379 46.48759 6.848925e-02 0.3326394 High -#> W1.3 -20.55410 49.10820 4.800972e-02 0.3710183 Medium -#> LL2.2 -70.79793 147.59586 8.398391e-17 23.3118491 Low +#> W1.3 -20.46060 48.92121 3.233853e-02 0.3681387 Medium +#> LL2.2 -70.78500 147.57000 5.059273e-17 23.2867452 Low #> Steepness No Effect p-val #> LN.4 Medium 0 #> LL.4 Medium 0 @@ -133,18 +130,18 @@ library(purrr) edResTab <- mselect.ED(modList = modList,respLev = c(10,20,50),trend="Decrease",CI="inv") edResTab #> .id Estimate Std. Error Lower Upper NW Rating EC -#> 1 LN.4 1.699273 NA 1.464617 1.990240 0.3093219 Good EC 10 -#> 2 LN.4 2.067034 NA 1.817202 2.321445 0.2439457 Good EC 20 -#> 3 LN.4 3.034117 NA 2.785528 3.283618 0.1641632 Excellent EC 50 -#> 4 LL.4 1.680896 NA 1.421435 2.018155 0.3550014 Good EC 10 -#> 5 LL.4 2.084252 NA 1.812372 2.371154 0.2680974 Good EC 20 -#> 6 LL.4 3.040373 NA 2.770313 3.299156 0.1739402 Excellent EC 50 -#> 7 LL.3 1.577783 NA 1.284085 1.961887 0.4295911 Good EC 10 -#> 8 LL.3 2.019241 NA 1.705807 2.342361 0.3152440 Good EC 20 -#> 9 LL.3 3.078550 NA 2.783875 3.366535 0.1892644 Excellent EC 50 -#> 10 W1.3 1.588627 NA 1.207649 2.091723 0.5565024 Fair EC 10 -#> 11 W1.3 2.092288 NA 1.686784 2.491398 0.3845617 Good EC 20 -#> 12 W1.3 3.171479 NA 2.861093 3.436843 0.1815399 Excellent EC 50 +#> 1 LN.4 1.700983 NA 1.473332 1.981769 0.2989080 Good EC 10 +#> 2 LN.4 2.067640 NA 1.826100 2.313691 0.2358199 Good EC 20 +#> 3 LN.4 3.032171 NA 2.791669 3.273468 0.1588958 Excellent EC 50 +#> 4 LL.4 1.684436 NA 1.432457 2.010475 0.3431522 Good EC 10 +#> 5 LL.4 2.085759 NA 1.822344 2.363961 0.2596737 Good EC 20 +#> 6 LL.4 3.037362 NA 2.775132 3.288824 0.1691243 Excellent EC 50 +#> 7 LL.3 1.577779 NA 1.284085 1.961887 0.4295925 Good EC 10 +#> 8 LL.3 2.019269 NA 1.705807 2.342361 0.3152395 Good EC 20 +#> 9 LL.3 3.078551 NA 2.783875 3.366535 0.1892643 Excellent EC 50 +#> 10 W1.3 1.588647 NA 1.208777 2.089897 0.5546351 Fair EC 10 +#> 11 W1.3 2.092302 NA 1.688186 2.490045 0.3832427 Good EC 20 +#> 12 W1.3 3.171499 NA 2.862468 3.435822 0.1807832 Excellent EC 50 #> 13 LL2.2 NA NA NA NA NA Not defined EC 10 #> 14 LL2.2 NA NA NA NA NA Not defined EC 20 #> 15 LL2.2 NA NA NA NA NA Not defined EC 50 @@ -183,10 +180,10 @@ knitr::kable(resED,caption = "Response Variable at day N",digits = 3) | | EC 10 | EC 20 | EC 50 | |:---------|------:|------:|------:| -| Estimate | 1.699 | 2.067 | 3.034 | -| Lower | 1.465 | 1.817 | 2.786 | -| Upper | 1.990 | 2.321 | 3.284 | -| NW | 0.309 | 0.244 | 0.164 | +| Estimate | 1.701 | 2.068 | 3.032 | +| Lower | 1.473 | 1.826 | 2.792 | +| Upper | 1.982 | 2.314 | 3.273 | +| NW | 0.299 | 0.236 | 0.159 | Response Variable at day N @@ -200,10 +197,10 @@ edres%>%knitr::kable(.,digits = 3) | | Estimate | Std. Error | Lower | Upper | |:------|---------:|-----------:|------:|------:| -| EC 5 | 1.447 | 0.163 | 1.107 | 1.787 | -| EC 10 | 1.699 | 0.159 | 1.367 | 2.032 | -| EC 20 | 2.067 | 0.151 | 1.753 | 2.382 | -| EC 50 | 3.034 | 0.152 | 2.716 | 3.352 | +| EC 5 | 1.449 | 0.157 | 1.122 | 1.777 | +| EC 10 | 1.701 | 0.154 | 1.380 | 2.022 | +| EC 20 | 2.068 | 0.146 | 1.764 | 2.371 | +| EC 50 | 3.032 | 0.147 | 2.725 | 3.340 | ## Model Output @@ -214,10 +211,33 @@ knitr::kable(coef(modsum),digits = 3) | | Estimate | Std. Error | t-value | p-value | |:--------------|---------:|-----------:|--------:|--------:| -| b:(Intercept) | -2.300 | 0.309 | -7.441 | 0.000 | -| c:(Intercept) | 0.532 | 0.177 | 3.005 | 0.007 | -| d:(Intercept) | 7.719 | 0.174 | 44.474 | 0.000 | -| e:(Intercept) | 2.914 | 0.148 | 19.750 | 0.000 | +| b:(Intercept) | -2.311 | 0.299 | -7.719 | 0.000 | +| c:(Intercept) | 0.556 | 0.171 | 3.256 | 0.004 | +| d:(Intercept) | 7.719 | 0.168 | 46.004 | 0.000 | +| e:(Intercept) | 2.907 | 0.143 | 20.382 | 0.000 | + +## GitHub Actions + +1. R-CMD-check.yaml: This triggers when: + +- A pull request is opened that targets any branch matching the pattern + releases/\*\* +- This includes branches like releases/v1.0, releases/beta, + releases/hotfix, etc. +- It will NOT trigger for PRs targeting main or master +- workflow_dispatch: This allows manual triggering of the workflow from + the GitHub Actions tab. + +This workflow will only run when working with release branches, not +during normal development on main. If you want it to run on regular +development, you’ll need to change the branch patterns. + +2. pkgdown.yaml: This triggers when + +- whenever a pull request event occurs. +- when a GitHub release event occurs, but only for the specific type + published. +- when pushed to dev. ## ToDo @@ -227,6 +247,9 @@ knitr::kable(coef(modsum),digits = 3) ## Contribution Notes +- If a code space is used, Use ‘postCreateCommand’ to run commands after + the container is created. It is rather fast. + `"postCreateCommand": "R -q -e 'install.packages("tidyverse")'"`, - Please create a pull request to contribute to the development of packages. Note that source branch is the branch you are currently working on when you run the `gh pr create` command. @@ -251,4 +274,6 @@ The work is supported by Bayer Environment Effects team members, especially by Andreas Solga and Daniela Jans. The Mesocosm colleagues Sarah Baumert and Harald Schulz have supported the verification and validation with extensive examples and scripts and SAS / VB validated -calculations. +calculations. Discussions with the Bayer RS-stats group, ecotox stats +core group and members of the CLE stats group regarding current +practices and statistical principles have been extremely helpful. diff --git a/REDUNDANCY_ANALYSIS.md b/REDUNDANCY_ANALYSIS.md index beb003d..8075b5b 100644 --- a/REDUNDANCY_ANALYSIS.md +++ b/REDUNDANCY_ANALYSIS.md @@ -14,7 +14,7 @@ This document identifies structural redundancy issues in the drcHelper package a - `stepDownRSCABS()` - Internal step-down procedure - `stepKRSCABS()` - Internal helper - Documented as "for validation purpose" - - Uses older API design + - Uses older implementation design - **Current Implementation** (`R/RSCABS_AO.R`, 934 lines): - `step_down_RSCABS()` - Modern step-down procedure @@ -24,13 +24,13 @@ This document identifies structural redundancy issues in the drcHelper package a - By Allen Olmstead, more modern design **Impact**: -- API confusion with similar function names +- implementation confusion with similar function names - Maintenance burden of two codebases - Test suite covers both implementations **Recommendation**: - Phase out legacy implementation gradually -- Update tests to use modern API +- Update tests to use modern implementation - Add deprecation warnings to legacy functions ### 2. Large Monolithic Files (MEDIUM PRIORITY) @@ -61,7 +61,7 @@ This document identifies structural redundancy issues in the drcHelper package a - Cochran-Armitage: `stepdown_binom.R`, `RSCABS_AO.R` (different purposes) **Note**: Some apparent "duplication" serves different purposes: -- `cochranArmitageTrendTest()` - General-purpose public API +- `cochranArmitageTrendTest()` - General-purpose public implementation - `get_CA_Z()` - Internal utility for clustered data - These are NOT redundant @@ -70,9 +70,9 @@ This document identifies structural redundancy issues in the drcHelper package a ### Phase 1: Documentation and Planning - [x] Create this analysis document - [ ] Add deprecation warnings to legacy functions -- [ ] Document migration path from old to new API +- [ ] Document migration path from old to new implementation -### Phase 2: API Consolidation +### Phase 2: implementation Consolidation - [ ] Update tests to prefer modern RSCA implementation - [ ] Add wrapper functions for backward compatibility - [ ] Mark legacy functions as deprecated in documentation @@ -92,13 +92,13 @@ This document identifies structural redundancy issues in the drcHelper package a ### Completed in This Analysis - [x] **Removed empty placeholder file** (`R/MQJT.R`) - [x] **Added deprecation warnings** to `runRSCABS()` -- [x] **Updated documentation** to clearly distinguish legacy vs modern APIs +- [x] **Updated documentation** to clearly distinguish legacy vs modern implementations - [x] **Created comprehensive analysis** of all redundancy issues ### Deferred for Future Work - [ ] **Breaking up large files**: Requires careful dependency analysis - [ ] **Removing deprecated functions**: Should wait for major version bump -- [ ] **API consolidation**: Needs broader team discussion on backward compatibility +- [ ] **implementation consolidation**: Needs broader team discussion on backward compatibility ## Impact Assessment @@ -119,4 +119,4 @@ This document identifies structural redundancy issues in the drcHelper package a - **User Impact**: Existing code works unchanged, users get helpful deprecation guidance ## Backward Compatibility -All changes maintain full backward compatibility. Legacy functions remain available and functional but issue deprecation warnings to guide users toward modern implementations. No existing code needs to be changed immediately. \ No newline at end of file +All changes maintain full backward compatibility. Legacy functions remain available and functional but issue deprecation warnings to guide users toward modern implementations. No existing code needs to be changed immediately. diff --git a/_pkgdown.yml b/_pkgdown.yml index 07b689b..d999a71 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -76,6 +76,8 @@ navbar: href: articles/Trend-Testing.html - text: NOEC ECx and BMD href: articles/NOEC_ECx_BMD.html + - text: MDD in Regulatory Context + href: articles/MDD-in-Regulatory-Context.html articles: text: Articles menu: @@ -143,7 +145,7 @@ articles: - drcHelper - Dunnetts_Test_for_Data_with_Hierarchical_Structure - Example_Analysis_Workflow - + - title: "Core Statistical Methods" desc: > These articles cover the foundational statistical methods implemented in drcHelper @@ -153,7 +155,7 @@ articles: - articles/Ordinal-Data - articles/Count_Data - articles/LMM-GLMM-and-GAMM - + - title: "Advanced Topics" desc: > These articles cover more advanced statistical concepts and techniques @@ -162,7 +164,7 @@ articles: - articles/Normality-Check - articles/Binomial_Extra_Variance - articles/Equivalence-Testing - + - title: "Alternative Methods" desc: > These articles explore alternative statistical methods and approaches @@ -173,7 +175,7 @@ articles: - articles/TSK_method - articles/MQJT - articles/Advanced_Fitting-a-biphasic-dose-reponse-model - + - title: "Regulatory Statistics" desc: > Statistical methods for regulatory assessments @@ -186,7 +188,8 @@ articles: - articles/NOEC_ECx_BMD - articles/OECD_ED_Assays - articles/Test-Guidelines - + - articles/MDD-in-Regulatory-Context + - title: "Validation" desc: > Validation of statistical methods in the package @@ -197,10 +200,11 @@ articles: - articles/Verification_which_JT - articles/Validation_MCP_tests - articles/Verification_CA_test - + - articles/Verification-of-Williams-Test + - title: "Examples" desc: > Example applications of the package contents: - articles/Examples_drc - - articles/Examples_oecd201 \ No newline at end of file + - articles/Examples_oecd201 diff --git a/analyze_differences.R b/dev/analyze_differences.R similarity index 100% rename from analyze_differences.R rename to dev/analyze_differences.R diff --git a/complete_fix_summary.R b/dev/complete_fix_summary.R similarity index 100% rename from complete_fix_summary.R rename to dev/complete_fix_summary.R diff --git a/fix_summary.R b/dev/fix_summary.R similarity index 100% rename from fix_summary.R rename to dev/fix_summary.R diff --git a/dev/new_function_dev/SK_TSK.qmd b/dev/new_function_dev/SK_TSK.qmd new file mode 100644 index 0000000..91c4d7f --- /dev/null +++ b/dev/new_function_dev/SK_TSK.qmd @@ -0,0 +1,459 @@ +--- +title: "SK_TSK" +format: html +--- + +Great idea. Below are two deliverables: + +1) A minimal patch for drcHelper:::tsk_auto to auto-trim when use.log.doses = TRUE and there is a zero dose, so you avoid degenerate results from log10(0). I show exactly where to add it. + +2) A helper function to tidy outputs from multiple SK/TSK functions and return a single comparison table. + +PATCH: drcHelper:::tsk_auto.data.frame +Where to add: +- Open the source for tsk_auto.data.frame (the one you pasted). +- Right after input <- x and the max.trim validation, insert the block labeled ADD THIS BLOCK. +- Optional but recommended: sanitize ... to drop a user-supplied trim in tsk_auto (prevents “formal argument matched by multiple actual arguments”). + +Patched function body excerpt (only the changed/added lines shown) + +```{r} +tsk_auto.data.frame <- function(x, control = 0, conf.level = 0.95, + use.log.doses = TRUE, max.trim = 0.45, ...) { + input <- x + + # Optional: strip 'trim' from ... to avoid duplicate matching if user passes it to tsk_auto + dots <- list(...) + if (!is.null(dots$trim)) { + warning("tsk_auto ignores 'trim'. Use drcHelper::tsk(...) for explicit trims. Removing 'trim' from ...") + dots$trim <- NULL + } + + # Validate max.trim + if (max.trim <= 0 || max.trim >= 0.5) { + stop("max.trim must be between 0 and 0.5 (exclusive).") + } + + # ADD THIS BLOCK: preemptively trim if log-doses requested and zero (or negative) doses exist + if (isTRUE(use.log.doses) && any(input$x <= 0, na.rm = TRUE)) { + # Minimal safe trim: roughly 1/min(total per dose), capped by max.trim + # Falls back to 0.001 if totals are missing + base_trim <- tryCatch(1 / max(input$n, na.rm = TRUE), error = function(e) NA_real_) + if (!is.finite(base_trim) || base_trim <= 0) base_trim <- 0.001 + auto_trim <- min(base_trim, max.trim) + + message(sprintf("tsk_auto: zero dose with log transform detected; applying auto-trim = %.4f", auto_trim)) + # Try trimmed call; if it fails with a “suggested trim” message, upgrade trim slightly (capped) + return(tryCatch( + do.call(tsk, c(list(input, control = control, trim = auto_trim, conf.level = conf.level, + use.log.doses = use.log.doses), dots)), + error = function(e) { + if (grepl("consider using this trim:", e$message)) { + suggested_trim_match <- regmatches(e$message, regexpr("consider using this trim: [0-9.]+", e$message)) + if (length(suggested_trim_match) > 0) { + suggested_trim <- as.numeric(sub("consider using this trim: ", "", suggested_trim_match)) + auto_trim2 <- min(suggested_trim + 0.001, max.trim) + message(sprintf("tsk_auto: increasing auto-trim to %.4f due to suggestion", auto_trim2)) + return(do.call(tsk, c(list(input, control = control, trim = auto_trim2, conf.level = conf.level, + use.log.doses = use.log.doses), dots))) + } + } + stop(e) + } + )) + } + + # ORIGINAL try with trim=0 (unchanged), but now use 'dots' to avoid duplicate arg names + result <- tryCatch({ + do.call(tsk, c(list(input, control = control, trim = 0, conf.level = conf.level, + use.log.doses = use.log.doses), dots)) + }, error = function(e) { + # Only apply auto-trimming for specific trim-related errors + if (grepl("responses do not increase from trim to 1-trim", e$message)) { + suggested_trim_match <- regmatches(e$message, + regexpr("consider using this trim: [0-9.]+", e$message)) + if (length(suggested_trim_match) > 0) { + suggested_trim <- as.numeric(sub("consider using this trim: ", "", suggested_trim_match)) + auto_trim <- min(suggested_trim + 0.001, max.trim) + message(paste("Auto-trimming applied: trim =", round(auto_trim, 4))) + message(paste("Reason: Responses don't span the full range from 0 to 1")) + do.call(tsk, c(list(input, control = control, trim = auto_trim, conf.level = conf.level, + use.log.doses = use.log.doses), dots)) + } else { + stop(e) + } + } else { + stop(e) + } + }) + + return(result) +} + +``` + +Notes: +- This patch: + - Detects use.log.doses = TRUE with x <= 0, and applies a minimal trim automatically: trim = min(1 / max(n), max.trim), with a lower safeguard of 0.001. + - If tsk() still suggests a larger trim, it bumps it to the suggested value + 0.001 (capped by max.trim). + - Prevents the “formal argument 'trim' matched by multiple actual arguments” error by stripping trim from ... at the start of tsk_auto. + +Comparison table helper (tidy) +Below is a function that: +- Runs your SpearmanKarber_modified +- Runs drcHelper::tsk_auto twice (log and linear) +- Optionally runs drcHelper::tsk with trim = 1/min(n) on the log scale +- Optionally runs ecotoxicology::TSK with A rounded to integral percent to avoid the formatting bug +- Optionally runs ecotoxicology::SpearmanKarber if totals are constant (N) + +It returns a data.frame with comparable columns. + +```{r} +tidy_sk_compare <- function(x, n, r, conf.level = 0.95, + run_tsk_auto_log = TRUE, + run_tsk_auto_linear = TRUE, + run_tsk_log_mintrim = TRUE, + run_ecotox_TSK = TRUE, + run_ecotox_SK = TRUE) { + stopifnot(length(x) == length(n), length(x) == length(r)) + out_rows <- list() + add_row <- function(method, scale, trim_or_A, est, lcl, ucl, log10est = NA_real_, sd = NA_real_, + gsd = NA_real_, notes = NA_character_) { + out_rows[[length(out_rows) + 1]] <<- data.frame( + method = method, + scale = scale, + trim_or_A = trim_or_A, + log10LC50 = log10est, + LC50 = est, + LCL = lcl, + UCL = ucl, + SD = sd, + GSD = gsd, + notes = notes, + stringsAsFactors = FALSE + ) + } + # 1) Your modified + skm <- try(SpearmanKarber_modified(x, r, n, conf.level = conf.level, retData = TRUE), silent = TRUE) + if (!inherits(skm, "try-error")) { + add_row("SpearmanKarber_modified", "log-dose", NA, + est = skm$LC50, + lcl = skm$confidenceIntervalLC50[1], + ucl = skm$confidenceIntervalLC50[2], + log10est = skm$log10LC50, + sd = if (!is.null(skm$StandardDeviationOfm)) skm$StandardDeviationOfm else NA_real_, + notes = "") + } else { + add_row("SpearmanKarber_modified", "log-dose", NA, NA, NA, NA, NA, NA, NA, as.character(skm)) + } + # Helper to extract from drcHelper::tsk/tsk_auto objects + extract_tsk_like <- function(obj) { + # Try common fields; fall back to NA + est <- obj$LD50 %||% obj$ED50 %||% obj$estimate %||% NA_real_ + lcl <- (obj$conf.int %||% obj$ci %||% c(NA_real_, NA_real_))[1] + ucl <- (obj$conf.int %||% obj$ci %||% c(NA_real_, NA_real_))[2] + sd <- obj$sd %||% obj$SE %||% NA_real_ + gsd <- obj$GSD %||% obj$gsd %||% NA_real_ + list(est = as.numeric(est), lcl = as.numeric(lcl), ucl = as.numeric(ucl), + sd = as.numeric(sd), gsd = as.numeric(gsd)) + } + `%||%` <- function(a, b) if (!is.null(a)) a else b + + # 2) tsk_auto (log-dose) - uses your patched behavior + if (isTRUE(run_tsk_auto_log)) { + o <- try(drcHelper::tsk_auto(data.frame(x = x, n = n, r = r), + control = 0, conf.level = conf.level, + use.log.doses = TRUE), silent = TRUE) + if (!inherits(o, "try-error")) { + ex <- extract_tsk_like(o) + add_row("tsk_auto", "log-dose", attr(o, "trim") %||% NA, + est = ex$est, lcl = ex$lcl, ucl = ex$ucl, + log10est = if (is.finite(ex$est) && ex$est > 0) log10(ex$est) else NA_real_, + sd = ex$sd, gsd = ex$gsd, + notes = "") + } else { + add_row("tsk_auto", "log-dose", NA, NA, NA, NA, NA, NA, NA, as.character(o)) + } + } + # 3) tsk_auto (linear-dose) + if (isTRUE(run_tsk_auto_linear)) { + o <- try(drcHelper::tsk_auto(data.frame(x = x, n = n, r = r), + control = 0, conf.level = conf.level, + use.log.doses = FALSE), silent = TRUE) + if (!inherits(o, "try-error")) { + ex <- extract_tsk_like(o) + add_row("tsk_auto", "linear-dose", attr(o, "trim") %||% NA, + est = ex$est, lcl = ex$lcl, ucl = ex$ucl, + log10est = if (is.finite(ex$est) && ex$est > 0) log10(ex$est) else NA_real_, + sd = ex$sd, gsd = ex$gsd, + notes = "") + } else { + add_row("tsk_auto", "linear-dose", NA, NA, NA, NA, NA, NA, NA, as.character(o)) + } + } + # 4) tsk with minimal trim on log-dose + if (isTRUE(run_tsk_log_mintrim)) { + trim_min <- 1 / max(n, na.rm = TRUE) + o <- try(drcHelper::tsk(x, n, r, trim = trim_min, use.log.doses = TRUE), silent = TRUE) + if (!inherits(o, "try-error")) { + ex <- extract_tsk_like(o) + add_row("tsk (min trim)", "log-dose", trim_min, + est = ex$est, lcl = ex$lcl, ucl = ex$ucl, + log10est = if (is.finite(ex$est) && ex$est > 0) log10(ex$est) else NA_real_, + sd = ex$sd, gsd = ex$gsd, + notes = "") + } else { + add_row("tsk (min trim)", "log-dose", trim_min, NA, NA, NA, NA, NA, NA, as.character(o)) + } + } + # 5) ecotoxicology::TSK with A rounded to integer percent + if (isTRUE(run_ecotox_TSK)) { + A_intpct <- round(100 * (1 / max(n, na.rm = TRUE))) + A <- A_intpct / 100 + o <- try(ecotoxicology::TSK(x, r, n, A = A, conf = conf.level), silent = TRUE) + if (!inherits(o, "try-error")) { + # Try common names used by ecotoxicology::TSK + est <- o$LD50 %||% o$ED50 %||% o$estimate %||% NA_real_ + ci <- o$conf.int %||% o$ci %||% c(NA_real_, NA_real_) + add_row("ecotox::TSK", "log-dose", A, + est = as.numeric(est), + lcl = as.numeric(ci[1]), + ucl = as.numeric(ci[2]), + log10est = if (is.finite(est) && est > 0) log10(as.numeric(est)) else NA_real_, + sd = o$sd %||% o$SE %||% NA_real_, + notes = "") + } else { + add_row("ecotox::TSK", "log-dose", A, NA, NA, NA, NA, NA, NA, as.character(o)) + } + } + # 6) ecotoxicology::SpearmanKarber (only if totals constant) + if (isTRUE(run_ecotox_SK) && length(unique(n)) == 1L) { + # Use positive x to avoid log10(0) inside; provide third column as r/N (observed prop) + use <- which(x > 0) + toxData <- cbind(x[use], r[use], r[use] / n[use]) + o <- try(ecotoxicology::SpearmanKarber(toxData, N = unique(n)[1], retData = TRUE, + showOutput = FALSE, showPlot = FALSE), silent = TRUE) + if (!inherits(o, "try-error")) { + add_row("ecotox::SpearmanKarber", "log-dose", NA, + est = o$LC50, + lcl = o$confidenceInterval95LC50[1], + ucl = o$confidenceInterval95LC50[2], + log10est = o$log10LC50, + sd = if (!is.null(o$varianceOfm)) sqrt(o$varianceOfm) else NA_real_, + notes = "") + } else { + add_row("ecotox::SpearmanKarber", "log-dose", NA, NA, NA, NA, NA, NA, NA, as.character(o)) + } + } + # Bind rows + do.call(rbind, out_rows) +} +``` + + +How to use: +- After adding the tsk_auto patch, source both functions, then: + res <- tidy_sk_compare(x, n, r, conf.level = 0.95) + print(res) + +This will give you a single table with method, scale, trim/A, LC50, CI, and log10(LC50) where available, enabling quick side-by-side comparison. + +If you want me to extend the tidier to include the raw adjusted mortality vectors or more diagnostics (e.g., whether smoothing/trimming occurred), I can add those columns too. + + +Thanks. I’ll give you two things: + +1) A corrected add_row plus a set of separate extract_* functions that return one standardized row each. You can rbind these rows to build your comparison table. + +2) A concise patch for drcHelper:::tsk_auto.data.frame to auto-trim when use.log.doses = TRUE and x includes 0 (or negative), with the exact spot to add it. + +Part 1: add_row and extract functions +Use this add_row that returns the updated out_rows list (no <<-). Then use the extract_* functions below. Each returns a single-row data.frame with columns: +method, scale, trim_or_A, log10LC50, LC50, LCL, UCL, SD, GSD, notes. + +```{r} +add_row <- function(out_rows, method, scale, trim_or_A, est, lcl, ucl, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = NA_character_) { + out_rows[[length(out_rows) + 1]] <- data.frame( + method = method, + scale = scale, + trim_or_A = trim_or_A, + log10LC50 = log10est, + LC50 = est, + LCL = lcl, + UCL = ucl, + SD = sd, + GSD = gsd, + notes = notes, + stringsAsFactors = FALSE + ) + out_rows +} + +# Single-row constructor (useful if you prefer not to manage an out_rows list) +standard_row <- function(method, scale, trim_or_A, est, lcl, ucl, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = NA_character_) { + data.frame( + method = method, + scale = scale, + trim_or_A = trim_or_A, + log10LC50 = log10est, + LC50 = est, + LCL = lcl, + UCL = ucl, + SD = sd, + GSD = gsd, + notes = notes, + stringsAsFactors = FALSE + ) +} + +extract_SpearmanKarber_modified <- function(skm) { + if (inherits(skm, "try-error")) { + return(standard_row("SpearmanKarber_modified", "log-dose", NA, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = as.character(skm))) + } + standard_row("SpearmanKarber_modified", "log-dose", NA, + est = skm$LC50, + lcl = skm$confidenceIntervalLC50[1], + ucl = skm$confidenceIntervalLC50[2], + log10est = skm$log10LC50, + sd = if (!is.null(skm$StandardDeviationOfm)) skm$StandardDeviationOfm else NA_real_, + notes = "") +} + +extract_tsk_auto <- function(x, n, r, control = 0, conf.level = 0.95, + use.log.doses = TRUE, max.trim = 0.45) { + obj <- try(drcHelper::tsk_auto(data.frame(x = x, n = n, r = r), + control = control, conf.level = conf.level, + use.log.doses = use.log.doses, max.trim = max.trim), + silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row("tsk_auto", if (use.log.doses) "log-dose" else "linear-dose", NA, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = as.character(obj))) + } + est <- if (!is.null(obj$LD50)) obj$LD50 else if (!is.null(obj$ED50)) obj$ED50 else NA_real_ + ci <- if (!is.null(obj$conf.int)) obj$conf.int else if (!is.null(obj$ci)) obj$ci else c(NA_real_, NA_real_) + sd <- if (!is.null(obj$sd)) obj$sd else if (!is.null(obj$SD)) obj$SD else NA_real_ + gsd <- if (!is.null(obj$GSD)) obj$GSD else if (!is.null(obj$gsd)) obj$gsd else NA_real_ + trim_val <- if (!is.null(obj$trim)) obj$trim else if (!is.null(attr(obj, "trim"))) attr(obj, "trim") else NA_real_ + standard_row("tsk_auto", if (use.log.doses) "log-dose" else "linear-dose", trim_val, + est = est, + lcl = ci[1], + ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA_real_, + sd = sd, + gsd = gsd, + notes = "") +} + +extract_tsk <- function(x, n, r, trim = 0, use.log.doses = TRUE) { + obj <- try(drcHelper::tsk(x, n, r, trim = trim, use.log.doses = use.log.doses), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row("tsk", if (use.log.doses) "log-dose" else "linear-dose", trim, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = as.character(obj))) + } + est <- if (!is.null(obj$LD50)) obj$LD50 else if (!is.null(obj$ED50)) obj$ED50 else NA_real_ + ci <- if (!is.null(obj$conf.int)) obj$conf.int else if (!is.null(obj$ci)) obj$ci else c(NA_real_, NA_real_) + sd <- if (!is.null(obj$sd)) obj$sd else if (!is.null(obj$SD)) obj$SD else NA_real_ + gsd <- if (!is.null(obj$GSD)) obj$GSD else if (!is.null(obj$gsd)) obj$gsd else NA_real_ + standard_row("tsk", if (use.log.doses) "log-dose" else "linear-dose", trim, + est = est, + lcl = ci[1], + ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA_real_, + sd = sd, + gsd = gsd, + notes = "") +} + +extract_ecotox_TSK <- function(x, n, r, A = NULL, conf = 0.95) { + if (is.null(A)) { + A_intpct <- round(100 * (1 / max(n, na.rm = TRUE))) + A <- A_intpct / 100 + } + obj <- try(ecotoxicology::TSK(x, r, n, A = A, conf = conf), silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row("ecotox::TSK", "log-dose", A, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = as.character(obj))) + } + est <- if (!is.null(obj$LD50)) obj$LD50 else if (!is.null(obj$ED50)) obj$ED50 else NA_real_ + ci <- if (!is.null(obj$conf.int)) obj$conf.int else if (!is.null(obj$ci)) obj$ci else c(NA_real_, NA_real_) + sd <- if (!is.null(obj$sd)) obj$sd else if (!is.null(obj$SE)) obj$SE else NA_real_ + standard_row("ecotox::TSK", "log-dose", A, + est = est, + lcl = ci[1], + ucl = ci[2], + log10est = if (is.finite(est) && est > 0) log10(est) else NA_real_, + sd = sd, + gsd = NA_real_, + notes = "") +} + +extract_ecotox_SpearmanKarber <- function(x, n, r) { + if (length(unique(n)) != 1L) { + return(standard_row("ecotox::SpearmanKarber", "log-dose", NA, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = "Requires constant N")) + } + use <- which(x > 0) + toxData <- cbind(x[use], r[use], r[use] / n[use]) + obj <- try(ecotoxicology::SpearmanKarber(toxData, N = unique(n)[1], + retData = TRUE, showOutput = FALSE, showPlot = FALSE), + silent = TRUE) + if (inherits(obj, "try-error")) { + return(standard_row("ecotox::SpearmanKarber", "log-dose", NA, + est = NA_real_, lcl = NA_real_, ucl = NA_real_, + log10est = NA_real_, sd = NA_real_, gsd = NA_real_, + notes = as.character(obj))) + } + sd_log <- if (!is.null(obj$varianceOfm)) sqrt(obj$varianceOfm) else NA_real_ + standard_row("ecotox::SpearmanKarber", "log-dose", NA, + est = obj$LC50, + lcl = obj$confidenceInterval95LC50[1], + ucl = obj$confidenceInterval95LC50[2], + log10est = obj$log10LC50, + sd = sd_log, + gsd = NA_real_, + notes = "") +} + +# Example combining rows +tidy_compare <- function(x, n, r, conf.level = 0.95) { + rows <- list() + # your modified (object creation) + skm <- try(SpearmanKarber_modified(x, r, n, conf.level = conf.level, retData = TRUE), silent = TRUE) + rows[[length(rows) + 1]] <- extract_SpearmanKarber_modified(skm) + rows[[length(rows) + 1]] <- extract_tsk_auto(x, n, r, control = 0, conf.level = conf.level, use.log.doses = TRUE) + rows[[length(rows) + 1]] <- extract_tsk_auto(x, n, r, control = 0, conf.level = conf.level, use.log.doses = FALSE) + rows[[length(rows) + 1]] <- extract_tsk(x, n, r, trim = 1 / max(n), use.log.doses = TRUE) + rows[[length(rows) + 1]] <- extract_ecotox_TSK(x, n, r, A = 1 / max(n), conf = conf.level) + rows[[length(rows) + 1]] <- extract_ecotox_SpearmanKarber(x, n, r) + do.call(rbind, rows) +} +``` + + + + +And in the original tryCatch that calls tsk with trim = 0, replace direct tsk(...) with do.call(tsk, c(list(...), dots)) so that any other arguments in ... are preserved without conflicting names. + +Where exactly to add: +- In the body of tsk_auto.data.frame, after input <- x and the max.trim check, insert the patch block. +- Also add the optional dots-handling lines at the same location (above the patch block). +- In the existing result <- tryCatch({ ... }, ...), move from direct tsk(...) to do.call(tsk, c(list(...), dots)). + +This ensures: +- tsk_auto automatically trims when log doses include zero, preventing degenerate estimates. +- Users won’t hit the “formal argument 'trim' matched by multiple actual arguments” error in tsk_auto. diff --git a/inst/SystemTesting/config/parse_metrics.R b/inst/SystemTesting/config/parse_metrics.R new file mode 100644 index 0000000..b69ba84 --- /dev/null +++ b/inst/SystemTesting/config/parse_metrics.R @@ -0,0 +1,49 @@ + + + +parse_expected_metrics <- function(test_type, exp_tbl) { + # Dose as numeric: from column or from description + Dose_from_label <- ifelse(is.na(convert_dose(exp_tbl$Dose)), + dose_from_description(exp_tbl$`Brief description`), + convert_dose(exp_tbl$Dose)) + exp_tbl <- dplyr::mutate(exp_tbl, + Dose = Dose_from_label, + Expected_Value = convert_numeric(`expected result value`)) + + pick <- function(pattern) exp_tbl[grepl(pattern, exp_tbl$`Brief description`, ignore.case = TRUE), c("Dose", "Expected_Value")] + + out <- list() + if (test_type %in% c("dunnett", "williams", "dunn", "wilcoxon", "welch", "student_t")) { + out$mean <- pick("\\bMean\\b"); names(out$mean)[2] <- "Expected_Mean" + } + if (test_type %in% c("dunnett", "welch", "student_t")) { + out$t <- pick("T-value(?!\\s*\\(adjusted\\))|t-value(?!\\s*\\(adjusted\\))"); names(out$t)[2] <- "Expected_T" + out$p <- pick("p-value"); names(out$p)[2] <- "Expected_P" + } + if (test_type == "williams") { + out$mean <- pick("\\bMean\\b"); names(out$mean)[2] <- "Expected_Mean" + out$pinhib <- pick("%Inhibition"); names(out$pinhib)[2] <- "Expected_PercentInhibition" + out$tadj <- pick("T-value\\s*\\(adjusted\\)"); names(out$tadj)[2] <- "Expected_Tadj" + out$tcrit <- pick("\\bTcrit\\b"); names(out$tcrit)[2] <- "Expected_Tcrit" + out$signif <- pick("\\bsignificance\\b|\\bpsignificance\\b"); names(out$signif)[2] <- "Expected_Significance" + out$df <- pick("\\bdf\\b"); names(out$df)[2] <- "Expected_df" + out$mdd <- pick("\\bMDD%\\b"); names(out$mdd)[2] <- "Expected_MDDpct" + } + if (test_type == "dunn") { + out$z <- pick("\\bz-value\\b"); names(out$z)[2] <- "Expected_z" + out$p <- pick("p-value"); names(out$p)[2] <- "Expected_P" + out$pinhib <- pick("%Inhibition"); names(out$pinhib)[2] <- "Expected_PercentInhibition" + } + if (test_type == "wilcoxon") { + out$w <- pick("\\bW-Value\\b|\\bW-value\\b"); names(out$w)[2] <- "Expected_W" + out$p <- pick("p-value"); names(out$p)[2] <- "Expected_P" + out$pinhib <- pick("%Inhibition"); names(out$pinhib)[2] <- "Expected_PercentInhibition" + } + if (test_type == "fisher") { + out$uncorr <- pick("\\bUncorrected\\b"); names(out$uncorr)[2] <- "Expected_Uncorrected" + out$corr <- pick("\\bCorrected\\b"); names(out$corr)[2] <- "Expected_Corrected" + out$p <- pick("p-value"); names(out$p)[2] <- "Expected_P" + out$signif <- pick("\\bsignificance\\b|\\bpsignificance\\b"); names(out$signif)[2] <- "Expected_Significance" + } + out +} diff --git a/inst/SystemTesting/config/test_registry.R b/inst/SystemTesting/config/test_registry.R new file mode 100644 index 0000000..b45c2b8 --- /dev/null +++ b/inst/SystemTesting/config/test_registry.R @@ -0,0 +1,70 @@ +test_registry <- list() + +test_registry$williams <- list( + keyword = "Williams", + discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Williams"), + run_actual = function(endpoint_data, alternative) { + ed <- endpoint_data + ed$Dose_numeric <- convert_dose(ed$Dose) + ed <- ed[!is.na(ed$Dose_numeric), ] + # Control first (lowest dose) + dose_levels <- sort(unique(ed$Dose_numeric)) + ed$Dose_factor <- factor(ed$Dose_numeric, levels = dose_levels) + + direction <- if (tolower(alternative) %in% c("smaller", "less")) "decreasing" else "increasing" + + # Primary: PMCMRplus + bw <- try(drcHelper::broom_williams(Response ~ Dose_factor, data = ed, + method = "Williams_PMCMRplus", + direction = direction), silent = TRUE) + + used_method <- "PMCMRplus" + if (inherits(bw, "try-error") || nrow(bw) == 0) { + # Fallback: JG + bw <- drcHelper::broom_williams(Response ~ Dose_factor, data = ed, + method = "Williams_JG", + direction = direction) + used_method <- "JG" + } + + actual_df <- as.data.frame(bw) + + if (used_method == "PMCMRplus") { + # comparison like "0.0448 - 0 <= 0" or "... >= 0" + comp_clean <- gsub("\\s*(<=|>=)\\s*0\\s*$", "", as.character(actual_df$comparison)) + actual_df$Dose <- dose_from_comparison(comp_clean) + } else { + # JG comparisons can miss doses; assign from treatment doses excluding control + trt_doses <- dose_levels[-1] + if (length(trt_doses) == nrow(actual_df)) { + # PMCMRplus tends to list in ascending dose; JG may differ. Use ascending as default. + actual_df$Dose <- trt_doses + } else { + actual_df$Dose <- NA_real_ + warning("Williams_JG: could not safely map Dose to rows; leaving Dose as NA.") + } + } + + # Normalize column names + cn <- names(actual_df) + names(actual_df)[cn == "`t'-stat"] <- "Actual_Tadj" + names(actual_df)[cn == "`t'-crit"] <- "Actual_Tcrit" + names(actual_df)[cn == "estimate"] <- "Actual_Diff" + names(actual_df)[cn == "decision"] <- "Actual_Significance" # "accept"/"reject" + rownames(actual_df) <- NULL + + # Group means and %Inhibition + group_means <- ed %>% + dplyr::group_by(Dose = Dose_numeric) %>% + dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop") + + ctrl_mean <- group_means$Actual_Mean[group_means$Dose == min(group_means$Dose)] + group_means$Actual_PercentInhibition <- if (!is.na(ctrl_mean) && ctrl_mean != 0) { + 100 * (1 - group_means$Actual_Mean / ctrl_mean) # #$#%Inhibition = 100 * (1 - mean_treatment / mean_control)#$# + } else NA_real_ + + list(actual_df = actual_df, group_means = group_means) + }, + metrics = c("Mean", "%Inhibition", "T-value (adjusted)", "Tcrit", "significance", "df", "MDD%"), + comparator = "trend" +) diff --git a/man/SpearmanKarber_modified.Rd b/man/SpearmanKarber_modified.Rd new file mode 100644 index 0000000..4c7f168 --- /dev/null +++ b/man/SpearmanKarber_modified.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SK_TSK_tests_wrapper.R +\name{SpearmanKarber_modified} +\alias{SpearmanKarber_modified} +\title{Spearman-Karber Estimation with Modified Handling for Control Mortality} +\usage{ +SpearmanKarber_modified( + conc, + dead, + total, + conf.level = 0.95, + retData = TRUE, + showOutput = FALSE, + showPlot = FALSE +) +} +\arguments{ +\item{conc}{A numeric vector of doses or concentrations (must be >= 0 and in increasing order).} + +\item{dead}{A numeric vector of the number of organisms that died or responded at each \code{conc} level.} + +\item{total}{A numeric vector of the total number of organisms (population size) at each \code{conc} level. Can be a single value if all are the same.} + +\item{conf.level}{A single numeric value specifying the confidence level for the interval estimation (e.g., 0.95 for a 95\% CI). Must be in (0, 1).} + +\item{retData}{A logical value. If \code{TRUE}, the results are returned in a list. If \code{FALSE}, \code{invisible(NULL)} is returned.} + +\item{showOutput}{A logical value. If \code{TRUE}, the input data table and estimation results (including log-scale values and CIs) are printed to the console.} + +\item{showPlot}{A logical value. If \code{TRUE}, a plot is generated showing the observed and Abbott-corrected mortalities/responses, the estimated LC50, and its confidence interval.} +} +\value{ +A list containing the estimation results if \code{retData} is \code{TRUE}, otherwise \code{invisible(NULL)}. The list includes: +\item{\code{log10LC50}}{Estimated log10 of the LC50.} +\item{\code{varianceOfLog10LC50}}{Estimated variance of log10LC50.} +\item{\code{StandardDeviationOfm}}{Estimated standard deviation of log10LC50.} +\item{\code{confidenceIntervalLog10}}{CI for log10LC50 (vector of two values).} +\item{\code{LC50}}{Estimated LC50 (10^log10LC50).} +\item{\code{confidenceIntervalLC50}}{CI for LC50 (vector of two values).} +\item{\code{conf.level}}{The confidence level used for the CIs.} +} +\description{ +Estimates the LC50 (median lethal concentration) or ED50 (median effective dose) +and its confidence interval using the Spearman-Karber method. This version includes +robust handling for zero-dose (control) mortality and a Fieller-like approach for +confidence interval estimation when control mortality is non-zero. +} +\details{ +The function operates in two branches: +\itemize{ +\item If control mortality (p0) is near zero, the standard Spearman-Karber method is used on the log-transformed doses, assuming the first observed response is 0\%. +\item If p0 is non-zero, it estimates the background rate (c) by cumulative pooling, Abbott-corrects the proportions, prunes/pools low-dose groups, and uses a modified Spearman-Karber method on the scaled proportions. Confidence intervals are calculated using an approach inspired by Fieller's theorem to account for the background correction uncertainty. +} + +All concentrations must be non-negative and in increasing order. +} +\examples{ +# Example 1: Zero control mortality (standard Spearman-Karber) +x1 <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +n1 <- c(30, 30, 30, 30, 30, 30) +r1 <- c(0, 1, 3, 16, 24, 30) +SpearmanKarber_modified(x1, r1, n1, showOutput = TRUE) + +# Example 2: Non-zero control mortality (modified method) +x2 <- c(0, 15.54, 20.47, 27.92, 35.98, 55.52) +n2 <- c(40, 40, 40, 40, 40, 40) +r2 <- c(3, 2, 6, 11, 18, 33) +results <- SpearmanKarber_modified(x2, r2, n2, retData = TRUE, + showOutput = TRUE, showPlot = TRUE) +print(results$LC50) +} +\references{ +ART CARTER, WYETH-AYERST RESEARCH, CHAZY, NY (1994): Using the Spearman-Karber Method to Estimate the ED50. +Proceedings of the Nineteenth Annual SAS Users Group International Conference, Dallas, Texas, April, pp. 10-13. +\url{http://www.sascommunity.org/sugi/SUGI94/Sugi-94-195\%20Carter.pdf}. +} +\seealso{ +\code{\link[drcHelper]{tsk_auto}} +} +\author{ +Sarah Baumert, Harald Schulz, Zhenglei Gao +} diff --git a/man/analyze_SK.Rd b/man/analyze_SK.Rd new file mode 100644 index 0000000..4841a28 --- /dev/null +++ b/man/analyze_SK.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SK_TSK_tests_wrapper.R +\name{analyze_SK} +\alias{analyze_SK} +\title{Unified Spearman-Karber Analysis Function} +\usage{ +analyze_SK(x, n, r, method, conf.level = 0.95, ...) +} +\arguments{ +\item{x}{Numeric vector of concentrations.} + +\item{n}{Numeric vector of total subjects per concentration.} + +\item{r}{Numeric vector of responses (e.g., dead) per concentration.} + +\item{method}{The analysis method to use. Must be one of: +\itemize{ +\item "SK_modified" (your custom SpearmanKarber_modified function) +\item "tsk_auto_log" (drcHelper::tsk_auto with log-doses) +\item "tsk_auto_linear" (drcHelper::tsk_auto with linear doses) +\item "ecotox_SK" (ecotoxicology::SpearmanKarber) +}} + +\item{conf.level}{The confidence level for intervals (default 0.95).} + +\item{...}{Additional arguments passed to the underlying functions (e.g., max.trim for tsk_auto).} +} +\value{ +A single-row data frame containing the standardized analysis results. +} +\description{ +This function serves as a wrapper to run various Spearman-Karber (SK) and +Trimmed Spearman-Karber (TSK) analyses using a single interface. +It returns a standardized one-row data frame for easy comparison. +} +\examples{ +x <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +n <- c(30, 30, 30, 30, 30, 30) +r <- c(0, 1, 3, 16, 24, 30) + +# Run a single analysis +analyze_SK(x, n, r, method = "SK_modified") + +# Run multiple analyses and combine into a single table +methods_to_run <- c("SK_modified", "tsk_auto_log", "tsk_auto_linear", "ecotox_SK") +all_results <- lapply(methods_to_run, function(m) { + analyze_SK(x, n, r, method = m) +}) +comparison_table <- do.call(rbind, all_results) +print(comparison_table) +} diff --git a/man/compute_mdd_williams.Rd b/man/compute_mdd_williams.Rd new file mode 100644 index 0000000..7c95561 --- /dev/null +++ b/man/compute_mdd_williams.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MDD.R +\name{compute_mdd_williams} +\alias{compute_mdd_williams} +\title{Calculate MDD\% for a Williams Test Result} +\usage{ +compute_mdd_williams(williams_obj, data, formula) +} +\arguments{ +\item{williams_obj}{The tibble result from broom_williams.} + +\item{data}{The original dataframe used for the test.} + +\item{formula}{The formula used for the test, e.g., Response ~ Dose.} +} +\value{ +A tibble with Dose, MDD, and MDD_pct. +} +\description{ +Calculate MDD\% for a Williams Test Result +} diff --git a/man/dunn_test.Rd b/man/dunn_test.Rd new file mode 100644 index 0000000..44757fd --- /dev/null +++ b/man/dunn_test.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dunn_test.R +\name{dunn_test} +\alias{dunn_test} +\title{Dunn's Multiple Comparison Test} +\usage{ +dunn_test( + data, + response_var, + dose_var, + control_level = 0, + alternative = "less", + p_adjust_method = "holm", + alpha = 0.05, + include_kruskal = TRUE +) +} +\arguments{ +\item{data}{A data frame containing the response and grouping variables} + +\item{response_var}{Character string specifying the name of the response variable} + +\item{dose_var}{Character string specifying the name of the dose/treatment variable} + +\item{control_level}{The control level (default: 0)} + +\item{alternative}{Character string specifying the alternative hypothesis. +Must be one of "less", "greater", or "two.sided" (default: "less")} + +\item{p_adjust_method}{Character string specifying the p-value adjustment method +(default: "holm"). See p.adjust.methods for available methods} + +\item{alpha}{Significance level (default: 0.05)} + +\item{include_kruskal}{Logical indicating whether to include Kruskal-Wallis test results +(default: TRUE)} +} +\value{ +A list of class "dunn_test_result" containing: +\describe{ +\item{results_table}{Data frame with comparison results including z-values and p-values} +\item{kruskal_wallis}{Kruskal-Wallis test results (if include_kruskal = TRUE)} +\item{noec}{No Observed Effect Concentration} +\item{noec_message}{Description of NOEC determination} +\item{model_type}{Description of the statistical method used} +\item{control_level}{The control level used} +\item{alpha}{Significance level used} +\item{alternative}{Alternative hypothesis tested} +\item{p_adjust_method}{P-value adjustment method used} +} +} +\description{ +Performs Dunn's multiple comparison test for comparing treatment groups against a control +after a significant Kruskal-Wallis test. This is a wrapper around PMCMRplus::kwManyOneDunnTest +that provides consistent output structure with other drcHelper test functions. +} +\note{ +This function uses PMCMRplus::kwManyOneDunnTest which produces equivalent results +to DescTools::DunnTest. Both implementations use the same underlying statistical +methodology for Dunn's post-hoc test following Kruskal-Wallis. +} +\examples{ +\dontrun{ +# Example data +Rate <- c(0,0,0,0,0,0, + 0.0448,0.0448,0.0448,0.0448, + 0.132,0.132,0.132,0.132) +y <- c(0.131,0.117,0.130,0.122,0.127,0.128, + 0.122,0.126,0.128,0.116, + 0.090,0.102,0.107,0.099) +test_data <- data.frame(Rate = Rate, Response = y) + +# Run Dunn's test +result <- dunn_test(test_data, response_var = "Response", + dose_var = "Rate", control_level = 0, + alternative = "less") +} + +} diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png index 26f3c17..05b6e50 100644 Binary files a/man/figures/README-unnamed-chunk-3-1.png and b/man/figures/README-unnamed-chunk-3-1.png differ diff --git a/man/figures/README-unnamed-chunk-6-1.png b/man/figures/README-unnamed-chunk-6-1.png index f0272a4..2dc0b75 100644 Binary files a/man/figures/README-unnamed-chunk-6-1.png and b/man/figures/README-unnamed-chunk-6-1.png differ diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png index 24c2184..f97af9a 100644 Binary files a/man/figures/README-unnamed-chunk-7-1.png and b/man/figures/README-unnamed-chunk-7-1.png differ diff --git a/man/print.dunn_test_result.Rd b/man/print.dunn_test_result.Rd new file mode 100644 index 0000000..6fe2238 --- /dev/null +++ b/man/print.dunn_test_result.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dunn_test.R +\name{print.dunn_test_result} +\alias{print.dunn_test_result} +\title{Print method for dunn_test_result} +\usage{ +\method{print}{dunn_test_result}(x, ...) +} +\arguments{ +\item{x}{A dunn_test_result object} + +\item{...}{Additional arguments (not used)} +} +\description{ +Print method for dunn_test_result +} diff --git a/vignettes/assets/ECx_monotonic.png b/pkgdown/assets/ECx_monotonic.png similarity index 100% rename from vignettes/assets/ECx_monotonic.png rename to pkgdown/assets/ECx_monotonic.png diff --git a/vignettes/assets/NOEC_Monotonic.png b/pkgdown/assets/NOEC_Monotonic.png similarity index 100% rename from vignettes/assets/NOEC_Monotonic.png rename to pkgdown/assets/NOEC_Monotonic.png diff --git a/vignettes/assets/NOEC_nonMonotonic.png b/pkgdown/assets/NOEC_nonMonotonic.png similarity index 100% rename from vignettes/assets/NOEC_nonMonotonic.png rename to pkgdown/assets/NOEC_nonMonotonic.png diff --git a/vignettes/assets/binomial_tank_effects_visualization.png b/pkgdown/assets/binomial_tank_effects_visualization.png similarity index 100% rename from vignettes/assets/binomial_tank_effects_visualization.png rename to pkgdown/assets/binomial_tank_effects_visualization.png diff --git a/vignettes/assets/quantal_NOEC.png b/pkgdown/assets/quantal_NOEC.png similarity index 100% rename from vignettes/assets/quantal_NOEC.png rename to pkgdown/assets/quantal_NOEC.png diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 47018d6..835a4e7 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,5 +1,5 @@ *.html -*.R /.quarto/ **/*.quarto_ipynb +*.R diff --git a/vignettes/articles/MDD-in-Regulatory-Context.Rmd b/vignettes/articles/MDD-in-Regulatory-Context.Rmd new file mode 100644 index 0000000..ce9b4ae --- /dev/null +++ b/vignettes/articles/MDD-in-Regulatory-Context.Rmd @@ -0,0 +1,419 @@ +--- +title: "MDD in Regulatory Context" +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(drcHelper) +source("../knitr-setup.R") +``` + + +In the regulatory context, MDD% stands for the minimum percent change (relative to the control mean) that would be deemed statistically significant given the test's critical value, the residual variability, and the replication. There are two common ways to define it; the often used way in ecotoxicology is ofent the significance-threshold version (no power term). + +## Key definitions + +- Absolute minimum detectable difference (MDD, on the response scale): + - For a control vs. dose comparison with pooled residual variance MSE and per-group sample sizes n_c (control) and n_t (treatment), the standard error of a difference is: + - $$SE_{diff} = \sqrt{MSE \cdot \left(\frac{1}{n_c} + \frac{1}{n_t}\right)}$$ + - Given a one-sided critical value (e.g., Williams' Tcrit), the absolute MDD is: + - $$MDD = T_{crit} \cdot SE_{diff}$$ +- Percent MDD relative to control mean: + - If the control mean is μ_c, then + - $$MDD\% = 100 \cdot \frac{MDD}{\mu_c} = 100 \cdot \frac{T_{crit} \cdot SE_{diff}}{\mu_c}$$ + +Notes + +- Significance-threshold vs. power-based: + - The above uses only Tcrit (significance threshold at alpha). Some literature calls this MSD (Minimum Significant Difference). + - A power-based MDD (to achieve power 1−β) adds a second quantile: + - $$MDD_{power} = \left(t_{1-\alpha,\ df} + t_{1-\beta,\ df}\right) \cdot SE_{diff}$$ + - and $$MDD\%_{power} = 100 \cdot \frac{MDD_{power}}{\mu_c}$$ + - Your Williams expected includes "Tcrit" per dose; this aligns with the significance-threshold version (no power term). +- Which Tcrit to use: + - For Williams: use the Tcrit reported per dose/step from broom_williams(method = "Williams_PMCMRplus"), which you have available. + - For Dunnett: a true Dunnett critical value depends on the number of groups (multivariate t). If you don't have a reported Tcrit, computing an exact Dunnett Tcrit is more involved; for Williams, you already have it, so we can compute MDD% reliably. + + +**What the function does** + +- Accepts a formula x of the form Response ~ Dose (Dose can be numeric or factor). +- Ensures the control (reference) group is the first factor level (by default the lowest dose). +- Computes the per-contrast standard error SE_diff using the appropriate method: + - Williams/Dunnett: pooled ANOVA MSE (or directly from glht sigma for Dunnett). + - Student’s t: pooled-variance SE. + - Welch: group-specific-variance SE and Welch–Satterthwaite df. +- Determines the critical value Tcrit: + - Williams: from broom_williams t’-crit per contrast. + - Dunnett: from multcomp summary.glht test$qfunction at your alpha. + - Student’s t/Welch: qt with per-contrast df. +- Computes MDD = Tcrit × SE_diff and MDD% = 100 × MDD / control_mean. +- Returns a tidy tibble with dose, n_c, n_t, df (when applicable), SE_diff, Tcrit, MDD, MDD_pct, method, alpha, alternative, and control_mean. + +## Code: compute_mdd_generic + +```{r} +compute_mdd_generic <- function(x, data, + test = c("Williams_PMCMRplus", "Williams_JG", "Dunnett", "t", "Welch"), + alpha = 0.05, + alternative = c("smaller", "greater", "two-sided")) { + test <- match.arg(test) + alternative <- match.arg(alternative) + + # Parse formula + if (!inherits(x, "formula")) stop("x must be a formula, e.g., Response ~ Dose") + terms_obj <- terms(x) + resp_name <- rownames(attr(terms_obj, "factors"))[1] + factor_name <- attr(terms_obj, "term.labels")[1] + + df <- data + if (!is.data.frame(df)) stop("data must be a data.frame") + + # Prepare dose factor with control first (lowest dose) + dose_raw <- df[[factor_name]] + dose_num <- suppressWarnings(as.numeric(gsub(",", ".", as.character(dose_raw)))) + # If numeric conversion fails everywhere, treat as factor and try to sort levels by numeric content + if (all(is.na(dose_num))) { + # Attempt to parse numeric part from strings + dose_num <- suppressWarnings(as.numeric(gsub(",", ".", gsub("[^0-9.,-]", "", as.character(dose_raw))))) + } + # Build factor levels: sort unique numeric doses; fallback to existing factor levels + if (!all(is.na(dose_num))) { + levels_sorted <- sort(unique(dose_num)) + df$Dose_factor <- factor(dose_num, levels = levels_sorted) + } else { + df$Dose_factor <- if (is.factor(dose_raw)) factor(dose_raw) else factor(dose_raw) + warning("Dose values not numeric; using existing factor levels. Ensure control is the first level.") + } + + # Basic stats per group + resp <- df[[resp_name]] + ctrl_level <- levels(df$Dose_factor)[1] + ctrl_idx <- df$Dose_factor == ctrl_level + mu_c <- mean(resp[ctrl_idx], na.rm = TRUE) + n_c <- sum(ctrl_idx) + + # One-way ANOVA for pooled MSE (used by Williams/Dunnett/t pooled) + aov_fit <- stats::aov(resp ~ df$Dose_factor) + mse <- tryCatch(summary(aov_fit)[[1]]["Residuals", "Mean Sq"], error = function(e) NA_real_) + df_resid <- tryCatch(summary(aov_fit)[[1]]["Residuals", "Df"], error = function(e) NA_real_) + + # Per-dose counts and SDs + group_n <- aggregate(resp ~ df$Dose_factor, FUN = function(z) sum(!is.na(z))) + group_sd <- aggregate(resp ~ df$Dose_factor, FUN = stats::sd) + names(group_n) <- c("Dose_factor", "n_t") + names(group_sd) <- c("Dose_factor", "sd_t") + + # Helper to compute one-/two-sided quantile tail + qtail <- function(df, alt, alpha) { + if (alt == "two-sided") stats::qt(1 - alpha / 2, df = df) else stats::qt(1 - alpha, df = df) + } + + # Initialize result container + res_list <- list() + + if (test %in% c("Williams_PMCMRplus", "Williams_JG")) { + # Direction for Williams + direction <- if (alternative == "smaller") "decreasing" else "increasing" + bw <- try(drcHelper::broom_williams(x, data = df, method = if (test == "Williams_PMCMRplus") "Williams_PMCMRplus" else "Williams_JG", direction = direction), silent = TRUE) + if (inherits(bw, "try-error") || nrow(bw) == 0) stop("broom_williams failed to produce results.") + + bw <- as.data.frame(bw) + # Map Dose from comparison ("a - 0 <= 0" or "a - 0 >= 0") + comp_clean <- gsub("\\s*(<=|>=)\\s*0\\s*$", "", as.character(bw$comparison)) + dose_contrast <- suppressWarnings(as.numeric(gsub(",", ".", sub(" - 0$", "", comp_clean)))) + if (all(is.na(dose_contrast)) && test == "Williams_JG") { + # Fallback: assign ascending treatment doses excluding control + trt_levels <- levels(df$Dose_factor)[-1] + dose_contrast <- suppressWarnings(as.numeric(trt_levels)) + dose_contrast <- dose_contrast[seq_len(min(length(dose_contrast), nrow(bw)))] + # If lengths mismatch, pad with NA + if (length(dose_contrast) < nrow(bw)) { + dose_contrast <- c(dose_contrast, rep(NA_real_, nrow(bw) - length(dose_contrast))) + } + } + names(bw)[names(bw) == "`t'-crit"] <- "Tcrit" + # Merge per-dose counts + trt_n <- merge(data.frame(Dose_factor = factor(dose_contrast, levels = levels(df$Dose_factor))), + group_n, by = "Dose_factor", all.x = TRUE) + n_t <- trt_n$n_t + # SE_diff via pooled MSE + if (is.na(mse)) stop("Cannot compute pooled MSE from ANOVA; MDD cannot be computed.") + SE_diff <- sqrt(mse * (1 / n_c + 1 / n_t)) # $$SE_{diff} = \sqrt{MSE \cdot (1/n_c + 1/n_t)}$$ + MDD <- bw$Tcrit * SE_diff # $$MDD = T_{crit} \cdot SE_{diff}$$ + MDD_pct <- if (!is.na(mu_c) && mu_c != 0) 100 * MDD / mu_c else NA_real_ # $$MDD\% = 100 \cdot MDD / \mu_c$$ + res_list <- list( + dose = dose_contrast, + n_c = rep(n_c, length(n_t)), + n_t = n_t, + df = rep(df_resid, length(n_t)), + SE_diff = SE_diff, + Tcrit = bw$Tcrit, + MDD = MDD, + MDD_pct = MDD_pct, + method = test + ) + } else if (test == "Dunnett") { + # Need multcomp + if (!requireNamespace("multcomp", quietly = TRUE)) stop("Package 'multcomp' is required for Dunnett MDD.") + # Build Dunnett contrasts and glht + contrasts_call <- as.call(list(as.name("mcp"))) + contrasts_call[[2]] <- "Dunnett" + names(contrasts_call)[2] <- factor_name + dunnett_contrasts <- eval(contrasts_call) + # Construct aov from formula x + aov_model <- stats::aov(x, data = df) + gl <- multcomp::glht(aov_model, linfct = dunnett_contrasts) + sm <- summary(gl) + # Dose from rownames of coefficients like "0.0448 - 0" + comp <- names(sm$test$coefficients) + dose_contrast <- suppressWarnings(as.numeric(gsub(",", ".", sub(" - 0$", "", comp)))) + # Standard errors per contrast come from sigma in summary (vector) + SE_diff <- as.numeric(sm$test$sigma) + # Critical value from qfunction + qfun <- sm$test$qfunction + Tcrit <- tryCatch(as.numeric(qfun(conf.level = 1 - alpha, adjusted = TRUE)), error = function(e) NA_real_) + if (is.na(Tcrit)) { + warning("Could not obtain Dunnett adjusted critical value; using unadjusted qt with residual df.") + Tcrit <- qtail(df_resid, alternative, alpha) + } + MDD <- Tcrit * SE_diff + MDD_pct <- if (!is.na(mu_c) && mu_c != 0) 100 * MDD / mu_c else NA_real_ + # n_t per contrast + trt_n <- merge(data.frame(Dose_factor = factor(dose_contrast, levels = levels(df$Dose_factor))), + group_n, by = "Dose_factor", all.x = TRUE) + n_t <- trt_n$n_t + res_list <- list( + dose = dose_contrast, + n_c = rep(n_c, length(n_t)), + n_t = n_t, + df = rep(df_resid, length(SE_diff)), + SE_diff = SE_diff, + Tcrit = rep(Tcrit, length(SE_diff)), + MDD = MDD, + MDD_pct = MDD_pct, + method = "Dunnett_multcomp" + ) + } else if (test == "t") { + # Pooled-variance per contrast (control vs each treatment) + # Compute control SD + sd_c <- stats::sd(resp[ctrl_idx]) + trt_levels <- levels(df$Dose_factor)[-1] + out_rows <- lapply(trt_levels, function(lv) { + trt_idx <- df$Dose_factor == lv + n_t <- sum(trt_idx) + sd_t <- stats::sd(resp[trt_idx]) + # pooled variance + sp2 <- ((n_c - 1) * sd_c^2 + (n_t - 1) * sd_t^2) / (n_c + n_t - 2) # $$s_p^2 = ((n_c-1)s_c^2 + (n_t-1)s_t^2) / (n_c+n_t-2)$$ + SE_diff <- sqrt(sp2 * (1 / n_c + 1 / n_t)) # $$SE_{diff} = \sqrt{s_p^2 (1/n_c + 1/n_t)}$$ + df_ct <- n_c + n_t - 2 + Tcrit <- qtail(df_ct, alternative, alpha) + MDD <- Tcrit * SE_diff + MDD_pct <- if (!is.na(mu_c) && mu_c != 0) 100 * MDD / mu_c else NA_real_ + data.frame(dose = suppressWarnings(as.numeric(lv)), n_c = n_c, n_t = n_t, + df = df_ct, SE_diff = SE_diff, Tcrit = Tcrit, MDD = MDD, MDD_pct = MDD_pct, + method = "Student_t") + }) + res_df <- do.call(rbind, out_rows) + return(res_df) + } else if (test == "Welch") { + # Welch SE and df per contrast + sd_c <- stats::sd(resp[ctrl_idx]) + trt_levels <- levels(df$Dose_factor)[-1] + out_rows <- lapply(trt_levels, function(lv) { + trt_idx <- df$Dose_factor == lv + n_t <- sum(trt_idx) + sd_t <- stats::sd(resp[trt_idx]) + SE_diff <- sqrt(sd_c^2 / n_c + sd_t^2 / n_t) # $$SE_{diff} = \sqrt{s_c^2/n_c + s_t^2/n_t}$$ + df_w <- (sd_c^2 / n_c + sd_t^2 / n_t)^2 / + ((sd_c^2 / n_c)^2 / (n_c - 1) + (sd_t^2 / n_t)^2 / (n_t - 1)) # $$df_{Welch} = \frac{(s_c^2/n_c + s_t^2/n_t)^2}{(s_c^2/n_c)^2/(n_c-1) + (s_t^2/n_t)^2/(n_t-1)}$$ + Tcrit <- qtail(df_w, alternative, alpha) + MDD <- Tcrit * SE_diff + MDD_pct <- if (!is.na(mu_c) && mu_c != 0) 100 * MDD / mu_c else NA_real_ + data.frame(dose = suppressWarnings(as.numeric(lv)), n_c = n_c, n_t = n_t, + df = df_w, SE_diff = SE_diff, Tcrit = Tcrit, MDD = MDD, MDD_pct = MDD_pct, + method = "Welch_t") + }) + res_df <- do.call(rbind, out_rows) + return(res_df) + } + + # Assemble and return tibble for Williams/Dunnett branches + res_df <- data.frame( + Dose = res_list$dose, + n_c = res_list$n_c, + n_t = res_list$n_t, + df = res_list$df, + SE_diff = res_list$SE_diff, + Tcrit = res_list$Tcrit, + MDD = res_list$MDD, + MDD_pct = res_list$MDD_pct, + method = res_list$method, + alpha = alpha, + alternative = alternative, + control_mean = mu_c + ) + # Order by Dose ascending + res_df <- res_df[order(res_df$Dose), ] + res_df +} + +``` + +Usage examples + +- Williams PMCMRplus (your MOCK0065 Growth Rate): + +```{r eval=FALSE} +# Helper to convert dose strings to numeric, handling various formats +convert_dose <- function(x) { + if (length(x) == 0) return(numeric(0)) + xc <- as.character(x) + xc <- trimws(xc) + xc[xc %in% c("", "n/a", "NA")] <- NA_character_ + # Normalize decimal commas and keep scientific notation (e.g., "4,48E-2" -> "4.48E-2") + xc <- gsub(",", ".", xc, fixed = TRUE) + out <- suppressWarnings(as.numeric(xc)) + return(out) +} + +convert_numeric <- function(x) { + if (length(x) == 0) return(numeric(0)) + xc <- as.character(x) + xc <- trimws(xc) + xc[xc %in% c("", "n/a", "NA")] <- NA_character_ + xc <- gsub(",", ".", xc, fixed = TRUE) + suppressWarnings(as.numeric(xc)) +} + +dose_from_comparison <- function(comp_vec) { + if (length(comp_vec) == 0) return(numeric(0)) + vapply(comp_vec, function(s) { + if (is.na(s)) return(NA_real_) + parts <- strsplit(s, " - ", fixed = TRUE)[[1]] + convert_dose(parts[1]) + }, FUN.VALUE = numeric(1)) +} +w_ep <- test_cases_data %>% dplyr::filter(`Study ID` == "MOCK0065", Endpoint == "Growth Rate") +w_ep$Dose_numeric <- convert_dose(w_ep$Dose) +w_ep$Dose_factor <- factor(w_ep$Dose_numeric, levels = sort(unique(w_ep$Dose_numeric))) +res_william <- broom_williams(Response ~ Dose_factor, data = w_ep, + test = "Williams_PMCMRplus", alpha = 0.05, alternative = "less") +mdd_w_pm <- compute_mdd_generic(Response ~ Dose_factor, data = w_ep, + test = "Williams_PMCMRplus", alpha = 0.05, alternative = "smaller") +print(mdd_w_pm) +``` + + +- Dunnett: +```{r eval=FALSE} +mdd_dunn <- compute_mdd_generic(Response ~ Dose_factor, data = w_ep, + test = "Dunnett", alpha = 0.05, alternative = "smaller") +print(mdd_dunn) +``` +```{r eval=FALSE} +- Student’s t (pooled, control vs each dose): + +mdd_t <- compute_mdd_generic(Response ~ Dose_factor, data = w_ep, + test = "t", alpha = 0.05, alternative = "smaller") +print(mdd_t) +``` +- Welch: +```{r eval=FALSE} +mdd_welch <- compute_mdd_generic(Response ~ Dose_factor, data = w_ep, + test = "Welch", alpha = 0.05, alternative = "two-sided") +print(mdd_welch) +``` +Notes and options +- Alternative handling: MDD uses a critical value magnitude; the choice of one-sided vs two-sided changes the quantile. If your expected MDD% was computed with two-sided thresholds, set alternative = "two-sided". +- For Dunnett, the adjusted Tcrit is taken from `summary(glht)$test$qfunction`. If unavailable, we fallback to unadjusted qt with residual df. +- For `Williams_JG`, comparison strings might not carry doses; we assign doses in ascending order excluding control. If you need exact mapping, we can inspect the internal `williamsTest_JG` output via drcHelper to align rows to doses. +- You can extend the test argument to include other tests (e.g., “Dunn”) once you define the appropriate Tcrit. Dunn typically uses z critical values; for MDD you would use zcrit × SE_diff on a rank-based scale, which is a different construct; unless your expected MDD% is defined explicitly for Dunn, I recommend sticking to parametric tests above. +- If you’d like me to integrate this into your validation report, I can add an MDD% metric branch that uses compute_mdd_generic under the hood and compares to expected “MDD%” rows for Williams. + +# Implementation in `drcHelper` + +Reusing existing test objects and building a modular system around them is far more efficient and robust. + +```{r} +library(drcHelper) +Results <- broom_williams(Response ~ factor(Dose), data = dat_medium, + test = "Williams_JG", alpha = 0.05, alternative = "less") + +compute_mdd_williams(Results,data=dat_medium,formula = Response ~ factor(Dose)) +``` + + + + +# Implementation in the Validation for Williams + +Here's a self-contained helper to compute per-dose MDD% for Williams and return a tidy table. It uses your existing helpers and broom_williams output. + + +# How to integrate into the validation workflow + +- In the Williams run_actual handler, compute the prelim summary as before (for Mean, SD, %Inhibition, CV), and also compute MDD%: + +```r +prelim <- compute_prelim_summary(endpoint_data) +mdd_tbl <- compute_mdd_williams(endpoint_data, alternative = alternative) + +# Return both; the generic runner can join MDD% when expected rows exist +list(actual_df = actual_df, group_means = prelim, mdd = mdd_tbl) +``` + +- In your expected parsing (parse_expected_metrics for Williams), you already capture "MDD%": + +```r +out$mdd <- pick("\\bMDD%\\b"); +names(out$mdd)[2] <- "Expected_MDDpct" +``` + + +- In the generic runner's join stage for Williams, add: + +```{r eval=FALSE} +if (!is.null(exp_splits$mdd) && nrow(exp_splits$mdd)) { + mdd_join <- exp_splits$mdd %>% + dplyr::left_join(actual$mdd[, c("Dose", "Actual_MDDpct")], by = "Dose") %>% + dplyr::mutate( + Endpoint = endpoint, + Diff = abs(Actual_MDDpct - Expected_MDDpct), + Status = dplyr::case_when( + is.na(Expected_MDDpct) | is.na(Actual_MDDpct) ~ "MISSING", + Diff <= tolerance ~ "PASS", TRUE ~ "FAIL") + ) %>% + dplyr::transmute(Endpoint, Dose, metric = "MDD%", Actual = Actual_MDDpct, Expected = Expected_MDDpct, Status) + pieces <- c(pieces, list(mdd_join)) +} +``` + + +### Handling "greater" vs "smaller" + +- The formula for MDD% doesn't change with direction; it's the minimum absolute difference required for significance. You typically compare magnitudes (positive values). If you want to encode a "directional" MDD% (signed), you can attach sign = ifelse(alternative=="greater", +1, -1) and report MDD%*sign, but most reports use a positive threshold. + +### Unequal sample sizes and heteroscedasticity + +- The SE_diff above already handles unequal n via 1/n_c + 1/n_t. +- If variances are heterogeneous and a Welch-type approach is used, you would estimate SE_diff via group-specific variances. Williams is based on a pooled-variance ANOVA, so the pooled MSE is appropriate here. + +## Optional: power-based MDD% + +- If we later need to compute a power-based MDD% (target power 1−β), use: + - $$MDD_{power} = \left(t_{1-\alpha,\ df} + t_{1-\beta,\ df}\right) \cdot SE_{diff}$$ + - Then $$MDD\%_{power} = 100 \cdot MDD_{power} / \mu_c$$ +- df can be taken from the residual df of the ANOVA model. You can parameterize β (e.g., 0.2 for 80% power) and compute the quantiles using stats::qt. + +## Notes + +We probably need to adjust if the expected MDD% uses a different SE (e.g., contrast-specific SE in Williams instead of the simple (1/n_c + 1/n_t) form). diff --git a/vignettes/articles/Quantal-Data.Rmd b/vignettes/articles/Quantal-Data.Rmd index e0b2e27..1e42e7d 100644 --- a/vignettes/articles/Quantal-Data.Rmd +++ b/vignettes/articles/Quantal-Data.Rmd @@ -1,8 +1,5 @@ --- title: "Quantal Data" -resource_files: - - article_assets/quantal_NOEC.png - - article_assets/binomial_tank_effects_visualization.png editor_options: chunk_output_type: console --- @@ -16,6 +13,7 @@ knitr::opts_chunk$set( ) source("../knitr-setup.R") library(tidyverse) +library(here) ``` ```{r} @@ -29,7 +27,7 @@ Analysis of quantal data typically involves the following steps as shown in the ```{r echo=FALSE,out.width="90%"} #| fig.alt: > #| Quantal NOEC Flowchart -knitr::include_graphics("article_assets/quantal_NOEC.png") +knitr::include_graphics("../../pkgdown/assets/quantal_NOEC.png") ``` ## An example dataset @@ -441,7 +439,7 @@ ggsave("binomial_tank_effects_visualization.png", combined_plot, width = 12, hei ``` ```{r echo=FALSE,eval=FALSE,include=FALSE} -knitr::include_graphics("./../assets/binomial_tank_effects_visualization.png") +knitr::include_graphics(here::here("vignettes", "assets", "binomial_tank_effects_visualization.png")) ``` The plot on the upper left shows the survival proportions for each tank (colored points) at each dose level. The black diamonds represent the mean survival proportion at each dose level, and the dashed line shows the theoretical dose-response relationship without tank effects. The variation among tanks at the same dose level illustrates the tank effect. diff --git a/vignettes/articles/TSK_method.Rmd b/vignettes/articles/TSK_method.Rmd index 27804a1..689d6fc 100644 --- a/vignettes/articles/TSK_method.Rmd +++ b/vignettes/articles/TSK_method.Rmd @@ -1,5 +1,7 @@ --- title: " Spearman-Karber method" +editor_options: + chunk_output_type: console --- ```{r, include = FALSE} @@ -8,13 +10,303 @@ knitr::opts_chunk$set( comment = "#>" ) source("../knitr-setup.R") +library(drcHelper) ``` ## Backaground -Spearman-Karber method is still in use in regulatory Ecotox endpoints derivation. A more common approach would be derive a EC50 from interpolation by a smoothing line. +Spearman-Karber method is still in use in regulatory Ecotox endpoints derivation under certain scenarios. A more common approach would be derive a EC50 from interpolation by a smoothing line. + +# Various Implementations + +Here is a comparison between `drcHelper::SpearmanKarber_modified`, the original `ecotoxicology::SpearmanKarber`, and the `drcHelper::tsk_auto` (adaptive "trimmed Spearman–Karber"). + +## `drcHelper::SpearmanKarber_modified` vs. `ecotoxicology::SpearmanKarber` + +`drcHelper::SpearmanKarber_modified` is based on a function written by Sarah and Harold. I provided a fixed and refactored version. This revision corrected several subtle bugs related to loop indexing, unsafe handling of log10(0), and hard-coded data sizes, making the function more robust and reliable for generic data. + +In summary, key conceptual differences are: + +* **Monotonicity Handling:** The original `ecotoxicology::SpearmanKarber` uses smoothing, while `drcHelper::SpearmanKarber_modified` uses a more complex method of pruning and pooling data based on an estimated background mortality rate (`c`) when control mortality is positive. +* **Confidence Intervals:** `drcHelper::SpearmanKarber_modified` incorporates Fieller's Theorem for more accurate confidence intervals when control mortality is present, a feature the original lacks. +* **Flexibility:** `drcHelper::SpearmanKarber_modified` was designed to handle variable totals per dose (`n_i`), while the original assumes a constant `N`. + + +## TSK + +The first version of the wrapper function `tsk_auto` only auto-trims if `tsk()` throws a specific trim error. In the case dose 0 is included in the data, tsk() didn't error but returned a degenerate estimate (LD50=0, NaN CI). Turning off logs avoids that.During the comparison I also wrote a small code patch for `drcHelper:::tsk_auto.data.frame` that automatically detects when a log-transform is requested on data with a zero dose and applies a minimal trim to prevent errors. + +`drcHelper:::tsk_auto` prioritizes a monotone, well-behaved response profile by trimming tails rather than smoothing (like the original) or pruning/pooling by a background-rate threshold (like `drcHelper::SpearmanKarber_modified`). + +## Core estimator (all three are Spearman–Karber variants) +- Base SK trapezoid on log-dose: + - $m \equiv \sum_{i} \Delta p_i \cdot \frac{\log_{10} x_i + \log_{10} x_{i+1}}{2}$, where $\Delta p_i = p_{i+1}-p_i$. + - $\mathrm{LC50} = 10^{m}$. +- Differences are how p is preprocessed (Abbott correction, trimming/pruning, smoothing), and how variance/CI are computed. + +### How each method preprocesses p (monotonicity and control mortality) +- ecotoxicology::SpearmanKarber (original): + - If proportions aren't monotone, it smooths them to a non-decreasing sequence, then applies Abbott's correction. + - Single N assumed; uses that in the variance. +- SpearmanKarber_modified (drcHelper): + - Always does Abbott-like correction from the control (explicit). + - $p_i^{A} = \frac{p_i - p_0}{1 - p_0}$, with $p_i = r_i/n_i$. + - If p0=0: add "ghost" endpoints with p=0 and p=1 at extrapolated log-doses; integrate over the full [0,1]. + - If p0>0: estimate background mortality c by pooled cumulative minimum: + - $c = \min_k \frac{\sum_{i=1}^k r_i}{\sum_{i=1}^k n_i}$, with $n_c $the pooled denominator at the argmin. + - Pool doses with p_i 0: + - After pruning and rescaling to $\tilde p$, compute raw trapezoid: + - $\mu^{\mathrm{raw}} = \sum_{i=1}^{m} (\tilde{p}_{i+1}-\tilde{p}_i)\cdot \frac{\log_{10}x_i+\log_{10}x_{i+1}}{2}$. + - Adjust back for c using the first-segment midpoint $a = \frac{\log_{10}x_0+\log_{10}x_1}{2}$: + - $\mu = \frac{\mu^{\mathrm{raw}} - c\,a}{1 - c}$, $\mathrm{LC50}=10^\mu$. +- tsk_auto (trimmed SK): + - With indices L..U after trim and renormalized $\tilde p$: + - $\mu_{TSK} = \sum_{i=L}^{U-1} (\tilde{p}_{i+1}-\tilde{p}_i)\cdot \frac{\log_{10}x_i+\log_{10}x_{i+1}}{2}$, + - $\mathrm{LC50}=10^{\mu_{TSK}}$. + +#### Variance and confidence intervals + +- Original `ecotoxicology::SpearmanKarber`: + - $V_m = \sum_{i=2}^{n-1} \Delta p_i(1-\Delta p_i)\cdot \frac{(\log_{10}x_{i+1}-\log_{10}x_{i-1})^2}{4N-4} $(constant N). + - 95% CI: $m \pm 2\sqrt{V_m}$; back-transform to LC50. +- Modified `drcHelper::SpearmanKarber_modified`, p0=0: + - Per-dose n_i: $\operatorname{Var}(\mu) = \sum_{i=2}^{n-1} 0.25(\log_{10}x_{i-1}-\log_{10}x_{i+1})^2 \frac{p_i(1-p_i)}{n_i}$. + - CI: $\mu \pm z_{1-\alpha/2}\sqrt{\operatorname{Var}(\mu)}$. +- Modified `drcHelper::SpearmanKarber_modified`, p0>0 (Fieller on log-scale): + - Define $V_{12} = \left(\frac{\log_{10} x_0 + \log_{10} x_1}{2}\right)^2 \frac{c(1-c)}{n_c}$, $V_{22} = \frac{c(1-c)}{n_c}$, $V_{11} = \operatorname{Var}(\mu)+V_{12}$. + - With $z=z_{1-\alpha/2}$, $B=1-c$, $g=\frac{z^2V_{22}}{B^2}$: + - $\sigma = \frac{z}{B} \cdot \frac{\sqrt{V_{11}-2\mu V_{12}+\mu^2V_{22}-g\left(V_{11}-\frac{V_{12}^2}{V_{22}}\right)}}{1-g}$, + - CI: $\mu - \frac{gV_{12}}{V_{22}} \pm \sigma$; back-transform. +- tsk_auto (`drcHelper::tsk_auto`, typical TSK): + - Uses the same normal-approx SK variance as your p0=0 branch but on the trimmed-and-renormalized grid: + - $\operatorname{Var}(\mu_{TSK}) = \sum_{i=L+1}^{U-1} 0.25(\log_{10}x_{i-1}-\log_{10}x_{i+1})^2 \frac{\tilde p_i(1-\tilde p_i)}{n_i}$, + - CI: $\mu_{TSK} \pm z_{1-\alpha/2}\sqrt{\operatorname{Var}(\mu_{TSK})}$. + - Fieller is generally not used in trimmed SK; CIs are normal-approx based on the trimmed segment. + +### Key contrasts: tsk_auto vs modified vs original +- Handling non-monotonicity: + - tsk_auto: trims tails and renormalizes; data-driven trim chosen automatically. + - Modified: either uses ghost endpoints (p0=0) or prunes doses below a data-driven c and rescales (p0>0). + - Original: smooths to enforce monotonic increase then applies Abbott. +- Control mortality: + - tsk_auto: supplies control dose (control=0 by default). The underlying tsk typically corrects via the control or excludes it when trimming. It does not estimate a background c via cumulative pooling. + - Modified: explicit Abbott correction and, if p0>0, a pooled minimum estimator of background c, plus Fieller CIs. + - Original: Abbott after smoothing (single N). +- Log transformation: + - tsk_auto: can turn log-doses on/off via use.log.doses; practical when x includes a 0 dose (control). + - Modified: always uses log10; explicitly creates ghost endpoints to avoid depending on log10(0). + - Original: uses log10 internally; relies on na.rm=TRUE to avoid log10(0) breaks if present. +- Variance/CI: + - tsk_auto: normal-approx on the trimmed/renormalized grid, per-dose n_i. + - Modified: normal-approx for p0=0; Fieller when p0>0. + - Original: constant-N variance and a fixed "2" multiplier for 95% CI. + +### Practical guidance + +- If you want robust, minimal-tuning monotone behavior with potentially messy tails: tsk_auto is convenient; it chooses a trim that makes the estimation feasible and stable, avoids over-influence of outlying tails, and respects variable n_i. +- If nonzero control mortality is a concern and you want a background-rate-aware estimate with Fieller CIs: your modified function targets that scenario directly. +- If your totals are constant and you prefer smoothing to trimming or pruning: the original function is simplest. + + +### How results will typically differ from tsk_auto +- When control mortality > 0: + - `drcHelper::SpearmanKarber_modified` estimates c by pooled cumulative minima and applies Fieller CIs; tsk_auto trims instead, then uses normal-approx CIs. Estimates and CIs will differ, especially if early-dose mortalities fluctuate around the background. +- When tails are noisy: + - tsk_auto will remove tail influence via trim; your modified (p0=0) keeps full range but adds ghost endpoints; the original smooths instead. Trimming usually reduces variance and sensitivity to extreme tail spacing. +- With variable totals n_i: + - Both tsk_auto and `drcHelper::SpearmanKarber_modified` use per-dose n_i in variance; the `ecotoxicolog::SpearmanKarber` assumes constant N and may misstate precision when n_i vary. + + + +```{r} +## install.packages("isotone") +## devtools::install_github(repo="brsr/tsk") + +library(drcHelper) +## library(tsk) +tsk(c(1, 10, 100, 1000), 20, c(0, 3, 17, 20)) + +data(hamilton) + + +``` + + +```{r} +x<-c(0,0.2,0.3,0.375,0.625,2) +n<-c(30,30,30,30,30,30) +r<-c(0,1,3,16,24,30) +skm <- SpearmanKarber_modified(x,r,n) +skm +tsk_auto(x,n,r, use.log.doses = FALSE) +tsk_auto(x,n,r, use.log.doses = TRUE) + +drcHelper::tsk(x, n, r, trim = 1/30, use.log.doses = FALSE) +drcHelper::tsk(x, n, r, trim = 1/30, use.log.doses = TRUE) + + +## ecotoxicology::TSK errored for A=0, suggesting A ≥ 1/30 ≈ 0.033333… to make the response span usable for trimming. +## A = 0.0333 failed due to a formatting bug in that version; use A that makes A*100 an integer (e.g., 0.04). +ecotoxicology::TSK(x*100, r, n, A = 0.04, conf = 0.95) +p <- r/n +x2 <- x +x2[1] <- 1e-1 ## replace it with very small values. +ecotoxicology::SpearmanKarber(cbind(x2,r,p),N=30, retData = FALSE, showOutput = TRUE,showPlot = FALSE) +ecotoxicology::SpearmanKarber(cbind(x2,r,n),N=30, retData = FALSE, showOutput = TRUE,showPlot = FALSE) + +## remove 0 dose +ecotoxicology::SpearmanKarber(cbind(x[-1],r[-1],p[-1]),N=30, retData = TRUE, showOutput = TRUE,showPlot = FALSE) + + +x=c(0,15.54,20.47,27.92,35.98,55.52) +n=c(40,40,40,40,40,40) +r=c(3,2,6,11,18,33) + +ecotoxicology::SpearmanKarber(cbind(x,r,n),N=30, retData = FALSE, showOutput = TRUE,showPlot = FALSE) + +``` + + + +```{r} +# Your data +x <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +n <- c(30, 30, 30, 30, 30, 30) +r <- c(0, 1, 3, 16, 24, 30) + +# Create a master comparison function +tidy_compare <- function(x, n, r, conf.level = 0.95) { + + # A list to hold the output rows + all_rows <- list() + + # Call each extract function and add the result to the list + all_rows[[length(all_rows) + 1]] <- drcHelper:::extract_SpearmanKarber_modified(x, n, r, conf.level) + all_rows[[length(all_rows) + 1]] <- drcHelper:::extract_tsk_auto(x, n, r, use.log.doses = TRUE, conf.level) + all_rows[[length(all_rows) + 1]] <- drcHelper:::extract_tsk_auto(x, n, r, use.log.doses = FALSE, conf.level) + all_rows[[length(all_rows) + 1]] <- drcHelper:::extract_ecotox_SpearmanKarber(x, n, r) + + # You can easily add more functions here, e.g.: + # all_rows[[length(all_rows) + 1]] <- extract_ecotox_TSK(x, n, r, conf = conf.level) + + # Combine all the single-row data frames into one table + do.call(rbind, all_rows) +} + +# Run the comparison +comparison_table <- tidy_compare(x, n, r) + +# Print the final result +## print(comparison_table) +comparison_table %>% knitr::kable(.,digits = 3) +``` + + +#### Using the wrapper `analyze_SK` function + +```{r} +# Your data +x <- c(0, 0.2, 0.3, 0.375, 0.625, 2) +n <- c(30, 30, 30, 30, 30, 30) +r <- c(0, 1, 3, 16, 24, 30) + +# Define the methods you want to run +methods_to_run <- c("SK_modified", "tsk_auto_log", "tsk_auto_linear", "ecotox_SK") + +# Use lapply to run the analysis for each method +all_results <- lapply(methods_to_run, function(m) { + # The ... argument lets you pass method-specific options here if needed + # e.g., analyze_SK(x, n, r, method = m, max.trim = 0.4) + analyze_SK(x, n, r, method = m) +}) + +# Combine the list of single-row data frames into one final table +comparison_table <- do.call(rbind, all_results) + +# Print the result +print(comparison_table) +``` + + + +- All three valid SK variants (your modified, TSK with A ≈ 0.0333 and use.log.doses = FALSE, and original SpearmanKarber with N = 30) should give very similar LC50 near 0.45 because responses increase cleanly from 0 to 1 across doses. +- Minor differences can arise from: + - Whether ghost endpoints or trimming are used, + - Variance/CI formulas (Fieller vs normal approximation), + - Whether logs are used with a zero-dose control. + +## Notes on GSD and SD_log10 -## Method +This is related to how we describe uncertainty for log-normal data, and it's a frequent point of confusion. + +In short: **They both measure the exact same amount of uncertainty**, but they express it on different scales. + +* `SD_log10` is **additive** on the **log10 scale**. +* `GSD` is **multiplicative** on the **original concentration scale**. + +### SD_log10: The Standard Deviation of the log10(LC50) + +This is the more straightforward statistical measure. When we perform Spearman-Karber analysis, we are actually estimating the mean of the distribution of the *logarithms* of the toxic thresholds. + +* **What it is:** The standard deviation of our `log10LC50` estimate. If our estimate is $m = \log_{10}(\text{LC50})$, then `SD_log10` is the standard deviation of $m$. +* **How it's used:** It is used to construct a confidence interval *on the log10 scale*. You add and subtract this value (multiplied by a z-score like 1.96) to get your confidence limits. + $\text{95% CI for } \log_{10}(\text{LC50}) = m \pm 1.96 \times \text{SD}_{\log_{10}}$ + +### GSD: The Geometric Standard Deviation + +The GSD is a more intuitive way to express this same uncertainty back on the original concentration scale. Since the underlying distribution is log-normal, the spread is multiplicative, not additive. + +* **What it is:** A unitless factor that describes how spread out the data are on the original scale. +* **How it's derived:** It is the exponentiated standard deviation from the log scale. The relationship depends on the base of the logarithm used. + * If using natural log (`ln`), then `GSD` = $e^{\text{SD}_{\ln}}$. The `drcHelper` package uses this convention. + * To be consistent with our `SD_log10`, we can define a base-10 GSD as: + $\text{GSD} = 10^{\text{SD}_{\log_{10}}}$ + And conversely: + $\text{SD}_{\log_{10}} = \log_{10}(\text{GSD})$ + +* **How it's used:** It is used to construct a confidence interval by **multiplying and dividing** the LC50 estimate. + $\text{95% CI for LC50} = \text{LC50} \times / \div (\text{GSD})^{1.96}$ + Which means the lower bound is $\text{LC50} / (\text{GSD})^{1.96} $and the upper bound is $\text{LC50} \times (\text{GSD})^{1.96}$. +* **Analogy:** Instead of saying "the true value is plus or minus 10 mg/L," you'd say "the true value is likely within a **factor of 2** of our estimate." In this case, the GSD would be 2. + + +### Summary Table + +| Feature | `SD_log10` | `GSD` (Geometric Standard Deviation) | +| :--- | :--- | :--- | +| **Scale** | Logarithmic | Original (Concentration) | +| **Operation** | **Additive** (±) | **Multiplicative** (×/÷) | +| **Units** | log10(concentration units) | None (it's a factor) | +| **CI on Log Scale** | `log(LC50) ± z * SD_log10` | Not used directly | +| **CI on Original Scale** | Back-transform the log CI | `LC50 ×/÷ (GSD)^z` | +| **Interpretation** | "The uncertainty is ±0.03 units on the log10 scale." | "The uncertainty is by a factor of 1.08 on the original scale." | + +The reason both columns exist in our summary table is that different functions naturally report one or the other. `SpearmanKarber_modified` calculates `SD_log10`, while `drcHelper::tsk_auto` reports the `GSD`. By calculating the corresponding value for each, our table allows for a direct, apples-to-apples comparison of the uncertainty calculated by each method. + + + +## Some Notes + +`tsk` function comes from the R package. Since it is a single function without dependencies, it is bundled into this helper package for the purpose of validation and verification. + +The original goal of this R package was to replicate the results of a DOS program that used to be provided by the EPA to perform the trimmed Spearman-Karber method. The list "expected" contains the results of that DOS program run on the data from Hamilton et al. Some of these are NA because the EPA program didn't return a result. The EPA program uses a confidence of 2*pnorm(2)-1=0.9544997 (that is, exactly two sigmas on both sides). + + +## Other Methods Two commonly used methods for calculating 50% endpoint using serial dilutions are Spearman-Karber method and Reed and Muench method. @@ -24,7 +316,7 @@ The original paper written by Kärber was published in 1931 and is in German [K log10 50% end point dilution = log10 of dilution showing a mortality next above 50% - (difference of logarithms × logarithm of dilution factor). -Generally, the following formula is used to calculate “difference of logarithms” (difference of logarithms is also known as “proportionate distance” or “interpolated value”): Difference of logarithms = [(mortality at dilution next above 50%)-50%]/[(mortality next above 50%)-(mortality next below 50%)]. +Generally, the following formula is used to calculate "difference of logarithms" (difference of logarithms is also known as "proportionate distance" or "interpolated value"): Difference of logarithms = [(mortality at dilution next above 50%)-50%]/[(mortality next above 50%)-(mortality next below 50%)]. ### Spearman-Karber method @@ -50,25 +342,6 @@ Formula 2 (if any accidental death occurred): log10 50% end point dilution = -(total death score + 0.5) × log dilution factor. -## Some Notes - -`tsk` function comes from the R package. Since it is a single function without dependencies, it is bundled into this helper package for the purpose of validation and verification. - - -The original goal of this R package was to replicate the results of a DOS program that used to be provided by the EPA to perform the trimmed Spearman-Karber method. The list "expected" contains the results of that DOS program run on the data from Hamilton et al. Some of these are NA because the EPA program didn't return a result. The EPA program uses a confidence of 2*pnorm(2)-1=0.9544997 (that is, exactly two sigmas on both sides). - -```{r} -## install.packages("isotone") -## devtools::install_github(repo="brsr/tsk") - -library(drcHelper) -## library(tsk) -tsk(c(1, 10, 100, 1000), 20, c(0, 3, 17, 20)) - -data(hamilton) - - -``` ## References diff --git a/vignettes/articles/Verification-of-Williams-Test.Rmd b/vignettes/articles/Verification-of-Williams-Test.Rmd new file mode 100644 index 0000000..aa4bff1 --- /dev/null +++ b/vignettes/articles/Verification-of-Williams-Test.Rmd @@ -0,0 +1,79 @@ +--- +title: "Verification of Williams' Test" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(drcHelper) +``` + + + +Short answer: Yes for the PMCMRplus method. broom_williams doesn’t have a formal alternative argument, but it forwards any extra arguments via ... to PMCMRplus::williamsTest, so you can pass alternative = "less" or alternative = "greater". For the Williams_JG method, you should use direction = "decreasing" or direction = "increasing" instead of alternative. + +How to verify with your dat_medium + +- Ensure the control is the first level of the dose factor (lowest dose first). +- Call broom_williams with method = "Williams_PMCMRplus" and pass alternative via ... +- Observe the "comparison" column: for alternative = "greater" you’ll see "<= 0"; for alternative = "less" you’ll see ">= 0". + + + +# Prepare the data: control first level +```{r} +dm <- dat_medium +dm$Dose_factor <- factor(dm$Dose, levels = sort(unique(dm$Dose))) +``` + + +#### PMCMRplus with alternative = "less" (one-sided, smaller/decreasing) +```{r} +bw_less <- drcHelper::broom_williams(Response ~ Dose_factor, + data = dm, + method = "Williams_PMCMRplus", + alternative = "less") +print(bw_less) + +``` + +#### PMCMRplus with alternative = "greater" (one-sided, increasing) +```{r} +bw_greater <- drcHelper::broom_williams(Response ~ Dose_factor, + data = dm, + method = "Williams_PMCMRplus", + alternative = "greater") +print(bw_greater) +``` + + +#### Williams_JG uses 'direction' instead of 'alternative' +```{r} +bw_dec <- drcHelper::broom_williams(Response ~ Dose_factor, + data = dm, + method = "Williams_JG", + direction = "decreasing") +print(bw_dec) + +bw_inc <- drcHelper::broom_williams(Response ~ Dose_factor, + data = dm, + method = "Williams_JG", + direction = "increasing") +print(bw_inc) +``` + + +## What to look for + +- In the `PMCMRplus` outputs, the comparison strings will end with "<= 0" when alternative = "greater" and ">= 0" when alternative = "less". That reflects the one-sided hypothesis direction used inside `PMCMRplus::williamsTest`. +- The JG outputs will reflect ">= 0" for direction = "decreasing" and "<= 0" for direction = "increasing". + +## Notes + +- If you try alternative = "two.sided" with PMCMRplus, it may work (depending on PMCMRplus::williamsTest implementation) but Williams is commonly used as a one-sided trend test. +- Always ensure the control group is the first factor level; Williams comparisons and Tcrit depend on the factor ordering. diff --git a/vignettes/knitr-setup.R b/vignettes/knitr-setup.R new file mode 100644 index 0000000..66ed9f2 --- /dev/null +++ b/vignettes/knitr-setup.R @@ -0,0 +1,15 @@ +# This file sets global knitr options for all vignettes in the project. +# It ensures that figures are saved with unique names to prevent overwrites +# during the pkgdown build process. + +knitr::opts_chunk$set( + # Create a unique prefix for each figure based on the Rmd filename. + # e.g., for 'My-Vignette.Rmd', figures will be named 'My-Vignette-chunk-label-1.png' + # and placed in the 'articles' directory during the pkgdown build. + ## fig.path = paste0(tools::file_path_sans_ext(basename(knitr::current_input())), "-"), + + # Provide a default, non-descriptive alt text. + # This is better than no alt text, but it's highly recommended to + # provide specific, descriptive alt text for each plot using fig.alt in the chunk options. + fig.alt = "A plot generated from an R code chunk." +)