-
Notifications
You must be signed in to change notification settings - Fork 1
Description
We can generalize my Dunnett framework into a modular validator that supports multiple test types (Williams, Welch/Student t, Wilcoxon, Kruskal–Wallis + Dunn, Fisher, Shapiro/Levene), each with its own discovery, execution, and metric mapping. Below are concise, practical code suggestions and targeted R commands I need you to run so I can tailor the handlers precisely.
What we’ll reuse
- Discovery: build_dunnett_fgs pattern, generalized to build_generic_fgs(pattern).
- Helpers: convert_dose, convert_numeric, dose_from_comparison, normalize_alternative.
- Long-format builder and display filters (Actual/Expected/Status with metadata).
Registry design (extensible)
-
Register each test type with:
- keyword/pattern(s) to discover expected rows
- discover_fgs function (FG/Study pairs + alternatives)
- run_actual function to compute real results for one endpoint
- metrics list and tolerances
- comparator type (many_to_one, two_sample, trend, per_group)
Code: extend the registry with stubs you can fill in
# Existing 'dunnett' entry stays the same
test_registry <- list(
dunnett = list(
keyword = "Dunnett",
discover_fgs = function(res, data) build_dunnett_fgs(res, data),
run_actual = function(endpoint_data, alternative) {
out <- drcHelper::dunnett_test(
data = endpoint_data,
response_var = "Response",
dose_var = "Dose_numeric",
include_random_effect = FALSE,
alternative = alternative
)
actual_df <- as.data.frame(out$results_table)
actual_df$Dose <- dose_from_comparison(as.character(actual_df$comparison))
names(actual_df)[names(actual_df) == "statistic"] <- "Actual_T"
names(actual_df)[names(actual_df) == "p.value"] <- "Actual_P"
names(actual_df)[names(actual_df) == "estimate"] <- "Actual_Diff"
rownames(actual_df) <- NULL
group_means <- endpoint_data %>%
dplyr::mutate(Dose = convert_dose(Dose)) %>%
dplyr::filter(!is.na(Dose)) %>%
dplyr::group_by(Dose) %>%
dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value"),
comparator = "many_to_one"
),
# Williams trend test (uses tukeytrend; drcHelper has broom_williams/getwilliamRes)
williams = list(
keyword = "Williams",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Williams"),
run_actual = function(endpoint_data, alternative) {
# Placeholder: will wire once we see the output shapes
# Likely via drcHelper::broom_williams or drcHelper::getwilliamRes
stop("Williams handler: please provide structure of broom_williams/getwilliamRes outputs (see commands below).")
},
metrics = c("Mean", "T-value", "P-value", "Tcrit", "df", "significance"),
comparator = "trend"
),
# Student t-test (var.equal = TRUE) many-to-one (dose vs control)
student_t = list(
keyword = "Student's t-test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Student's t-test"),
run_actual = function(endpoint_data, alternative) {
# Compute per-dose vs control t-tests (unadjusted)
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
tt <- stats::t.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"),
var.equal = TRUE)
data.frame(Dose = as.numeric(d), Actual_T = unname(tt$statistic), Actual_P = tt$p.value)
})
actual_df <- dplyr::bind_rows(rows)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value", "df"),
comparator = "many_to_one"
),
# Welch t-test (var.equal = FALSE)
welch = list(
keyword = "Welch",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Welch"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
tt <- stats::t.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"),
var.equal = FALSE)
data.frame(Dose = as.numeric(d), Actual_T = unname(tt$statistic), Actual_P = tt$p.value)
})
actual_df <- dplyr::bind_rows(rows)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "T-value", "P-value", "df"),
comparator = "many_to_one"
),
# Wilcoxon (Mann–Whitney) per dose vs control
wilcoxon = list(
keyword = "Wilcoxon test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Wilcoxon test"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ctrl <- ed %>% dplyr::filter(Dose == min(Dose, na.rm = TRUE)) %>% dplyr::pull(Response)
by_dose <- ed %>% dplyr::filter(Dose != min(Dose, na.rm = TRUE)) %>% split(.$Dose)
rows <- lapply(names(by_dose), function(d) {
trt <- by_dose[[d]]$Response
wt <- stats::wilcox.test(trt, ctrl, alternative = switch(tolower(alternative),
"smaller" = "less", "greater" = "greater", "two-sided" = "two.sided"))
data.frame(Dose = as.numeric(d), Actual_W = unname(wt$statistic), Actual_P = wt$p.value)
})
actual_df <- dplyr::bind_rows(rows) # Note: metric names differ (W-value, P-value)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means)
},
metrics = c("Mean", "W-value", "P-value"),
comparator = "many_to_one"
),
# Kruskal–Wallis + Dunn’s post-hoc
dunn = list(
keyword = "Dunn's test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Dunn's test"),
run_actual = function(endpoint_data, alternative) {
ed <- endpoint_data %>% dplyr::mutate(Dose = convert_dose(Dose))
ed <- ed %>% dplyr::filter(!is.na(Dose))
# KW omnibus
kw <- stats::kruskal.test(Response ~ as.factor(Dose), data = ed)
# Dunn post-hoc many-to-one vs control
if (!requireNamespace("rstatix", quietly = TRUE)) stop("Please install rstatix for Dunn's test")
dunn_res <- rstatix::dunn_test(ed, Response ~ as.factor(Dose), p.adjust.method = "bonferroni") # adjust if needed
# Extract many-to-one contrasts vs lowest Dose (control)
ctrl_label <- as.character(min(ed$Dose, na.rm = TRUE))
dunn_vs_ctrl <- dunn_res %>%
dplyr::filter(grepl(paste0("^", ctrl_label, " - "), comparison) | grepl(paste0(" - ", ctrl_label, "$"), comparison)) %>%
dplyr::mutate(Dose = dose_from_comparison(comparison)) %>%
dplyr::rename(Actual_z = statistic, Actual_P = p.adj)
actual_df <- dunn_vs_ctrl %>% dplyr::select(Dose, Actual_z, Actual_P)
group_means <- ed %>% dplyr::group_by(Dose) %>% dplyr::summarise(Actual_Mean = mean(Response, na.rm = TRUE), .groups = "drop")
list(actual_df = actual_df, group_means = group_means, kw = kw)
},
metrics = c("Mean", "z-value", "P-value"), # plus optionally H-statistic for KW
comparator = "many_to_one"
),
# Fisher’s exact (binary endpoints; drcHelper provides many_to_one_fisher_test)
fisher = list(
keyword = "Fishers' test",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Fishers' test"),
run_actual = function(endpoint_data, alternative) {
# Structure depends on Alive/Dead counts; we’ll wire after seeing a concrete example
stop("Fisher handler: please provide example endpoint_data (Alive/Dead/Total) and expected rows.")
},
metrics = c("P-value", "significance"),
comparator = "many_to_one"
),
# Model tests (Shapiro/Levene) on residuals
shapiro = list(
keyword = "Shapiro-Wilk",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Shapiro-Wilk"),
run_actual = function(endpoint_data, alternative) {
# Are these computed on residuals from Response ~ Dose? Please confirm
stop("Shapiro handler: confirm target vector (residuals vs raw group).")
},
metrics = c("W-value", "P-value"),
comparator = "per_group"
),
levene = list(
keyword = "Levene",
discover_fgs = function(res, data) build_generic_fgs(res, data, pattern = "Levene"),
run_actual = function(endpoint_data, alternative) {
stop("Levene handler: confirm target (raw groups vs residuals).")
},
metrics = c("F-value", "P-value"),
comparator = "per_group"
)
)
Generic runner
- You can use the run_consolidated_validation_generic function I provided earlier (it already handles Mean/T/P joins and long-format output).
- For tests with different metric names, adjust the “metric splits” in that function based on reg$metrics and on Brief description labels:
- Wilcoxon: “W-Value”
- Dunn: “z-value”
- Kruskal–Wallis: “H-statistic”
- Significance: compute Actual_Significance as (Actual_P < alpha), if expected result value is 0/1; please confirm expected encoding.
Dose embedded in Brief description
- Some rows encode the dose inside Brief description, e.g., “Dunn's test, smaller, 0,41, z-value”.
- Add a parser that falls back to description when Dose is “n/a”:
dose_from_description <- function(desc) {
# capture first numeric token allowing comma decimal
m <- regmatches(desc, regexpr("[0-9]+(?:,[0-9]+)?", desc))
ifelse(length(m) > 0, convert_dose(m), NA_real_)
}
When preparing expected tables, set Dose = coalesce(convert_dose(Dose), dose_from_description(Brief description)).
Key formulas you’ll likely need
- Percent inhibition/reduction per dose relative to control (please confirm your definition):
$$%Inhibition = 100 * (1 - mean_treatment / mean_control)$$
$$%Reduction = 100 * (1 - mean_treatment / mean_control)$$
If your expected results use a different baseline or direction, tell me and I’ll adapt.
What I need you to run so I can finalize handlers and metric mapping
Please run these on one Williams FG/study and paste the outputs:
- Show structure of Williams outputs from drcHelper:
- Pick an endpoint_data for a Williams case (FG00120 looks promising). Then:
w_ep <- test_cases_data %>% dplyr::filter(`Study ID` == "MOCK0065") # replace with the actual Williams study
w_ep$Dose_numeric <- convert_dose(w_ep$Dose)
w_out <- try(drcHelper::williamsTest_JG(data = w_ep, response_var = "Response", dose_var = "Dose_numeric"), silent = TRUE)
str(w_out)
bw_out <- try(drcHelper::broom_williams(data = w_ep, response_var = "Response", dose_var = "Dose_numeric"), silent = TRUE)
str(bw_out)
- Provide a small slice of expected Williams rows (10–20 rows):
test_cases_res %>%
dplyr::filter(grepl("Williams", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
- For Dunn’s test (Kruskal–Wallis post-hoc), show a slice of expected rows:
test_cases_res %>%
dplyr::filter(grepl("Dunn's test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
- For Wilcoxon, show expected rows and a sample endpoint_data:
- Expected:
test_cases_res %>%
dplyr::filter(grepl("Wilcoxon test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
- Data groups:
test_cases_data %>%
dplyr::filter(`Study ID` %in% unique(test_cases_res$`Study ID`[grepl("Wilcoxon test", test_cases_res$`Brief description`, ignore.case = TRUE)])) %>%
dplyr::count(`Study ID`, `Test group`) %>% print(n = 50)
- For Fisher’s test, show endpoint_data columns (Alive/Dead/Total?) and expected rows:
- Data:
test_cases_data %>%
dplyr::filter(!is.na(Alive) | !is.na(Dead) | !is.na(Total)) %>%
dplyr::slice(1:20) %>% print(n = 20)
- Expected:
test_cases_res %>%
dplyr::filter(grepl("Fishers' test", `Brief description`, ignore.case = TRUE)) %>%
dplyr::select(`Function group ID`, `Study ID`, `Endpoint`, `Brief description`, Dose, `expected result value`) %>%
dplyr::slice(1:20) %>% print(n = 20)
- For Shapiro/Levene “model test”: confirm target vector:
- Are these computed on residuals from
lm(Response ~ Dose_numeric)? If yes, we’ll fit the same lm, then use stats::shapiro.test on residuals and car::leveneTest on grouped raw data, or another agreed approach. Please confirm. - No, residuals are from
lm(Response ~ Dose_factor)
Once I see these structures, I’ll wire the run_actual handlers to return the same shape as your Dunnett handler (actual_df + group_means), extend the expected metric parser for labels like “Tcrit”, “df”, “z-value”, “W-value”, “%Inhibition/%Reduction”, and finalize tolerances by metric/test.