From 99fc7d8689731070cc4a7425cf2f1c02e4ea1023 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 13:54:21 +0930 Subject: [PATCH 01/18] Update cv_oem.R --- R/cv_oem.R | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index 92f33ac..24d058b 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -61,7 +61,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", "grp.mcp.net", "grp.scad.net", "sparse.grp.lasso"), weights = numeric(0), lambda = NULL, - type.measure = c("mse", "deviance", "class", "auc", "mae"), nfolds = 10, foldid = NULL, + type.measure = c("mse", "deviance", "class", "auc", "auprc", "mae"), nfolds = 10, foldid = NULL, grouped = TRUE, keep = FALSE, parallel = FALSE, ncores = -1, ...) { ## code modified from "glmnet" package @@ -209,7 +209,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", nzero = nz, name = cvname, oem.fit = oem.object) if (keep) out = c(out, list(fit.preval = cvstuff$fit.preval, foldid = foldid)) - lamin <- if(cvname == "AUC") getmin(lambda, lapply(cvm, function(ccvvmm) -ccvvmm), cvsd) + lamin <- if(cvname == "AUC" | cvname == "AUPRC") getmin(lambda, lapply(cvm, function(ccvvmm) -ccvvmm), cvsd) else getmin(lambda, cvm, cvsd) obj <- c(out, as.list(lamin)) obj$best.model <- penalty[obj$model.min] @@ -223,13 +223,13 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea { ## code modified from "glmnet" package typenames = c(mse = "Mean-Squared Error", mae = "Mean Absolute Error", - deviance = "Binomial Deviance", auc = "AUC", class = "Misclassification Error") + deviance = "Binomial Deviance", auc = "AUC", auprc = "AUPRC", class = "Misclassification Error") if (type.measure == "default") type.measure = "deviance" - if (!match(type.measure, c("mse", "mae", "deviance", "auc", + if (!match(type.measure, c("mse", "mae", "deviance", "auc", "auprc", "class"), FALSE)) { - warning("Only 'deviance', 'class', 'auc', 'mse' or 'mae' available for binomial models; 'deviance' used") + warning("Only 'deviance', 'class', 'auc', 'auprc', 'mse' or 'mae' available for binomial models; 'deviance' used") type.measure = "deviance" } prob_min = 1e-05 @@ -303,7 +303,28 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea } weights = tapply(weights, foldid, sum) weights = rep(list(weights), nmodels) - } else + } + + if (type.measure == "auprc") { + cvraw <- rep(list(matrix(NA, nfolds, length(lambda[[1]]))), + nmodels) + N <- vector(mode = "list", length = nmodels) + for (m in 1:nmodels) { + good <- matrix(0, nfolds, length(lambda[[1]])) + for (i in seq(nfolds)) { + good[i, seq(nlams[i])] = 1 + which <- foldid == i + for (j in seq(nlams[i])) { + cvraw[[m]][i, j] = precrec::auc(evalmod(scores = predlist[[m]][which,j] , labels = y[which,2]))[4][2,] + } + } + N[[m]] = apply(good, 2, sum) + } + weights = tapply(weights, foldid, sum) + weights = rep(list(weights), nmodels) + } + + else { ywt <- apply(y, 1, sum) y <- y / ywt @@ -418,4 +439,4 @@ cv.oemfit_gaussian <- function (outlist, lambda, x, y, weights, foldid, type.mea if (keep) out$fit.preval <- predlist out -} \ No newline at end of file +} From e7ad6b9422e6d698f50b75d62f349cf8b14a8859 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 15:38:08 +0930 Subject: [PATCH 02/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index 24d058b..24d0503 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -315,7 +315,7 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea good[i, seq(nlams[i])] = 1 which <- foldid == i for (j in seq(nlams[i])) { - cvraw[[m]][i, j] = precrec::auc(evalmod(scores = predlist[[m]][which,j] , labels = y[which,2]))[4][2,] + cvraw[[m]][i, j] = precrec::auc(precrec::evalmod(scores = predlist[[m]][which,j] , labels = y[which,2]))[4][2,] } } N[[m]] = apply(good, 2, sum) From b2b044c0ed9f33cf2fc479c31e613612a2200e0c Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 15:47:59 +0930 Subject: [PATCH 03/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index 24d0503..24d058b 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -315,7 +315,7 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea good[i, seq(nlams[i])] = 1 which <- foldid == i for (j in seq(nlams[i])) { - cvraw[[m]][i, j] = precrec::auc(precrec::evalmod(scores = predlist[[m]][which,j] , labels = y[which,2]))[4][2,] + cvraw[[m]][i, j] = precrec::auc(evalmod(scores = predlist[[m]][which,j] , labels = y[which,2]))[4][2,] } } N[[m]] = apply(good, 2, sum) From d139154c4bcc3a78b1211c82cfb0ac71d16d688b Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 16:01:54 +0930 Subject: [PATCH 04/18] Update cv_oem.R --- R/cv_oem.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/cv_oem.R b/R/cv_oem.R index 24d058b..b83da5e 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -84,6 +84,8 @@ cv.oem <- function (x, y, penalty = c("elastic.net", else type.measure = match.arg(type.measure) if (!is.null(lambda) && length(lambda) < 2) stop("Need more than one value of lambda for cv.oem") + if (length(weights)!=0 & type.measure=="auprc") + stop("Cross-validation based on AUPRC is not available with sampling weights") N = nrow(x) if (length(weights)) weights = as.double(weights) From 29d93ad3241b50d4ac1189cb5b21dda9937f79b5 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 16:16:40 +0930 Subject: [PATCH 05/18] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 45f8a59..91f1538 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: Rcpp (>= 0.11.0), Matrix, foreach, - methods + methods, + precrec LinkingTo: Rcpp, RcppEigen, BH, From a9c59cb8a38ec8a94907ab28d7fdb132da6bb496 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 6 Jul 2021 16:32:21 +0930 Subject: [PATCH 06/18] Update NAMESPACE --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 329409b..af5723b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(oem.xtx) export(oemfit) export(xval.oem) import(Matrix) +import(precrec) import(Rcpp) import(bigmemory) import(foreach) From e80c08a65f7f97dba6ebba76b4aed4dab1c5d3be Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Wed, 7 Jul 2021 08:50:19 +0930 Subject: [PATCH 07/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index b83da5e..9dbd890 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -221,7 +221,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", } -cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.measure, grouped, keep = FALSE) + cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.measure, grouped, keep = FALSE) { ## code modified from "glmnet" package typenames = c(mse = "Mean-Squared Error", mae = "Mean Absolute Error", From f58daf2c616f29823dd4dc1bb1c023a2058674ed Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Wed, 7 Jul 2021 08:51:47 +0930 Subject: [PATCH 08/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index 9dbd890..b83da5e 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -221,7 +221,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", } - cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.measure, grouped, keep = FALSE) +cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.measure, grouped, keep = FALSE) { ## code modified from "glmnet" package typenames = c(mse = "Mean-Squared Error", mae = "Mean Absolute Error", From db98d67808e1075b7cd8543699b3cf669af7d479 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Wed, 7 Jul 2021 08:53:10 +0930 Subject: [PATCH 09/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index b83da5e..ab13655 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -85,7 +85,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", if (!is.null(lambda) && length(lambda) < 2) stop("Need more than one value of lambda for cv.oem") if (length(weights)!=0 & type.measure=="auprc") - stop("Cross-validation based on AUPRC is not available with sampling weights") + stop("Cross-validation based on AUPRC is not yet available with sampling weights") N = nrow(x) if (length(weights)) weights = as.double(weights) From 911e93ba59607b0ab40c0bc112089eb0521087e1 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Wed, 7 Jul 2021 10:27:42 +0930 Subject: [PATCH 10/18] Update cv_oem.R --- R/cv_oem.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index ab13655..e2c6d6a 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -305,9 +305,7 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea } weights = tapply(weights, foldid, sum) weights = rep(list(weights), nmodels) - } - - if (type.measure == "auprc") { + } else if (type.measure == "auprc") { cvraw <- rep(list(matrix(NA, nfolds, length(lambda[[1]]))), nmodels) N <- vector(mode = "list", length = nmodels) @@ -324,9 +322,7 @@ cv.oemfit_binomial <- function (outlist, lambda, x, y, weights, foldid, type.mea } weights = tapply(weights, foldid, sum) weights = rep(list(weights), nmodels) - } - - else + } else { ywt <- apply(y, 1, sum) y <- y / ywt From ddb3b4ee30bc9c7132652727ed90fd487429721a Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Thu, 8 Jul 2021 16:54:03 +0930 Subject: [PATCH 11/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index e2c6d6a..36f4c24 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -17,7 +17,7 @@ #' a value of lambda overrides this. #' @param type.measure measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"}, #' which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic -#' regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic +#' regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} or \code{type.measure = "auprc"} are for two-class logistic #' regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models; #' they measure the deviation from the fitted mean to the response. #' @param nfolds number of folds for cross-validation. default is 10. 3 is smallest value allowed. From daa6af1fc661c2ff03ba60caa2d1c957bd51e846 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:08:38 +0930 Subject: [PATCH 12/18] Update cv.oem.Rd --- man/cv.oem.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/cv.oem.Rd b/man/cv.oem.Rd index eb8e9ed..20798e2 100644 --- a/man/cv.oem.Rd +++ b/man/cv.oem.Rd @@ -12,7 +12,7 @@ cv.oem( "sparse.grp.lasso"), weights = numeric(0), lambda = NULL, - type.measure = c("mse", "deviance", "class", "auc", "mae"), + type.measure = c("mse", "deviance", "class", "auc", "auprc", "mae"), nfolds = 10, foldid = NULL, grouped = TRUE, @@ -42,7 +42,7 @@ a value of lambda overrides this.} \item{type.measure}{measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"}, which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic -regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic +regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} and \code{type.measure = "auprc"} are for two-class logistic regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models; they measure the deviation from the fitted mean to the response.} From 4815129db9075ec811a90e2f5450a3ff3470c496 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Fri, 16 Jul 2021 17:18:20 +0930 Subject: [PATCH 13/18] Update cv_oem.R --- R/cv_oem.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cv_oem.R b/R/cv_oem.R index 36f4c24..b72b1bf 100644 --- a/R/cv_oem.R +++ b/R/cv_oem.R @@ -89,7 +89,7 @@ cv.oem <- function (x, y, penalty = c("elastic.net", N = nrow(x) if (length(weights)) weights = as.double(weights) - y = drop(y) + y = drop(y) if (parallel & ncores != 1) { From 8537820ec678d2088796c34337d36019bf382ff0 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 20 Jul 2021 09:18:39 +0930 Subject: [PATCH 14/18] Update DESCRIPTION --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 91f1538..45f8a59 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,8 +32,7 @@ Imports: Rcpp (>= 0.11.0), Matrix, foreach, - methods, - precrec + methods LinkingTo: Rcpp, RcppEigen, BH, From 5ab3c4888e5f4c6381bcc42e5733fdf1f83773c5 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 20 Jul 2021 10:40:37 +0930 Subject: [PATCH 15/18] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 45f8a59..91f1538 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: Rcpp (>= 0.11.0), Matrix, foreach, - methods + methods, + precrec LinkingTo: Rcpp, RcppEigen, BH, From 2948decf63fcb8a077318287b551002c591a717e Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 20 Jul 2021 10:40:53 +0930 Subject: [PATCH 16/18] Update NAMESPACE --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index af5723b..329409b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,7 +27,6 @@ export(oem.xtx) export(oemfit) export(xval.oem) import(Matrix) -import(precrec) import(Rcpp) import(bigmemory) import(foreach) From e325bfc91a74530f05e1749d81d34a324bfc2677 Mon Sep 17 00:00:00 2001 From: pedro-ribeirosantiago <45943434+pedro-ribeirosantiago@users.noreply.github.com> Date: Tue, 20 Jul 2021 11:42:45 +0930 Subject: [PATCH 17/18] Update NAMESPACE --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 329409b..dd51f4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(oemfit) export(xval.oem) import(Matrix) import(Rcpp) +import(precrec) import(bigmemory) import(foreach) importFrom(bigmemory,is.big.matrix) From c354581fb068d5c7d46cf5d6376528d7199df472 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Ribeiro Santiago Date: Mon, 17 Jul 2023 12:28:58 +0930 Subject: [PATCH 18/18] AUPRC available --- DESCRIPTION | 2 +- NAMESPACE | 1 - man/cv.oem.Rd | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f5d71a5..786abd6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,7 +40,7 @@ LinkingTo: Rcpp, BH, bigmemory, RcppArmadillo -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Suggests: knitr, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index dd51f4e..329409b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,7 +28,6 @@ export(oemfit) export(xval.oem) import(Matrix) import(Rcpp) -import(precrec) import(bigmemory) import(foreach) importFrom(bigmemory,is.big.matrix) diff --git a/man/cv.oem.Rd b/man/cv.oem.Rd index 9b9deec..4f0ec00 100644 --- a/man/cv.oem.Rd +++ b/man/cv.oem.Rd @@ -42,7 +42,7 @@ a value of lambda overrides this.} \item{type.measure}{measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"}, which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic -regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} and \code{type.measure = "auprc"} are for two-class logistic +regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} or \code{type.measure = "auprc"} are for two-class logistic regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models; they measure the deviation from the fitted mean to the response.}