From b7a3d7054cc1436d9fd987317a7a529c6f81561b Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Tue, 3 Dec 2024 20:48:13 +0100 Subject: [PATCH 1/6] Update documentation add more tests --- R/ena2ncbi.R | 2 +- R/ncbi2ena.R | 4 ++-- man/ena2ncbi.Rd | 2 +- man/ncbi2ena.Rd | 4 ++-- tests/testthat/test-ena2ncbi.R | 7 +++++++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R/ena2ncbi.R b/R/ena2ncbi.R index bba66ca..771b99e 100644 --- a/R/ena2ncbi.R +++ b/R/ena2ncbi.R @@ -2,7 +2,7 @@ #' #' Take a vector of ENA accessions and convert them to NCBI accessions. #' @param accessions character; a vector or ENA accessions. -#' @param type character; type of accessions. Supported types: `sample`, +#' @param type character; type of ENA accessions. Supported types: `sample`, #' `study`. #' @return A tibble with two columns, `ena` and `ncbi`. #' @examples diff --git a/R/ncbi2ena.R b/R/ncbi2ena.R index e35a2dd..e476f9b 100644 --- a/R/ncbi2ena.R +++ b/R/ncbi2ena.R @@ -2,8 +2,8 @@ #' #' Take a vector of NCBI accessions and convert them to ENA accessions. #' @param accessions character; a vector or ENA accessions. -#' @param type character; type of accessions. Supported types: `biosample`, -#' `bioproject`. +#' @param type character; type of NCBI accessions. Supported types: +#' `biosample`, `bioproject`. #' @return A tibble with two columns, `ncbi` and `ena`. #' @examples #' ncbi2ena("SAMEA111452506", type = "biosample") diff --git a/man/ena2ncbi.Rd b/man/ena2ncbi.Rd index 093a05d..d85ad6d 100644 --- a/man/ena2ncbi.Rd +++ b/man/ena2ncbi.Rd @@ -9,7 +9,7 @@ ena2ncbi(accessions, type) \arguments{ \item{accessions}{character; a vector or ENA accessions.} -\item{type}{character; type of accessions. Supported types: `sample`, +\item{type}{character; type of ENA accessions. Supported types: `sample`, `study`.} } \value{ diff --git a/man/ncbi2ena.Rd b/man/ncbi2ena.Rd index 0b024d6..217d8a6 100644 --- a/man/ncbi2ena.Rd +++ b/man/ncbi2ena.Rd @@ -9,8 +9,8 @@ ncbi2ena(accessions, type) \arguments{ \item{accessions}{character; a vector or ENA accessions.} -\item{type}{character; type of accessions. Supported types: `biosample`, -`bioproject`.} +\item{type}{character; type of NCBI accessions. Supported types: +`biosample`, `bioproject`.} } \value{ A tibble with two columns, `ncbi` and `ena`. diff --git a/tests/testthat/test-ena2ncbi.R b/tests/testthat/test-ena2ncbi.R index c56db62..1dc6866 100644 --- a/tests/testthat/test-ena2ncbi.R +++ b/tests/testthat/test-ena2ncbi.R @@ -34,3 +34,10 @@ test_that("ena2ncbi() removes invalid queries and returns the rest", { ) expect_equal(dim(E), c(2,2)) }) + +test_that("ena2ncbi() works with studies", { + G <- ena2ncbi("ERP161024", type = "study") + expect_equal(dim(G), c(1,2)) + expect_equal(names(G), c("ena", "ncbi")) + expect_equal(G$ncbi, "PRJEB76496") +}) From 48011c65a8f62099c946c0175d8a29e07b43ee97 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Tue, 3 Dec 2024 20:39:50 +0100 Subject: [PATCH 2/6] Return integer UIDs --- R/ncbi_link_uid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ncbi_link_uid.R b/R/ncbi_link_uid.R index df86f98..1536afa 100644 --- a/R/ncbi_link_uid.R +++ b/R/ncbi_link_uid.R @@ -92,7 +92,7 @@ ncbi_link_uid <- function( foo_from_ids <- function(x, from, to) { if (length(x) == 1 && is.na(x)) { if (verbose) message("No valid UIDs.") - return(tibble::tibble(query = x, uid = NA_real_)) + return(tibble::tibble(query = x, uid = NA_integer_)) } id_hit <- suppressWarnings(wrap( "entrez_link", From 8519771f4a0f2d0c8267a9f78f2ae4e5bbcb71cd Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 22:26:15 +0100 Subject: [PATCH 3/6] Add function for coercing uids to numeric --- R/utils.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/utils.R b/R/utils.R index 7f26a74..6a2aedb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -204,4 +204,16 @@ get_mc_cores <- function(mc_cores, verbose = getOption("verbose")) { mc_cores <- n_cores } return(mc_cores) +} + +#' Convert a vector to numeric +#' +#' @param x vector; Vector to convert. +#' @noRd +as_numeric <- function(x) { + numeric_x <- suppressWarnings(as.numeric(x)) + if (any(is.na(numeric_x) & !is.na(x))) { + stop("Query must be either an ncbi_uid object or a vector of UIDs.") + } + return(numeric_x) } \ No newline at end of file From 2e8b5f029badeb235550ee6468ac68c805674a18 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 22:27:08 +0100 Subject: [PATCH 4/6] Do not require numeric uids but require that they are coercible --- R/ncbi_get_meta.R | 4 +--- R/ncbi_get_summary.R | 4 +--- R/ncbi_link_uid.R | 4 +--- R/ncbi_recover_id.R | 4 +++- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/R/ncbi_get_meta.R b/R/ncbi_get_meta.R index 27f708b..37e6f25 100644 --- a/R/ncbi_get_meta.R +++ b/R/ncbi_get_meta.R @@ -150,9 +150,7 @@ ncbi_get_meta <- function( if ("ncbi_uid" %in% class(query)) { query <- query$uid } - if (!is.numeric(query)) { - stop("Query must be an ncbi_uid object or a numeric vector or UIDs.") - } + query <- as_numeric(query) idlist <- get_idlist(query, batch_size, verbose) res <- sapply(idlist, function(x) { foo_from_ids(x, db = db) diff --git a/R/ncbi_get_summary.R b/R/ncbi_get_summary.R index 45138ee..9978ef2 100644 --- a/R/ncbi_get_summary.R +++ b/R/ncbi_get_summary.R @@ -81,9 +81,7 @@ ncbi_get_summary <- function( } return(res) } - if (!is.numeric(query)) { - stop("Query must be an ncbi_uid object or a numeric vector or UIDs.") - } + query <- as_numeric(query) idlist <- get_idlist(query, batch_size = batch_size, verbose = verbose) res <- lapply(idlist, function(x) { foo_from_ids(x, db = db, verbose = verbose) diff --git a/R/ncbi_link_uid.R b/R/ncbi_link_uid.R index 1536afa..6674976 100644 --- a/R/ncbi_link_uid.R +++ b/R/ncbi_link_uid.R @@ -131,9 +131,7 @@ ncbi_link_uid <- function( } else { query_vector <- query } - if (!is.numeric(query_vector)) { - stop("Query must be an ncbi_uid object or a numeric vector or UIDs.") - } + query_vector <- as_numeric(query_vector) idlist <- get_idlist(query_vector, batch_size, verbose) res <- lapply(idlist, function(x) { foo_from_ids( diff --git a/R/ncbi_recover_id.R b/R/ncbi_recover_id.R index 0f161bb..c15e30d 100644 --- a/R/ncbi_recover_id.R +++ b/R/ncbi_recover_id.R @@ -44,8 +44,8 @@ ncbi_recover_id <- function( msg <- "Specify a 'db' argument or query an object of class 'ncbi_uid'." stop(msg) } + query <- as_numeric(query) } - db <- match.arg(db, ncbi_dbs()) summaries <- ncbi_get_summary( query, db = db, @@ -64,6 +64,8 @@ ncbi_recover_id <- function( id <- unname(sapply(summaries, function(x) x$accessionversion)) } else if (db == "protein") { id <- unname(sapply(summaries, function(x) x$accessionversion)) + } else if (db == "pubmed") { + id <- unname(sapply(summaries, function(x) x$uid)) } else { stop("Not supported.") } From ce3f924460990ad32794f8782b858f0603a598b2 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 22:27:21 +0100 Subject: [PATCH 5/6] Add pubmed examples --- data/examples.rda | Bin 376 -> 398 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/examples.rda b/data/examples.rda index 9b2477bfe80ad09c32771ce0f239e39044a6543b..f01be08de1cff92a466338ab07f59914b121e336 100644 GIT binary patch literal 398 zcmV;90df8xiwFP!0000017(uyYZE~bz}GyC4ZcA9Gb}srosXwU1woOhkZ&c?Ln=)g zdKL7a7dOdW&7KF#&HQF|UOS7~lBUaP2w@mTqd^#svlzx-K3}{_A!HE3gD?tb`TwZ? zwc6~i>n4Q3i>y!bd49Zjus?hI=HfE$KV*_~>|u0gkaPHltILQ$$-(#d7_3V+{kV8H ze=Qu9TFcAU^7vEr{zF6qM#2*|%Q?gl;M9-MSV+BPPcxfW0IcN3Sa>Fu$R@proYP3* zXC{E+1yd4bizQP}RI(V=5<)7OWhSS!%qt==2X5JuW7Ql%>@gB(nHU^W zx)0cnP#s;$c#0TBwH#qCCdJqrA&_gL!V{R1Ny6S!K6FlbKK%!-stQe_{rB`|v&Awh z09tDpbPGsMgbZ}AtBG5lUAd^^?P{~%(!=FR=e%l~db7U%-Q{0j-Ry2}e$-#@y1bb* shYfGR=zG2GrMSFOMmv~FWF0yu~7p60CO$FivR!s literal 376 zcmV-;0f+t{iwFP!0000017(uiN&_(vhEuz(ws->ZF)VXPGRfs?TR~8yD)g?jvPh*T zx?bqZi)p*7&AJeh`SS0~WM&54A-9KZrBq!dNln!o;iwuf&;2W>RB{lEno3kF>_^$> zc)prtxl#ug!ET0izWZ@|e|CG*AHn7$&89;YOx|lW9sI-T2ngAE^c%c$Rt9T74(@u_ zMuX+z=CZgseoF5jfCM!$?Xhl%VT2I(bxfiSTVJr#zv7n=H|qx~Mj9;}0{{TqO}MH6 From 343d71ca9ab6e16c50009ad4834809b34e5a910d Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 22:27:37 +0100 Subject: [PATCH 6/6] Test recovering pubmed ids --- tests/testthat/test-ncbi_recover_id.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-ncbi_recover_id.R b/tests/testthat/test-ncbi_recover_id.R index 07e7a5f..38e39f8 100644 --- a/tests/testthat/test-ncbi_recover_id.R +++ b/tests/testthat/test-ncbi_recover_id.R @@ -27,3 +27,11 @@ test_that("ncbi_recover_id() works with nuccore", { id <- ncbi_recover_id(uid) expect_equal(id,"OP617744.1") }) + +test_that("ncbi_recover_id() works with pubmed", { + uid <- ncbi_get_uid(examples$pubmed, db = "pubmed") + id1 <- ncbi_recover_id(uid) + id2 <- ncbi_recover_id(uid$uid, db = "pubmed") + expect_equal(id1, c("39332413", "36759752")) + expect_equal(id1, c("39332413", "36759752")) +})