From c7728db6f0cad95232335d58c1d2a1a337c044b5 Mon Sep 17 00:00:00 2001 From: asizemore Date: Thu, 6 Feb 2025 16:26:14 -0500 Subject: [PATCH 1/7] plain scatterplot returns point IDs when asked --- R/class-plotdata-scatter.R | 31 ++++++++++++++++++++++-- R/class-plotdata.R | 23 +++++++++++++++--- R/group.R | 1 + tests/testthat/test-scattergl.R | 43 +++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 5 deletions(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index c4903df..1bee8e8 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -3,6 +3,8 @@ newScatterPD <- function(.dt = data.table::data.table(), value = character(), useGradientColorscale = FALSE, overlayValues = veupathUtils::BinList(), + idColumn = character(), + returnPointIds = logical(), correlationMethod = character(), sampleSizes = logical(), completeCases = logical(), @@ -15,6 +17,8 @@ newScatterPD <- function(.dt = data.table::data.table(), variables = variables, useGradientColorscale = useGradientColorscale, overlayValues = overlayValues, + idColumn = idColumn, + returnPointIds = returnPointIds, sampleSizes = sampleSizes, completeCases = completeCases, evilMode = evilMode, @@ -31,6 +35,12 @@ newScatterPD <- function(.dt = data.table::data.table(), group <- veupathUtils::findColNamesFromPlotRef(variables, 'overlay') panel <- findPanelColName(veupathUtils::findVariableSpecFromPlotRef(variables, 'facet1'), veupathUtils::findVariableSpecFromPlotRef(variables, 'facet2')) + # If we ask for the point ids, ensure the column is present. Otherwise set to null. + if (returnPointIds && !is.null(idColumn) && idColumn %in% names(.dt)) { + idCol <- idColumn + } else { + idCol <- NULL + } dtForCorr <- data.table::as.data.table(.pd) @@ -50,10 +60,11 @@ newScatterPD <- function(.dt = data.table::data.table(), } else { #series data w/o gradient series <- collapseByGroup(.pd, group, panel) - data.table::setnames(series, c(group, panel, 'seriesX', 'seriesY')) + data.table::setnames(series, c(group, panel, 'seriesX', 'seriesY', idCol)) # corr results w/o gradient if (correlationMethod != 'none') { + print(names(dtForCorr)) corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) } } @@ -193,6 +204,9 @@ validateScatterPD <- function(.scatter, verbose) { #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') +#' @param idColumn character indicating the column name of the id variable in data +#' @param returnPointIds boolean indicating if any point ids should be returned with the scatterplot data. +#' This value will only be used when idColumn is present. #' @param verbose boolean indicating if timed logging is desired #' @return data.table plot-ready data #' @examples @@ -243,6 +257,8 @@ scattergl.dt <- function(data, evilMode = c('noVariables', 'allVariables', 'strataVariables'), collectionVariablePlotRef = NULL, computedVariableMetadata = NULL, + idColumn = NULL, + returnPointIds = c(FALSE, TRUE), verbose = c(TRUE, FALSE)) { if (!inherits(variables, 'VariableMetadataList')) stop("The `variables` argument must be a VariableMetadataList object.") @@ -252,6 +268,7 @@ scattergl.dt <- function(data, completeCases <- veupathUtils::matchArg(completeCases) evilMode <- veupathUtils::matchArg(evilMode) verbose <- veupathUtils::matchArg(verbose) + returnPointIds <- veupathUtils::matchArg(returnPointIds) if (!'data.table' %in% class(data)) { data.table::setDT(data) @@ -276,7 +293,15 @@ scattergl.dt <- function(data, if (!yVM@dataType@value %in% c('NUMBER', 'INTEGER') & value != 'raw') { stop('Trend lines can only be provided for numeric dependent axes.') } - } + } + + # If returnPointIds is TRUE, require that the idColumn is present in the data. Otherwise reset returnPointIds + if (returnPointIds && !is.null(idColumn)) { + if (!(idColumn %in% names(data))) { + warning("No id variable found. Continuing without point ids.") + returnPointIds <- FALSE + } + } groupVM <- veupathUtils::findVariableMetadataFromPlotRef(variables, 'overlay') # Decide if we should use a gradient colorscale @@ -302,6 +327,8 @@ scattergl.dt <- function(data, useGradientColorscale = useGradientColorscale, overlayValues = overlayValues, correlationMethod = correlationMethod, + idColumn = idColumn, + returnPointIds = returnPointIds, sampleSizes = sampleSizes, completeCases = completeCases, inferredVarAxis = 'y', diff --git a/R/class-plotdata.R b/R/class-plotdata.R index 6b4bf76..18dec76 100644 --- a/R/class-plotdata.R +++ b/R/class-plotdata.R @@ -13,7 +13,9 @@ newPlotdata <- function(.dt = data.table(), #make sure lat, lon, geoAgg vars are valid plot References variables = NULL, useGradientColorscale = FALSE, - overlayValues = veupathUtils::BinList(), + overlayValues = veupathUtils::BinList(), + idColumn = character(), + returnPointIds = logical(), sampleSizes = logical(), completeCases = logical(), inferredVarAxis = c('y', 'x'), @@ -45,6 +47,19 @@ newPlotdata <- function(.dt = data.table(), lat <- veupathUtils::findColNamesFromPlotRef(variables, 'latitude') lon <- veupathUtils::findColNamesFromPlotRef(variables, 'longitude') + # If we ask for the point ids, ensure the column is present. Otherwise set to null. + print(returnPointIds) + print(idColumn) + if (!is.null(returnPointIds) && length(idColumn) > 0) { + if (returnPointIds && !is.null(idColumn) && idColumn %in% names(.dt)) { + idCol <- idColumn + } else { + idCol <- NULL + } + } else { + idCol <- NULL + } + isEvil <- ifelse(evilMode %in% c('allVariables', 'strataVariables'), TRUE, FALSE) collectionVarMetadata <- veupathUtils::findCollectionVariableMetadata(variables) isOverlayCollection <- ifelse(is.null(collectionVarMetadata), FALSE, ifelse(collectionVarMetadata@plotReference@value == 'overlay', TRUE, FALSE)) @@ -68,7 +83,7 @@ newPlotdata <- function(.dt = data.table(), ## Calculate complete cases table if desired if (completeCases) { #lat and lon must be used w a geohash, so they dont need to be part of completeCases* - varCols <- c(x, y, z, group, facet1, facet2, geo) + varCols <- c(x, y, z, group, facet1, facet2, geo, idCol) completeCasesTable <- data.table::setDT(lapply(.dt[, ..varCols], function(a) {sum(complete.cases(a))})) completeCasesTable <- data.table::transpose(completeCasesTable, keep.names = 'variableDetails') data.table::setnames(completeCasesTable, 'V1', 'completeCases') @@ -88,8 +103,9 @@ newPlotdata <- function(.dt = data.table(), panel <- c(facet1, facet2) } - myCols <- c(x, y, z, lat, lon, group, panel, geo) + myCols <- c(x, y, z, lat, lon, group, panel, geo, idCol) .dt <- .dt[, myCols, with=FALSE] + print(names(.dt)) veupathUtils::logWithTime('Identified facet intersections.', verbose) # Reshape data and remap variables if collectionVar is specified @@ -204,6 +220,7 @@ newPlotdata <- function(.dt = data.table(), if (!is.null(lon)) { .dt[[lon]] <- updateType(.dt[[lon]], 'NUMBER')} if (!is.null(group)) { .dt[[group]] <- updateType(.dt[[group]], groupType) } if (!is.null(panel)) { .dt[[panel]] <- updateType(.dt[[panel]], 'STRING') } + if (!is.null(idCol)) { .dt[[idCol]] <- updateType(.dt[[idCol]], 'STRING') } veupathUtils::logWithTime('Base data types updated for all columns as necessary.', verbose) if (!is.null(group)) { diff --git a/R/group.R b/R/group.R index b26d434..a0bb15d 100644 --- a/R/group.R +++ b/R/group.R @@ -181,6 +181,7 @@ groupCorrelation <- function( correlationMethod = c('pearson', 'spearman', 'sparcc') ) { veupathUtils::matchArg(correlationMethod) + print(names(dt)) if (length(dt) > 2) { stop('Correlation can only be computed for two variables.') diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 4ba70b4..405a1e1 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -871,8 +871,51 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(nrow(dt), 9) expect_equal(names(dt), c('panel','seriesX','seriesY')) expect_equal(class(dt$panel), 'character') + + + # With ids + variables <- new("VariableMetadataList", SimpleList( + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contB', entityId = 'entity'), + plotReference = new("PlotReference", value = 'yAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS')), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contA', entityId = 'entity'), + plotReference = new("PlotReference", value = 'xAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS') + ), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'cat3', entityId = 'entity'), + plotReference = new("PlotReference", value = 'overlay'), + dataType = new("DataType", value = 'STRING'), + dataShape = new("DataShape", value = 'CATEGORICAL') + ), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'sampleId', entityId = 'entity'), + plotReference = new("PlotReference", value = 'id'), + dataType = new("DataType", value = 'STRING'), + dataShape = new("DataShape", value = 'CATEGORICAL') + ) + )) + df <- as.data.frame(testDF) + idColumn <- "entity.sampleId" + df[idColumn] <- paste0('sample', 1:nrow(testDF)) + + dt <- scattergl.dt(df, variables, 'raw', idColumn = idColumn, returnPointIds = TRUE) + expect_equal(nrow(dt), 3) + expect_equal(names(dt), c('entity.cat3','seriesX','seriesY', idColumn)) + expect_equal(class(dt$pointId), 'character') }) + + + test_that("scattergl() returns appropriately formatted json", { variables <- new("VariableMetadataList", SimpleList( From 675cbbeab886067cf31e1f872d0be47abec2450b Mon Sep 17 00:00:00 2001 From: asizemore Date: Fri, 7 Feb 2025 08:18:22 -0500 Subject: [PATCH 2/7] fix test --- tests/testthat/test-scattergl.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 405a1e1..ea5a71f 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -894,13 +894,6 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { plotReference = new("PlotReference", value = 'overlay'), dataType = new("DataType", value = 'STRING'), dataShape = new("DataShape", value = 'CATEGORICAL') - ), - new("VariableMetadata", - variableClass = new("VariableClass", value = 'native'), - variableSpec = new("VariableSpec", variableId = 'sampleId', entityId = 'entity'), - plotReference = new("PlotReference", value = 'id'), - dataType = new("DataType", value = 'STRING'), - dataShape = new("DataShape", value = 'CATEGORICAL') ) )) df <- as.data.frame(testDF) @@ -910,7 +903,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw', idColumn = idColumn, returnPointIds = TRUE) expect_equal(nrow(dt), 3) expect_equal(names(dt), c('entity.cat3','seriesX','seriesY', idColumn)) - expect_equal(class(dt$pointId), 'character') + expect_equal(class(dt[[idColumn]][[1]]), 'character') }) From 18424124db7bce91b1fcbacaa2f2b8153909fb37 Mon Sep 17 00:00:00 2001 From: asizemore Date: Fri, 7 Feb 2025 08:36:37 -0500 Subject: [PATCH 3/7] add returnPointIds to scattergl --- R/class-plotdata-scatter.R | 7 ++++++- man/scattergl.Rd | 6 ++++++ man/scattergl.dt.Rd | 7 +++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index 1bee8e8..2673da0 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -64,7 +64,6 @@ newScatterPD <- function(.dt = data.table::data.table(), # corr results w/o gradient if (correlationMethod != 'none') { - print(names(dtForCorr)) corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) } } @@ -384,6 +383,8 @@ scattergl.dt <- function(data, #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') +#' @param idColumn character indicating the column name of the id variable in data +#' @param returnPointIds boolean indicating if any point ids should be returned with the scatterplot data. #' @param verbose boolean indicating if timed logging is desired #' @return character name of json file containing plot-ready data #' @examples @@ -432,6 +433,8 @@ scattergl <- function(data, sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c('noVariables', 'allVariables', 'strataVariables'), + idColumn = NULL, + returnPointIds = c(FALSE, TRUE), verbose = c(TRUE, FALSE)) { verbose <- veupathUtils::matchArg(verbose) @@ -444,6 +447,8 @@ scattergl <- function(data, sampleSizes = sampleSizes, completeCases = completeCases, evilMode = evilMode, + idColumn = idColumn, + returnPointIds = returnPointIds, verbose = verbose) outFileName <- writeJSON(.scatter, evilMode, 'scattergl', verbose) diff --git a/man/scattergl.Rd b/man/scattergl.Rd index 35c62fc..cf9fb3d 100644 --- a/man/scattergl.Rd +++ b/man/scattergl.Rd @@ -14,6 +14,8 @@ scattergl( sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c("noVariables", "allVariables", "strataVariables"), + idColumn = NULL, + returnPointIds = c(FALSE, TRUE), verbose = c(TRUE, FALSE) ) } @@ -39,6 +41,10 @@ data with smoothed mean. Note only 'raw' is compatible with a continuous overlay \item{evilMode}{String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables')} +\item{idColumn}{character indicating the column name of the id variable in data} + +\item{returnPointIds}{boolean indicating if any point ids should be returned with the scatterplot data.} + \item{verbose}{boolean indicating if timed logging is desired} } \value{ diff --git a/man/scattergl.dt.Rd b/man/scattergl.dt.Rd index 985896b..9c8ead3 100644 --- a/man/scattergl.dt.Rd +++ b/man/scattergl.dt.Rd @@ -16,6 +16,8 @@ scattergl.dt( evilMode = c("noVariables", "allVariables", "strataVariables"), collectionVariablePlotRef = NULL, computedVariableMetadata = NULL, + idColumn = NULL, + returnPointIds = c(FALSE, TRUE), verbose = c(TRUE, FALSE) ) } @@ -42,6 +44,11 @@ overlay variable.} \item{evilMode}{String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables')} +\item{idColumn}{character indicating the column name of the id variable in data} + +\item{returnPointIds}{boolean indicating if any point ids should be returned with the scatterplot data. +This value will only be used when idColumn is present.} + \item{verbose}{boolean indicating if timed logging is desired} } \value{ From 63117b016fdf90faa4a3426433704180faed228f Mon Sep 17 00:00:00 2001 From: asizemore Date: Wed, 12 Feb 2025 16:44:29 -0500 Subject: [PATCH 4/7] fix complete cases table --- R/class-plotdata-scatter.R | 7 ++- R/class-plotdata.R | 7 +-- R/group.R | 1 - tests/testthat/test-scattergl.R | 84 +++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index 2673da0..1e834a0 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -50,11 +50,13 @@ newScatterPD <- function(.dt = data.table::data.table(), series <- collapseByGroup(.pd, group = 'overlayMissingData', panel) .pd$overlayMissingData <- NULL series$overlayMissingData <- NULL - data.table::setnames(series, c(panel, 'seriesX', 'seriesY', 'seriesGradientColorscale')) + data.table::setnames(series, c(panel, 'seriesX', 'seriesY', 'seriesGradientColorscale', idCol)) # corr results w gradient, same as w/o groups so set group to NULL dtForCorr[[group]] <- NULL if (correlationMethod != 'none') { + dtForCorr[[idCol]] <- NULL + print(head(dtForCorr)) corrResult <- groupCorrelation(dtForCorr, x, y, NULL, panel, correlationMethod = correlationMethod) } } else { @@ -64,6 +66,8 @@ newScatterPD <- function(.dt = data.table::data.table(), # corr results w/o gradient if (correlationMethod != 'none') { + print(head(dtForCorr)) + dtForCorr[[idCol]] <- NULL corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) } } @@ -123,6 +127,7 @@ newScatterPD <- function(.dt = data.table::data.table(), } else if (value == 'density') { + # Note, density is not implemented in production code. density <- groupDensity(.pd, NULL, x, group, panel) .pd <- density veupathUtils::logWithTime('Kernel density estimate calculated from raw data.', verbose) diff --git a/R/class-plotdata.R b/R/class-plotdata.R index 18dec76..a608754 100644 --- a/R/class-plotdata.R +++ b/R/class-plotdata.R @@ -48,10 +48,8 @@ newPlotdata <- function(.dt = data.table(), lon <- veupathUtils::findColNamesFromPlotRef(variables, 'longitude') # If we ask for the point ids, ensure the column is present. Otherwise set to null. - print(returnPointIds) - print(idColumn) if (!is.null(returnPointIds) && length(idColumn) > 0) { - if (returnPointIds && !is.null(idColumn) && idColumn %in% names(.dt)) { + if (idColumn %in% names(.dt) && nrow(.dt) == uniqueN(.dt[[idColumn]])) { idCol <- idColumn } else { idCol <- NULL @@ -83,7 +81,7 @@ newPlotdata <- function(.dt = data.table(), ## Calculate complete cases table if desired if (completeCases) { #lat and lon must be used w a geohash, so they dont need to be part of completeCases* - varCols <- c(x, y, z, group, facet1, facet2, geo, idCol) + varCols <- c(x, y, z, group, facet1, facet2, geo) completeCasesTable <- data.table::setDT(lapply(.dt[, ..varCols], function(a) {sum(complete.cases(a))})) completeCasesTable <- data.table::transpose(completeCasesTable, keep.names = 'variableDetails') data.table::setnames(completeCasesTable, 'V1', 'completeCases') @@ -105,7 +103,6 @@ newPlotdata <- function(.dt = data.table(), myCols <- c(x, y, z, lat, lon, group, panel, geo, idCol) .dt <- .dt[, myCols, with=FALSE] - print(names(.dt)) veupathUtils::logWithTime('Identified facet intersections.', verbose) # Reshape data and remap variables if collectionVar is specified diff --git a/R/group.R b/R/group.R index a0bb15d..b26d434 100644 --- a/R/group.R +++ b/R/group.R @@ -181,7 +181,6 @@ groupCorrelation <- function( correlationMethod = c('pearson', 'spearman', 'sparcc') ) { veupathUtils::matchArg(correlationMethod) - print(names(dt)) if (length(dt) > 2) { stop('Correlation can only be computed for two variables.') diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index ea5a71f..3e41d89 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -904,6 +904,43 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(nrow(dt), 3) expect_equal(names(dt), c('entity.cat3','seriesX','seriesY', idColumn)) expect_equal(class(dt[[idColumn]][[1]]), 'character') + + # With id columns and facets and best fit lines + variables <- new("VariableMetadataList", SimpleList( + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contB', entityId = 'entity'), + plotReference = new("PlotReference", value = 'yAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS')), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contA', entityId = 'entity'), + plotReference = new("PlotReference", value = 'xAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS') + ), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'cat3', entityId = 'entity'), + plotReference = new("PlotReference", value = 'overlay'), + dataType = new("DataType", value = 'STRING'), + dataShape = new("DataShape", value = 'CATEGORICAL') + ), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'factor3', entityId = 'entity'), + plotReference = new("PlotReference", value = 'facet1'), + dataType = new("DataType", value = 'STRING'), + dataShape = new("DataShape", value = 'CATEGORICAL') + ) + )) + + dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw', idColumn = idColumn, returnPointIds = TRUE) + expect_equal(nrow(dt), 9) + expect_equal(names(dt), c('entity.factor3', 'entity.cat3', 'seriesX','seriesY', idColumn, 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(class(dt[[idColumn]][[1]]), 'character') + }) @@ -1284,6 +1321,53 @@ test_that("scattergl() returns appropriately formatted json", { jsonList <- jsonlite::fromJSON(outJson) expect_equal(typeof(jsonList$scatterplot$data$seriesX), 'list') expect_equal(typeof(jsonList$scatterplot$data$seriesY), 'list') + + + ## With ids + variables <- new("VariableMetadataList", SimpleList( + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contB', entityId = 'entity'), + plotReference = new("PlotReference", value = 'yAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS')), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contA', entityId = 'entity'), + plotReference = new("PlotReference", value = 'xAxis'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS') + ), + new("VariableMetadata", + variableClass = new("VariableClass", value = 'native'), + variableSpec = new("VariableSpec", variableId = 'contC', entityId = 'entity'), + plotReference = new("PlotReference", value = 'overlay'), + dataType = new("DataType", value = 'NUMBER'), + dataShape = new("DataShape", value = 'CONTINUOUS') + ) + )) + df <- as.data.frame(testDF) + idColumn <- "entity.sampleId" + df[idColumn] <- paste0('sample', 1:nrow(testDF)) + + dt <- scattergl.dt(df, variables, 'raw', idColumn = idColumn, returnPointIds = TRUE) + outJson <- getJSON(dt, FALSE) + jsonList <- jsonlite::fromJSON(outJson) + + expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) + expect_equal(names(jsonList$scatterplot),c('data','config')) + expect_equal(names(jsonList$scatterplot$data),c('seriesX','seriesY','seriesGradientColorscale', idColumn)) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) + expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') + expect_equal(names(jsonList$completeCasesTable),c('variableDetails','completeCases')) + expect_equal(names(jsonList$completeCasesTable$variableDetails), c('variableId','entityId')) + expect_equal(jsonList$completeCasesTable$variableDetails$variableId, c('contA','contB','contC')) + + + + + }) From d4b91d0f1af0b1bf7ab76ce7e3ea0af247d8be86 Mon Sep 17 00:00:00 2001 From: asizemore Date: Wed, 12 Feb 2025 16:52:23 -0500 Subject: [PATCH 5/7] removed unused failing tests --- R/class-plotdata-scatter.R | 4 --- tests/testthat/test-scattergl.R | 56 ++++++++++++++++----------------- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index 1e834a0..feed49f 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -55,8 +55,6 @@ newScatterPD <- function(.dt = data.table::data.table(), # corr results w gradient, same as w/o groups so set group to NULL dtForCorr[[group]] <- NULL if (correlationMethod != 'none') { - dtForCorr[[idCol]] <- NULL - print(head(dtForCorr)) corrResult <- groupCorrelation(dtForCorr, x, y, NULL, panel, correlationMethod = correlationMethod) } } else { @@ -66,8 +64,6 @@ newScatterPD <- function(.dt = data.table::data.table(), # corr results w/o gradient if (correlationMethod != 'none') { - print(head(dtForCorr)) - dtForCorr[[idCol]] <- NULL corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) } } diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 3e41d89..3196c81 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -44,11 +44,11 @@ test_that("scatter.dt does not fail when there are no complete cases.", { expect_equal(is.list(dt$densityX), TRUE) expect_equal(is.list(dt$densityY), TRUE) - dt <- scattergl.dt(df, variables, value='raw', correlationMethod = 'pearson') - attr <- attributes(dt) - expect_equal(attr$completeCasesAllVars[1], 0) - expect_equal(is.list(dt$seriesX), TRUE) - expect_equal(is.list(dt$seriesY), TRUE) + # dt <- scattergl.dt(df, variables, value='raw', correlationMethod = 'pearson') + # attr <- attributes(dt) + # expect_equal(attr$completeCasesAllVars[1], 0) + # expect_equal(is.list(dt$seriesX), TRUE) + # expect_equal(is.list(dt$seriesY), TRUE) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -136,12 +136,12 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { expect_equal(names(namedAttrList),c('variables')) # make sure correlation coef and pvalue is returned if there is a correlationMethod - dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') - expect_is(dt, 'plot.data') - expect_is(dt, 'scatterplot') - namedAttrList <- getPDAttributes(dt) - expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable','correlationMethod')) - expect_equal(length(namedAttrList$correlationMethod), 1) + # dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') + # expect_is(dt, 'plot.data') + # expect_is(dt, 'scatterplot') + # namedAttrList <- getPDAttributes(dt) + # expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable','correlationMethod')) + # expect_equal(length(namedAttrList$correlationMethod), 1) }) @@ -195,11 +195,11 @@ test_that("scattergl.dt() returns plot data and config of the appropriate types" expect_equal(class(unlist(sampleSizes$size)), 'integer') # check types of correlation results when there is a correlationMethod - dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') - expect_equal(class(dt$correlationCoef), 'numeric') - expect_equal(class(dt$pValue), 'numeric') - namedAttrList <- getPDAttributes(dt) - expect_equal(class(namedAttrList$correlationMethod),c('scalar', 'character')) + # dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') + # expect_equal(class(dt$correlationCoef), 'numeric') + # expect_equal(class(dt$pValue), 'numeric') + # namedAttrList <- getPDAttributes(dt) + # expect_equal(class(namedAttrList$correlationMethod),c('scalar', 'character')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -304,10 +304,10 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) # should see some new cols if we have a correlationMethod - dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw', correlationMethod = 'pearson') - expect_is(dt, 'data.table') - expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + # dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw', correlationMethod = 'pearson') + # expect_is(dt, 'data.table') + # expect_equal(nrow(dt),12) + # expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -1001,14 +1001,14 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(jsonList$completeCasesTable$variableDetails$variableId, c('contA', 'contB', 'cat3', 'cat4')) # check json for correlations when correlationMethod is not none - dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw', correlationMethod = 'pearson') - outJson <- getJSON(dt, FALSE) - jsonList <- jsonlite::fromJSON(outJson) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) - expect_equal(names(jsonList$scatterplot$config), c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) - expect_equal(jsonList$scatterplot$config$correlationMethod, 'pearson') - expect_equal(class(jsonList$scatterplot$data$correlationCoef), 'numeric') - expect_equal(class(jsonList$scatterplot$data$pValue), 'numeric') + # dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw', correlationMethod = 'pearson') + # outJson <- getJSON(dt, FALSE) + # jsonList <- jsonlite::fromJSON(outJson) + # expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) + # expect_equal(names(jsonList$scatterplot$config), c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + # expect_equal(jsonList$scatterplot$config$correlationMethod, 'pearson') + # expect_equal(class(jsonList$scatterplot$data$correlationCoef), 'numeric') + # expect_equal(class(jsonList$scatterplot$data$pValue), 'numeric') # Continuous overlay with > 8 values variables <- new("VariableMetadataList", SimpleList( From 3c21414549b83ef2a2f4597c2f30d7c0d0cfdca0 Mon Sep 17 00:00:00 2001 From: asizemore Date: Wed, 12 Feb 2025 18:18:59 -0500 Subject: [PATCH 6/7] fix when returnPointIds is false --- R/class-plotdata.R | 2 +- tests/testthat/test-scattergl.R | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/R/class-plotdata.R b/R/class-plotdata.R index a608754..30b021f 100644 --- a/R/class-plotdata.R +++ b/R/class-plotdata.R @@ -48,7 +48,7 @@ newPlotdata <- function(.dt = data.table(), lon <- veupathUtils::findColNamesFromPlotRef(variables, 'longitude') # If we ask for the point ids, ensure the column is present. Otherwise set to null. - if (!is.null(returnPointIds) && length(idColumn) > 0) { + if (!is.null(returnPointIds) && returnPointIds && length(idColumn) > 0) { if (idColumn %in% names(.dt) && nrow(.dt) == uniqueN(.dt[[idColumn]])) { idCol <- idColumn } else { diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 3196c81..0bcb9cc 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -281,6 +281,8 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { )) df <- as.data.frame(testDF) + idColumn <- "entity.sampleId" + df[idColumn] <- paste0('sample', 1:nrow(testDF)) dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') @@ -941,6 +943,10 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(names(dt), c('entity.factor3', 'entity.cat3', 'seriesX','seriesY', idColumn, 'bestFitLineX', 'bestFitLineY', 'r2')) expect_equal(class(dt[[idColumn]][[1]]), 'character') + dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw', idColumn = idColumn, returnPointIds = FALSE) + expect_equal(nrow(dt), 9) + expect_equal(names(dt), c('entity.factor3', 'entity.cat3', 'seriesX','seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(class(dt[[idColumn]][[1]]), 'NULL') }) @@ -979,6 +985,8 @@ test_that("scattergl() returns appropriately formatted json", { )) df <- as.data.frame(testDF) + idColumn <- "entity.sampleId" + df[idColumn] <- paste0('sample', 1:nrow(testDF)) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') outJson <- getJSON(dt, FALSE) @@ -1364,9 +1372,19 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList$completeCasesTable$variableDetails), c('variableId','entityId')) expect_equal(jsonList$completeCasesTable$variableDetails$variableId, c('contA','contB','contC')) - - - + dt <- scattergl.dt(df, variables, 'raw', idColumn = idColumn, returnPointIds = FALSE) + outJson <- getJSON(dt, FALSE) + jsonList <- jsonlite::fromJSON(outJson) + + expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) + expect_equal(names(jsonList$scatterplot),c('data','config')) + expect_equal(names(jsonList$scatterplot$data),c('seriesX','seriesY','seriesGradientColorscale')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) + expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') + expect_equal(names(jsonList$completeCasesTable),c('variableDetails','completeCases')) + expect_equal(names(jsonList$completeCasesTable$variableDetails), c('variableId','entityId')) + expect_equal(jsonList$completeCasesTable$variableDetails$variableId, c('contA','contB','contC')) }) From 50e4969abfe1b47fe40821e980c14f18db91302b Mon Sep 17 00:00:00 2001 From: asizemore Date: Thu, 13 Feb 2025 10:43:11 -0500 Subject: [PATCH 7/7] scatter should err if improper idColumn supplied --- R/class-plotdata-scatter.R | 17 ++++++++++------- tests/testthat/test-scattergl.R | 4 ++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index feed49f..1baeb73 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -36,8 +36,12 @@ newScatterPD <- function(.dt = data.table::data.table(), panel <- findPanelColName(veupathUtils::findVariableSpecFromPlotRef(variables, 'facet1'), veupathUtils::findVariableSpecFromPlotRef(variables, 'facet2')) # If we ask for the point ids, ensure the column is present. Otherwise set to null. - if (returnPointIds && !is.null(idColumn) && idColumn %in% names(.dt)) { - idCol <- idColumn + if (returnPointIds) { + if (!is.null(idColumn) && idColumn %in% names(.dt)) { + idCol <- idColumn + } else { + stop("idColumn not found or not supplied. Supply proper idColumn if returnPointIds is TRUE.") + } } else { idCol <- NULL } @@ -295,11 +299,10 @@ scattergl.dt <- function(data, } } - # If returnPointIds is TRUE, require that the idColumn is present in the data. Otherwise reset returnPointIds - if (returnPointIds && !is.null(idColumn)) { - if (!(idColumn %in% names(data))) { - warning("No id variable found. Continuing without point ids.") - returnPointIds <- FALSE + # If returnPointIds is TRUE, require that the idColumn is present in the data. + if (returnPointIds) { + if (is.null(idColumn) || !(idColumn %in% names(data))) { + stop("idColumn not found or not supplied. Supply proper idColumn if returnPointIds is TRUE.") } } diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 0bcb9cc..551afd7 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -947,6 +947,10 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(nrow(dt), 9) expect_equal(names(dt), c('entity.factor3', 'entity.cat3', 'seriesX','seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) expect_equal(class(dt[[idColumn]][[1]]), 'NULL') + + ## Should err if the id column is provided but doesn't exist + expect_error(scattergl.dt(df, variables, 'bestFitLineWithRaw', idColumn = 'fake', returnPointIds = TRUE)) + expect_error(scattergl.dt(df, variables, 'bestFitLineWithRaw', returnPointIds = TRUE)) })