OHDSI · jreps · Jul 8, 2022 · Jul 8, 2022 · Jul 8, 2022 · Jul 21, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,20 +1,20 @@
 Package: SkeletonPredictionStudy
 Type: Package
 Title: A Skeleton for Prediction Development Studies that use the PatientLevelPrediction Framework
-Version: 1.0.5
+Version: 1.0.7
 Author: Jenna Reps [aut, cre]
 Maintainer: Jenna M Reps <reps@ohdsi.org>
 Description: A skeleton package to be used as a starting point when implementing patient-level prediction development studies.
 Depends:
     R(>= 3.6.0)
 Imports:
-    CohortGenerator (>= 0.3.0),
-    DatabaseConnector (>= 5.0.0),
+    CohortGenerator (>= 0.6.0),
+    DatabaseConnector (>= 5.0.4),
     dplyr,
     jsonlite,
     knitr,
     OhdsiSharing,
-    PatientLevelPrediction (>= 5.0.2),
+    PatientLevelPrediction (>= 5.0.6),
     ParallelLogger,
     rlang,
     rmarkdown,
@@ -26,10 +26,10 @@ Suggests:
     Hydra
 Remotes:
     ohdsi/CirceR,
-    ohdsi/CohortGenerator@from_package,
+    ohdsi/CohortGenerator,
     ohdsi/Hydra,
     ohdsi/OhdsiSharing,
-    ohdsi/PatientLevelPrediction@issue242
+    ohdsi/PatientLevelPrediction
 License: Apache License 2.0
 RoxygenNote: 7.1.1
 SkeletonNote: 1.0.0
diff --git a/HydraConfig.json b/HydraConfig.json
@@ -13,9 +13,9 @@
   },{
     "type": "jsonArrayToCsv",
     "input": "cohortDefinitions",
-    "mapping": [{"source": "id", "target": "webApiCohortId"},
-                {"source": "id", "target": "cohortId"},
-                {"source": "name", "target": "cohortName"}],
+    "mapping": [{"source": "id", "target": "web_api_cohort_id"},
+                {"source": "id", "target": "cohort_id"},
+                {"source": "name", "target": "cohort_name"}],
     "output": "inst/Cohorts.csv"
   },{
     "type": "jsonArrayToJson",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+SkeletonPredictionStudy 1.0.7
+======================
+  - Pulled in bug fixes from Henrik
+  - Renamed analysis to analyses to be consistent with PLP
+
 SkeletonPredictionStudy 1.0.2
 ======================
   - Cleaned r package check 

diff --git a/R/BackwardsComp.R b/R/BackwardsComp.R
@@ -15,6 +15,20 @@ backwards <- function(predictionAnalysisListFile){
     )
   }
 
+  #create split setting
+  if(json$runPlpArgs$testSplit %in% c('subject', 'time', 'stratified')){
+    splitType <- json$runPlpArgs$testSplit
+  }else{
+    splitType <- 'subject'
+  }
+
+  splitSettings <- PatientLevelPrediction::createDefaultSplitSetting(
+    testFraction = json$runPlpArgs$testFraction, 
+    splitSeed = json$runPlpArgs$splitSeed, 
+    nfold = json$runPlpArgs$nfold, 
+    type = splitType
+  )
+
   # this can be multiple?
   ##covariateSettingList <- lapply(json$covariateSettings, function(x) do.call(FeatureExtraction::createCovariateSettings, x))
   covariateSettingList <- json$covariateSettings
@@ -66,6 +80,7 @@ backwards <- function(predictionAnalysisListFile){
               featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(),
               sampleSettings = PatientLevelPrediction::createSampleSettings(), 
               preprocessSettings = preprocessSettings, 
+              splitSettings =splitSettings,
               modelSettings = mod, 
               runCovariateSummary = T
               )
@@ -81,20 +96,6 @@ backwards <- function(predictionAnalysisListFile){
   # create modelDesigns:
   json$analyses <- modelDesign
 
-  #create split setting
-  if(json$runPlpArgs$testSplit %in% c('subject', 'time', 'stratified')){
-    splitType <- json$runPlpArgs$testSplit
-  }else{
-    splitType <- 'subject'
-  }
-
-  json$splitSettings <- PatientLevelPrediction::createDefaultSplitSetting(
-    testFraction = json$runPlpArgs$testFraction, 
-    splitSeed = json$runPlpArgs$splitSeed, 
-    nfold = json$runPlpArgs$nfold, 
-    type = splitType
-    )
-
   return(json)
 }
 

diff --git a/R/CreateAllCohorts.R b/R/CreateAllCohorts.R
@@ -54,19 +54,14 @@ createCohorts <- function(
   utils::write.csv(counts, file.path(outputFolder, "CohortCounts.csv"), row.names = FALSE)
 }
 
-addCohortNames <- function(data, IdColumnName = "cohortId", nameColumnName = "cohortName") {
+addCohortNames <- function(data) {
   pathToCsv <- system.file("Cohorts.csv", package = "SkeletonPredictionStudy")
-
+  
   idToName <- utils::read.csv(pathToCsv)
   idToName <- idToName[order(idToName$cohortId), ]
   idToName <- idToName[!duplicated(idToName$cohortId), ]
-  names(idToName)[1] <- IdColumnName
-  names(idToName)[2] <- nameColumnName
+
   data <- merge(data, idToName, all.x = TRUE)
-  # Change order of columns:
-  idCol <- which(colnames(data) == IdColumnName)
-  if (idCol < ncol(data) - 1) {
-    data <- data[, c(1:idCol, ncol(data) , (idCol + 1):(ncol(data) - 1))]
-  }
+
   return(data)
 }
diff --git a/R/Main.R b/R/Main.R
@@ -25,9 +25,6 @@
 #'                             performance.
 #' @param createProtocol       Creates a protocol based on the analyses specification                             
 #' @param createCohorts        Create the cohortTable table with the target population and outcome cohorts?
-#' @param runDiagnostic        Runs a diagnostic of the T, O and tar settings for the cdmDatabaseSchema - can be used to check whether to change 
-#'                             settings or whether the prediction may not do well.  
-#' @param viewDiagnostic       Opens a shiny app with the diagnostic results (run after runDiagnostic completes)                              
 #' @param runAnalyses          Run the model development
 #' @param createValidationPackage  Create a package for sharing the models 
 #' @param useHydra             Whether to use Hydra to create the validation package (requires hydra to be installed) or download the master version of the skeleton (requires internet access)
@@ -70,8 +67,6 @@
 #'         outputFolder = "c:/temp/study_results", 
 #'         createProtocol = T,
 #'         createCohorts = T,
-#'         runDiagnostic = F,
-#'         viewDiagnostic = F,
 #'         runAnalyses = T,
 #'         createValidationPackage = T,
 #'         useHydra = F,
@@ -90,8 +85,6 @@ execute <- function(
   outputFolder,
   createProtocol = F,
   createCohorts = F,
-  runDiagnostic = F,
-  viewDiagnostic = F,
   runAnalyses = F,
   createValidationPackage = F,
   useHydra = F,
@@ -126,8 +119,8 @@ execute <- function(
     )
   }
 
-  if(runAnalyses || onlyFetchData || runDiagnostic){
-    if(onlyFetchData || (runDiagnostic && !runAnalyses)) {
+  if(runAnalyses || onlyFetchData){
+    if(onlyFetchData) {
       ParallelLogger::logInfo("Only fetching data and not running predictions")
     } else {
       ParallelLogger::logInfo("Running predictions")
@@ -175,89 +168,14 @@ execute <- function(
       list(
         databaseDetails = databaseDetails,
         modelDesignList = predictionAnalysisList$analyses,
-        onlyFetchData =  onlyFetchData || (runDiagnostic && !runAnalyses),
-        splitSettings = predictionAnalysisList$splitSettings,
+        onlyFetchData =  onlyFetchData,
         cohortDefinitions = predictionAnalysisList$cohortDefinitions,
         logSettings = logSettings,
         saveDirectory = outputFolder
       )
     )
   }
 
-  if(runDiagnostic){
-    ParallelLogger::logInfo(paste0("Creating diagnostic results for ",databaseDetails$cdmDatabaseName))
-
-    settings <- utils::read.csv(file.path(outputFolder, 'settings.csv'))
-
-    settings <- settings %>% 
-      dplyr::select(.data$targetName, .data$outcomeName, .data$dataLocation) %>% 
-      dplyr::mutate(target = paste0(.data$targetName, '-' , .data$dataLocation))
-
-    length(unique(settings$target))
-
-    # run diagnostic
-    for(i in 1:length(unique(settings$target))){
-
-      setOfInt <- settings %>% dplyr::filter(.data$target  == unique(settings$target)[i])
-
-      ParallelLogger::logInfo(paste0("Target Cohort: ", unique(setOfInt$targetName), ' generating'))
-
-      diagnosticData <- PatientLevelPrediction::loadPlpData(file.path(outputFolder, setOfInt$dataLocation[1]))
-      diagnosticData$cohorts$cohortId <- i*100000+diagnosticData$cohorts$cohortId
-
-      diag <- tryCatch(
-        {
-          PatientLevelPrediction::diagnostic( 
-            plpData = diagnosticData,
-            cdmDatabaseName = databaseDetails$cdmDatabaseName,
-            cohortName = unique(setOfInt$target), 
-            outcomeNames = unique(setOfInt$outcomeName), 
-            databaseDetails = NULL,
-            populationSettings = PatientLevelPrediction::createStudyPopulationSettings(
-              includeAllOutcomes = F, 
-              firstExposureOnly = F, 
-              washoutPeriod = 0, 
-              requireTimeAtRisk = F, 
-              removeSubjectsWithPriorOutcome = F, 
-              riskWindowStart = 0, 
-              riskWindowEnd = 9999
-            ),
-            outputFolder = file.path(outputFolder, 'diagnostics'), 
-            minCellCount = minCellCount
-          )
-        },
-        error = function(err) {
-          # error handler picks up where error was generated
-          ParallelLogger::logError(paste("Diagnostic error:  ",err))
-          return(NULL)
-
-        }
-      )
-    }
-
-
-  }
-
-  if(viewDiagnostic){
-    ParallelLogger::logInfo(paste0("Loading diagnostic shiny app"))
-
-    checkDiagnosticResults <- dir.exists(file.path(outputFolder, 'diagnostics'))
-    checkShinyViewer <- dir.exists(system.file("shiny", "DiagnosticsExplorer", package = "PatientLevelPrediction"))
-    if(!checkDiagnosticResults){
-      warning('No diagnosstic results found, please execute with runDiagnostic first')
-    } else if(!checkShinyViewer){
-      warning('No DiagnosticsExplorer shiny app found in your PatientLevelPrediction library - try updating PatientLevelPrediction')
-    } else{
-      shinyDirectory <- system.file("shiny", "DiagnosticsExplorer", package = "PatientLevelPrediction")
-      shinySettings <- list(dataFolder = file.path(outputFolder, 'diagnostics'))
-      .GlobalEnv$shinySettings <- shinySettings
-      on.exit(rm(shinySettings, envir = .GlobalEnv))
-      shiny::runApp(shinyDirectory)
-    }
-
-  }
-
-
   if (packageResults) {
     ensure_installed("OhdsiSharing")
     ParallelLogger::logInfo("Packaging results")

diff --git a/extras/PackageMaintenance.R b/extras/PackageMaintenance.R
@@ -44,3 +44,8 @@ rmarkdown::render("vignettes/CreatingStudyPackageInR.Rmd",
 # Store environment in which the study was executed -----------------------
 OhdsiRTools::insertEnvironmentSnapshotInPackage("SkeletonPredictionStudy")
 
+reticulate::conda_export(
+  envname = 'r-reticulate', 
+  file = "environment.yml", 
+  json = F
+)
diff --git a/extras/createDevelopmentPackageExample.R b/extras/createDevelopmentPackageExample.R
@@ -1,5 +1,5 @@
 source(file.path(getwd(),'extras/createDevelopmentPackageFunctions.R'))
-#devtools::source_url("https://raw.github.com/ohdsi/SkeletonPredictionStudy/issue242/extras/createDevelopmentPackageFunctions.R")
+#devtools::source_url("https://raw.github.com/ohdsi/SkeletonPredictionStudy/main/extras/createDevelopmentPackageFunctions.R")
 
 packageName <- 'ExamplePrediction'
 baseUrl <- 'https://api.ohdsi.org/WebAPI/'
@@ -26,6 +26,7 @@ modelDesign1 <- PatientLevelPrediction::createModelDesign(
     minFraction = 1/10000, 
     normalize = T
     ), 
+  splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
   modelSettings = PatientLevelPrediction::setLassoLogisticRegression(), 
   runCovariateSummary = T
   )
@@ -74,6 +75,7 @@ modelDesign2 <- PatientLevelPrediction::createModelDesign(
     minFraction = 1/10000, 
     normalize = T
   ), 
+  splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
   modelSettings = PatientLevelPrediction::setLassoLogisticRegression(), 
   runCovariateSummary = T
 )
@@ -87,17 +89,15 @@ jsonList <- createDevelopmentSkeletonSettings(
   packageName = packageName,
   organizationName = "OHDSI",
   modelDesignList = modelDesignList,
-  splitSettings = PatientLevelPrediction::createDefaultSplitSetting(),
   baseUrl = baseUrl,
   saveDirectory = NULL
   )
 
 
 createDevelopmentPackage(
   jsonList = jsonList, 
-  baseUrl = baseUrl,
   #skeletonLocation = 'D:/GitHub/SkeletonPredictionStudy', 
-  skeletonUrl = "https://github.com/ohdsi/SkeletonPredictionStudy/archive/master.zip",
+  skeletonUrl = "https://github.com/ohdsi/SkeletonPredictionStudy/archive/main.zip",
   outputLocation = '/Users/jreps/Documents/testing2',
   packageName = packageName
   )