-
Notifications
You must be signed in to change notification settings - Fork 5
Description
Describe the bug
I'm not sure this is a bug but it seems like it might be. Suppose a person receives two drugs on the same day. The duration of this drug event is one day since it is chemotherapy in a hospital setting. I would assume we would have one combination therapy in our TP results (cytarabine+daunorubicin) since they are both given at the same time together. Instead we see cytarabine (alone) follwed by cytarabine+daunorubicin.
To Reproduce
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.5.2
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(CDMConnector)
json <- c('
{
"person": [
{
"person_id": 2,
"gender_concept_id": 8532,
"year_of_birth": 1970,
"month_of_birth": 4,
"day_of_birth": 6,
"birth_datetime": "1970-04-06",
"race_concept_id": 0,
"ethnicity_concept_id": 0,
"location_id": 0,
"provider_id": 0,
"care_site_id": 0
}
],
"observation_period": [
{
"observation_period_id": 2,
"person_id": 2,
"observation_period_start_date": "2014-01-01",
"observation_period_end_date": "2024-01-01",
"period_type_concept_id": 32817
}
],
"visit_occurrence": [],
"visit_detail": [],
"drug_exposure": [
{
"drug_exposure_id": 1,
"person_id": 2,
"drug_concept_id": 1311078,
"drug_exposure_start_date": "2017-11-08",
"drug_exposure_start_datetime": "2017-11-08",
"drug_exposure_end_date": "2017-11-09",
"drug_exposure_end_datetime": "2017-11-09",
"drug_type_concept_id": 38000177,
"drug_source_value": "cytarabine"
},
{
"drug_exposure_id": 1,
"person_id": 2,
"drug_concept_id": 1311799,
"drug_exposure_start_date": "2017-11-08",
"drug_exposure_start_datetime": "2017-11-08",
"drug_exposure_end_date": "2017-11-09",
"drug_exposure_end_datetime": "2017-11-09",
"drug_type_concept_id": 38000177,
"drug_source_value": "daunorubicin"
}
],
"condition_occurrence": [
{
"condition_occurrence_id": 1,
"person_id": 2,
"condition_concept_id": 140352,
"condition_start_date": "2016-05-05",
"condition_start_datetime": "2016-05-05",
"condition_end_date": "2023-07-20",
"condition_end_datetime": "2023-07-20",
"condition_type_concept_id": 32879,
"provider_id": 0,
"condition_source_value": "Acute myeloid leukemia, disease"
}
],
"procedure_occurrence": [],
"measurement": []
}
')
readr::write_file(json, file.path(tempdir(), "aml_tp.json"))
cdm <- TestGenerator::patientsCDM(tempdir(), "aml_tp", "5.4", "TP_cdm")
#> ✖ Standard table(s) in test data: person, observation_period, visit_occurrence, visit_detail, drug_exposure, condition_occurrence, procedure_occurrence and measurement
#> ✔ Patients pushed to blank CDM successfully
cdm$drug_exposure
#> # Source: table<drug_exposure> [?? x 23]
#> # Database: DuckDB 1.4.4 [root@Darwin 25.2.0:R 4.5.1//private/var/folders/2j/8z0yfn1j69q8sxjc7vj9yhz40000gp/T/Rtmpxw1XxX/file105af7632e0ed.duckdb]
#> drug_exposure_id person_id drug_concept_id drug_exposure_start_date
#> <int> <int> <int> <date>
#> 1 1 2 1311078 2017-11-08
#> 2 1 2 1311799 2017-11-08
#> # ℹ 19 more variables: drug_exposure_start_datetime <dttm>,
#> # drug_exposure_end_date <date>, drug_exposure_end_datetime <dttm>,
#> # verbatim_end_date <date>, drug_type_concept_id <int>, stop_reason <chr>,
#> # refills <int>, quantity <dbl>, days_supply <int>, sig <chr>,
#> # route_concept_id <int>, lot_number <chr>, provider_id <int>,
#> # visit_occurrence_id <int>, visit_detail_id <int>, drug_source_value <chr>,
#> # drug_source_concept_id <int>, route_source_value <chr>, …
cdm <- generateConceptCohortSet(cdm, conceptSet = list(aml = 140352, cytarabine = 1311078, daunorubicin = 1311799),
name = "cohort",
end = "event_end_date")
cohortCount(cdm$cohort) %>%
left_join(settings(cdm$cohort))
#> Joining with `by = join_by(cohort_definition_id)`
#> # A tibble: 3 × 8
#> cohort_definition_id number_records number_subjects cohort_name limit
#> <int> <int> <int> <chr> <chr>
#> 1 1 1 1 aml first
#> 2 2 1 1 cytarabine first
#> 3 3 1 1 daunorubicin first
#> # ℹ 3 more variables: prior_observation <dbl>, future_observation <dbl>,
#> # end <chr>
cdm$cohort %>%
PatientProfiles::addCohortName()
#> # Source: SQL [?? x 5]
#> # Database: DuckDB 1.4.4 [root@Darwin 25.2.0:R 4.5.1//private/var/folders/2j/8z0yfn1j69q8sxjc7vj9yhz40000gp/T/Rtmpxw1XxX/file105af7632e0ed.duckdb]
#> cohort_definition_id subject_id cohort_start_date cohort_end_date cohort_name
#> <int> <int> <date> <date> <chr>
#> 1 1 2 2016-05-05 2023-07-20 aml
#> 2 2 2 2017-11-08 2017-11-09 cytarabine
#> 3 3 2 2017-11-08 2017-11-09 daunorubicin
cohortSet <- settings(cdm$cohort) |>
select(
cohortId = "cohort_definition_id",
cohortName = "cohort_name"
) |>
mutate(type = case_when(
stringr::str_detect(cohortName, "aml") ~ "target",
TRUE ~ "event"
))
andr <- TreatmentPatterns::computePathways(
cohorts = cohortSet,
cohortTableName = "cohort",
cdm = cdm,
analysisId = 1,
description = "",
tempEmulationSchema = NULL,
startAnchor = "startDate",
windowStart = 0,
endAnchor = "endDate",
windowEnd = 0,
minEraDuration = 0,
splitEventCohorts = NULL,
splitTime = NULL,
eraCollapseSize = 30,
combinationWindow = 7,
minPostCombinationDuration = 0,
filterTreatments = "First",
maxPathLength = 10,
overlapMethod = "truncate",
concatTargets = TRUE
)
#> -- Qualifying records for cohort definitions: 1, 2, 3
#> Records: 3
#> Subjects: 1
#> -- Removing records < minEraDuration (0)
#> Records: 3
#> Subjects: 1
#> >> Starting on target: 1 (aml)
#> -- Removing events outside window (startDate: 0 | endDate: 0)
#> Records: 2
#> Subjects: 1
#> -- splitEventCohorts
#> Records: 2
#> Subjects: 1
#> -- No eras needed Collapsing, eraCollapse (30)
#> Records: 2
#> Subjects: 1
#> -- Iteration 1: minPostCombinationDuration (0), combinatinoWindow (7)
#> Records: 3
#> Subjects: 1
#> -- After Combination
#> Records: 3
#> Subjects: 1
#> -- filterTreatments (First)
#> Records: 2
#> Subjects: 1
#> -- Max path length (10)
#> Records: 2
#> Subjects: 1
#> -- treatment construction done
#> Records: 2
#> Subjects: 1
andr$treatmentHistory %>%
dplyr::collect() %>%
select(-indexYear, -age, -sex) %>%
t()
#> [,1] [,2]
#> eventCohortId "2" "2+3"
#> personId "1" "1"
#> targetCohortId "1" "1"
#> eventStartDate "17478" "17478"
#> eventEndDate "17478" "17479"
#> durationEra "0" "1"
#> n_target "1" "1"
#> sortOrder "17478.01" "17478.01"
#> eventSeq "1" "2"
#> eventCohortName "cytarabine" "cytarabine+daunorubicin"
cdmDisconnect(cdm)
Andromeda::close(andr)Created on 2026-02-17 with reprex v2.1.1
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.5.1 (2025-06-13)
#> os macOS Tahoe 26.2
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz UTC
#> date 2026-02-17
#> pandoc 3.6.3 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> quarto 1.8.27 @ /usr/local/bin/quarto
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> Andromeda 0.6.3 2023-03-26 [1] CRAN (R 4.5.1)
#> arrow 22.0.0.1 2025-12-23 [1] CRAN (R 4.5.2)
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.5.0)
#> backports 1.5.0 2024-05-23 [1] CRAN (R 4.5.0)
#> bit 4.6.0 2025-03-06 [1] CRAN (R 4.5.0)
#> bit64 4.6.0-1 2025-01-16 [1] CRAN (R 4.5.0)
#> blob 1.3.0 2026-01-14 [1] CRAN (R 4.5.2)
#> brio 1.1.5 2024-04-24 [1] CRAN (R 4.5.0)
#> cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.0)
#> CDMConnector * 2.4.0 2026-02-16 [1] local
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.5.0)
#> checkmate 2.3.4 2026-02-03 [1] CRAN (R 4.5.2)
#> cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
#> DBI 1.2.3 2024-06-02 [1] CRAN (R 4.5.0)
#> dbplyr 2.5.2 2026-02-13 [1] CRAN (R 4.5.2)
#> digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
#> dplyr * 1.2.0 2026-02-03 [1] CRAN (R 4.5.2)
#> duckdb 1.4.4 2026-01-28 [1] CRAN (R 4.5.2)
#> evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.0)
#> farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
#> fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
#> generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
#> ggplot2 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
#> gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
#> hms 1.1.4 2025-10-17 [1] CRAN (R 4.5.0)
#> htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
#> jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
#> knitr 1.51 2025-12-20 [1] CRAN (R 4.5.2)
#> lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
#> magrittr 2.0.4 2025-09-12 [1] CRAN (R 4.5.0)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.5.0)
#> omopgenerics 1.3.6 2026-01-28 [1] CRAN (R 4.5.2)
#> openxlsx 4.2.8.1 2025-10-31 [1] CRAN (R 4.5.0)
#> otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.0)
#> PatientProfiles 1.4.5 2026-01-21 [1] CRAN (R 4.5.2)
#> pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
#> purrr 1.2.1 2026-01-09 [1] CRAN (R 4.5.2)
#> R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
#> RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
#> Rcpp 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
#> readr 2.1.6 2025-11-14 [1] CRAN (R 4.5.2)
#> readxl 1.4.5 2025-03-07 [1] CRAN (R 4.5.0)
#> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.5.0)
#> rJava 1.0-11 2024-01-26 [1] CRAN (R 4.5.0)
#> rlang 1.1.7 2026-01-09 [1] CRAN (R 4.5.2)
#> rmarkdown 2.30 2025-09-28 [1] CRAN (R 4.5.0)
#> RSQLite 2.4.4 2025-11-10 [1] CRAN (R 4.5.0)
#> rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
#> S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
#> scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
#> sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
#> snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.5.0)
#> stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
#> stringr 1.6.0 2025-11-04 [1] CRAN (R 4.5.0)
#> TestGenerator 0.5.0 2026-01-14 [1] CRAN (R 4.5.2)
#> testthat 3.3.2 2026-01-11 [1] CRAN (R 4.5.2)
#> tibble 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
#> tidyr 1.3.2 2025-12-19 [1] CRAN (R 4.5.2)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
#> TreatmentPatterns 3.1.2 2026-02-17 [1] Github (darwin-eu/TreatmentPatterns@fb4437f)
#> tzdb 0.5.0 2025-03-15 [1] CRAN (R 4.5.0)
#> utf8 1.2.6 2025-06-08 [1] CRAN (R 4.5.0)
#> vctrs 0.7.1 2026-01-23 [1] CRAN (R 4.5.2)
#> withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
#> xfun 0.56 2026-01-18 [1] CRAN (R 4.5.2)
#> yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
#> zip 2.3.3 2025-05-13 [1] CRAN (R 4.5.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
#> * ── Packages attached to the search path.
#>
#> ──────────────────────────────────────────────────────────────────────────────Expected behavior
A single combination treatment cytarabine+daunorubicin
Additional context
It is possible we are misunderstanding the parameters. Setting minPostCombinationDuration = 1 seems to fix this and give us a single combination as expected.
tagging @antonbarchuk