Skip to content

Commit 331e382

Browse files
authored
Merge pull request #486 from OHDSI/additional
Additional columns handling
2 parents a20ce9c + 3a174fc commit 331e382

7 files changed

+47
-141
lines changed

R/requireCohortIntersect.R

+5-23
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,6 @@ requireCohortIntersect <- function(cohort,
8282
upper_limit[is.infinite(upper_limit)] <- 999999L
8383
upper_limit <- as.integer(upper_limit)
8484

85-
cols <- unique(
86-
c(
87-
"cohort_definition_id",
88-
"subject_id",
89-
"cohort_start_date",
90-
"cohort_end_date",
91-
indexDate
92-
)
93-
)
94-
9585
window_start <- window[[1]][1]
9686
window_end <- window[[1]][2]
9787

@@ -121,8 +111,8 @@ requireCohortIntersect <- function(cohort,
121111
newCohort <- cdm[[tmpNewCohort]]
122112

123113
# requirement
114+
intersectCol <- uniqueColumnName(newCohort)
124115
newCohort <- newCohort |>
125-
dplyr::select(dplyr::all_of(.env$cols)) |>
126116
PatientProfiles::addCohortIntersectCount(
127117
targetCohortTable = targetCohortTable,
128118
targetCohortId = targetCohortId,
@@ -131,16 +121,16 @@ requireCohortIntersect <- function(cohort,
131121
targetEndDate = targetEndDate,
132122
window = window,
133123
censorDate = censorDate,
134-
nameStyle = "intersect_cohort",
124+
nameStyle = intersectCol,
135125
name = tmpNewCohort
136126
)
137127

138128
newCohort <- newCohort |>
139129
dplyr::filter(
140-
.data$intersect_cohort >= .env$lower_limit &
141-
.data$intersect_cohort <= .env$upper_limit
130+
.data[[intersectCol]] >= .env$lower_limit &
131+
.data[[intersectCol]] <= .env$upper_limit
142132
) |>
143-
dplyr::select(dplyr::all_of(cols)) |>
133+
dplyr::select(!dplyr::all_of(intersectCol)) |>
144134
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
145135
logPrefix = "CohortConstructor_requireCohortIntersect_subset_")
146136

@@ -186,14 +176,6 @@ requireCohortIntersect <- function(cohort,
186176
logPrefix = "CohortConstructor_requireCohortIntersect_union_")
187177
}
188178

189-
# add additional columns
190-
if (any(!colnames(cohort) %in% colnames(newCohort))) {
191-
newCohort <- newCohort |>
192-
dplyr::inner_join(cohort, by = c(cols)) |>
193-
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
194-
logPrefix = "CohortConstructor_requireCohortIntersect_additional_")
195-
}
196-
197179
newCohort <- newCohort |>
198180
dplyr::compute(name = name, temporary = FALSE,
199181
logPrefix = "CohortConstructor_requireCohortIntersect_name_") |>

R/requireConceptIntersect.R

+5-24
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,6 @@ requireConceptIntersect <- function(cohort,
6969
upper_limit[is.infinite(upper_limit)] <- 999999L
7070
upper_limit <- as.integer(upper_limit)
7171

72-
cols <- unique(
73-
c(
74-
"cohort_definition_id",
75-
"subject_id",
76-
"cohort_start_date",
77-
"cohort_end_date",
78-
indexDate
79-
)
80-
)
81-
8272
window_start <- window[[1]][1]
8373
window_end <- window[[1]][2]
8474

@@ -104,8 +94,8 @@ requireConceptIntersect <- function(cohort,
10494
cdm <- filterCohortInternal(cdm, cohort, cohortId, tmpNewCohort, tmpUnchanged)
10595
newCohort <- cdm[[tmpNewCohort]]
10696

97+
intersectCol <- uniqueColumnName(newCohort)
10798
newCohort <- newCohort |>
108-
dplyr::select(dplyr::all_of(.env$cols)) |>
10999
PatientProfiles::addConceptIntersectCount(
110100
conceptSet = conceptSet,
111101
indexDate = indexDate,
@@ -114,16 +104,16 @@ requireConceptIntersect <- function(cohort,
114104
window = window,
115105
censorDate = censorDate,
116106
inObservation = inObservation,
117-
nameStyle = "intersect_concept",
107+
nameStyle = intersectCol,
118108
name = tmpNewCohort
119109
)
120110

121111
newCohort <- newCohort |>
122112
dplyr::filter(
123-
.data$intersect_concept >= .env$lower_limit &
124-
.data$intersect_concept <= .env$upper_limit
113+
.data[[intersectCol]] >= .env$lower_limit &
114+
.data[[intersectCol]] <= .env$upper_limit
125115
) |>
126-
dplyr::select(dplyr::all_of(cols)) |>
116+
dplyr::select(!dplyr::all_of(intersectCol)) |>
127117
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
128118
logPrefix = "CohortConstructor_requireConceptIntersect_subset_")
129119

@@ -166,14 +156,6 @@ requireConceptIntersect <- function(cohort,
166156
logPrefix = "CohortConstructor_requireConceptIntersect_union_")
167157
}
168158

169-
# add additional columns
170-
if (any(!colnames(cohort) %in% colnames(newCohort))) {
171-
newCohort <- newCohort |>
172-
dplyr::inner_join(cohort, by = c(cols)) |>
173-
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
174-
logPrefix = "CohortConstructor_requireConceptIntersect_additional_")
175-
}
176-
177159
# cohort
178160
newCohort <- newCohort |>
179161
dplyr::compute(
@@ -187,7 +169,6 @@ requireConceptIntersect <- function(cohort,
187169

188170
omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))
189171

190-
191172
useIndexes <- getOption("CohortConstructor.use_indexes")
192173
if (!isFALSE(useIndexes)) {
193174
addIndex(

R/requireDeathFlag.R

+7-32
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@ requireDeathFlag <- function(cohort,
5353
return(cdm[[name]])
5454
}
5555

56-
cols <- unique(
57-
c(
58-
"cohort_definition_id",
59-
"subject_id",
60-
"cohort_start_date",
61-
"cohort_end_date",
62-
indexDate
63-
)
64-
)
65-
6656
window_start <- window[[1]][1]
6757
window_end <- window[[1]][2]
6858

@@ -73,21 +63,20 @@ requireDeathFlag <- function(cohort,
7363
cdm <- filterCohortInternal(cdm, cohort, cohortId, tmpNewCohort, tmpUnchanged)
7464
newCohort <- cdm[[tmpNewCohort]]
7565

66+
intersectCol <- uniqueColumnName(newCohort)
7667
newCohort <- newCohort |>
77-
dplyr::select(dplyr::all_of(.env$cols)) |>
7868
PatientProfiles::addDeathFlag(
7969
indexDate = indexDate,
8070
censorDate = censorDate,
8171
window = window,
82-
deathFlagName = "death",
72+
deathFlagName = intersectCol,
8373
name = tmpNewCohort
8474
)
8575

8676
if (isFALSE(negate)) {
8777
newCohort <- newCohort |>
88-
dplyr::filter(.data$death == 1 |
89-
(!.data$cohort_definition_id %in% cohortId)) |>
90-
dplyr::select(!"death") |>
78+
dplyr::filter(.data[[intersectCol]] == 1) |>
79+
dplyr::select(!dplyr::all_of(intersectCol)) |>
9180
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
9281
logPrefix = "CohortConstructor_requireDeathFlag_negateFalse_")
9382
# attrition reason
@@ -96,9 +85,8 @@ requireDeathFlag <- function(cohort,
9685
} else {
9786
# ie require absence instead of presence
9887
newCohort <- newCohort |>
99-
dplyr::filter(.data$death != 1 |
100-
(!.data$cohort_definition_id %in% cohortId)) |>
101-
dplyr::select(!"death") |>
88+
dplyr::filter(.data[[intersectCol]] != 1) |>
89+
dplyr::select(!dplyr::all_of(intersectCol)) |>
10290
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
10391
logPrefix = "CohortConstructor_requireDeathFlag_negateTrue_")
10492
# attrition reason
@@ -113,26 +101,13 @@ requireDeathFlag <- function(cohort,
113101
if (isTRUE(needsIdFilter(cohort, cohortId))) {
114102
newCohort <- newCohort |>
115103
# join non modified cohorts
116-
dplyr::union_all(
117-
cdm[[tmpUnchanged]] |>
118-
dplyr::select(dplyr::all_of(colnames(newCohort)))
119-
) |>
104+
dplyr::union_all(cdm[[tmpUnchanged]]) |>
120105
dplyr::compute(
121106
name = tmpNewCohort, temporary = FALSE,
122107
logPrefix = "CohortConstructor_requireDeathFlag_union_"
123108
)
124109
}
125110

126-
# add additional columns
127-
if (any(!colnames(cohort) %in% colnames(newCohort))) {
128-
newCohort <- newCohort |>
129-
dplyr::inner_join(cohort, by = c(cols)) |>
130-
dplyr::compute(
131-
name = tmpNewCohort, temporary = FALSE,
132-
logPrefix = "CohortConstructor_requireDeathFlag_additional_"
133-
)
134-
}
135-
136111
newCohort <- newCohort |>
137112
dplyr::compute(
138113
name = name, temporary = FALSE,

R/requireDemographics.R

+11-32
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,19 @@ demographicsFilter <- function(cohort,
287287
# because the cohort table passed to the function might have extra columns
288288
# that would conflict with ones we'll add, we'll take the core table first
289289
# join later
290-
cols <- c("cohort_definition_id", "subject_id", "cohort_start_date",
291-
"cohort_end_date", indexDate) |> unique()
292290

291+
newCols <- uniqueColumnName(newCohort, n = 4)
293292
newCohort <- newCohort |>
294-
dplyr::select(dplyr::all_of(cols)) |>
295293
PatientProfiles::addDemographics(
296294
indexDate = indexDate,
297295
age = reqAge,
296+
ageName = newCols[1],
298297
sex = reqSex,
298+
sexName = newCols[2],
299299
priorObservation = reqPriorObservation,
300+
priorObservationName = newCols[3],
300301
futureObservation = reqFutureObservation,
302+
futureObservationName = newCols[4],
301303
name = tmpNewCohort
302304
)
303305

@@ -310,7 +312,7 @@ demographicsFilter <- function(cohort,
310312
if (is.infinite(max_age)) max_age <- 200
311313
# filter
312314
newCohort <- newCohort |>
313-
dplyr::filter(.data$age >= .env$min_age & .data$age <= .env$max_age) |>
315+
dplyr::filter(.data[[newCols[1]]] >= .env$min_age & .data[[newCols[1]]] <= .env$max_age) |>
314316
dplyr::compute(
315317
name = tmpNewCohort, temporary = FALSE,
316318
logPrefix = "CohortConstructor_demographicsFilter_reqAge_"
@@ -323,7 +325,7 @@ demographicsFilter <- function(cohort,
323325
# sex
324326
if (reqSex) {
325327
newCohort <- newCohort |>
326-
dplyr::filter(.data$sex == .env$sex) |>
328+
dplyr::filter(.data[[newCols[2]]] == .env$sex) |>
327329
dplyr::compute(
328330
name = tmpNewCohort, temporary = FALSE,
329331
logPrefix = "CohortConstructor_demographicsFilter_reqSex_"
@@ -336,7 +338,7 @@ demographicsFilter <- function(cohort,
336338
# prior observation
337339
if (reqPriorObservation) {
338340
newCohort <- newCohort |>
339-
dplyr::filter(.data$prior_observation >= .env$minPriorObservation) |>
341+
dplyr::filter(.data[[newCols[3]]] >= .env$minPriorObservation) |>
340342
dplyr::compute(
341343
name = tmpNewCohort, temporary = FALSE,
342344
logPrefix = "CohortConstructor_demographicsFilter_reqPriorObservation_"
@@ -349,7 +351,7 @@ demographicsFilter <- function(cohort,
349351
# future observation
350352
if (reqFutureObservation) {
351353
newCohort <- newCohort |>
352-
dplyr::filter(.data$future_observation >= .env$minFutureObservation) |>
354+
dplyr::filter(.data[[newCols[4]]] >= .env$minFutureObservation) |>
353355
dplyr::compute(
354356
name = tmpNewCohort, temporary = FALSE,
355357
logPrefix = "CohortConstructor_demographicsFilter_reqFutureObservation_"
@@ -361,40 +363,18 @@ demographicsFilter <- function(cohort,
361363
}
362364

363365
newCohort <- newCohort |>
364-
dplyr::select(dplyr::all_of(cols)) |>
366+
dplyr::select(!dplyr::any_of(newCols)) |>
365367
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
366368
logPrefix = "CohortConstructor_demographicsFilter_select_")
367369

368370
if (isTRUE(needsIdFilter(cohort, cohortId))) {
369371
newCohort <- newCohort |>
370372
# join non modified cohorts
371-
dplyr::union_all(
372-
cdm[[tmpUnchanged]] |>
373-
dplyr::select(dplyr::all_of(cols))
374-
) |>
373+
dplyr::union_all(cdm[[tmpUnchanged]]) |>
375374
dplyr::compute(name = tmpNewCohort, temporary = FALSE,
376375
logPrefix = "CohortConstructor_demographicsFilter_union_")
377376
}
378377

379-
# add additional columns
380-
if (any(!colnames(cohort) %in% colnames(newCohort))) {
381-
if (dplyr::pull(dplyr::tally(dplyr::ungroup(newCohort))) == 0) {
382-
additionalCols <- colnames(cohort)[!colnames(cohort) %in% colnames(newCohort)]
383-
additionalCols <- rep(NA_character_, length(additionalCols)) |>
384-
rlang::parse_exprs() |>
385-
rlang::set_names(additionalCols)
386-
newCohort <- newCohort |>
387-
dplyr::mutate(!!!additionalCols)
388-
} else {
389-
newCohort <- newCohort |>
390-
dplyr::inner_join(cohort, by = c(cols)) |>
391-
dplyr::compute(
392-
name = tmpNewCohort, temporary = FALSE,
393-
logPrefix = "CohortConstructor_demographicsFilter_additional_"
394-
)
395-
}
396-
}
397-
398378
newCohort <- newCohort |>
399379
dplyr::compute(name = name, temporary = FALSE,
400380
logPrefix = "CohortConstructor_demographicsFilter_name_") |>
@@ -404,7 +384,6 @@ demographicsFilter <- function(cohort,
404384

405385
omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))
406386

407-
408387
useIndexes <- getOption("CohortConstructor.use_indexes")
409388
if (!isFALSE(useIndexes)) {
410389
addIndex(

0 commit comments

Comments
 (0)