Skip to content

Commit 5ce8bbe

Browse files
authored
Merge pull request #24 from dewittpe/tidyverse-support
Tidyverse support
2 parents d121d07 + 32deb4d commit 5ce8bbe

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+10972
-10425
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ data-raw/icd/desc_start_stop.rds
1111
data-raw/icd/icd10/.download_stamp
1212
data-raw/icd/icd10/icd10.rds
1313
data-raw/icd/icd9/.download_stamp
14-
data-raw/icd/icd9/icd9_cm_pcs.rds
14+
data-raw/icd/icd9/icd9.rds
1515
data-raw/icd/icd_chapters.rds
1616
data-raw/icd/icd_chapters_subchapters.rds
1717
data-raw/icd/icd_codes.rds

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ BugReports: https://github.com/dewittpe/medicalcoder/issues
2121
LazyData: true
2222
Suggests:
2323
data.table,
24+
dplyr,
2425
kableExtra,
2526
knitr,
2627
R.utils,
27-
rmarkdown,
28-
tibble (>= 2.0.0)
28+
rmarkdown
2929
RoxygenNote: 7.3.3
3030
VignetteBuilder: knitr
3131
Roxygen: list(markdown = TRUE)

NEWS.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
# medicalcoder 0.7.0.9000
2+
3+
## New Features
4+
5+
* If a `tibble` is passed to `comorbidities()` and the `dplyr` namespace is
6+
available, then `dplyr` methods will be used for data manipulation. This
7+
change will generally result in less computation time than base R
8+
`data.frames` (`data.tables` require even less time).
9+
10+
## Other Changes
11+
12+
* Extend and improve the internal ICD-9 database to distinguish between CDC and
13+
CMS source.
14+
15+
* Fix documentation of the `mdcr` and `mdcr_longitudinal` datasets.
16+
17+
* Clarified internal data.frame/data.table helpers: documented that `mdcr_select()`
18+
deep-copies data.table subsets to avoid aliasing, noted the selfref fix in
19+
`mdcr_set()`, and added inline guidance in the longitudinal section of
20+
`comorbidities()` to explain the first-occurrence logic.
21+
122
# medicalcoder 0.7.0
223

324
## Bug Fixes

R/charlson.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
cci <- as.integer(as.vector(X %*% cci_wt))
6363

6464
# build the return object
65-
rtn <- cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE))
65+
rtn <- mdcr_cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE))
6666
rtn <- mdcr_set(rtn, j = "num_cmrb", value = num_cmrb)
6767
rtn <- mdcr_set(rtn, j = "cmrb_flag", value = cmrb_flag)
6868
rtn <- mdcr_set(rtn, j = "cci", value = cci)

R/comorbidities.R

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -360,13 +360,13 @@ comorbidities.data.frame <- function(data,
360360
# Determine the lookup table and the columns for the lookup table to keep
361361
lookup_to_keep <- c("condition")
362362
if (startsWith(method, "pccc")) {
363-
lookup <- get_pccc_codes()
363+
lookup <- get(x = "pccc_codes", envir = ..mdcr_data_env.., inherits = FALSE)
364364
lookup_to_keep <- c(lookup_to_keep, "subcondition", "transplant_flag", "tech_dep_flag")
365365
} else if (startsWith(method, "charlson")) {
366-
lookup <- get_charlson_codes()
366+
lookup <- get("charlson_codes", envir = ..mdcr_data_env.., inherits = FALSE)
367367
lookup_to_keep <- c(lookup_to_keep)
368368
} else if (startsWith(method, "elixhauser")) {
369-
lookup <- get_elixhauser_codes()
369+
lookup <- get("elixhauser_codes", envir = ..mdcr_data_env.., inherits = FALSE)
370370
lookup_to_keep <- c(lookup_to_keep, "poaexempt")
371371
}
372372

@@ -385,25 +385,21 @@ comorbidities.data.frame <- function(data,
385385
##############################################################################
386386
# inner join the data with the lookup table
387387
on_full <-
388-
merge(
388+
mdcr_inner_join(
389389
x = if (full.codes) {data} else {data[0, ]},
390390
y = lookup,
391-
all = FALSE,
392391
by.x = by_x,
393392
by.y = c("full_code", by_y),
394-
suffixes = c("", ".y"),
395-
sort = FALSE
393+
suffixes = c("", ".y")
396394
)
397395

398396
on_comp <-
399-
merge(
397+
mdcr_inner_join(
400398
x = if (compact.codes) {data} else {data[0, ]},
401399
y = lookup,
402-
all = FALSE,
403400
by.x = by_x,
404401
by.y = c("code", by_y),
405-
suffixes = c("", ".y"),
406-
sort = FALSE
402+
suffixes = c("", ".y")
407403
)
408404

409405
##############################################################################
@@ -519,6 +515,7 @@ comorbidities.data.frame <- function(data,
519515
grps <- c(grps, "subcondition")
520516
byconditions <- c(byconditions, "subcondition")
521517
}
518+
# identify first occurrence per id/condition then retain encounters on/after it
522519
tmp <- mdcr_select(cmrb, c(grps, encid))
523520
tmp <- mdcr_setorder(tmp, c(grps, encid))
524521
keep <- !mdcr_duplicated(tmp, by = grps)
@@ -527,12 +524,11 @@ comorbidities.data.frame <- function(data,
527524

528525
# merge on the poa.var
529526
foc <-
530-
merge(x = foc,
531-
y = cmrb,
532-
all = TRUE,
533-
by.x = c(id.vars2, "first_occurrance", byconditions),
534-
by.y = c(id.vars2, encid, byconditions),
535-
sort = FALSE
527+
mdcr_full_outer_join(
528+
x = foc,
529+
y = cmrb,
530+
by.x = c(id.vars2, "first_occurrance", byconditions),
531+
by.y = c(id.vars2, encid, byconditions)
536532
)
537533

538534
if (startsWith(method, "pccc")) {
@@ -546,7 +542,7 @@ comorbidities.data.frame <- function(data,
546542
foc <-
547543
lapply(foc,
548544
function(y) {
549-
rtn <- merge(x = iddf, y = y, all.x = TRUE, by = c(id.vars2), allow.cartesian = TRUE, sort = FALSE)
545+
rtn <- mdcr_left_join(x = iddf, y = y, by = c(id.vars2))
550546
rtn <- mdcr_subset(rtn, i = !is.na(rtn[["condition"]]))
551547
i <- rtn[[encid]] >= rtn[["first_occurrance"]]
552548
mdcr_subset(rtn, i = i)
@@ -619,17 +615,6 @@ comorbidities.data.frame <- function(data,
619615

620616
##############################################################################
621617
# set attributes and return
622-
if (requireNamespace("tibble", quietly = TRUE) && inherits(data, "tbl_df")) {
623-
if (subconditions) {
624-
ccc[["conditions"]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["conditions"]])
625-
for (i in seq_len(length(ccc[["subconditions"]]))) {
626-
ccc[["subconditions"]][[i]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["subconditions"]][[i]])
627-
}
628-
} else {
629-
ccc <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc)
630-
}
631-
}
632-
633618
attr(ccc, "method") <- method
634619
attr(ccc, "id.vars") <- id.vars
635620
attr(ccc, "flag.method") <- flag.method

R/datasets.R

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
#' Synthetic Data
22
#'
33
#' @format
4-
#' `mdcr` is a `data.frame` with 4 columns, one for a patient id and 41 for
5-
#' diagnostic codes and 41 possible procedure codes. Each row is for one
6-
#' patient id.
4+
#' `mdcr` is a `data.frame` with 4 columns, Each row is for one ICD id.
75
#'
86
#' * `patid`: patient identifier, integer values
97
#' * `icdv`: ICD version; integer values, 9 or 10
@@ -17,15 +15,15 @@
1715
#' Synthetic Longitudinal Data
1816
#'
1917
#' @format
20-
#' `mdcr_longitudinal` is a `data.frame` with four columns. The codes are
21-
#' expected to be treated as diagnostic codes but there are a few ICD-9 codes
22-
#' which could match to procedure codes as well.
18+
#' `mdcr_longitudinal` is a `data.frame` with 4 columns. The codes are
19+
#' expected to be treated as diagnostic codes. Warning: there are a few ICD-9
20+
#' codes which could match to procedure codes.
2321
#'
2422
#' * `patid`: patient identifier, integer values
2523
#' * `date`: date the diagnostic code was recorded
2624
#' * `icdv`: ICD version 9 or 10, integer valued
2725
#' * `code`: ICD codes; character values
28-
#"
26+
#'
2927
#' @family datasets
3028
#'
3129
"mdcr_longitudinal"

R/elixhauser.R

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
}
3737

3838
# build the return object
39-
rtn <- cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))
39+
rtn <- mdcr_cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))
4040
rtn <- mdcr_set(rtn, j = "num_cmrb", value = results$num_cmrb)
4141
rtn <- mdcr_set(rtn, j = "cmrb_flag", value = results$cmrb_flag)
4242
rtn <- mdcr_set(rtn, j = "mortality_index", value = results$mortality_index)
@@ -47,7 +47,6 @@
4747
}
4848

4949
.elixhauser_post2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {
50-
5150
conditions <- ..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)]
5251
conditions <- sort(unique(conditions))
5352

@@ -207,7 +206,6 @@
207206
}
208207

209208
.elixhauser_pre2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {
210-
211209
# what are the relevent coniditions
212210
conditions <-
213211
unique(..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)])

R/get_icd_codes.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,16 @@
2828
#'
2929
#' `known_start` is the first fiscal or calendar year (depending on source) that
3030
#' the medicalcoder package as definitive source data for. ICD-9-CM started in
31-
#' the United States in fiscal year 1980. Source information that could be
32-
#' downloaded from the CDC and CMS and added to the source code for the
33-
#' medicalcoder package goes back to 1997. As such 1997 is the "known start"
31+
#' the United States in fiscal year 1980. The CDC extracts included in
32+
#' medicalcoder span fiscal years 1997--2012; the CMS ICD-9-CM/PCS extracts
33+
#' start in fiscal year 2006 and run through fiscal year 2015. As such 1997 is
34+
#' the earliest "known start" for ICD-9 within medicalcoder.
3435
#'
3536
#' `known_end` is the last fiscal or calendar year (depending on source)
36-
#' for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS
37-
#' that is 2015. For ICD-10-CM and ICD-10-PCS, which are active, it is just the
38-
#' last year of known data. ICD-10 from the WHO ends in 2019.
37+
#' for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS,
38+
#' CMS provides data through fiscal year 2015, while the CDC extracts stop at
39+
#' fiscal year 2012. For ICD-10-CM and ICD-10-PCS, which are active, it is just
40+
#' the last year of known data. ICD-10 from the WHO ends in 2019.
3941
#'
4042
#' ## Header and Assignable Codes
4143
#'

R/is_icd.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ is_icd <- function(x, icdv = c(9L, 10L), dx = c(1L, 0L),
6767
}
6868

6969
# get the known icd codes and filter to relevent codes
70-
codes <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE)
70+
codes <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE)
7171

7272
# keep based on icdv, dx, and src
7373
keep <- (codes[["icdv"]] %in% icdv) & (codes[["dx"]] %in% dx) & (codes[["src"]] %in% src)

R/lookup_icd_codes.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ lookup_icd_codes <- function(x, regex = FALSE, full.codes = TRUE, compact.codes
4343
assert_scalar_logical(compact.codes)
4444
stopifnot(isTRUE(full.codes | compact.codes))
4545

46-
ICDCODES <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE)
46+
ICDCODES <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE)
4747

4848
if (regex) {
4949
if(full.codes) {

0 commit comments

Comments
 (0)