dewittpe
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎NEWS.md‎
Lines changed: 21 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎R/charlson.R‎
Lines changed: 1 addition & 1 deletion b/‎R/charlson.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/comorbidities.R‎
Lines changed: 14 additions & 29 deletions b/‎R/comorbidities.R‎
Lines changed: 14 additions & 29 deletions
diff --git a/‎R/datasets.R‎
Lines changed: 5 additions & 7 deletions b/‎R/datasets.R‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎R/elixhauser.R‎
Lines changed: 1 addition & 3 deletions b/‎R/elixhauser.R‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/get_icd_codes.R‎
Lines changed: 8 additions & 6 deletions b/‎R/get_icd_codes.R‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎R/is_icd.R‎
Lines changed: 1 addition & 1 deletion b/‎R/is_icd.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/lookup_icd_codes.R‎
Lines changed: 1 addition & 1 deletion b/‎R/lookup_icd_codes.R‎
Lines changed: 1 addition & 1 deletion
@@ -11,7 +11,7 @@ data-raw/icd/desc_start_stop.rds
 data-raw/icd/icd10/.download_stamp
 data-raw/icd/icd10/icd10.rds
 data-raw/icd/icd9/.download_stamp
-data-raw/icd/icd9/icd9_cm_pcs.rds
+data-raw/icd/icd9/icd9.rds
 data-raw/icd/icd_chapters.rds
 data-raw/icd/icd_chapters_subchapters.rds
 data-raw/icd/icd_codes.rds
 
@@ -21,11 +21,11 @@ BugReports: https://github.com/dewittpe/medicalcoder/issues
 LazyData: true
 Suggests:
     data.table,
+    dplyr,
     kableExtra,
     knitr,
     R.utils,
-    rmarkdown,
-    tibble (>= 2.0.0)
+    rmarkdown
 RoxygenNote: 7.3.3
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
 
@@ -1,3 +1,24 @@
+# medicalcoder 0.7.0.9000
+
+## New Features
+
+* If a `tibble` is passed to `comorbidities()` and the `dplyr` namespace is
+  available, then `dplyr` methods will be used for data manipulation.  This
+  change will generally result in less computation time than base R
+  `data.frames` (`data.tables` require even less time).
+
+## Other Changes
+
+* Extend and improve the internal ICD-9 database to distinguish between CDC and
+  CMS source.
+
+* Fix documentation of the `mdcr` and `mdcr_longitudinal` datasets.
+
+* Clarified internal data.frame/data.table helpers: documented that `mdcr_select()`
+  deep-copies data.table subsets to avoid aliasing, noted the selfref fix in
+  `mdcr_set()`, and added inline guidance in the longitudinal section of
+  `comorbidities()` to explain the first-occurrence logic.
+
 # medicalcoder 0.7.0
 
 ## Bug Fixes
 
@@ -62,7 +62,7 @@
   cci      <- as.integer(as.vector(X %*% cci_wt))
 
   # build the return object
-  rtn <- cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE))
+  rtn <- mdcr_cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE))
   rtn <- mdcr_set(rtn, j = "num_cmrb",  value = num_cmrb)
   rtn <- mdcr_set(rtn, j = "cmrb_flag", value = cmrb_flag)
   rtn <- mdcr_set(rtn, j = "cci",      value = cci)
 
@@ -360,13 +360,13 @@ comorbidities.data.frame <- function(data,
   # Determine the lookup table and the columns for the lookup table to keep
   lookup_to_keep <- c("condition")
   if (startsWith(method, "pccc")) {
-    lookup <- get_pccc_codes()
+    lookup <- get(x = "pccc_codes", envir = ..mdcr_data_env.., inherits = FALSE)
     lookup_to_keep <- c(lookup_to_keep, "subcondition", "transplant_flag", "tech_dep_flag")
   } else if (startsWith(method, "charlson")) {
-    lookup <- get_charlson_codes()
+    lookup <- get("charlson_codes", envir = ..mdcr_data_env.., inherits = FALSE)
     lookup_to_keep <- c(lookup_to_keep)
   } else if (startsWith(method, "elixhauser")) {
-    lookup <- get_elixhauser_codes()
+    lookup <- get("elixhauser_codes", envir = ..mdcr_data_env.., inherits = FALSE)
     lookup_to_keep <- c(lookup_to_keep, "poaexempt")
   }
 
@@ -385,25 +385,21 @@ comorbidities.data.frame <- function(data,
   ##############################################################################
   # inner join the data with the lookup table
   on_full <-
-    merge(
+    mdcr_inner_join(
       x = if (full.codes) {data} else {data[0, ]},
       y = lookup,
-      all = FALSE,
       by.x = by_x,
       by.y = c("full_code", by_y),
-      suffixes = c("", ".y"),
-      sort = FALSE
+      suffixes = c("", ".y")
     )
 
   on_comp <-
-    merge(
+    mdcr_inner_join(
       x = if (compact.codes) {data} else {data[0, ]},
       y = lookup,
-      all = FALSE,
       by.x = by_x,
       by.y = c("code", by_y),
-      suffixes = c("", ".y"),
-      sort = FALSE
+      suffixes = c("", ".y")
     )
 
   ##############################################################################
@@ -519,6 +515,7 @@ comorbidities.data.frame <- function(data,
       grps <- c(grps, "subcondition")
       byconditions <- c(byconditions, "subcondition")
     }
+    # identify first occurrence per id/condition then retain encounters on/after it
     tmp <- mdcr_select(cmrb, c(grps, encid))
     tmp <- mdcr_setorder(tmp, c(grps, encid))
     keep <- !mdcr_duplicated(tmp, by = grps)
@@ -527,12 +524,11 @@ comorbidities.data.frame <- function(data,
 
     # merge on the poa.var
     foc <-
-      merge(x = foc,
-            y = cmrb,
-            all = TRUE,
-            by.x = c(id.vars2, "first_occurrance", byconditions),
-            by.y = c(id.vars2, encid, byconditions),
-            sort = FALSE
+      mdcr_full_outer_join(
+        x = foc,
+        y = cmrb,
+        by.x = c(id.vars2, "first_occurrance", byconditions),
+        by.y = c(id.vars2, encid, byconditions)
       )
 
     if (startsWith(method, "pccc")) {
@@ -546,7 +542,7 @@ comorbidities.data.frame <- function(data,
     foc <-
       lapply(foc,
              function(y) {
-               rtn <- merge(x = iddf, y = y, all.x = TRUE, by = c(id.vars2), allow.cartesian = TRUE, sort = FALSE)
+               rtn <- mdcr_left_join(x = iddf, y = y, by = c(id.vars2))
                rtn <- mdcr_subset(rtn, i = !is.na(rtn[["condition"]]))
                i <- rtn[[encid]] >= rtn[["first_occurrance"]]
                mdcr_subset(rtn, i = i)
@@ -619,17 +615,6 @@ comorbidities.data.frame <- function(data,
 
   ##############################################################################
   # set attributes and return
-  if (requireNamespace("tibble", quietly = TRUE) && inherits(data, "tbl_df")) {
-    if (subconditions) {
-      ccc[["conditions"]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["conditions"]])
-      for (i in seq_len(length(ccc[["subconditions"]]))) {
-        ccc[["subconditions"]][[i]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["subconditions"]][[i]])
-      }
-    } else {
-      ccc <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc)
-    }
-  }
-
   attr(ccc, "method") <- method
   attr(ccc, "id.vars") <- id.vars
   attr(ccc, "flag.method") <- flag.method
 
@@ -1,9 +1,7 @@
 #' Synthetic Data
 #'
 #' @format
-#' `mdcr` is a `data.frame` with 4 columns, one for a patient id and 41 for
-#' diagnostic codes and 41 possible procedure codes.  Each row is for one
-#' patient id.
+#' `mdcr` is a `data.frame` with 4 columns, Each row is for one ICD id.
 #'
 #' * `patid`: patient identifier, integer values
 #' * `icdv`: ICD version; integer values, 9 or 10
@@ -17,15 +15,15 @@
 #' Synthetic Longitudinal Data
 #'
 #' @format
-#' `mdcr_longitudinal` is a `data.frame` with four columns.  The codes are
-#' expected to be treated as diagnostic codes but there are a few ICD-9 codes
-#' which could match to procedure codes as well.
+#' `mdcr_longitudinal` is a `data.frame` with 4 columns.  The codes are
+#' expected to be treated as diagnostic codes. Warning: there are a few ICD-9
+#' codes which could match to procedure codes.
 #'
 #' * `patid`: patient identifier, integer values
 #' * `date`: date the diagnostic code was recorded
 #' * `icdv`: ICD version 9 or 10, integer valued
 #' * `code`: ICD codes; character values
-#"
+#'
 #' @family datasets
 #'
 "mdcr_longitudinal"
@@ -36,7 +36,7 @@
   }
 
   # build the return object
-  rtn <- cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))
+  rtn <- mdcr_cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))
   rtn <- mdcr_set(rtn, j = "num_cmrb", value = results$num_cmrb)
   rtn <- mdcr_set(rtn, j = "cmrb_flag", value = results$cmrb_flag)
   rtn <- mdcr_set(rtn, j = "mortality_index", value = results$mortality_index)
@@ -47,7 +47,6 @@
 }
 
 .elixhauser_post2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {
-
   conditions <- ..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)]
   conditions <- sort(unique(conditions))
 
@@ -207,7 +206,6 @@
 }
 
 .elixhauser_pre2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {
-
   # what are the relevent coniditions
   conditions <-
     unique(..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)])
 
@@ -28,14 +28,16 @@
 #'
 #' `known_start` is the first fiscal or calendar year (depending on source) that
 #' the medicalcoder package as definitive source data for.  ICD-9-CM started in
-#' the United States in fiscal year 1980.  Source information that could be
-#' downloaded from the CDC and CMS and added to the source code for the
-#' medicalcoder package goes back to 1997.  As such 1997 is the "known start"
+#' the United States in fiscal year 1980.  The CDC extracts included in
+#' medicalcoder span fiscal years 1997--2012; the CMS ICD-9-CM/PCS extracts
+#' start in fiscal year 2006 and run through fiscal year 2015.  As such 1997 is
+#' the earliest "known start" for ICD-9 within medicalcoder.
 #'
 #' `known_end` is the last fiscal or calendar year (depending on source)
-#' for which we have definitive source data for.  For ICD-9-CM and ICD-9-PCS
-#' that is 2015.  For ICD-10-CM and ICD-10-PCS, which are active, it is just the
-#' last year of known data.  ICD-10 from the WHO ends in 2019.
+#' for which we have definitive source data for.  For ICD-9-CM and ICD-9-PCS,
+#' CMS provides data through fiscal year 2015, while the CDC extracts stop at
+#' fiscal year 2012.  For ICD-10-CM and ICD-10-PCS, which are active, it is just
+#' the last year of known data.  ICD-10 from the WHO ends in 2019.
 #'
 #' ## Header and Assignable Codes
 #'
 
@@ -67,7 +67,7 @@ is_icd <- function(x, icdv = c(9L, 10L), dx = c(1L, 0L),
   }
 
   # get the known icd codes and filter to relevent codes
-  codes <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE)
+  codes <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE)
 
   # keep based on icdv, dx, and src
   keep <- (codes[["icdv"]] %in% icdv) & (codes[["dx"]] %in% dx) & (codes[["src"]] %in% src)
 
@@ -43,7 +43,7 @@ lookup_icd_codes <- function(x, regex = FALSE, full.codes = TRUE, compact.codes
   assert_scalar_logical(compact.codes)
   stopifnot(isTRUE(full.codes | compact.codes))
 
-  ICDCODES <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE)
+  ICDCODES <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE)
 
   if (regex) {
     if(full.codes) {
Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@`
`36`	`36`	`}`
`37`	`37`
`38`	`38`	`# build the return object`
`39`		`- rtn <- cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))`
	`39`	`+ rtn <- mdcr_cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE))`
`40`	`40`	`rtn <- mdcr_set(rtn, j = "num_cmrb", value = results$num_cmrb)`
`41`	`41`	`rtn <- mdcr_set(rtn, j = "cmrb_flag", value = results$cmrb_flag)`
`42`	`42`	`rtn <- mdcr_set(rtn, j = "mortality_index", value = results$mortality_index)`
`@@ -47,7 +47,6 @@`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`.elixhauser_post2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {`
`50`		`-`
`51`	`50`	`conditions <- ..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)]`
`52`	`51`	`conditions <- sort(unique(conditions))`
`53`	`52`
`@@ -207,7 +206,6 @@`
`207`	`206`	`}`
`208`	`207`
`209`	`208`	`.elixhauser_pre2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) {`
`210`		`-`
`211`	`209`	`# what are the relevent coniditions`
`212`	`210`	`conditions <-`
`213`	`211`	`unique(..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)])`
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ is_icd <- function(x, icdv = c(9L, 10L), dx = c(1L, 0L),`
`67`	`67`	`}`
`68`	`68`
`69`	`69`	`# get the known icd codes and filter to relevent codes`
`70`		`- codes <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE)`
	`70`	`+ codes <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE)`
`71`	`71`
`72`	`72`	`# keep based on icdv, dx, and src`
`73`	`73`	`keep <- (codes[["icdv"]] %in% icdv) & (codes[["dx"]] %in% dx) & (codes[["src"]] %in% src)`