diff --git a/NEWS.md b/NEWS.md index cfbb185d..022700d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,10 @@ * exclusions for less severer conditions when more severer conditions are flagged * Improved POA, NPOA, and EXEMPTPOA. This came about from #20. +* Make internal data.frame tool `mdcr_duplicated` data.table aware. + +* Elixhauser (Quan 2005) - added missing ICD-10 codes to the mappings + ## New Features * `comorbidities()` will return a `tibble` with the input data is a `tbl_df` diff --git a/R/sysdata.rda b/R/sysdata.rda index 84ef243e..c8fa0b23 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utilities.R b/R/utilities.R index 42c14aec..c92fa74e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -132,7 +132,10 @@ mdcr_setnames <- function(x, old, new, ...) { mdcr_duplicated <- function(x, by = seq_along(x), ...) { stopifnot(is.data.frame(x)) if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { - rtn <- duplicated(x, by = by, ...) + # Flag this frame as data.table-aware so duplicated.data.table uses its + # optimized path instead of falling back to duplicated.data.frame. + .datatable.aware <- TRUE + rtn <- utils::getFromNamespace(x = 'duplicated.data.table', ns = "data.table")(x, by = by, ...) } else { rtn <- duplicated(x[, by, drop = FALSE], ...) } diff --git a/data-raw/build_sysdata.R b/data-raw/build_sysdata.R index a016a529..8a0d4815 100644 --- a/data-raw/build_sysdata.R +++ b/data-raw/build_sysdata.R @@ -52,6 +52,9 @@ ..mdcr_internal_elixhauser_poa.. <- readRDS("./elixhauser/elixhauser_poa.rds") ..mdcr_internal_elixhauser_codes.. <- readRDS("./elixhauser/elixhauser_codes.rds") + +# All internl data sets need to have the ..mdcr_internal_ prefix and .. suffix. +# This is expected in tests and in use within the package. usethis::use_data( ..mdcr_internal_pccc_codes.. , ..mdcr_internal_pccc_conditions.. diff --git a/data-raw/elixhauser/regex_based_on_quan2005.txt b/data-raw/elixhauser/regex_based_on_quan2005.txt index 84105d95..32f5065a 100644 --- a/data-raw/elixhauser/regex_based_on_quan2005.txt +++ b/data-raw/elixhauser/regex_based_on_quan2005.txt @@ -152,6 +152,11 @@ DEPRESS | 9 | 1 | quan2005 | ^296\.[2,3,5] DEPRESS | 9 | 1 | quan2005 | ^300\.4 DEPRESS | 9 | 1 | quan2005 | ^309 DEPRESS | 9 | 1 | quan2005 | ^311 +DEPRESS | 10 | 1 | quan2005 | ^F20\.4 +DEPRESS | 10 | 1 | quan2005 | ^F31\.[3-5] +DEPRESS | 10 | 1 | quan2005 | ^F3[2-3] +DEPRESS | 10 | 1 | quan2005 | ^F34\.1 +DEPRESS | 10 | 1 | quan2005 | ^F4[1-3]\.2 DM | 10 | 1 | quan2005 | ^E1[0-4]\.[0,1,9] DM | 9 | 1 | ahrq_web | ^250\.[0-3] DM | 9 | 1 | ahrq_web | ^648\.0 @@ -191,6 +196,7 @@ HTN_UNCX | 9 | 1 | ahrq_web | ^642\.0 HTN_UNCX | 9 | 1 | elixhauser1988 | ^401\.[1,9] HTN_UNCX | 9 | 1 | quan2005 | ^401 HYPOTHY | 10 | 1 | quan2005 | ^E0[0-3] +HYPOTHY | 10 | 1 | quan2005 | ^E89\.0 HYPOTHY | 9 | 1 | ahrq_web | ^243 HYPOTHY | 9 | 1 | ahrq_web | ^244\.[0-2,8,9] HYPOTHY | 9 | 1 | elixhauser1988 | ^243 diff --git a/tests/test-asserts.R b/tests/test-asserts.R index d2fe0400..9347c154 100644 --- a/tests/test-asserts.R +++ b/tests/test-asserts.R @@ -30,8 +30,6 @@ stopifnot( t01b = !("assert_scalar_logical" %in% getNamespaceExports("medicalcoder")) ) -# the method should only be used within methods and the error messages are -# controlled: common_args <- list(data = mdcr, method = "pccc_v3.1", icd.codes = "code", poa = 1L) t02a <- tryCatchError(do.call(comorbidities, args = c(common_args, list(full.codes = TRUE)))) diff --git a/tests/test-charlson-codes.R b/tests/test-charlson-codes.R deleted file mode 100644 index 345a877b..00000000 --- a/tests/test-charlson-codes.R +++ /dev/null @@ -1,64 +0,0 @@ -library(medicalcoder) - -# verify that the internal data set is a data.frame, and only a data.frame, not -# a data.table, not a tibble -stopifnot( inherits(medicalcoder:::..mdcr_internal_charlson_codes.., "data.frame")) -stopifnot(!inherits(medicalcoder:::..mdcr_internal_charlson_codes.., "data.table")) -stopifnot(!inherits(medicalcoder:::..mdcr_internal_charlson_codes.., "tbl_df")) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_charlson_codes..), - c("code_id", "condition", "charlson_cdmf2019", "charlson_deyo1992", "charlson_quan2005", "charlson_quan2011") - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_charlson_codes.., class), - c(code_id = "integer", - condition = "character", - charlson_cdmf2019 = "integer", - charlson_deyo1992 = "integer", - charlson_quan2005 = "integer", - charlson_quan2011 = "integer" - ) - ) -) - -stopifnot( inherits(get_charlson_codes(), "data.frame")) -stopifnot(!inherits(get_charlson_codes(), "data.table")) -stopifnot(!inherits(get_charlson_codes(), "tbl_df")) - -stopifnot( - identical( - names(get_charlson_codes()), - c("icdv", "dx", "full_code", "code", "condition", "charlson_cdmf2019", "charlson_deyo1992", "charlson_quan2005", "charlson_quan2011") - ) -) - -stopifnot( - identical( - sapply(get_charlson_codes(), class), - c(icdv = "integer", - dx = "integer", - full_code = "character", - code = "character", - condition = "character", - charlson_cdmf2019 = "integer", - charlson_deyo1992 = "integer", - charlson_quan2005 = "integer", - charlson_quan2011 = "integer" - ) - ) -) - -# check that the row names are just sequential integers -cc <- get_charlson_codes() -ci <- get_charlson_index_scores() -stopifnot(identical(rownames(cc), as.character(seq_len(nrow(cc))))) -stopifnot(identical(rownames(ci), as.character(seq_len(nrow(ci))))) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-charlson-index-scores.R b/tests/test-charlson-index-scores.R deleted file mode 100644 index da672be9..00000000 --- a/tests/test-charlson-index-scores.R +++ /dev/null @@ -1,35 +0,0 @@ -library(medicalcoder) -source('utilities.R') - -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_charlson_index_scores..)) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_charlson_index_scores..), - c("condition_description", "condition", "charlson_cdmf2019", "charlson_quan2011", "charlson_quan2005", "charlson_deyo1992") - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_charlson_index_scores.., class), - c(condition_description = "character", - condition = "character", - charlson_cdmf2019 = "integer", - charlson_quan2011 = "integer", - charlson_quan2005 = "integer", - charlson_deyo1992 = "integer" - ) - ) -) - - -stopifnot(is.data.frame(get_charlson_index_scores())) - -stopifnot( - identical(get_charlson_index_scores(), medicalcoder:::..mdcr_internal_charlson_index_scores..) -) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-data-frame-tools.R b/tests/test-data-frame-tools.R new file mode 100644 index 00000000..97c88555 --- /dev/null +++ b/tests/test-data-frame-tools.R @@ -0,0 +1,310 @@ +# No need to load and attach the namespace, everything in this test script is +# non-exported +# library(medicalcoder) +source('utilities.R') + +dataframetools <- + c("mdcr_set", + "mdcr_select", + "mdcr_subset", + "mdcr_setorder", + "mdcr_setnames", + "mdcr_duplicated" + ) + +mdcr <- getNamespace("medicalcoder") + +# are all the dataframetools in the namespcae +stopifnot(all(dataframetools %in% names(mdcr))) + +# check that there are not unaccounted for data sets. the ..mdcr_internal_ +# prefix and .. suffix is expected. noted in the data-raw/build_sysdata.R +stopifnot( + all( + grep("^mdcr_", names(mdcr), value = TRUE) %in% dataframetools + ) +) + +################################################################################ +# Set up data for testing +DF <- data.frame(A = 1:10, C = NA_integer_, B = LETTERS[1:10]) +if (requireNamespace("tibble", quietly = TRUE)) { + TBL <- getExportedValue(name = "as_tibble", ns = "tibble")(DF) +} else { + TBL <- DF +} +if (requireNamespace("data.table", quietly = TRUE)) { + DT <- getExportedValue(name = "copy", ns = "data.table")(DF) + getExportedValue(name = "setDT", ns = "data.table")(DT) +} else { + DT <- DF +} + +################################################################################ +# set the value of column C in row 5 +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, i = 5L, j = "C", value = 3L) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, i = 5L, j = "C", value = 3L) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, i = 5L, j = "C", value = 3L) + +stopifnot( + identical(DF[["C"]], c(rep(NA_integer_, 4L), 3L, rep(NA_integer_, 5L))), + identical(TBL[["C"]], c(rep(NA_integer_, 4L), 3L, rep(NA_integer_, 5L))), + identical(DT[["C"]], c(rep(NA_integer_, 4L), 3L, rep(NA_integer_, 5L))) +) + +# set the value in two rows at a time with one value +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, i = c(1L, 10L), j = "C", value = 8L) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, i = c(1L, 10L), j = "C", value = 8L) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, i = c(1L, 10L), j = "C", value = 8L) + +stopifnot( + identical(DF[["C"]], c(8L, rep(NA_integer_, 3L), 3L, rep(NA_integer_, 4L), 8L)), + identical(TBL[["C"]], c(8L, rep(NA_integer_, 3L), 3L, rep(NA_integer_, 4L), 8L)), + identical(DT[["C"]], c(8L, rep(NA_integer_, 3L), 3L, rep(NA_integer_, 4L), 8L)) +) + +# set the value in three rows at a time with three values +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, i = c(2L, 3L, 4L), j = "C", value = c(21L, 22L, 23L)) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, i = c(2L, 3L, 4L), j = "C", value = c(21L, 22L, 23L)) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, i = c(2L, 3L, 4L), j = "C", value = c(21L, 22L, 23L)) + +stopifnot( + identical(DF[["C"]], c(8L, 21L, 22L, 23L, 3L, rep(NA_integer_, 4L), 8L)), + identical(TBL[["C"]], c(8L, 21L, 22L, 23L, 3L, rep(NA_integer_, 4L), 8L)), + identical(DT[["C"]], c(8L, 21L, 22L, 23L, 3L, rep(NA_integer_, 4L), 8L)) +) + +# set a full column +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, j = "A", value = as.integer(11:20)) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, j = "A", value = as.integer(11:20)) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, j = "A", value = as.integer(11:20)) + +stopifnot( + identical(DF[["A"]], as.integer(11:20)), + identical(TBL[["A"]], as.integer(11:20)), + identical(DT[["A"]], as.integer(11:20)) +) + +# create a new column +x <- paste("v", c(0:5, 5, 6:8)) +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, j = "D", value = x) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, j = "D", value = x) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, j = "D", value = x) + +stopifnot( + identical(DF[["D"]], x), + identical(TBL[["D"]], x), + identical(DT[["D"]], x) +) + +################################################################################ +# testing mdcr_select +# set colummns - change the order of the columns +DF <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DF, col = c("D", "B", "C", "A")) +TBL <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(TBL, col = c("D", "B", "C", "A")) +DT <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DT, col = c("D", "B", "C", "A")) + +stopifnot( + identical(names(DF), c("D", "B", "C", "A")), + identical(names(TBL), c("D", "B", "C", "A")), + identical(names(DT), c("D", "B", "C", "A")) +) + +# retun the object if col is missing +stopifnot( + identical(getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DF), DF), + identical(getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(TBL), TBL), + identical(getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DT), DT) +) + +################################################################################ +# testing mdcr_subset + +# if arg i and cols are missing then the object is retruned +stopifnot( + identical(getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF), DF), + identical(getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL), TBL), + identical(getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT), DT) +) + +# with no row specfied, it is the same as calling mdcr_select +stopifnot( + identical( + getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, col = c("D", "B", "C", "A")), + getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DF, col = c("D", "B", "C", "A")) + ), + identical( + getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, col = c("D", "B", "C", "A")), + getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(TBL, col = c("D", "B", "C", "A")) + ), + identical( + getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, col = c("D", "B", "C", "A")), + getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DT, col = c("D", "B", "C", "A")) + ) +) + +# select one column and a subset of rows +DF0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(1, 10, by = 2), cols = c("B")) +DF1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(2, 10, by = 2), cols = c("B")) +TBL0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(1, 10, by = 2), cols = c("B")) +TBL1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(2, 10, by = 2), cols = c("B")) +DT0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(1, 10, by = 2), cols = c("B")) +DT1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(2, 10, by = 2), cols = c("B")) + +stopifnot( + inherits(DF0, "data.frame"), + inherits(DF1, "data.frame"), + inherits(TBL0, "data.frame"), + inherits(TBL1, "data.frame"), + inherits(DT0, "data.frame"), + inherits(DT1, "data.frame"), + identical(names(DF0), "B"), + identical(names(DF1), "B"), + identical(names(TBL0), "B"), + identical(names(TBL1), "B"), + identical(names(DT0), "B"), + identical(names(DT1), "B") +) + +if (requireNamespace("data.table", quietly = TRUE)) { + stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) +} + +if (requireNamespace("tibble", quietly = TRUE)) { + stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) +} + +# select two columns +DF0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(1, 10, by = 2), cols = c("B", "A")) +DF1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(2, 10, by = 2), cols = c("B", "A")) +TBL0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(1, 10, by = 2), cols = c("B", "A")) +TBL1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(2, 10, by = 2), cols = c("B", "A")) +DT0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(1, 10, by = 2), cols = c("B", "A")) +DT1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(2, 10, by = 2), cols = c("B", "A")) + +stopifnot( + inherits(DF0, "data.frame"), + inherits(DF1, "data.frame"), + inherits(TBL0, "data.frame"), + inherits(TBL1, "data.frame"), + inherits(DT0, "data.frame"), + inherits(DT1, "data.frame"), + identical(names(DF0), c("B", "A")), + identical(names(DF1), c("B", "A")), + identical(names(TBL0), c("B", "A")), + identical(names(TBL1), c("B", "A")), + identical(names(DT0), c("B", "A")), + identical(names(DT1), c("B", "A")) +) + +if (requireNamespace("data.table", quietly = TRUE)) { + stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) +} + +if (requireNamespace("tibble", quietly = TRUE)) { + stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) +} + +# if only i is supplied, all the columns are returned +DF0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(1, 10, by = 2)) +DF1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DF, i = seq(2, 10, by = 2)) +TBL0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(1, 10, by = 2)) +TBL1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(TBL, i = seq(2, 10, by = 2)) +DT0 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(1, 10, by = 2)) +DT1 <- getFromNamespace(x = "mdcr_subset", ns = "medicalcoder")(DT, i = seq(2, 10, by = 2)) + +stopifnot( + inherits(DF0, "data.frame"), + inherits(DF1, "data.frame"), + inherits(TBL0, "data.frame"), + inherits(TBL1, "data.frame"), + inherits(DT0, "data.frame"), + inherits(DT1, "data.frame"), + identical(names(DF0), c("D", "B", "C", "A")), + identical(names(DF1), c("D", "B", "C", "A")), + identical(names(TBL0), c("D", "B", "C", "A")), + identical(names(TBL1), c("D", "B", "C", "A")), + identical(names(DT0), c("D", "B", "C", "A")), + identical(names(DT1), c("D", "B", "C", "A")) +) + +if (requireNamespace("data.table", quietly = TRUE)) { + stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) +} + +if (requireNamespace("tibble", quietly = TRUE)) { + stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) +} + + +################################################################################ +# testing mdcr_setorder +DF <- rbind(DF0, DF1) +TBL <- rbind(TBL0, TBL1) +DT <- rbind(DT0, DT1) + +# verify that the data.frames are not currently ordered by "A" + +stopifnot( + identical(DF[["A"]], as.integer(c(11, 13, 15, 17, 19, 12, 14, 16, 18, 20))), + identical(TBL[["A"]], as.integer(c(11, 13, 15, 17, 19, 12, 14, 16, 18, 20))), + identical(DT[["A"]], as.integer(c(11, 13, 15, 17, 19, 12, 14, 16, 18, 20))), + identical(DF[["D"]], paste("v", c(0, 2, 4, 5, 7, 1, 3, 5, 6, 8))), + identical(TBL[["D"]], paste("v", c(0, 2, 4, 5, 7, 1, 3, 5, 6, 8))), + identical(DT[["D"]], paste("v", c(0, 2, 4, 5, 7, 1, 3, 5, 6, 8))) +) + +DF <- getFromNamespace(x = "mdcr_setorder", ns = "medicalcoder")(DF, by = c("A")) +TBL <- getFromNamespace(x = "mdcr_setorder", ns = "medicalcoder")(TBL, by = c("A")) +DT <- getFromNamespace(x = "mdcr_setorder", ns = "medicalcoder")(DT, by = c("A")) + +stopifnot( + identical(DF[["A"]], as.integer(11:20)), + identical(TBL[["A"]], as.integer(11:20)), + identical(DT[["A"]], as.integer(11:20)), + identical(DF[["D"]], paste("v", c(0:5, 5, 6:8))), + identical(TBL[["D"]], paste("v", c(0:5, 5, 6:8))), + identical(DT[["D"]], paste("v", c(0:5, 5, 6:8))) +) + +################################################################################ +# testing mdcr_setnames +DF <- getFromNamespace(x = "mdcr_setnames", ns = "medicalcoder")(DF, old = "A", new = "Column A") +TBL <- getFromNamespace(x = "mdcr_setnames", ns = "medicalcoder")(TBL, old = "A", new = "Column A") +DT <- getFromNamespace(x = "mdcr_setnames", ns = "medicalcoder")(DT, old = "A", new = "Column A") + +stopifnot( + identical(names(DF), c("D", "B", "C", "Column A")), + identical(names(TBL), c("D", "B", "C", "Column A")), + identical(names(DT), c("D", "B", "C", "Column A")) +) + +################################################################################ +# testing mdcr_duplicated + +stopifnot( + !any(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DF)), + !any(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(TBL)), + !any(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DT)) +) + +expected <- rep(FALSE, 10) +expected[7] <- TRUE +stopifnot( + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DF, by = "D"), expected), + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(TBL, by = "D"), expected), + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DT, by = "D"), expected) +) + +# check fromLast +expected[6:7] <- !expected[6:7] +stopifnot( + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DF, by = "D", fromLast = TRUE), expected), + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(TBL, by = "D", fromLast = TRUE), expected), + identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DT, by = "D", fromLast = TRUE), expected) +) + + +################################################################################ +# End of File # +################################################################################ diff --git a/tests/test-elixhauser-codes.R b/tests/test-elixhauser-codes.R deleted file mode 100644 index 693bf210..00000000 --- a/tests/test-elixhauser-codes.R +++ /dev/null @@ -1,96 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -################################################################################ -# test the structure of the internal object - -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_elixhauser_codes..)) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_elixhauser_codes..), - c("code_id", - "poaexempt", - "condition", - "elixhauser_ahrq_web", - "elixhauser_elixhauser1988", - "elixhauser_quan2005", - "elixhauser_ahrq2022", - "elixhauser_ahrq2023", - "elixhauser_ahrq2024", - "elixhauser_ahrq2025", - "elixhauser_ahrq_icd10" - ) - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_elixhauser_codes.., class), - c(code_id = "integer", - poaexempt = "integer", - condition = "character", - elixhauser_ahrq_web = "integer", - elixhauser_elixhauser1988 = "integer", - elixhauser_quan2005 = "integer", - elixhauser_ahrq2022 = "integer", - elixhauser_ahrq2023 = "integer", - elixhauser_ahrq2024 = "integer", - elixhauser_ahrq2025 = "integer", - elixhauser_ahrq_icd10 = "integer" - ) - ) -) - -################################################################################ -# Test the structure of the object returned by get_elixhauser_codes() - -stopifnot(is.data.frame(get_elixhauser_codes())) - -stopifnot( - identical( - names(get_elixhauser_codes()), - c("icdv", "dx", "full_code", "code", - "poaexempt", - "condition", - "elixhauser_ahrq_web", - "elixhauser_elixhauser1988", - "elixhauser_quan2005", - "elixhauser_ahrq2022", - "elixhauser_ahrq2023", - "elixhauser_ahrq2024", - "elixhauser_ahrq2025", - "elixhauser_ahrq_icd10" - ) - ) -) - - -stopifnot( - identical( - sapply(get_elixhauser_codes(), class), - c(icdv = "integer", - dx = "integer", - full_code = "character", - code = "character", - poaexempt = "integer", - condition = "character", - elixhauser_ahrq_web = "integer", - elixhauser_elixhauser1988 = "integer", - elixhauser_quan2005 = "integer", - elixhauser_ahrq2022 = "integer", - elixhauser_ahrq2023 = "integer", - elixhauser_ahrq2024 = "integer", - elixhauser_ahrq2025 = "integer", - elixhauser_ahrq_icd10 = "integer" - ) - ) -) - -# check that the row names are just sequential integers -ec <- get_elixhauser_codes() -stopifnot(identical(rownames(ec), as.character(seq_len(nrow(ec))))) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-elixhauser-example.R b/tests/test-elixhauser-example.R index dedcc897..866ac4aa 100644 --- a/tests/test-elixhauser-example.R +++ b/tests/test-elixhauser-example.R @@ -3,8 +3,7 @@ library(medicalcoder) ################################################################################ # Regression test: selected ICD-10 codes should map to specific Elixhauser -# comorbidities under the Quan (2005) implementation. The codes were drawn from -# the internal lookup table (get_elixhauser_codes()). +# comorbidities under the Quan (2005) implementation. ################################################################################ example_codes <- diff --git a/tests/test-elixhauser-index-scores.R b/tests/test-elixhauser-index-scores.R deleted file mode 100644 index 4903de4a..00000000 --- a/tests/test-elixhauser-index-scores.R +++ /dev/null @@ -1,31 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_elixhauser_index_scores..)) - -stopifnot(identical(get_elixhauser_index_scores(), medicalcoder:::..mdcr_internal_elixhauser_index_scores..)) - -stopifnot( - identical( - sapply(get_elixhauser_index_scores(), class), - c( - condition = "character", - index = "character", - elixhauser_ahrq_web = "integer", - elixhauser_elixhauser1988 = "integer", - elixhauser_quan2005 = "integer", - elixhauser_ahrq2022 = "integer", - elixhauser_ahrq2023 = "integer", - elixhauser_ahrq2024 = "integer", - elixhauser_ahrq2025 = "integer", - elixhauser_ahrq_icd10 = "integer" - ) - ) -) - -eis <- get_elixhauser_index_scores() -stopifnot(identical(rownames(eis), as.character(seq_len(nrow(eis))))) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-elixhauser-poa.R b/tests/test-elixhauser-poa.R deleted file mode 100644 index 0699a0f9..00000000 --- a/tests/test-elixhauser-poa.R +++ /dev/null @@ -1,29 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_elixhauser_poa..)) - -stopifnot(identical(get_elixhauser_poa(), medicalcoder:::..mdcr_internal_elixhauser_poa..)) - -stopifnot( - identical( - sapply(get_elixhauser_poa(), class), - c(condition = "character", - poa_required = "integer", - elixhauser_ahrq2022 = "integer", - elixhauser_ahrq2023 = "integer", - elixhauser_ahrq2024 = "integer", - elixhauser_ahrq2025 = "integer", - elixhauser_ahrq_icd10 = "integer" - ) - ) -) - - -# check that the row names are just sequential integers -ep <- get_elixhauser_poa() -stopifnot(identical(rownames(ep), as.character(seq_len(nrow(ep))))) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-get_icd.R b/tests/test-get_icd.R deleted file mode 100644 index 670f0890..00000000 --- a/tests/test-get_icd.R +++ /dev/null @@ -1,120 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -ICDCODES <- get_icd_codes() -ICDDESCS <- get_icd_codes(with.descriptions = TRUE, with.hierarchy = FALSE) -ICDHIER <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = TRUE) -ICDDH <- get_icd_codes(with.descriptions = TRUE, with.hierarchy = TRUE) - -# The way these functions should work is to build the needed user friendly data -# set from the internal look up tables and save the result in the a package -# envrionment. This is done so the initial call may take some time to build the -# data, subsequent calls will retrive the cached data. The next four lines -# verify that the return from the subsequent call is identical to the initial -# call. -ICDCODES2 <- get_icd_codes() -ICDDESCS2 <- get_icd_codes(with.descriptions = TRUE, with.hierarchy = FALSE) -ICDHIER2 <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = TRUE) -ICDDH2 <- get_icd_codes(with.descriptions = TRUE, with.hierarchy = TRUE) - -stopifnot(isTRUE(identical(ICDCODES, ICDCODES2))) -stopifnot(isTRUE(identical(ICDDESCS, ICDDESCS2))) -stopifnot(isTRUE(identical(ICDHIER, ICDHIER2))) -stopifnot(isTRUE(identical(ICDDH, ICDDH2))) - -# verify that the ICD codes are data.frames, nothing more. -stopifnot(class(ICDCODES) == "data.frame") -stopifnot(class(ICDDESCS) == "data.frame") -stopifnot(class(ICDHIER) == "data.frame") -stopifnot(class(ICDDH) == "data.frame") - -# verify that the row names are just integers -stopifnot(identical(rownames(ICDCODES), as.character(seq_len(nrow(ICDCODES))))) -stopifnot(identical(rownames(ICDDESCS), as.character(seq_len(nrow(ICDDESCS))))) -stopifnot(identical(rownames(ICDHIER), as.character(seq_len(nrow(ICDHIER))))) -stopifnot(identical(rownames(ICDDH), as.character(seq_len(nrow(ICDDH))))) - -# verify the names are as expected, the order shouldn't matter, but this test -# will verify the order is as expected too. -expected_icd_code_columns <- c("icdv", "dx", "full_code", "code", "src", "known_start", "known_end", "assignable_start", "assignable_end") -expected_icd_desc_columns <- c("desc", "desc_start", "desc_end") -expected_icd_hier_columns <- c("chapter", "subchapter", "category", "subcategory", "subclassification", "subsubclassification", "extension") -stopifnot(isTRUE(identical(names(ICDCODES), expected_icd_code_columns))) -stopifnot(isTRUE(identical(names(ICDDESCS), c(expected_icd_code_columns, expected_icd_desc_columns)))) -stopifnot(isTRUE(identical(names(ICDHIER), c(expected_icd_code_columns, expected_icd_hier_columns)))) -stopifnot(isTRUE(identical(names(ICDDH), c(expected_icd_code_columns, expected_icd_desc_columns, expected_icd_hier_columns)))) - -# Verify the column classes are as expected -nc <- - c( - "icdv" = "integer", - "dx" = "integer", - "full_code" = "character", - "code" = "character", - "src" = "character", - "known_start" = "integer", - "known_end" = "integer", - "assignable_start" = "integer", - "assignable_end" = "integer", - "desc" = "character", - "desc_start" = "integer", - "desc_end" = "integer", - "chapter" = "character", - "subchapter" = "character", - "category" = "character", - "subcategory" = "character", - "subclassification" = "character", - "subsubclassification" = "character", - "extension" = "character" - ) - -for(j in names(ICDCODES)) { - stopifnot(isTRUE(inherits(ICDCODES[[j]], nc[j]))) -} - -for(j in names(ICDDESCS)) { - stopifnot(isTRUE(inherits(ICDDESCS[[j]], nc[j]))) -} - -for(j in names(ICDHIER)) { - stopifnot(isTRUE(inherits(ICDHIER[[j]], nc[j]))) -} - -for(j in names(ICDDH)) { - stopifnot(isTRUE(inherits(ICDDH[[j]], nc[j]))) -} - -# if assignable_start is NA then assignable_end should be NA -idx <- which(is.na(ICDCODES[["assignable_start"]])) -stopifnot(all(is.na(ICDCODES[["assignable_end"]][idx]))) - -idx <- which(is.na(ICDHIER[["assignable_start"]])) -stopifnot(all(is.na(ICDHIER[["assignable_end"]][idx]))) - -# Verify that columns are populated -stopifnot(all(Negate(is.na)(ICDCODES[["icdv"]]))) -stopifnot(all(Negate(is.na)(ICDCODES[["dx"]]))) -stopifnot(all(Negate(is.na)(ICDCODES[["full_code"]]))) -stopifnot(all(Negate(is.na)(ICDCODES[["code"]]))) -stopifnot(all(Negate(is.na)(ICDCODES[["known_start"]]))) -stopifnot(all(Negate(is.na)(ICDCODES[["known_end"]]))) - -stopifnot(all(Negate(is.na)(ICDHIER[["icdv"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["dx"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["full_code"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["code"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["chapter"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["known_start"]]))) -stopifnot(all(Negate(is.na)(ICDHIER[["known_end"]]))) - -# Verify that the src column is as expected -stopifnot( - identical(c("cdc", "cms", "who"), sort(unique(ICDCODES[["src"]]))), - identical(c("cdc", "cms", "who"), sort(unique(ICDDESCS[["src"]]))), - identical(c("cdc", "cms", "who"), sort(unique(ICDHIER[["src"]]))), - identical(c("cdc", "cms", "who"), sort(unique(ICDDH[["src"]]))) -) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-icd-codes.R b/tests/test-icd-codes.R deleted file mode 100644 index 1d7a2ff2..00000000 --- a/tests/test-icd-codes.R +++ /dev/null @@ -1,33 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -################################################################################ -# check the internal data set -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_icd_codes..)) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_icd_codes..), - c("icdv", "dx", "full_code", "code", "code_id", "chap_id", "subchap_id") - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_icd_codes.., class), - c(icdv = "integer", - dx = "integer", - full_code = "character", - code = "character", - code_id = "integer", - chap_id = "integer", - subchap_id = "integer") - ) -) - -# see test-get_icd.R - - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-internal-data.R b/tests/test-internal-data.R new file mode 100644 index 00000000..43393233 --- /dev/null +++ b/tests/test-internal-data.R @@ -0,0 +1,250 @@ +library(medicalcoder) +source("utilities.R") +################################################################################ +# testing the internal data sets - both the internal sets and the user visible +# versions. + +# The internal sets can to look for: this cannot be done dynamically +# e <- new.env() +# load("../R/sysdata.rda", envir = e) +# dput(sort(ls(all.names = TRUE, envir = e))) + +internal_data_sets <- + c( + "..mdcr_internal_charlson_codes..", + "..mdcr_internal_charlson_index_scores..", + "..mdcr_internal_desc_start_stop..", + "..mdcr_internal_elixhauser_codes..", + "..mdcr_internal_elixhauser_index_scores..", + "..mdcr_internal_elixhauser_poa..", + "..mdcr_internal_icd_chapters..", + "..mdcr_internal_icd_codes..", + "..mdcr_internal_icd_descs..", + "..mdcr_internal_icd_subchapters..", + "..mdcr_internal_known_and_assignable_start_stop..", + "..mdcr_internal_pccc_codes..", + "..mdcr_internal_pccc_conditions.." + ) + +mdcr <- getNamespace("medicalcoder") + +# are all the expected sets in the medicalcoder namespace? +stopifnot(all(internal_data_sets %in% names(mdcr))) + +# check that there are not unaccounted for data sets. the ..mdcr_internal_ +# prefix and .. suffix is expected. noted in the data-raw/build_sysdata.R +stopifnot( + all( + grep("\\.\\.mdcr_internal_.+\\.\\.$", names(mdcr), value = TRUE) %in% internal_data_sets + ) +) + +################################################################################ +# verify all the sets are data.frames and only data.frames +t01 <- all(sapply(sapply(internal_data_sets, get, envir = mdcr), inherits, "data.frame")) +t02 <- !any(sapply(sapply(internal_data_sets, get, envir = mdcr), inherits, "data.table")) +t03 <- !any(sapply(sapply(internal_data_sets, get, envir = mdcr), inherits, "tbl_df")) + +stopifnot(t01, t02, t03) + +################################################################################ +# Verify the names and classes of the internal data sets +expected_internal_names_and_classes <- + list( + "..mdcr_internal_charlson_codes.." = c(code_id = "integer", condition = "character", charlson_cdmf2019 = "integer", charlson_deyo1992 = "integer", charlson_quan2005 = "integer", charlson_quan2011 = "integer"), + "..mdcr_internal_charlson_index_scores.." = c(condition_description = "character", condition = "character", charlson_cdmf2019 = "integer", charlson_quan2011 = "integer", charlson_quan2005 = "integer", charlson_deyo1992 = "integer"), + "..mdcr_internal_desc_start_stop.." = c(code_id = "integer", desc_id = "integer", src = "factor", desc_start = "integer", desc_end = "integer"), + "..mdcr_internal_elixhauser_codes.." = c(code_id = "integer", poaexempt = "integer", condition = "character", elixhauser_ahrq_web = "integer", elixhauser_elixhauser1988 = "integer", elixhauser_quan2005 = "integer", elixhauser_ahrq2022 = "integer", elixhauser_ahrq2023 = "integer", elixhauser_ahrq2024 = "integer", elixhauser_ahrq2025 = "integer", elixhauser_ahrq_icd10 = "integer"), + "..mdcr_internal_elixhauser_index_scores.." = c(condition = "character", index = "character", elixhauser_ahrq_web = "integer", elixhauser_elixhauser1988 = "integer", elixhauser_quan2005 = "integer", elixhauser_ahrq2022 = "integer", elixhauser_ahrq2023 = "integer", elixhauser_ahrq2024 = "integer", elixhauser_ahrq2025 = "integer", elixhauser_ahrq_icd10 = "integer"), + "..mdcr_internal_elixhauser_poa.." = c(condition = "character", poa_required = "integer", elixhauser_ahrq2022 = "integer", elixhauser_ahrq2023 = "integer", elixhauser_ahrq2024 = "integer", elixhauser_ahrq2025 = "integer", elixhauser_ahrq_icd10 = "integer"), + "..mdcr_internal_icd_chapters.." = c(chapter = "character", chap_id = "integer"), + "..mdcr_internal_icd_codes.." = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", code_id = "integer", chap_id = "integer", subchap_id = "integer"), + "..mdcr_internal_icd_descs.." = c(desc = "character", desc_id = "integer"), + "..mdcr_internal_icd_subchapters.." = c(subchapter = "character", subchap_id = "integer"), + "..mdcr_internal_known_and_assignable_start_stop.." = c(code_id = "integer", src = "factor", known_start = "integer", known_end = "integer", assignable_start = "integer", assignable_end = "integer"), + "..mdcr_internal_pccc_codes.." = c(code_id = "integer", condition = "character", subcondition = "character", transplant_flag = "integer", tech_dep_flag = "integer", pccc_v3.1 = "integer", pccc_v3.0 = "integer", pccc_v2.1 = "integer", pccc_v2.0 = "integer"), + "..mdcr_internal_pccc_conditions.." = c(condition = "character", subcondition = "character", condition_label = "character", subcondition_label = "character") + ) + +current_names_and_classes <- sapply(sapply(internal_data_sets, get, envir = mdcr), sapply, class) + +stopifnot(identical(length(current_names_and_classes), length(expected_internal_names_and_classes))) + +for(n in names(current_names_and_classes)) { + z <- identical(current_names_and_classes[[n]], expected_internal_names_and_classes[[n]]) + if (!z) { + stop(sprintf("user visible %s does not have the expected structure", n)) + } +} + +#for (n in names(current_names)) { +# if (!identical(names(get(n, envir = mdcr)), expected_names[[n]])) { +# stop(sprintf("names(%s) are not as expected", n)) +# } +#} + + +################################################################################ +# get the data sets via get_ methods + +# expected methods +user_visible_get_methods <- + c( + "get_charlson_codes", + "get_charlson_index_scores", + "get_elixhauser_codes", + "get_elixhauser_index_scores", + "get_elixhauser_poa", + "get_icd_codes", + "get_pccc_codes", + "get_pccc_conditions" + ) + +# check that the expected methods are accounted for +stopifnot(all(user_visible_get_methods %in% names(mdcr))) +stopifnot(all(grep("get_.+", names(mdcr), value = TRUE) %in% user_visible_get_methods)) + +# all the get_* methods have zero arguments except for get_icd_codes +for (m in user_visible_get_methods) { + f <- getExportedValue(name = m, ns = "medicalcoder") + x <- formals(f) + if (m != "get_icd_codes") { + if (!is.null(x)) { + stop(sprintf("Unexpected function argument in %s()", m)) + } + } else { + if (!identical(x, pairlist(with.descriptions = FALSE, with.hierarchy = FALSE))) { + stop(sprintf("Unexpected function argument in %s()", m)) + } + } +} + +# get the datasets +user_visible <- + list( + charlson_codes = get_charlson_codes(), + charlson_index_scores = get_charlson_index_scores(), + elixhauser_codes = get_elixhauser_codes(), + elixhauser_index_scores = get_elixhauser_index_scores(), + elixhauser_poa = get_elixhauser_poa(), + icd_codes = get_icd_codes(), + icd_desc = get_icd_codes(with.descriptions = TRUE), + icd_hier = get_icd_codes(with.hierarchy = TRUE), + icd_dh = get_icd_codes(with.descriptions = TRUE, with.hierarchy = TRUE), + pccc_codes = get_pccc_codes(), + pccc_conditions = get_pccc_conditions() + ) + +# verify all the sets are data.frames and only data.frames +t04 <- all(sapply(user_visible, inherits, "data.frame")) +t05 <- !any(sapply(user_visible, inherits, "data.table")) +t06 <- !any(sapply(user_visible, inherits, "tbl_df")) + +stopifnot(t04, t05, t06) + +# define the expected structure +user_visible_expected_structures <- + list( + charlson_codes = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", condition = "character", charlson_cdmf2019 = "integer", charlson_deyo1992 = "integer", charlson_quan2005 = "integer", charlson_quan2011 = "integer"), + charlson_index_scores = expected_internal_names_and_classes$..mdcr_internal_charlson_index_scores.., + elixhauser_codes = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", poaexempt = "integer", condition = "character", elixhauser_ahrq_web = "integer", elixhauser_elixhauser1988 = "integer", elixhauser_quan2005 = "integer", elixhauser_ahrq2022 = "integer", elixhauser_ahrq2023 = "integer", elixhauser_ahrq2024 = "integer", elixhauser_ahrq2025 = "integer", elixhauser_ahrq_icd10 = "integer"), + elixhauser_index_scores = expected_internal_names_and_classes$..mdcr_internal_elixhauser_index_scores.., + elixhauser_poa = expected_internal_names_and_classes$..mdcr_internal_elixhauser_poa.., + icd_codes = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", src = "character", known_start = "integer", known_end = "integer", assignable_start = "integer", assignable_end = "integer"), + icd_desc = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", src = "character", known_start = "integer", known_end = "integer", assignable_start = "integer", assignable_end = "integer", desc = "character", desc_start = "integer", desc_end = "integer"), + icd_hier = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", src = "character", known_start = "integer", known_end = "integer", assignable_start = "integer", assignable_end = "integer", chapter = "character", subchapter = "character", category = "character", subcategory = "character", subclassification = "character", subsubclassification = "character", extension = "character"), + icd_dh = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", src = "character", known_start = "integer", known_end = "integer", assignable_start = "integer", assignable_end = "integer", desc = "character", desc_start = "integer", desc_end = "integer", chapter = "character", subchapter = "character", category = "character", subcategory = "character", subclassification = "character", subsubclassification = "character", extension = "character"), + pccc_codes = c(icdv = "integer", dx = "integer", full_code = "character", code = "character", condition = "character", subcondition = "character", transplant_flag = "integer", tech_dep_flag = "integer", pccc_v3.1 = "integer", pccc_v3.0 = "integer", pccc_v2.1 = "integer", pccc_v2.0 = "integer"), + pccc_conditions = expected_internal_names_and_classes$..mdcr_internal_pccc_conditions.. + ) + +user_visible_current_structures <- lapply(user_visible, sapply, class) + +for(n in names(user_visible_current_structures)) { + z <- identical(user_visible_current_structures[[n]], user_visible_expected_structures[[n]]) + if (!z) { + stop(sprintf("user visible %s does not have the expected structure", n)) + } +} + +# check that the row names are just sequential integers +for (n in names(user_visible)) { + expected <- as.character(seq_len(nrow(user_visible[[n]]))) + current <- rownames(user_visible[[n]]) + if (!identical(expected, current)) { + stop(sprintf("rownames(user_visible[[%s]]) are not sequential integers", n)) + } +} + +################################################################################ +# verify *_codes are all valid codes +for (n in grep("_codes$", names(user_visible), value = TRUE)) { + z <- is_icd(x = user_visible[[n]][["code"]], + icdv = user_visible[[n]][["icdv"]], + dx = user_visible[[n]][["dx"]], + headerok = TRUE, + ever.assignable = TRUE, + warn.ambiguous = FALSE + ) + if (!all(z)) { + stop(sprintf("not all user_visible[['%s']][['code']] are valid ever.assignable ICD codes", n)) + } + z <- is_icd(x = user_visible[[n]][["full_code"]], + icdv = user_visible[[n]][["icdv"]], + dx = user_visible[[n]][["dx"]], + headerok = TRUE, + ever.assignable = TRUE, + warn.ambiguous = FALSE + ) + if (!all(z)) { + stop(sprintf("not all user_visible[['%s']][['full_code']] are valid ever.assignable ICD codes", n)) + } +} +################################################################################ +# PCCC specific checks +# +# * verify no more than two rows for a code by pccc variant +variants <- grep("^pccc_", names(user_visible[["pccc_codes"]]), value = TRUE) +ns <- + stats::aggregate( + x = user_visible[["pccc_codes"]][variants], + by = user_visible[["pccc_codes"]][c("icdv", "dx", "code")], + FUN = sum + ) +stopifnot(unlist(ns[variants]) %in% c(0L, 1L, 2L)) + +################################################################################ +# ICD code specific checks + +# assignable_start is NA then assignable_end should be NA +for (n in grep("^icd_", names(user_visible), value = TRUE)) { + x <- is.na(user_visible[[n]][["assignable_start"]]) + y <- is.na(user_visible[[n]][["assignable_end"]]) + z <- identical(x, y) + if (!z) { + stop(sprintf("is.na status of assignable_start and assignable_end are not identical for %s", n)) + } +} + +# verify that columns which are expected to be fully populated are +for (n in grep("^icd_", names(user_visible), value = TRUE)) { + for (j in c("icdv", "dx", "full_code", "code", "known_start", "known_end", "src")) { + z <- all(Negate(is.na)(user_visible[[n]][[j]])) + if (!z) { + stop(sprintf("user_visible[['%s']][['%s']] has missing values", n, j)) + } + } +} + +# Verify that the src column is as expected +for (n in grep("^icd_", names(user_visible), value = TRUE)) { + stopifnot( + identical(c("cdc", "cms", "who"), sort(unique(user_visible[[n]][["src"]]))) + ) +} + + + +################################################################################ +# End of File # +################################################################################ diff --git a/tests/test-longitudinal-pccc-v3.1.R b/tests/test-longitudinal-pccc-v3.1.R index f12a3d0f..4bb65da7 100644 --- a/tests/test-longitudinal-pccc-v3.1.R +++ b/tests/test-longitudinal-pccc-v3.1.R @@ -8,8 +8,12 @@ codes <- "J84.111", # respiratory (chronic respiratory disease) "Z96.41" # metabolic (device and technology use) ) -subset(get_pccc_codes(), full_code %in% codes) +# verify that these codes are as expected +x <- subset(get_pccc_codes(), full_code %in% codes) +rownames(x) <- NULL +expected <- structure(list(icdv = c(10L, 10L, 10L), dx = c(1L, 1L, 1L), full_code = c("H49.811", "J84.111", "Z96.41"), code = c("H49811", "J84111", "Z9641"), condition = c("metabolic", "respiratory", "metabolic"), subcondition = c("other_metabolic_disorders", "chronic_respiratory_diseases", "device_and_technology_use"), transplant_flag = c(0L, 0L, 0L), tech_dep_flag = c(0L, 0L, 1L), pccc_v3.1 = c(1L, 1L, 1L), pccc_v3.0 = c(1L, 1L, 1L), pccc_v2.1 = c(1L, 0L, 1L), pccc_v2.0 = c(1L, 0L, 1L)), row.names = c(NA, -3L), class = "data.frame") +stopifnot(identical(x, expected)) # build six permutations of these codes. The codes will be considered poa on # encounters 2, 4, and 6. There will be seven encounters for each permutation. diff --git a/tests/test-lookup_icd_codes.R b/tests/test-lookup_icd_codes.R index 08d4ba9b..a74a6a20 100644 --- a/tests/test-lookup_icd_codes.R +++ b/tests/test-lookup_icd_codes.R @@ -3,11 +3,15 @@ library(medicalcoder) # lookup exact vs regex consistency lx <- lookup_icd_codes(c("E11.9","I50.9"), regex = FALSE) +lf <- lookup_icd_codes(factor(c("E11.9","I50.9")), regex = FALSE) lr <- lookup_icd_codes(c("^E11\\.9$","^I50\\.9$"), regex = TRUE) -stopifnot(all(lx$full_code %in% lr$full_code), - all(lr$full_code %in% lx$full_code)) +stopifnot( + identical(lx, lf), + all(lx$full_code %in% lr$full_code), + all(lr$full_code %in% lx$full_code) +) -# the following used to give an error: +# the following gives an error: # Error in merge.data.frame(x = input, y = matches, all.x = TRUE, by = c(ifelse(regex, : # negative length vectors are not allowed m <- tryCatchError(lookup_icd_codes(x = "", regex = TRUE)) @@ -31,6 +35,49 @@ stopifnot( identical(names(m4), expected_regex_true_names) ) +################################################################################ +# tests for matching by full and/or compact codes + +# without regex +x00 <- tryCatchError(lookup_icd_codes(x = c("E000", "E000.0"), full.codes = FALSE, compact.codes = FALSE)) +stopifnot(inherits(x00, "error")) + +# The fallowing tests may fail when ICD codes are updated. As of 17 Nov 2025, +# there are codes with known end and assignable_end of 2026 +x01 <- lookup_icd_codes(x = c("E000", "E000.0"), full.codes = FALSE, compact.codes = TRUE) +x10 <- lookup_icd_codes(x = c("E000", "E000.0"), full.codes = TRUE, compact.codes = FALSE) +x11 <- lookup_icd_codes(x = c("E000", "E000.0"), full.codes = TRUE, compact.codes = TRUE) + +e01 <- structure(list(input_code = c("E000", "E000", "E000", "E000", "E000.0"), match_type = c("compact_code", "compact_code", "compact_code", "compact_code", NA), icdv = c(10L, 9L, 10L, 10L, NA), dx = c(1L, 1L, 1L, 1L, NA), full_code = c("E00.0", "E000", "E00.0", "E00.0", NA), code = c("E000", "E000", "E000", "E000", NA), src = c("cms", "cms", "who", "cdc", NA), known_start = c(2014L, 2010L, 2008L, 2001L, NA), known_end = c(2026L, 2015L, 2019L, 2025L, NA), assignable_start = c(2014L, NA, 2008L, 2001L, NA), assignable_end = c(2026L, NA, 2019L, 2025L, NA)), row.names = c(NA, 5L), class = "data.frame") +e10 <- structure(list(input_code = c("E000", "E000.0"), match_type = c("full_code", "full_code"), icdv = c(9L, 9L), dx = c(1L, 1L), full_code = c("E000", "E000.0"), code = c("E000", "E0000"), src = c("cms", "cms"), known_start = c(2010L, 2010L), known_end = c(2015L, 2015L), assignable_start = c(NA, 2010L), assignable_end = c(NA, 2015L)), row.names = 1:2, class = "data.frame") +e11 <- structure(list(input_code = c("E000", "E000", "E000", "E000", "E000.0"), match_type = c("full_code", "compact_code", "compact_code", "compact_code", "full_code"), icdv = c(9L, 10L, 10L, 10L, 9L), dx = c(1L, 1L, 1L, 1L, 1L), full_code = c("E000", "E00.0", "E00.0", "E00.0", "E000.0"), code = c("E000", "E000", "E000", "E000", "E0000"), src = c("cms", "cms", "who", "cdc", "cms"), known_start = c(2010L, 2014L, 2008L, 2001L, 2010L), known_end = c(2015L, 2026L, 2019L, 2025L, 2015L), assignable_start = c(NA, 2014L, 2008L, 2001L, 2010L), assignable_end = c(NA, 2026L, 2019L, 2025L, 2015L)), row.names = c(NA, 5L), class = "data.frame") + +stopifnot( + identical(x01, e01), + identical(x10, e10), + identical(x11, e11) +) + +# with regex +x00 <- tryCatchError(lookup_icd_codes(x = c("E000", "E000\\.0"), regex = TRUE, full.codes = FALSE, compact.codes = FALSE)) +stopifnot(inherits(x00, "error")) + +# The fallowing tests may fail when ICD codes are updated. As of 17 Nov 2025, +# there are codes with known end and assignable_end of 2026 +x01 <- lookup_icd_codes(x = c("E000", "E000\\.0"), regex = TRUE, full.codes = FALSE, compact.codes = TRUE) +x10 <- lookup_icd_codes(x = c("E000", "E000\\.0"), regex = TRUE, full.codes = TRUE, compact.codes = FALSE) +x11 <- lookup_icd_codes(x = c("E000", "E000\\.0"), regex = TRUE, full.codes = TRUE, compact.codes = TRUE) + +e01 <- structure(list(input_regex = c("E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000"), match_type = c("compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code"), icdv = c(10L, 10L, 9L, 10L, 9L, 9L, 9L, 9L, 9L), dx = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), full_code = c("E00.0", "E00.0", "E000", "E00.0", "E000.0", "E000.1", "E000.2", "E000.8", "E000.9"), code = c("E000", "E000", "E000", "E000", "E0000", "E0001", "E0002", "E0008", "E0009"), src = c("who", "cms", "cms", "cdc", "cms", "cms", "cms", "cms", "cms"), known_start = c(2008L, 2014L, 2010L, 2001L, 2010L, 2010L, 2011L, 2010L, 2010L), known_end = c(2019L, 2026L, 2015L, 2025L, 2015L, 2015L, 2015L, 2015L, 2015L), assignable_start = c(2008L, 2014L, NA, 2001L, 2010L, 2010L, 2011L, 2010L, 2010L), assignable_end = c(2019L, 2026L, NA, 2025L, 2015L, 2015L, 2015L, 2015L, 2015L)), row.names = c(1L, 2L, 3L, 4L, 17L, 18L, 19L, 20L, 21L), class = "data.frame") +e10 <- structure(list(input_regex = c("E000", "E000", "E000", "E000", "E000", "E000", "E000\\.0"), match_type = c("full_code", "full_code", "full_code", "full_code", "full_code", "full_code", "full_code"), icdv = c(9L, 9L, 9L, 9L, 9L, 9L, 9L), dx = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), full_code = c("E000", "E000.0", "E000.1", "E000.2", "E000.8", "E000.9", "E000.0"), code = c("E000", "E0000", "E0001", "E0002", "E0008", "E0009", "E0000"), src = c("cms", "cms", "cms", "cms", "cms", "cms", "cms"), known_start = c(2010L, 2010L, 2010L, 2011L, 2010L, 2010L, 2010L), known_end = c(2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), assignable_start = c(NA, 2010L, 2010L, 2011L, 2010L, 2010L, 2010L), assignable_end = c(NA, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 37L), class = "data.frame") +e11 <- structure(list(input_regex = c("E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000", "E000\\.0"), match_type = c("full_code", "full_code", "full_code", "full_code", "full_code", "full_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "compact_code", "full_code"), icdv = c(9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L), dx = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), full_code = c("E000", "E000.0", "E000.9", "E000.1", "E000.2", "E000.8", "E00.0", "E00.0", "E00.0", "E000.0", "E000.9", "E000.1", "E000.2", "E000.8", "E000.0"), code = c("E000", "E0000", "E0009", "E0001", "E0002", "E0008", "E000", "E000", "E000", "E0000", "E0009", "E0001", "E0002", "E0008", "E0000"), src = c("cms", "cms", "cms", "cms", "cms", "cms", "who", "cms", "cdc", "cms", "cms", "cms", "cms", "cms", "cms"), known_start = c(2010L, 2010L, 2010L, 2010L, 2011L, 2010L, 2008L, 2014L, 2001L, 2010L, 2010L, 2010L, 2011L, 2010L, 2010L), known_end = c(2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2019L, 2026L, 2025L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), assignable_start = c(NA, 2010L, 2010L, 2010L, 2011L, 2010L, 2008L, 2014L, 2001L, 2010L, 2010L, 2010L, 2011L, 2010L, 2010L), assignable_end = c(NA, 2015L, 2015L, 2015L, 2015L, 2015L, 2019L, 2026L, 2025L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L)), row.names = c(1L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 23L, 27L, 28L, 29L, 30L, 451L), class = "data.frame") + +stopifnot( + identical(x01, e01), + identical(x10, e10), + identical(x11, e11) +) + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-pccc-codes.R b/tests/test-pccc-codes.R deleted file mode 100644 index 18dfc5f0..00000000 --- a/tests/test-pccc-codes.R +++ /dev/null @@ -1,87 +0,0 @@ -source('utilities.R') -library(medicalcoder) -################################################################################ -# check the internal data set -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_pccc_codes..)) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_pccc_codes..), - c("code_id", "condition", "subcondition", "transplant_flag", - "tech_dep_flag", "pccc_v3.1", "pccc_v3.0", "pccc_v2.1", "pccc_v2.0") - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_pccc_codes.., class), - c(code_id = "integer", - condition = "character", - subcondition = "character", - transplant_flag = "integer", - tech_dep_flag = "integer", - pccc_v3.1 = "integer", - pccc_v3.0 = "integer", - pccc_v2.1 = "integer", - pccc_v2.0 = "integer" - ) - ) -) - -################################################################################ -# Verify that the pccc codes are retrievable as a data.frame -pccc_codes <- get_pccc_codes() - -stopifnot("pccc_codes are a data.frame" = identical(class(pccc_codes), "data.frame")) -stopifnot( - identical( - names(pccc_codes), - c("icdv", "dx", "full_code", "code", "condition", "subcondition", "transplant_flag", "tech_dep_flag", "pccc_v3.1", "pccc_v3.0", "pccc_v2.1", "pccc_v2.0") - ) -) - -################################################################################ -# Verify that all the ICD codes in the data set are valid codes -x <- is_icd(pccc_codes$code, headerok = TRUE, ever.assignable = TRUE, warn.ambiguous = FALSE) -stopifnot("all compact pccc_codes are valid ever.assignable icd codes" = all(x)) - -x <- is_icd(pccc_codes$full_code, headerok = TRUE, ever.assignable = TRUE, warn.ambiguous = FALSE) -stopifnot("all full pccc_codes are valid ever.assignable icd codes" = all(x)) - -x <- pccc_codes[pccc_codes$icdv == 9 & pccc_codes$dx == 1, "full_code"] -x <- is_icd(x = x, icdv = 9, dx = 1, headerok = TRUE, ever.assignable = TRUE, warn.ambiguous = FALSE) -stopifnot("all icdv 9 dx full codes are valid ever.assignable icd codes" = all(x)) - -x <- pccc_codes[pccc_codes$icdv == 9 & pccc_codes$dx == 0, "full_code"] -x <- is_icd(x = x, icdv = 9, dx = 0, headerok = TRUE, ever.assignable = TRUE, warn.ambiguous = FALSE) -stopifnot("all icdv 9 pr full codes are valid ever.assignable icd codes" = all(x)) - -x <- pccc_codes[pccc_codes$icdv == 10 & pccc_codes$dx == 1, "full_code"] -x <- is_icd(x = x, icdv = 10, dx = 1L, headerok = TRUE, ever.assignable = TRUE, warn.ambiguous = FALSE) -stopifnot("all icdv 10 dx full codes are valid ever.assignable icd codes" = all(x)) - -x <- pccc_codes[pccc_codes$icdv == 10 & pccc_codes$dx == 0L, "full_code"] -x <- is_icd(x = x, icdv = 10, dx = 0L, headerok = TRUE, ever.assignable = TRUE) -stopifnot("all icdv 10 pr full codes are valid ever.assignable icd codes" = all(x)) - -################################################################################ -# verify no more than two rows for a code by pccc variant -variants <- grep("^pccc_", names(pccc_codes), value = TRUE) -codes <- - aggregate( - pccc_codes[variants], - by = pccc_codes[c("icdv", "dx", "code")], - FUN = sum - ) -stopifnot( - max(unlist(codes[variants])) == 2 -) - -################################################################################ -# check that the row names are just sequential integers -pc <- get_pccc_codes() -stopifnot(identical(rownames(pc), as.character(seq_len(nrow(pc))))) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-pccc-conditions.R b/tests/test-pccc-conditions.R deleted file mode 100644 index 5ae0cf4a..00000000 --- a/tests/test-pccc-conditions.R +++ /dev/null @@ -1,28 +0,0 @@ -source('utilities.R') -library(medicalcoder) - -stopifnot(is.data.frame(medicalcoder:::..mdcr_internal_pccc_conditions..)) - -stopifnot( - identical( - names(medicalcoder:::..mdcr_internal_pccc_conditions..), - c("condition", "subcondition", "condition_label", "subcondition_label") - ) -) - -stopifnot( - identical( - sapply(medicalcoder:::..mdcr_internal_pccc_conditions.., class), - c(condition = "character", - subcondition = "character", - condition_label = "character", - subcondition_label = "character" - ) - ) -) - -stopifnot(identical(get_pccc_conditions(), medicalcoder:::..mdcr_internal_pccc_conditions..)) - -################################################################################ -# End of File # -################################################################################ diff --git a/tests/test-pccc.R b/tests/test-pccc.R index 3a7399a9..039fff87 100644 --- a/tests/test-pccc.R +++ b/tests/test-pccc.R @@ -1,17 +1,6 @@ source('utilities.R') library(medicalcoder) -################################################################################ -# Verify that the pccc codes are retrievable as a data.frame -pccc_codes <- get_pccc_codes() -stopifnot("pccc_codes are a data.frame" = identical(class(pccc_codes), "data.frame")) -stopifnot( - identical( - names(pccc_codes), - c("icdv", "dx", "full_code", "code", "condition", "subcondition", "transplant_flag", "tech_dep_flag", "pccc_v3.1", "pccc_v3.0", "pccc_v2.1", "pccc_v2.0") - ) -) - ################################################################################ # verify that there is not going to be an error if no matches are found dat <- data.frame(patid = 1:26, @@ -220,6 +209,31 @@ stopifnot( !identical(out00$num_cmrb, out07$num_cmrb) ) +################################################################################ +# looking for the case in v3.1 where the code is a tech and transplant +techtrans <- + subset(get_pccc_codes(), pccc_v3.1 == 1 & transplant_flag == 1 & tech_dep_flag == 1) +techtrans[["rid"]] <- seq_len(nrow(techtrans)) + +techtrans_results <- + comorbidities( + data = techtrans, + icd.codes = "code", + id.vars = "rid", + method = "pccc_v3.1", + poa = 1 + ) + +for (j in colnames(techtrans_results)) { + if (j == "rid") { + stopifnot(techtrans_results[[j]] == 1:6) + } else if (j %in% c("cvd_tech_only", "cvd_dxpr_or_tech", "any_tech_dep", "any_transplant", "num_cmrb", "cmrb_flag")) { + stopifnot(techtrans_results[[j]] == rep(1L, 6)) + } else { + stopifnot(techtrans_results[[j]] == rep(0L, 6)) + } +} + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-summary-pccc.R b/tests/test-summary-pccc.R index 0aa3baa0..0d93d854 100644 --- a/tests/test-summary-pccc.R +++ b/tests/test-summary-pccc.R @@ -121,6 +121,26 @@ stopifnot( is.numeric(rtn[["percent"]]) ) +################################################################################ +# PCCC Version 2.1 with flag.method = "cumulative" + +cmb <- comorbidities(data = mdcr_longitudinal, id.vars = c("patid", "date"), icd.codes = "code", poa = 1, flag.method = 'cumulative', method = "pccc_v2.1") +rtn <- tryCatchWarning(summary(cmb)) +stopifnot(inherits(rtn, "warning")) +rtn <- suppressWarnings(summary(cmb)) + +stopifnot( + inherits(rtn, "data.frame"), + identical( + names(rtn), + c("condition", "label", "count", "percent") + ), + is.character(rtn[["condition"]]), + is.character(rtn[["label"]]), + is.integer(rtn[["count"]]), + is.numeric(rtn[["percent"]]) +) + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-tibble-datatable.R b/tests/test-tibble-datatable.R index 747cc49a..81d7cf7c 100644 --- a/tests/test-tibble-datatable.R +++ b/tests/test-tibble-datatable.R @@ -113,6 +113,25 @@ for (obj in ls(envir = DFS, all.names = TRUE)) { stopifnot(all.equal(xDF, xDT, check.attributes = FALSE)) } +# check that the print method returns the input object +for (obj in ls(envir = DFS, all.names = TRUE)) { + x <- print(DFS[[obj]]) + z <- identical(x, DFS[[obj]]) + if (!z) { + stop(sprintf("print(DFS[['%s']]) does not return identical %s", obj, obj)) + } + x <- print(TBLS[[obj]]) + z <- identical(x, TBLS[[obj]]) + if (!z) { + stop(sprintf("print(TBLS[['%s']]) does not return identical %s", obj, obj)) + } + x <- print(DTS[[obj]]) + z <- identical(x, DTS[[obj]]) + if (!z) { + stop(sprintf("print(DTS[['%s']]) does not return identical %s", obj, obj)) + } +} + ################################################################################ # Now, if the data.table namespace is available, then test that the elements in # DTS are data.tables and after setting to data.frames, then the objects are @@ -179,6 +198,8 @@ for (obj in ls(envir = DFS, all.names = TRUE)) { stopifnot(identical(xTBL, xDF)) } + + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-vs-pccc_1.0.6.R b/tests/test-vs-pccc_1.0.6.R index 8b584571..d27dda54 100644 --- a/tests/test-vs-pccc_1.0.6.R +++ b/tests/test-vs-pccc_1.0.6.R @@ -1,3 +1,4 @@ +library(medicalcoder) source('utilities.R') ################################################################################ # objective - the pccc_v2.0 in medicalcoder should reproduce the results from @@ -7,7 +8,6 @@ source('utilities.R') # pccc package only mapped to conditions and did not map subconditions. this # creates a problem with a few codes that were not documented but in the # software. Those will be noted in this script. -library(medicalcoder) # get the ICD codes from the medicalcoder package and add a code_id column icd_codes <- get_icd_codes()