diff --git a/.gitignore b/.gitignore index 271f1c27..1bd8080b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ data-raw/icd/desc_start_stop.rds data-raw/icd/icd10/.download_stamp data-raw/icd/icd10/icd10.rds data-raw/icd/icd9/.download_stamp -data-raw/icd/icd9/icd9_cm_pcs.rds +data-raw/icd/icd9/icd9.rds data-raw/icd/icd_chapters.rds data-raw/icd/icd_chapters_subchapters.rds data-raw/icd/icd_codes.rds diff --git a/DESCRIPTION b/DESCRIPTION index fb1f61a5..1face083 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,11 +21,11 @@ BugReports: https://github.com/dewittpe/medicalcoder/issues LazyData: true Suggests: data.table, + dplyr, kableExtra, knitr, R.utils, - rmarkdown, - tibble (>= 2.0.0) + rmarkdown RoxygenNote: 7.3.3 VignetteBuilder: knitr Roxygen: list(markdown = TRUE) diff --git a/NEWS.md b/NEWS.md index a74a3c4f..269f7a2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,24 @@ +# medicalcoder 0.7.0.9000 + +## New Features + +* If a `tibble` is passed to `comorbidities()` and the `dplyr` namespace is + available, then `dplyr` methods will be used for data manipulation. This + change will generally result in less computation time than base R + `data.frames` (`data.tables` require even less time). + +## Other Changes + +* Extend and improve the internal ICD-9 database to distinguish between CDC and + CMS source. + +* Fix documentation of the `mdcr` and `mdcr_longitudinal` datasets. + +* Clarified internal data.frame/data.table helpers: documented that `mdcr_select()` + deep-copies data.table subsets to avoid aliasing, noted the selfref fix in + `mdcr_set()`, and added inline guidance in the longitudinal section of + `comorbidities()` to explain the first-occurrence logic. + # medicalcoder 0.7.0 ## Bug Fixes diff --git a/R/charlson.R b/R/charlson.R index 073757c0..1250e2fc 100644 --- a/R/charlson.R +++ b/R/charlson.R @@ -62,7 +62,7 @@ cci <- as.integer(as.vector(X %*% cci_wt)) # build the return object - rtn <- cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn <- mdcr_cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) rtn <- mdcr_set(rtn, j = "num_cmrb", value = num_cmrb) rtn <- mdcr_set(rtn, j = "cmrb_flag", value = cmrb_flag) rtn <- mdcr_set(rtn, j = "cci", value = cci) diff --git a/R/comorbidities.R b/R/comorbidities.R index c914788a..856c1720 100644 --- a/R/comorbidities.R +++ b/R/comorbidities.R @@ -360,13 +360,13 @@ comorbidities.data.frame <- function(data, # Determine the lookup table and the columns for the lookup table to keep lookup_to_keep <- c("condition") if (startsWith(method, "pccc")) { - lookup <- get_pccc_codes() + lookup <- get(x = "pccc_codes", envir = ..mdcr_data_env.., inherits = FALSE) lookup_to_keep <- c(lookup_to_keep, "subcondition", "transplant_flag", "tech_dep_flag") } else if (startsWith(method, "charlson")) { - lookup <- get_charlson_codes() + lookup <- get("charlson_codes", envir = ..mdcr_data_env.., inherits = FALSE) lookup_to_keep <- c(lookup_to_keep) } else if (startsWith(method, "elixhauser")) { - lookup <- get_elixhauser_codes() + lookup <- get("elixhauser_codes", envir = ..mdcr_data_env.., inherits = FALSE) lookup_to_keep <- c(lookup_to_keep, "poaexempt") } @@ -385,25 +385,21 @@ comorbidities.data.frame <- function(data, ############################################################################## # inner join the data with the lookup table on_full <- - merge( + mdcr_inner_join( x = if (full.codes) {data} else {data[0, ]}, y = lookup, - all = FALSE, by.x = by_x, by.y = c("full_code", by_y), - suffixes = c("", ".y"), - sort = FALSE + suffixes = c("", ".y") ) on_comp <- - merge( + mdcr_inner_join( x = if (compact.codes) {data} else {data[0, ]}, y = lookup, - all = FALSE, by.x = by_x, by.y = c("code", by_y), - suffixes = c("", ".y"), - sort = FALSE + suffixes = c("", ".y") ) ############################################################################## @@ -519,6 +515,7 @@ comorbidities.data.frame <- function(data, grps <- c(grps, "subcondition") byconditions <- c(byconditions, "subcondition") } + # identify first occurrence per id/condition then retain encounters on/after it tmp <- mdcr_select(cmrb, c(grps, encid)) tmp <- mdcr_setorder(tmp, c(grps, encid)) keep <- !mdcr_duplicated(tmp, by = grps) @@ -527,12 +524,11 @@ comorbidities.data.frame <- function(data, # merge on the poa.var foc <- - merge(x = foc, - y = cmrb, - all = TRUE, - by.x = c(id.vars2, "first_occurrance", byconditions), - by.y = c(id.vars2, encid, byconditions), - sort = FALSE + mdcr_full_outer_join( + x = foc, + y = cmrb, + by.x = c(id.vars2, "first_occurrance", byconditions), + by.y = c(id.vars2, encid, byconditions) ) if (startsWith(method, "pccc")) { @@ -546,7 +542,7 @@ comorbidities.data.frame <- function(data, foc <- lapply(foc, function(y) { - rtn <- merge(x = iddf, y = y, all.x = TRUE, by = c(id.vars2), allow.cartesian = TRUE, sort = FALSE) + rtn <- mdcr_left_join(x = iddf, y = y, by = c(id.vars2)) rtn <- mdcr_subset(rtn, i = !is.na(rtn[["condition"]])) i <- rtn[[encid]] >= rtn[["first_occurrance"]] mdcr_subset(rtn, i = i) @@ -619,17 +615,6 @@ comorbidities.data.frame <- function(data, ############################################################################## # set attributes and return - if (requireNamespace("tibble", quietly = TRUE) && inherits(data, "tbl_df")) { - if (subconditions) { - ccc[["conditions"]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["conditions"]]) - for (i in seq_len(length(ccc[["subconditions"]]))) { - ccc[["subconditions"]][[i]] <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc[["subconditions"]][[i]]) - } - } else { - ccc <- getExportedValue(name = "as_tibble", ns = "tibble")(x = ccc) - } - } - attr(ccc, "method") <- method attr(ccc, "id.vars") <- id.vars attr(ccc, "flag.method") <- flag.method diff --git a/R/datasets.R b/R/datasets.R index 94d6ba08..5b98581a 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -1,9 +1,7 @@ #' Synthetic Data #' #' @format -#' `mdcr` is a `data.frame` with 4 columns, one for a patient id and 41 for -#' diagnostic codes and 41 possible procedure codes. Each row is for one -#' patient id. +#' `mdcr` is a `data.frame` with 4 columns, Each row is for one ICD id. #' #' * `patid`: patient identifier, integer values #' * `icdv`: ICD version; integer values, 9 or 10 @@ -17,15 +15,15 @@ #' Synthetic Longitudinal Data #' #' @format -#' `mdcr_longitudinal` is a `data.frame` with four columns. The codes are -#' expected to be treated as diagnostic codes but there are a few ICD-9 codes -#' which could match to procedure codes as well. +#' `mdcr_longitudinal` is a `data.frame` with 4 columns. The codes are +#' expected to be treated as diagnostic codes. Warning: there are a few ICD-9 +#' codes which could match to procedure codes. #' #' * `patid`: patient identifier, integer values #' * `date`: date the diagnostic code was recorded #' * `icdv`: ICD version 9 or 10, integer valued #' * `code`: ICD codes; character values -#" +#' #' @family datasets #' "mdcr_longitudinal" diff --git a/R/elixhauser.R b/R/elixhauser.R index 6eec657a..428fed2f 100644 --- a/R/elixhauser.R +++ b/R/elixhauser.R @@ -36,7 +36,7 @@ } # build the return object - rtn <- cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn <- mdcr_cbind(iddf, as.data.frame(results$X, check.names = FALSE, stringsAsFactors = FALSE)) rtn <- mdcr_set(rtn, j = "num_cmrb", value = results$num_cmrb) rtn <- mdcr_set(rtn, j = "cmrb_flag", value = results$cmrb_flag) rtn <- mdcr_set(rtn, j = "mortality_index", value = results$mortality_index) @@ -47,7 +47,6 @@ } .elixhauser_post2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) { - conditions <- ..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)] conditions <- sort(unique(conditions)) @@ -207,7 +206,6 @@ } .elixhauser_pre2022 <- function(ccc, id.vars, iddf, cmrb, poa.var, primarydx.var, method) { - # what are the relevent coniditions conditions <- unique(..mdcr_internal_elixhauser_codes..[["condition"]][which(..mdcr_internal_elixhauser_codes..[[method]] == 1L)]) diff --git a/R/get_icd_codes.R b/R/get_icd_codes.R index b28803fa..86ebf0fa 100644 --- a/R/get_icd_codes.R +++ b/R/get_icd_codes.R @@ -28,14 +28,16 @@ #' #' `known_start` is the first fiscal or calendar year (depending on source) that #' the medicalcoder package as definitive source data for. ICD-9-CM started in -#' the United States in fiscal year 1980. Source information that could be -#' downloaded from the CDC and CMS and added to the source code for the -#' medicalcoder package goes back to 1997. As such 1997 is the "known start" +#' the United States in fiscal year 1980. The CDC extracts included in +#' medicalcoder span fiscal years 1997--2012; the CMS ICD-9-CM/PCS extracts +#' start in fiscal year 2006 and run through fiscal year 2015. As such 1997 is +#' the earliest "known start" for ICD-9 within medicalcoder. #' #' `known_end` is the last fiscal or calendar year (depending on source) -#' for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS -#' that is 2015. For ICD-10-CM and ICD-10-PCS, which are active, it is just the -#' last year of known data. ICD-10 from the WHO ends in 2019. +#' for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS, +#' CMS provides data through fiscal year 2015, while the CDC extracts stop at +#' fiscal year 2012. For ICD-10-CM and ICD-10-PCS, which are active, it is just +#' the last year of known data. ICD-10 from the WHO ends in 2019. #' #' ## Header and Assignable Codes #' diff --git a/R/is_icd.R b/R/is_icd.R index a0aefe2c..57535021 100644 --- a/R/is_icd.R +++ b/R/is_icd.R @@ -67,7 +67,7 @@ is_icd <- function(x, icdv = c(9L, 10L), dx = c(1L, 0L), } # get the known icd codes and filter to relevent codes - codes <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE) + codes <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE) # keep based on icdv, dx, and src keep <- (codes[["icdv"]] %in% icdv) & (codes[["dx"]] %in% dx) & (codes[["src"]] %in% src) diff --git a/R/lookup_icd_codes.R b/R/lookup_icd_codes.R index 4e9cda19..d70038bf 100644 --- a/R/lookup_icd_codes.R +++ b/R/lookup_icd_codes.R @@ -43,7 +43,7 @@ lookup_icd_codes <- function(x, regex = FALSE, full.codes = TRUE, compact.codes assert_scalar_logical(compact.codes) stopifnot(isTRUE(full.codes | compact.codes)) - ICDCODES <- get_icd_codes(with.descriptions = FALSE, with.hierarchy = FALSE) + ICDCODES <- get("icd_codes", envir = ..mdcr_data_env.., inherits = FALSE) if (regex) { if(full.codes) { diff --git a/R/pccc.R b/R/pccc.R index 74ef0d6d..dedf179b 100644 --- a/R/pccc.R +++ b/R/pccc.R @@ -139,7 +139,7 @@ X <- X[, colorder, drop = FALSE] - rtn <- cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn <- mdcr_cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) if (subconditions) { rtn <- list(conditions = rtn, subconditions = list()) @@ -166,7 +166,7 @@ X[cbind(ri[keep], ci[keep])] <- 1L } - rtn[["subconditions"]][[cnd]] <- cbind(uiddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn[["subconditions"]][[cnd]] <- mdcr_cbind(uiddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) } } @@ -209,7 +209,7 @@ X[match(key_tran, key_iddf), "any_transplant"] <- 1L X <- cbind(X, num_cmrb, cmrb_flag) - rtn <- cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn <- mdcr_cbind(iddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) if (subconditions) { rtn <- list(conditions = rtn, subconditions = list()) @@ -235,7 +235,7 @@ X[cbind(ri[keep], ci[keep])] <- 1L } - rtn[["subconditions"]][[cnd]] <- cbind(uiddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) + rtn[["subconditions"]][[cnd]] <- mdcr_cbind(uiddf, as.data.frame(X, check.names = FALSE, stringsAsFactors = FALSE)) } } diff --git a/R/summary.R b/R/summary.R index efcf7240..e6297501 100644 --- a/R/summary.R +++ b/R/summary.R @@ -104,7 +104,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { N <- nrow(object[["conditions"]]) - conditions <- get_pccc_conditions()[c("condition", "condition_label")] + conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")] conditions <- unique(conditions) conditions <- conditions[order(conditions[["condition"]]), ] @@ -159,7 +159,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { warning(sprintf("Logic for pccc_summary_table has been implemented for flag.method = 'current'. Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method"))) } - conditions <- get_pccc_conditions()[c("condition", "condition_label")] + conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")] conditions <- unique(conditions) conditions <- conditions[order(conditions[["condition"]]), ] @@ -189,7 +189,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { warning(sprintf("Logic for pccc_summary_table has been implemented for flag.method = 'current'. Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method"))) } - conditions <- get_pccc_conditions()[c("condition", "condition_label")] + conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")] conditions <- unique(conditions) conditions <- conditions[order(conditions[["condition"]]), ] @@ -254,7 +254,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { warning(sprintf("Logic for charlson summary table has been implemented for flag.method = 'current'. Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method"))) } - cmrbs <- get_charlson_index_scores()[!is.na( get_charlson_index_scores()[[attr(object, "method")]]), c("condition_description", "condition")] + cmrbs <- ..mdcr_internal_charlson_index_scores..[!is.na( ..mdcr_internal_charlson_index_scores..[[attr(object, "method")]]), c("condition_description", "condition")] cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]]) @@ -305,7 +305,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { warning(sprintf("Logic for Elixhauser summary has been implemented for flag.method = 'current'. Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method"))) } - cmrbs <- get_elixhauser_index_scores()[!is.na( get_elixhauser_index_scores()[[attr(object, "method")]]), "condition", drop = FALSE] + cmrbs <- ..mdcr_internal_elixhauser_index_scores..[!is.na( ..mdcr_internal_elixhauser_index_scores..[[attr(object, "method")]]), "condition", drop = FALSE] cmrbs <- unique(cmrbs) cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) diff --git a/R/utilities.R b/R/utilities.R index c92fa74e..c39b193c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -6,6 +6,10 @@ #' subsetting needs to be specific to data.tables. These internal (non-exported) #' functions provide the needed method by data.table or data.frame. #' +#' `mdcr_select()` deep-copies data.table subsets (via data.table::copy()) to +#' avoid aliasing when downstream code mutates; this intentionally trades some +#' performance for isolation. +#' #' @param x a data.frame or data.table #' @param i Optional. Indicates the rows on which the values must be updated. If #' not `NULL`, implies all rows. @@ -25,13 +29,44 @@ NULL #' @keywords internal mdcr_set <- function(x, i = NULL, j, value) { stopifnot(is.data.frame(x)) - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { + # calling data.table::setDT to make sure that the object can be modified by + # reference. Without data.table::setDT here we see an error if the table was + # read from disk (e.g., readRDS()) or hand-constructed: + # + # This data.table has either been loaded from disk (e.g. using + # readRDS()/load()) or constructed manually (e.g. using structure()). + # Please run setDT() or setalloccol() on it first (to pre-allocate space + # for new columns) before assigning by reference to it. + # + # data.tables read via readRDS()/load() or hand‑constructed can have an + # invalid .internal.selfref, so the first by‑reference op (set()/:= ) errors. + # data.table::setDT() reinitializes the selfref so data.table::set() can work + # by reference. This wrapper keeps by-ref semantics consistent across backends. + getExportedValue(name = "setDT", ns = "data.table")(x = x) getExportedValue(name = "set", ns = "data.table")(x = x, i = i, j = j, value = value) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + mutate <- getExportedValue(name = "mutate", ns = "dplyr") + if (is.null(i)) { + newcol <- if (nrow(x) == 0L) value[0] else value + } else { + newcol <- x[[j]] + if (is.null(newcol)) { + newcol <- rep(NA, length = nrow(x)) + storage.mode(newcol) <- typeof(value) + } + newcol[i] <- value + } + x <- do.call(mutate, c(list(.data = x), stats::setNames(list(newcol), j))) } else { if (is.null(i)) { x[[j]] <- value } else { + if (is.null(x[[j]])) { + x[[j]] <- rep(NA, nrow(x)) + storage.mode(x[[j]]) <- typeof(value) + } x[[j]][i] <- value } } @@ -49,8 +84,25 @@ mdcr_select <- function(x, cols) { return(x) } - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + # # By makeing sure that cols is a character vector the `with = FALSE` is not + # # needed for data.tables which will allow for a simple call. This is + # # important because `[.data.frame` will error if `with = FALSE` is passed. + # # `[.data.table` does not need `with = FALSE` if `j` is a character vector. + # stopifnot(inherits(cols, "character")) + # #x[, cols, drop = FALSE, with = FALSE] + # x[, cols, drop = FALSE] + + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { + # note: the data.table::copy() is needed here because x[, cols] returns a + # shallow copy of the columns. The use of mdcr_select in the package + # implicitly assumes deep copies. Downstream setorder()/setnames() mutate in + # place, so copying here preserves the original. This pays a copy cost to + # protect callers who expect an isolated subset. return(getExportedValue(name = "copy", ns = "data.table")(x[, cols, drop = FALSE, with = FALSE])) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + select <- getExportedValue(name = "select", ns = "dplyr") + all_of <- getExportedValue(name = "all_of", ns = "dplyr") + return(select(x, all_of(cols))) } else { return(x[, cols, drop = FALSE]) } @@ -71,17 +123,27 @@ mdcr_subset <- function(x, i, cols) { return(mdcr_select(x, cols = cols)) } } else { + # match base/data.table semantics: logical i is converted to positions rows <- if (is.logical(i)) which(i) else i if (missing(cols)) { - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { return(x[rows, , drop = FALSE, with = FALSE]) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + slice <- getExportedValue(name = "slice", ns = "dplyr") + return(slice(x, rows)) } else { return(x[rows, , drop = FALSE]) } } else { - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { return(x[rows, cols, drop = FALSE, with = FALSE]) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + slice <- getExportedValue(name = "slice", ns = "dplyr") + select <- getExportedValue(name = "select", ns = "dplyr") + all_of <- getExportedValue(name = "all_of", ns = "dplyr") + x <- slice(x, rows) + return(select(x, all_of(cols))) } else { cols_idx <- match(cols, names(x)) return(x[rows, cols_idx, drop = FALSE]) @@ -97,8 +159,11 @@ mdcr_subset <- function(x, i, cols) { #' @keywords internal mdcr_setorder <- function(x, by) { stopifnot(is.data.frame(x)) - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { getExportedValue(name = "setorderv", ns = "data.table")(x, by) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + arrange <- getExportedValue(name = "arrange", ns = "dplyr") + x <- do.call(arrange, c(list(.data = x), lapply(by, as.name))) } else { x <- x[do.call(order, x[by]), , drop = FALSE] } @@ -112,11 +177,15 @@ mdcr_setorder <- function(x, by) { #' @keywords internal mdcr_setnames <- function(x, old, new, ...) { stopifnot(is.data.frame(x)) - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + stopifnot(is.character(old), is.character(new)) + stopifnot(length(old) == length(new)) + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { getExportedValue(name = "setnames", ns = "data.table")(x, old, new, ...) + } else if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + rename <- getExportedValue(name = "rename", ns = "dplyr") + args <- c(list(.data = x), stats::setNames(lapply(old, as.name), new)) + x <- do.call(rename, args) } else { - stopifnot(is.character(old), is.character(new)) - stopifnot(length(old) == length(new)) for (i in seq_len(length(old))) { names(x)[names(x) == old[i]] <- new[i] } @@ -131,17 +200,150 @@ mdcr_setnames <- function(x, old, new, ...) { #' @keywords internal mdcr_duplicated <- function(x, by = seq_along(x), ...) { stopifnot(is.data.frame(x)) - if (requireNamespace("data.table", quietly = TRUE) && inherits(x, "data.table")) { + if (requireNamespace(package = "data.table", quietly = TRUE) && inherits(x, "data.table")) { # Flag this frame as data.table-aware so duplicated.data.table uses its # optimized path instead of falling back to duplicated.data.frame. .datatable.aware <- TRUE rtn <- utils::getFromNamespace(x = 'duplicated.data.table', ns = "data.table")(x, by = by, ...) - } else { + } else { # this is for base R data.frames and tidyverse tbl_df rtn <- duplicated(x[, by, drop = FALSE], ...) } rtn } +#' +#' @rdname mdcr_data_frame_tools +#' @family data.frame tools +#' @noRd +#' @keywords internal +mdcr_left_join <- function(x, y, ...) { + stopifnot(is.data.frame(x), is.data.frame(y)) + + if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + lj <- getExportedValue(name = "left_join", ns = "dplyr") + dots <- list(...) + if (!is.null(dots$by.x) & !is.null(dots$by.y)) { + by <- stats::setNames(dots$by.y, dots$by.x) + dots$by.x <- NULL + dots$by.y <- NULL + } else if (!is.null(dots$by)) { + by <- dots$by + dots$by <- NULL + } else { + by <- NULL + } + if (!is.null(dots$suffixes)) { + suffix <- dots$suffixes + dots$suffixes <- NULL + } else { + suffix <- c(".x", ".y") + } + # normalize to dplyr's by/suffix arguments to mirror base/data.table defaults + rtn <- do.call(what = lj, args = c(list(x = x, y = y, by = by, suffix = suffix), dots)) + } else { + # if x is a data.table and the data.table namespace is available then the + # data.table:::merge.data.table method will be called and a specific block + # for data.table is not needed here + rtn <- merge(x = x, y = y, all.x = TRUE, all.y = FALSE, sort = FALSE, allow.cartesian = TRUE, ...) + } + rtn +} + +#' +#' @rdname mdcr_data_frame_tools +#' @family data.frame tools +#' @noRd +#' @keywords internal +mdcr_full_outer_join <- function(x, y, ...) { + stopifnot(is.data.frame(x), is.data.frame(y)) + + if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + fj <- getExportedValue(name = "full_join", ns = "dplyr") + dots <- list(...) + if (!is.null(dots$by.x) & !is.null(dots$by.y)) { + by <- stats::setNames(dots$by.y, dots$by.x) + dots$by.x <- NULL + dots$by.y <- NULL + } else if (!is.null(dots$by)) { + by <- dots$by + dots$by <- NULL + } else { + by <- NULL + } + if (!is.null(dots$suffixes)) { + suffix <- dots$suffixes + dots$suffixes <- NULL + } else { + suffix <- c(".x", ".y") + } + # normalize to dplyr's by/suffix arguments to mirror base/data.table defaults + rtn <- do.call(what = fj, args = c(list(x = x, y = y, by = by, suffix = suffix), dots)) + } else { + # if x is a data.table and the data.table namespace is available then the + # data.table:::merge.data.table method will be called and a specific block + # for data.table is not needed here + rtn <- merge(x = x, y = y, all.x = TRUE, all.y = TRUE, sort = FALSE, allow.cartesian = TRUE, ...) + } + rtn +} + +#' +#' @rdname mdcr_data_frame_tools +#' @family data.frame tools +#' @noRd +#' @keywords internal +mdcr_inner_join <- function(x, y, ...) { + stopifnot(is.data.frame(x), is.data.frame(y)) + + if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + ij <- getExportedValue(name = "inner_join", ns = "dplyr") + dots <- list(...) + if (!is.null(dots$by.x) & !is.null(dots$by.y)) { + by <- stats::setNames(dots$by.y, dots$by.x) + dots$by.x <- NULL + dots$by.y <- NULL + } else if (!is.null(dots$by)) { + by <- dots$by + dots$by <- NULL + } else { + by <- NULL + } + if (!is.null(dots$suffixes)) { + suffix <- dots$suffixes + dots$suffixes <- NULL + } else { + suffix <- c(".x", ".y") + } + # normalize to dplyr's by/suffix arguments to mirror base/data.table defaults + rtn <- do.call(what = ij, args = c(list(x = x, y = y, by = by, suffix = suffix), dots)) + } else { + # if x is a data.table and the data.table namespace is available then the + # data.table:::merge.data.table method will be called and a specific block + # for data.table is not needed here + rtn <- merge(x = x, y = y, all.x = FALSE, all.y = FALSE, sort = FALSE, ...) + } + rtn +} + +#' +#' @rdname mdcr_data_frame_tools +#' @family data.frame tools +#' @noRd +#' @keywords internal +mdcr_cbind <- function(x, ...) { + stopifnot(is.data.frame(x)) + if (requireNamespace(package = "dplyr", quietly = TRUE) && inherits(x, "tbl_df")) { + cb <- getExportedValue(name = "bind_cols", ns = "dplyr") + rtn <- cb(x, ...) + } else { + # if x is a data.table and the data.table namespace is available then the + # data.table:::cbind.data.table method will be called and a specific block + # for data.table is not needed here + rtn <- cbind(x, ...) + } + rtn +} + ################################################################################ diff --git a/README.Rmd b/README.Rmd index 957a606a..024c6238 100644 --- a/README.Rmd +++ b/README.Rmd @@ -56,11 +56,11 @@ The primary objectives of medicalcoder are: ease of maintenance and usability. - Suggested packages are needed only for development work and building vignettes. They are not required for installation or use. - - That said, there are non-trivial performance gains when passing a - [`data.table`](https://cran.r-project.org/package=data.table) to the - `comorbidities()` function compared to passing a base `data.frame` or a - `tibble` from the tidyverse. - (See [benchmarking](https://github.com/dewittpe/medicalcoder/tree/main/benchmarking)). + - That said, there are non-trivial performance gains when passing a + [`data.table`](https://cran.r-project.org/package=data.table) to the + `comorbidities()` function. Passing a `tibble` is typically faster than a + base `data.frame` but slower than a `data.table`. + (See [benchmarking](https://github.com/dewittpe/medicalcoder/tree/main/benchmarking)). - Internal lookup tables - All required data are included in the package. If you have the `.tar.gz` @@ -388,10 +388,12 @@ comorbidity algorithm to a data set are: 1. Data size: number of subjects/encounters. 2. Data storage class: medicalcoder has been built such that no imports of - other namespaces is required. That said, when a `data.table` is passed to + other namespaces is required. That said, when a `data.table` is passed to `comorbidities()` and the `data.table` namespace is available, then S3 dispatch for `merge` is used, along with some other methods, to reduce memory - use and reduce computation time. + use and reduce computation time. When a `tibble` is passed and the tidyverse + namespaces are available, the tibble-aware paths improve performance over a + base `data.frame`, but `data.table` remains fastest. 3. `flag.method`: "current" will take less time than the "cumulative" method. Details on the benchmarking method, summary graphics, and tables, can be found diff --git a/README.md b/README.md index 6af06abc..1abc3280 100644 --- a/README.md +++ b/README.md @@ -48,11 +48,11 @@ The primary objectives of medicalcoder are: ease of maintenance and usability. - Suggested packages are needed only for development work and building vignettes. They are not required for installation or use. - - That said, there are non-trivial performance gains when passing a - [`data.table`](https://cran.r-project.org/package=data.table) to the - `comorbidities()` function compared to passing a base `data.frame` or a - `tibble` from the tidyverse. - (See [benchmarking](https://github.com/dewittpe/medicalcoder/tree/main/benchmarking)). + - That said, there are non-trivial performance gains when passing a + [`data.table`](https://cran.r-project.org/package=data.table) to the + `comorbidities()` function. Passing a `tibble` is typically faster than a + base `data.frame` but slower than a `data.table`. + (See [benchmarking](https://github.com/dewittpe/medicalcoder/tree/main/benchmarking)). - Internal lookup tables - All required data are included in the package. If you have the `.tar.gz` @@ -459,10 +459,12 @@ comorbidity algorithm to a data set are: 1. Data size: number of subjects/encounters. 2. Data storage class: medicalcoder has been built such that no imports of - other namespaces is required. That said, when a `data.table` is passed to + other namespaces is required. That said, when a `data.table` is passed to `comorbidities()` and the `data.table` namespace is available, then S3 dispatch for `merge` is used, along with some other methods, to reduce memory - use and reduce computation time. + use and reduce computation time. When a `tibble` is passed and the tidyverse + namespaces are available, the tibble-aware paths improve performance over a + base `data.frame`, but `data.table` remains fastest. 3. `flag.method`: "current" will take less time than the "cumulative" method. Details on the benchmarking method, summary graphics, and tables, can be found diff --git a/benchmarking/Makefile b/benchmarking/Makefile index 58db8f16..df5e910e 100644 --- a/benchmarking/Makefile +++ b/benchmarking/Makefile @@ -1,19 +1,19 @@ include ../Makevars -.NOPARALLEL: .bench1 .bench2 +.NOPARALLEL: .bench .PHONY: all clean -all: benchmark2.svg README.md +all: README.md -grid%.tsv: make_grid%.sh +grid.tsv: make_grid.sh ./$< -.bench%: run_parallel%.sh grid%.tsv +.bench: run_parallel.sh grid.tsv ./$< @touch $@ -benchmark%.svg: benchmark%-summary.R .bench% +outtable.rds: benchmark-summary.R .bench $(RSCRIPT) $< README.md: README.Rmd outtable.rds @@ -21,11 +21,10 @@ README.md: README.Rmd outtable.rds clean: $(RM) .bench* - $(RM) benchmark*.svg + $(RM) *.svg $(RM) *.pdf - $(RM) grid1.tsv grid2.tsv - $(RM) -r logs1 - $(RM) -r logs2 - $(RM) -r bench1_results - $(RM) -r bench2_results + $(RM) grid.tsv + $(RM) output.rds + $(RM) -r logs + $(RM) -r bench_results diff --git a/benchmarking/README.Rmd b/benchmarking/README.Rmd index b81315bd..c17a6341 100644 --- a/benchmarking/README.Rmd +++ b/benchmarking/README.Rmd @@ -17,10 +17,12 @@ comorbidity algorithm to a data set are: 1. Data size: number of subjects/encounters. 2. Data storage class: `medicalcoder` has been built such that no imports of - other namespaces is required. That said, when a `data.table` is passed to + other namespaces is required. That said, when a `data.table` is passed to `comorbidities()` and the `data.table` namespace is available, then S3 dispatch for `merge` is used, along with some other methods, to reduce memory - use and reduce computation time. + use and reduce computation time. When a `tibble` is passed and tidyverse + namespaces are available, the tibble-aware path reduces time relative to a + base `data.frame`, though `data.table` remains the fastest option. 3. flag.method: "current" will take less time than the "cumulative" method. @@ -61,12 +63,11 @@ outtable[, method := fifelse(subconditions, paste(method, "(with subconditions)" outtable[, subconditions := NULL] ``` -In general, the expected time to apply a comorbidity method is the same between -`data.frame`s and `tibble`s. There is a notable decrease in time required when -`data.table`s are passed to `comorbidities()`. Best observed case: a -`data.table` took +In general, the expected time to apply a comorbidity method is lower for +`tibble`s than for base `data.frame`s, and lower still for `data.table`s. Best +observed case: a `data.table` took `r outtable_orig[data_class == "data.table", min(relative_time)]` -the time of a `data.frame`. +the time of a `data.frame`; `tibble`s sit between the two. ```{r show-outtable, echo = FALSE, results = "asis"} foo <- function(x) { diff --git a/benchmarking/README.md b/benchmarking/README.md index d483ed1d..b708c1f9 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -23,7 +23,7 @@ In general, the expected time to apply a comorbidity method is the same between `data.frame`s and `tibble`s. There is a notable decrease in time required when `data.table`s are passed to `comorbidities()`. Best observed case: a `data.table` took -0.3042897 +0.3106419 the time of a `data.frame`. @@ -53,302 +53,302 @@ the time of a `data.frame`. 1,000 data.frame - 0.10 + 0.19 1.00 - 0.30 - 0.37 + 0.31 + 0.48 1.00 - 0.28 + 0.29 data.table - 0.08 - 0.87 - 0.30 - 0.26 - 0.72 - 0.28 + 0.17 + 0.89 + 0.31 + 0.39 + 0.81 + 0.29 tibble - 0.10 - 1.09 - 0.30 - 0.38 - 1.04 - 0.28 + 0.23 + 1.20 + 0.31 + 0.54 + 1.20 + 0.29 2,000 data.frame - 0.17 + 0.28 1.00 - 0.30 - 0.72 + 0.31 + 0.85 1.00 - 0.29 + 0.31 data.table - 0.14 - 0.80 + 0.23 + 0.83 + 0.31 + 0.60 + 0.72 0.30 - 0.45 - 0.64 - 0.29 tibble - 0.18 - 1.08 0.30 - 0.73 - 1.02 + 1.07 + 0.31 + 0.79 + 0.98 0.30 5,000 data.frame - 0.41 + 0.53 1.00 0.32 - 1.82 + 1.93 1.00 - 0.35 + 0.38 data.table - 0.29 - 0.72 + 0.39 + 0.75 0.32 - 1.02 - 0.57 - 0.35 + 1.20 + 0.62 + 0.36 tibble - 0.42 - 1.05 - 0.31 - 1.82 - 1.01 + 0.49 + 0.93 + 0.32 + 1.47 + 0.77 0.36 10,000 data.frame - 0.88 + 0.99 1.00 - 0.33 - 3.88 + 0.34 + 4.00 1.00 - 0.44 + 0.48 data.table - 0.58 0.66 - 0.33 - 2.08 - 0.54 + 0.68 + 0.34 + 2.26 + 0.57 0.44 tibble - 0.89 - 1.02 - 0.33 - 3.89 - 1.01 - 0.45 + 0.83 + 0.84 + 0.34 + 2.70 + 0.68 + 0.44 20,000 data.frame - 1.88 + 1.93 1.00 - 0.37 - 7.90 + 0.39 + 8.17 1.00 - 0.64 + 0.68 data.table - 1.13 + 1.18 + 0.62 + 0.39 + 4.26 + 0.53 0.61 - 0.38 - 4.06 - 0.52 - 0.64 tibble - 1.86 - 0.99 - 0.37 - 7.94 - 1.02 - 0.65 + 1.50 + 0.78 + 0.39 + 5.13 + 0.63 + 0.62 50,000 data.frame - 4.87 + 4.72 1.00 - 0.49 - 19.52 + 0.54 + 20.42 1.00 - 1.23 + 1.26 data.table 2.61 - 0.54 - 0.52 - 9.48 - 0.49 - 1.23 + 0.56 + 0.53 + 9.66 + 0.48 + 1.12 tibble - 4.69 - 0.97 - 0.51 - 19.75 - 1.03 - 1.23 + 3.33 + 0.72 + 0.53 + 11.96 + 0.59 + 1.16 100,000 data.frame - 8.93 + 8.76 1.00 - 0.75 - 37.98 + 0.80 + 38.94 1.00 - 2.26 + 2.33 data.table - 4.35 - 0.50 - 0.78 - 17.19 - 0.46 - 2.19 + 4.50 + 0.52 + 0.79 + 17.37 + 0.45 + 2.02 tibble - 8.68 - 0.98 - 0.78 - 38.34 - 1.02 - 2.26 + 5.60 + 0.66 + 0.80 + 21.50 + 0.56 + 2.20 200,000 data.frame - 16.59 + 16.55 1.00 - 1.26 - 74.58 + 1.32 + 75.52 1.00 - 4.29 + 4.43 data.table - 7.51 - 0.46 - 1.29 - 31.77 + 8.00 + 0.49 + 1.27 + 32.04 0.43 - 4.05 + 3.84 tibble - 16.28 - 1.00 + 9.75 + 0.60 1.29 - 75.20 - 1.02 - 4.27 + 39.68 + 0.53 + 4.21 500,000 data.frame - 39.81 + 40.19 1.00 - 2.84 - 186.55 + 2.89 + 189.54 1.00 - 10.24 + 10.35 data.table - 17.33 - 0.44 - 2.87 - 75.38 + 18.37 + 0.46 + 2.72 + 76.28 0.41 - 9.37 + 9.39 tibble - 39.58 - 1.01 - 2.86 - 188.54 - 1.02 - 10.13 + 22.18 + 0.56 + 2.71 + 94.83 + 0.51 + 9.75 1,000,000 data.frame - 78.88 + 80.76 1.00 - 5.79 - 379.24 + 5.71 + 389.11 1.00 - 20.63 + 20.00 data.table - 34.63 + 35.76 0.45 - 5.81 - 148.65 - 0.40 - 18.32 + 5.33 + 150.93 + 0.39 + 19.53 tibble - 79.85 - 1.03 - 5.69 - 383.84 - 1.02 - 20.22 + 43.54 + 0.55 + 5.15 + 189.79 + 0.49 + 18.45 @@ -380,302 +380,302 @@ the time of a `data.frame`. 1,000 data.frame - 0.11 + 0.20 1.00 - 0.30 - 0.53 + 0.31 + 0.67 1.00 - 0.28 + 0.29 data.table - 0.09 - 0.84 + 0.18 + 0.89 0.30 - 0.39 - 0.76 - 0.28 + 0.54 + 0.81 + 0.29 tibble - 0.12 - 1.03 - 0.30 - 0.55 - 1.02 - 0.28 + 0.24 + 1.22 + 0.31 + 0.74 + 1.15 + 0.29 2,000 data.frame - 0.20 + 0.29 1.00 - 0.30 - 1.05 + 0.31 + 1.19 1.00 - 0.30 + 0.31 data.table - 0.15 - 0.77 + 0.25 + 0.84 0.30 - 0.69 - 0.67 - 0.31 + 0.85 + 0.73 + 0.32 tibble - 0.20 - 1.02 - 0.30 - 1.05 - 1.01 + 0.32 + 1.11 + 0.31 + 1.12 + 0.97 0.31 5,000 data.frame - 0.45 + 0.56 1.00 - 0.31 - 2.62 + 0.33 + 2.72 1.00 - 0.40 + 0.39 data.table - 0.31 - 0.69 + 0.42 + 0.76 0.32 - 1.56 - 0.60 - 0.40 + 1.72 + 0.64 + 0.41 tibble - 0.45 - 1.01 - 0.31 - 2.61 - 1.00 + 0.54 + 0.97 + 0.32 + 2.15 + 0.79 0.40 10,000 data.frame - 0.96 + 1.06 1.00 - 0.33 - 5.50 + 0.35 + 5.55 1.00 - 0.55 + 0.52 data.table - 0.60 - 0.63 + 0.72 + 0.69 0.34 - 3.11 + 3.23 + 0.58 0.56 - 0.55 tibble - 0.95 - 1.00 - 0.33 - 5.46 - 0.99 - 0.56 + 0.92 + 0.88 + 0.34 + 3.97 + 0.72 + 0.54 20,000 data.frame - 2.04 + 2.10 1.00 0.39 - 10.97 + 11.14 1.00 - 0.89 + 0.81 data.table - 1.17 - 0.58 + 1.31 + 0.63 0.39 - 6.01 - 0.55 - 0.87 + 6.04 + 0.54 + 0.86 tibble - 2.00 - 0.98 - 0.38 - 10.96 - 1.01 - 0.90 + 1.66 + 0.80 + 0.39 + 7.40 + 0.66 + 0.82 50,000 data.frame - 5.29 + 5.19 1.00 - 0.56 - 26.37 + 0.54 + 27.39 1.00 - 1.89 + 1.69 data.table - 2.78 - 0.53 + 2.92 + 0.57 0.55 - 13.89 - 0.53 - 1.77 + 13.76 + 0.51 + 1.72 tibble - 5.15 - 0.98 + 3.66 + 0.71 0.55 - 26.73 - 1.02 - 1.91 + 16.85 + 0.62 + 1.62 100,000 data.frame - 9.82 + 9.49 1.00 - 0.88 - 50.73 + 0.81 + 52.73 1.00 - 3.47 + 3.17 data.table - 4.86 - 0.50 - 0.84 - 25.19 - 0.50 - 3.12 + 5.01 + 0.54 + 0.85 + 25.57 + 0.49 + 3.07 tibble - 9.59 - 0.98 - 0.87 - 51.58 - 1.03 - 3.44 + 6.10 + 0.66 + 0.82 + 30.70 + 0.59 + 2.94 200,000 data.frame - 18.38 + 17.66 1.00 - 1.49 - 99.30 + 1.33 + 102.68 1.00 - 6.50 + 6.15 data.table - 8.69 - 0.48 - 1.39 - 46.50 + 8.78 + 0.51 + 1.40 + 48.30 0.48 - 5.66 + 5.73 tibble - 17.97 - 0.99 - 1.47 - 100.52 - 1.03 - 6.41 + 10.56 + 0.62 + 1.35 + 57.25 + 0.57 + 5.59 500,000 data.frame - 44.21 + 42.42 1.00 - 3.23 - 250.26 + 2.92 + 255.09 1.00 - 15.30 + 15.10 data.table - 20.05 + 19.67 + 0.47 + 2.99 + 115.50 0.46 - 3.09 - 109.65 - 0.44 - 12.90 + 13.88 tibble - 43.14 - 0.99 - 3.23 - 249.23 - 1.01 - 15.37 + 24.13 + 0.58 + 2.83 + 137.08 + 0.54 + 13.88 1,000,000 data.frame - 87.64 + 84.97 1.00 - 6.23 - 513.48 + 5.89 + 517.47 1.00 - 30.19 + 30.59 data.table - 39.25 + 37.75 0.45 - 6.26 - 215.76 - 0.42 - 25.19 + 5.64 + 228.19 + 0.45 + 28.42 tibble - 85.84 - 0.99 - 6.25 - 504.87 - 0.99 - 31.42 + 47.85 + 0.58 + 5.32 + 273.80 + 0.53 + 28.96 @@ -707,302 +707,302 @@ the time of a `data.frame`. 1,000 data.frame - 0.15 + 0.25 1.00 - 0.29 - 1.27 + 0.30 + 1.39 1.00 - 0.32 + 0.34 data.table - 0.10 - 0.68 - 0.29 - 0.87 - 0.70 - 0.32 + 0.19 + 0.75 + 0.30 + 1.05 + 0.78 + 0.33 tibble - 0.16 - 1.07 0.29 - 1.31 - 1.04 + 1.15 + 0.30 + 1.52 + 1.15 0.32 2,000 data.frame - 0.28 + 0.38 1.00 - 0.29 - 2.50 + 0.31 + 2.60 1.00 - 0.38 + 0.41 data.table - 0.16 - 0.59 - 0.30 - 1.52 - 0.62 - 0.38 + 0.26 + 0.68 + 0.31 + 1.73 + 0.68 + 0.40 tibble - 0.29 - 1.05 - 0.29 - 2.52 - 1.01 + 0.39 + 1.03 + 0.31 + 2.40 + 0.96 0.39 5,000 data.frame - 0.68 + 0.77 1.00 - 0.31 - 6.23 + 0.33 + 6.18 1.00 - 0.59 + 0.63 data.table - 0.32 - 0.49 - 0.32 - 3.30 - 0.53 + 0.44 0.58 + 0.33 + 3.56 + 0.58 + 0.59 tibble - 0.70 - 1.03 - 0.31 - 6.17 - 0.99 - 0.61 + 0.68 + 0.89 + 0.34 + 4.76 + 0.77 + 0.58 10,000 data.frame - 1.46 + 1.51 1.00 - 0.34 - 12.75 + 0.37 + 12.50 1.00 - 0.98 + 1.02 data.table - 0.63 - 0.43 - 0.34 - 6.17 - 0.49 + 0.77 + 0.51 + 0.35 + 6.61 + 0.53 0.92 tibble - 1.49 - 1.02 - 0.33 - 12.56 - 0.99 - 1.01 + 1.20 + 0.79 + 0.38 + 8.65 + 0.69 + 0.94 20,000 data.frame - 3.05 + 3.02 1.00 - 0.40 - 25.49 + 0.44 + 24.77 1.00 - 1.79 + 1.81 data.table - 1.25 - 0.41 - 0.41 - 11.48 - 0.45 - 1.61 + 1.39 + 0.46 + 0.42 + 12.24 + 0.49 + 1.58 tibble - 3.08 - 1.02 - 0.40 - 25.15 - 0.99 - 1.85 + 2.18 + 0.73 + 0.47 + 15.96 + 0.65 + 1.65 50,000 data.frame - 7.64 + 7.51 1.00 - 0.62 - 62.74 + 0.67 + 60.78 1.00 - 4.26 + 4.11 data.table - 3.03 - 0.40 - 0.60 - 26.50 + 3.11 0.42 - 3.66 + 0.60 + 27.76 + 0.46 + 3.59 tibble - 7.66 - 1.01 - 0.62 - 62.45 - 1.00 - 4.38 + 4.84 + 0.65 + 0.73 + 36.71 + 0.61 + 3.78 100,000 data.frame - 14.41 + 14.21 1.00 - 1.02 - 121.75 + 1.08 + 118.87 1.00 - 8.32 + 7.83 data.table - 5.33 + 5.30 0.38 - 0.93 - 50.89 - 0.42 - 6.99 + 0.91 + 51.09 + 0.44 + 7.09 tibble - 14.23 - 1.00 - 1.01 - 122.85 - 1.01 - 8.30 + 8.45 + 0.61 + 1.16 + 69.71 + 0.59 + 7.16 200,000 data.frame - 27.66 + 27.31 1.00 - 1.76 - 239.68 + 1.87 + 235.78 1.00 - 16.37 + 15.24 data.table - 9.52 + 9.34 0.35 - 1.55 - 99.35 + 1.49 + 97.30 0.42 - 13.63 + 14.05 tibble - 26.95 - 0.99 - 1.74 - 243.83 - 1.02 - 16.01 + 15.28 + 0.57 + 1.98 + 135.19 + 0.58 + 13.80 500,000 data.frame - 69.05 + 67.70 1.00 - 3.83 - 607.95 + 4.14 + 601.65 1.00 - 40.08 + 37.74 data.table - 21.85 + 21.48 0.32 - 3.28 - 246.75 + 3.20 + 243.94 0.41 - 33.83 + 34.18 tibble - 66.26 - 0.97 - 3.79 - 616.73 - 1.02 - 39.18 + 36.19 + 0.54 + 4.31 + 336.73 + 0.56 + 33.70 1,000,000 data.frame - 140.69 + 137.87 1.00 - 7.26 - 1256.20 + 8.04 + 1247.81 1.00 - 79.21 + 76.08 data.table - 42.29 - 0.30 - 6.21 - 502.29 - 0.40 - 68.99 + 42.50 + 0.31 + 6.19 + 510.88 + 0.41 + 66.85 tibble - 133.94 - 0.96 - 7.23 - 1262.62 - 1.01 - 78.35 + 72.45 + 0.53 + 8.44 + 690.52 + 0.56 + 67.83 @@ -1034,302 +1034,302 @@ the time of a `data.frame`. 1,000 data.frame - 0.18 + 0.27 1.00 - 0.29 - 1.38 + 0.30 + 1.51 1.00 0.34 data.table - 0.14 + 0.22 + 0.83 + 0.31 + 1.17 0.80 - 0.29 - 0.98 - 0.73 - 0.33 + 0.34 tibble - 0.18 - 1.05 - 0.29 - 1.38 - 1.01 + 0.40 + 1.53 + 0.31 + 1.74 + 1.24 0.33 2,000 data.frame - 0.32 + 0.42 1.00 - 0.29 - 2.69 + 0.30 + 2.84 1.00 0.41 data.table - 0.21 - 0.69 0.30 - 1.71 - 0.65 - 0.39 + 0.73 + 0.31 + 1.93 + 0.70 + 0.40 tibble - 0.32 - 1.02 - 0.29 - 2.66 - 0.99 - 0.41 + 0.53 + 1.32 + 0.31 + 2.73 + 1.01 + 0.40 5,000 data.frame - 0.75 + 0.84 1.00 - 0.32 - 6.58 + 0.33 + 6.77 1.00 - 0.63 + 0.64 data.table - 0.42 - 0.56 - 0.32 - 3.69 - 0.56 + 0.51 + 0.62 + 0.33 + 3.97 + 0.59 0.60 tibble - 0.75 - 1.01 - 0.31 - 6.51 - 0.99 - 0.63 + 0.89 + 1.07 + 0.34 + 5.38 + 0.79 + 0.61 10,000 data.frame - 1.59 + 1.62 1.00 - 0.35 - 13.16 + 0.36 + 13.61 1.00 - 1.03 + 1.06 data.table - 0.80 - 0.50 - 0.34 - 6.87 - 0.53 - 0.96 + 0.88 + 0.55 + 0.36 + 7.31 + 0.54 + 0.95 tibble - 1.59 - 1.00 - 0.34 - 13.20 - 1.01 - 1.03 + 1.50 + 0.92 + 0.37 + 9.79 + 0.71 + 0.97 20,000 data.frame - 3.31 + 3.24 1.00 - 0.41 - 25.84 + 0.44 + 26.82 1.00 - 1.80 + 1.87 data.table - 1.55 - 0.47 - 0.41 - 12.74 + 1.60 0.50 - 1.69 + 0.42 + 13.54 + 0.51 + 1.65 tibble - 3.27 - 1.00 - 0.41 - 26.55 - 1.04 - 1.88 + 2.69 + 0.83 + 0.45 + 17.90 + 0.67 + 1.71 50,000 data.frame - 8.29 + 8.09 1.00 - 0.62 - 63.01 + 0.67 + 65.35 1.00 - 4.09 + 4.26 data.table - 3.66 + 3.59 0.45 0.60 - 29.36 - 0.47 - 3.91 + 31.18 + 0.48 + 3.78 tibble - 8.18 - 1.00 - 0.61 - 66.69 - 1.07 - 4.44 + 5.95 + 0.75 + 0.67 + 40.48 + 0.63 + 3.93 100,000 data.frame - 15.53 + 15.56 1.00 - 1.01 - 125.06 + 1.07 + 127.35 1.00 - 7.96 + 8.20 data.table - 6.46 - 0.42 - 0.94 - 56.70 - 0.46 - 7.77 + 6.26 + 0.41 + 0.92 + 59.44 + 0.47 + 7.40 tibble - 15.49 - 1.02 - 1.00 - 132.57 - 1.07 - 8.50 + 10.21 + 0.67 + 1.04 + 75.43 + 0.60 + 7.63 200,000 data.frame - 29.64 + 30.28 1.00 - 1.75 - 251.28 + 1.84 + 252.49 1.00 - 15.81 + 16.10 data.table - 11.57 - 0.40 - 1.58 - 111.75 - 0.45 - 15.42 + 11.32 + 0.38 + 1.53 + 115.89 + 0.46 + 14.68 tibble - 29.67 - 1.02 + 18.18 + 0.61 1.74 - 264.40 - 1.06 - 16.49 + 145.13 + 0.58 + 14.86 500,000 data.frame - 73.79 + 75.12 1.00 - 3.79 - 646.17 + 4.06 + 647.57 1.00 - 39.75 + 40.13 data.table - 26.48 - 0.37 - 3.46 - 282.87 - 0.44 - 37.05 + 26.77 + 0.36 + 3.39 + 291.30 + 0.45 + 36.34 tibble - 72.85 - 1.00 - 3.76 - 664.81 - 1.03 - 40.60 + 42.37 + 0.57 + 3.82 + 365.95 + 0.57 + 35.56 1,000,000 data.frame - 150.63 + 151.87 1.00 - 7.05 - 1341.64 + 7.90 + 1349.61 1.00 - 80.41 + 81.79 data.table - 51.26 - 0.35 - 6.73 - 585.30 - 0.44 - 71.25 + 53.90 + 0.36 + 6.74 + 602.01 + 0.45 + 72.67 tibble - 146.26 - 0.98 - 7.01 - 1347.54 - 1.01 - 82.00 + 84.35 + 0.56 + 7.57 + 764.11 + 0.57 + 69.49 diff --git a/benchmarking/benchmark-composite.pdf b/benchmarking/benchmark-composite.pdf new file mode 100644 index 00000000..db8d8798 Binary files /dev/null and b/benchmarking/benchmark-composite.pdf differ diff --git a/benchmarking/benchmark-composite.svg b/benchmarking/benchmark-composite.svg new file mode 100644 index 00000000..d6e6055f --- /dev/null +++ b/benchmarking/benchmark-composite.svg @@ -0,0 +1,2418 @@ + + + + + + + + + + + + + + +Data Class + + + + + + + + + + + + +data.frame +data.table +tibble + +flag.method + + + + +cumulative +current + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +charlson_quan2005 + + + + + + + + + + +elixhauser_quan2005 + + + + + + + + + + +pccc_v3.1 + + + + + + + + + + +pccc_v3.1 (with subconditions) + + + + + + + + + + + + + + + + + + + + + + +0.1 +1.0 +10.0 +100.0 +1,000.0 + + + + + +Time (seconds) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 + + + + + +Relative expected run time +(vs data.frame) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100 +1 k +10 k +100 k +1 M + + + + + +100 +1 k +10 k +100 k +1 M + + + + + +100 +1 k +10 k +100 k +1 M + + + + + +100 +1 k +10 k +100 k +1 M +1 +10 +100 + + + +Encounters +Memory (GiB) + + + diff --git a/benchmarking/benchmark-relative.pdf b/benchmarking/benchmark-relative.pdf new file mode 100644 index 00000000..e81d0307 Binary files /dev/null and b/benchmarking/benchmark-relative.pdf differ diff --git a/benchmarking/benchmark-relative.svg b/benchmarking/benchmark-relative.svg new file mode 100644 index 00000000..ead5d4d5 --- /dev/null +++ b/benchmarking/benchmark-relative.svg @@ -0,0 +1,924 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +pccc_v3.1 + + + + + + + + + + +cumulative + + + + + + + + + + +pccc_v3.1 + + + + + + + + + + +current + + + + + + + + + + +pccc_v3.1 (with subconditions) + + + + + + + + + + +cumulative + + + + + + + + + + +pccc_v3.1 (with subconditions) + + + + + + + + + + +current + + + + + + + + + + +charlson_quan2005 + + + + + + + + + + +cumulative + + + + + + + + + + +charlson_quan2005 + + + + + + + + + + +current + + + + + + + + + + +elixhauser_quan2005 + + + + + + + + + + +cumulative + + + + + + + + + + +elixhauser_quan2005 + + + + + + + + + + +current + + + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 +0.5 +1.0 +1.5 +2.0 +2.5 + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 + + + + + +Encounters +Relative expected run time (vs data.frame) + +Data Class + + + + + + + + + +data.frame +data.table +tibble + + + diff --git a/benchmarking/benchmark2-summary.R b/benchmarking/benchmark-summary.R similarity index 74% rename from benchmarking/benchmark2-summary.R rename to benchmarking/benchmark-summary.R index 8ba41ce0..49e946fe 100644 --- a/benchmarking/benchmark2-summary.R +++ b/benchmarking/benchmark-summary.R @@ -3,38 +3,38 @@ library(ggplot2) ################################################################################ # data import -bench2 <- - list.files("bench2_results", full.names = TRUE) |> +bench <- + list.files("bench_results", full.names = TRUE) |> lapply(readRDS) |> lapply(setDT) |> rbindlist() mem <- - list.files("./logs2/mem", pattern = "\\.tsv$", full.names = TRUE, recursive = TRUE) |> + list.files("./logs/mem", pattern = "\\.tsv$", full.names = TRUE, recursive = TRUE) |> lapply(fread) |> rbindlist() mem[, subconditions := grepl("pccc_v3.1s", method)] mem[, method := sub("s$", "", method)] setnames(mem, "flag_method", "flag.method") -bench2_summary <- - bench2[, .(median_time_seconds = median(time_seconds)) , by = .(data_class, subjects, encounters, seed, method, subconditions, flag.method) ] +bench_summary <- + bench[, .(median_time_seconds = median(time_seconds)) , by = .(data_class, subjects, encounters, seed, method, subconditions, flag.method) ] mem_summary <- mem[, .(median_rss_kib = median(max_rss_kib)), by = .(data_class, subjects, seed, method, subconditions, flag.method)] -bench2_summary <- - merge(bench2_summary, mem_summary, all = TRUE) +bench_summary <- + merge(bench_summary, mem_summary, all = TRUE) -bench2_summary[!is.na(median_time_seconds) & !is.na(median_rss_kib)] +bench_summary[!is.na(median_time_seconds) & !is.na(median_rss_kib)] -bench2_summary[, data_class := fcase(data_class == "DF", "data.frame", +bench_summary[, data_class := fcase(data_class == "DF", "data.frame", data_class == "DT", "data.table", data_class == "TBL", "tibble")] # relative time -bench2_summary[!is.na(median_time_seconds), df_median := median_time_seconds[data_class == "data.frame"], by = .(subjects, encounters, method, subconditions, flag.method)] -bench2_summary[, relative_time := (median_time_seconds / df_median)] -bench2_summary[, df_median := NULL] +bench_summary[!is.na(median_time_seconds), df_median := median_time_seconds[data_class == "data.frame"], by = .(subjects, encounters, method, subconditions, flag.method)] +bench_summary[, relative_time := (median_time_seconds / df_median)] +bench_summary[, df_median := NULL] ################################################################################ # Plotting helpers @@ -48,7 +48,7 @@ facet_spec <- . ~ fifelse(subconditions, paste(method, "(with subconditions)"), method) + flag.method g <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = median_time_seconds, color = data_class, @@ -74,11 +74,11 @@ g <- axis.text.x = element_text(hjust = 0.75) ) -ggsave(file = "benchmark2.pdf", plot = g, width = 12, height = 7) -ggsave(file = "benchmark2.svg", plot = g, width = 12, height = 7) +ggsave(file = "benchmark.pdf", plot = g, width = 12, height = 7) +ggsave(file = "benchmark.svg", plot = g, width = 12, height = 7) gr <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = relative_time, color = data_class, fill = data_class, linetype = data_class) + stat_smooth(method = "loess", formula = y ~ x) + @@ -97,11 +97,11 @@ gr <- axis.text.x = element_text(hjust = 0.75) ) -ggsave(file = "benchmark2-relative.svg", plot = gr, width = 12, height = 7) -ggsave(file = "benchmark2-relative.pdf", plot = gr, width = 12, height = 7) +ggsave(file = "benchmark-relative.svg", plot = gr, width = 12, height = 7) +ggsave(file = "benchmark-relative.pdf", plot = gr, width = 12, height = 7) g <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = median_rss_kib / (1024^2), color = data_class, @@ -135,11 +135,11 @@ facet_spec <- . ~ fifelse(subconditions, paste(method, "(with subconditions)"), outtable <- list() -for (mt in unique(bench2_summary$method)) { - for (sc in unique(bench2_summary$subconditions)) { - for (dc in unique(bench2_summary$data_class)) { - for (fm in unique(bench2_summary$flag.method)) { - thisdt <- subset(bench2_summary, method == mt & subconditions == sc & data_class == dc & flag.method == fm) +for (mt in unique(bench_summary$method)) { + for (sc in unique(bench_summary$subconditions)) { + for (dc in unique(bench_summary$data_class)) { + for (fm in unique(bench_summary$flag.method)) { + thisdt <- subset(bench_summary, method == mt & subconditions == sc & data_class == dc & flag.method == fm) if (nrow(thisdt)) { ats_loess <- loess(log10(median_time_seconds) ~ log10(encounters), data = thisdt) ats <- predict(ats_loess, se = TRUE) @@ -147,14 +147,14 @@ for (mt in unique(bench2_summary$method)) { mem_loess <- loess(log10(median_rss_kib) ~ log10(encounters), data = thisdt) mem <- predict(mem_loess, se = TRUE) - bench2_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(median_time_seconds) & !is.na(encounters), + bench_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(median_time_seconds) & !is.na(encounters), `:=`( time_smoothed_y = 10^(ats$fit), time_smoothed_lwr = 10^(ats$fit - 1.96 * ats$se.fit), time_smoothed_upr = 10^(ats$fit + 1.96 * ats$se.fit) )] - bench2_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(median_rss_kib) & !is.na(encounters), + bench_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(median_rss_kib) & !is.na(encounters), `:=`( mem_smoothed_y = 10^(mem$fit), mem_smoothed_lwr = 10^(mem$fit - 1.96 * mem$se.fit), @@ -163,7 +163,7 @@ for (mt in unique(bench2_summary$method)) { if (dc != "data.frame") { rts_loess <- loess(relative_time ~ log10(encounters), data = thisdt) rts <- predict(rts_loess, se = TRUE) - bench2_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(relative_time) & !is.na(encounters), + bench_summary[method == mt & subconditions == sc & data_class == dc & flag.method == fm & !is.na(relative_time) & !is.na(encounters), `:=`( rel_time_smoothed_y = rts$fit, rel_time_smoothed_lwr = rts$fit - 1.96 * rts$se.fit, @@ -190,10 +190,8 @@ for (mt in unique(bench2_summary$method)) { } } -outtable <- rbindlist(outtable) -saveRDS(outtable, file = "outtable.rds") -bench2_summary[data_class == "data.frame", +bench_summary[data_class == "data.frame", `:=`( rel_time_smoothed_y = 1, rel_time_smoothed_lwr = 1, @@ -202,15 +200,15 @@ bench2_summary[data_class == "data.frame", # use this data set to identify the flag.method -setkey(bench2_summary, +setkey(bench_summary, method, data_class, subconditions, flag.method, subjects) fmpt <- - bench2_summary[, .(encounters = max(encounters, na.rm = TRUE)), keyby = .(method, data_class, subconditions, flag.method, subjects)] -fmpt <- bench2_summary[fmpt, on = c(key(fmpt), "encounters")] + bench_summary[, .(encounters = max(encounters, na.rm = TRUE)), keyby = .(method, data_class, subconditions, flag.method, subjects)] +fmpt <- bench_summary[fmpt, on = c(key(fmpt), "encounters")] fmpt <- unique(fmpt) g1 <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = time_smoothed_y, @@ -223,7 +221,6 @@ g1 <- ) + geom_line() + geom_ribbon(alpha = 0.2, mapping = aes(color = NULL)) + - #geom_point(mapping = aes(y = median)) + geom_point(data = fmpt, mapping = aes(shape = flag.method), size = 2) + scale_x_log10(labels = scales::label_number(scale_cut = scales::cut_si(""))) + scale_y_log10(labels = scales::label_comma()) + @@ -242,7 +239,7 @@ g1 <- ) g2 <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = rel_time_smoothed_y, @@ -256,7 +253,7 @@ g2 <- geom_line() + geom_ribbon(alpha = 0.2, mapping = aes(color = NULL)) + geom_point(data = fmpt[data_class != "data.frame"], mapping = aes(shape = flag.method), size = 2) + - scale_y_continuous(breaks = seq(0.4, 1.4, by = 0.2)) + + scale_y_continuous() + #breaks = seq(0.4, 1.4, by = 0.2)) + scale_x_log10(labels = scales::label_number(scale_cut = scales::cut_si(""))) + annotation_logticks(sides = "b") + scale_fill_manual(name = "Data Class", values = cclr) + @@ -272,7 +269,7 @@ g2 <- ) g3 <- - ggplot(bench2_summary) + + ggplot(bench_summary) + theme_bw() + aes(x = encounters, y = mem_smoothed_y / (1024^2), @@ -302,7 +299,7 @@ g3 <- axis.text.x = element_text(hjust = 0.75) ) -svglite::svglite(filename = "benchmark2-composite.svg", width = 9, height = 7) +svglite::svglite(filename = "benchmark-composite.svg", width = 9, height = 7) ggpubr::ggarrange(g1 + theme(axis.title.x = element_blank(), axis.text.x = element_blank()), g2 + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), strip.text = element_blank(), strip.background = element_blank()), @@ -311,9 +308,27 @@ svglite::svglite(filename = "benchmark2-composite.svg", width = 9, height = 7) dev.off() -pdf(file = "benchmark2-composite.pdf", width = 12, height = 9) +png(filename = "benchmark-composite.png", width = 9, height = 7) + + ggpubr::ggarrange(g1 + theme(axis.title.x = element_blank(), axis.text.x = element_blank()), + g2 + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), strip.text = element_blank(), strip.background = element_blank()), + g3 + theme(strip.text = element_blank(), strip.background = element_blank()), + ncol = 1, align = "v", common.legend = TRUE) + +dev.off() + +pdf(file = "benchmark-composite.pdf", width = 12, height = 9) ggpubr::ggarrange(g1 + theme(axis.title.x = element_blank(), axis.text.x = element_blank()), g2 + theme(axis.title.x = element_blank(), axis.text.x = element_blank()), g3, ncol = 1, align = "v", common.legend = TRUE) dev.off() + +################################################################################ +# final step - save the outtable.rds file, this is tracked in the Makefile +outtable <- rbindlist(outtable) +saveRDS(outtable, file = "outtable.rds") + +################################################################################ +# End of File # +################################################################################ diff --git a/benchmarking/benchmark-utilities.R b/benchmarking/benchmark-utilities.R index 252558c4..a29777ab 100644 --- a/benchmarking/benchmark-utilities.R +++ b/benchmarking/benchmark-utilities.R @@ -45,63 +45,7 @@ icd_codes <- medicalcoder::get_icd_codes()[, c("icdv", "dx", "full_code")] icd_dx_codes <- icd_codes[icd_codes$dx == 1, ] icd_pr_codes <- icd_codes[icd_codes$dx == 0, ] -build_set1 <- function(data_class = c("DF", "DT", "TBL") , subjects = 10 , seed = 1) { - subjects <- as.integer(subjects) - stopifnot(subjects > 0) - seed <- as.integer(seed) - set.seed(seed) - - data_class <- match.arg(data_class) - - dxs <- as.list(sample(dx_distribution$ncodes, size = subjects, prob = dx_distribution$d, replace = TRUE)) - prs <- as.list(sample(pr_distribution$ncodes, size = subjects, prob = pr_distribution$d, replace = TRUE)) - - # How many dx and pr codes from pccc? - pdxs <- lapply(lapply(dxs, runif), function(x) sum(x < 0.3)) - pprs <- lapply(lapply(prs, runif), function(x) sum(x < 0.3)) - - # any icd codes - dxs <- mapply(function(x, y) {x - y}, x = dxs, y = pdxs) - prs <- mapply(function(x, y) {x - y}, x = prs, y = pprs) - - set <- - mapply(function(sid, pdx, ppr, adx, apr) { - x <- - rbind( - pccc_dx_codes[sample(seq_len(nrow(pccc_dx_codes)), size = pdx), ], - pccc_pr_codes[sample(seq_len(nrow(pccc_pr_codes)), size = ppr), ], - icd_dx_codes[sample(seq_len(nrow(icd_dx_codes)), size = adx), ], - icd_pr_codes[sample(seq_len(nrow(icd_pr_codes)), size = apr), ] - ) - if (nrow(x)) { - x$subject_id <- sid - } else { - x <- data.frame(icdv = NA_integer_, dx = NA_integer_, full_code = NA_character_, subject_id = sid) - } - x - }, - pdx = pdxs, ppr = pprs, adx = dxs, apr = prs, sid = seq_along(pdxs), - SIMPLIFY = FALSE - ) - set <- do.call(rbind, set) - rownames(set) <- NULL - - if (data_class == "DT") { - require(data.table) - data.table::setDT(set) - } else if (data_class == "TBL") { - require(tibble) - set <- as_tibble(set) - } - - attr(set, "data_class") <- data_class - attr(set, "nsubjects") <- subjects - attr(set, "nencounters") <- subjects - - set -} - -build_set2 <- function(data_class = c("DF", "DT", "TBL") , subjects = 10 , seed = 1) { +build_set <- function(data_class = c("DF", "DT", "TBL") , subjects = 10 , seed = 1) { subjects <- as.integer(subjects) stopifnot(subjects > 0) seed <- as.integer(seed) @@ -164,35 +108,7 @@ build_set2 <- function(data_class = c("DF", "DT", "TBL") , subjects = 10 , seed set } -benchmark1 <- function(data, method, subconditions) { - tic <- Sys.time() - comorbidities( - data = data, - icd.codes = "full_code", - id.vars = "subject_id", - icdv.var = "icdv", - dx.var = "dx", - poa = 1, - primarydx = 0, - flag.method = "current", - method = method, - subconditions = subconditions - ) - toc <- Sys.time() - - data.frame( - data_class = attr(data, "data_class"), - subjects = attr(data, "nsubjects"), - encounters = attr(data, "nencounters"), - method = method, - subconditions = subconditions, - flag.method = "current", - seed = 1, - time_seconds = as.numeric(difftime(toc, tic, units = "secs")) - ) -} - -benchmark2 <- function(data, method, subconditions, flag.method) { +benchmark <- function(data, method, subconditions, flag.method) { tic <- Sys.time() comorbidities( data = data, diff --git a/benchmarking/benchmark2.R b/benchmarking/benchmark.R similarity index 87% rename from benchmarking/benchmark2.R rename to benchmarking/benchmark.R index c13a4cfd..033557de 100644 --- a/benchmarking/benchmark2.R +++ b/benchmarking/benchmark.R @@ -9,7 +9,7 @@ if (interactive()) { args <- as.list(strsplit(outfile, split = "__")[[1]]) this_data_set <- - build_set2( + build_set( data_class = args[[1]], subjects = as.integer(args[[2]]), seed = as.integer(args[[5]]) @@ -24,12 +24,12 @@ if (subconditions) { } output <- - benchmark2( + benchmark( data = this_data_set, method = method, subconditions = subconditions, flag.method = flag_method ) -saveRDS(output, file = file.path("bench2_results", outfile)) +saveRDS(output, file = file.path("bench_results", outfile)) diff --git a/benchmarking/benchmark.pdf b/benchmarking/benchmark.pdf new file mode 100644 index 00000000..052393f4 Binary files /dev/null and b/benchmarking/benchmark.pdf differ diff --git a/benchmarking/benchmark.svg b/benchmarking/benchmark.svg new file mode 100644 index 00000000..5eb0607a --- /dev/null +++ b/benchmarking/benchmark.svg @@ -0,0 +1,5301 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +pccc_v3.1 + + + + + + + + + + +cumulative + + + + + + + + + + +pccc_v3.1 + + + + + + + + + + +current + + + + + + + + + + +pccc_v3.1 (with subconditions) + + + + + + + + + + +cumulative + + + + + + + + + + +pccc_v3.1 (with subconditions) + + + + + + + + + + +current + + + + + + + + + + +charlson_quan2005 + + + + + + + + + + +cumulative + + + + + + + + + + +charlson_quan2005 + + + + + + + + + + +current + + + + + + + + + + +elixhauser_quan2005 + + + + + + + + + + +cumulative + + + + + + + + + + +elixhauser_quan2005 + + + + + + + + + + +current + + + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 + + + + + +100 +1,000 +10,000 +100,000 +1,000,000 +0.1 +1.0 +10.0 +100.0 +1,000.0 + + + + + +0.1 +1.0 +10.0 +100.0 +1,000.0 + + + + + +Encounters +Time (seconds) + +Data Class + + + + + + + + + + +data.frame +data.table +tibble + + + diff --git a/benchmarking/benchmark1-summary.R b/benchmarking/benchmark1-summary.R deleted file mode 100644 index 6ba53690..00000000 --- a/benchmarking/benchmark1-summary.R +++ /dev/null @@ -1,77 +0,0 @@ -library(data.table) -library(ggplot2) - -bench1 <- - list.files("bench1_results", full.names = TRUE) |> - lapply(readRDS) |> - lapply(setDT) |> - rbindlist() - -bench1[, data_class := fcase(data_class == "DF", "data.frame", - data_class == "DT", "data.table", - data_class == "TBL", "tibble")] - -cclr <- c("data.table" = "#8da0cb", "tibble" = "#fc8d62", "data.frame" = "#66c2a5") -ctyp <- c("data.frame" = 2, "data.table" = 1, "tibble" = 3) - -bench1_summary <- - bench1[, - .(mean = mean(time_seconds), median = median(time_seconds), q3 = quantile(time_seconds, prob = 0.75), q1 = quantile(time_seconds, prob = 0.25)) - , by = .(data_class, subjects, encounters, method, subconditions, flag.method) - ] - -# relative time -bench1_summary[, df_mean := mean[data_class == "data.frame"], by = .(subjects, encounters, method, subconditions, flag.method)] -bench1_summary[, rt := (mean / df_mean)] - -ggplot(bench1_summary) + - theme_bw() + - aes(x = subjects, - ymin = q1, - y = median, - ymax = q3, - color = data_class, - fill = data_class, - linetype = data_class, - shape = data_class - ) + - geom_point() + - geom_line() + - scale_x_log10(labels = scales::label_comma()) + - scale_y_log10(labels = scales::label_comma()) + - scale_fill_manual(name = "Data Class", values = cclr) + - scale_color_manual(name = "Data Class", values = cclr) + - scale_linetype_manual(name = "Data Class", values = ctyp) + - scale_shape_manual(name = "Data Class", values = ctyp) + - annotation_logticks() + - xlab("Encounters") + - ylab("Time (seconds)") + - facet_wrap(. ~ fifelse(subconditions, paste(method, "(with subconditions)"), method)) + - theme( - panel.grid.minor = element_blank(), - legend.position = "bottom", - axis.text.x = element_text(hjust = 0.75) - ) - -ggsave(file = "benchmark1.svg", width = 7, height = 7) - -ggplot(bench1_summary) + - theme_bw() + - aes(x = encounters, y = rt, color = data_class, fill = data_class, linetype = data_class) + - stat_smooth(method = "loess", formula = y ~ x) + - scale_y_continuous() + - scale_x_log10(labels = scales::label_comma()) + - annotation_logticks(sides = "b") + - scale_color_manual(name = "Data Class", values = cclr) + - scale_fill_manual(name = "Data Class", values = cclr) + - scale_linetype_manual(name = "Data Class", values = ctyp) + - xlab("Encounters") + - ylab("Relative expected run time (vs data.frame)") + - facet_wrap(. ~ fifelse(subconditions, paste(method, "(with subconditions)"), method)) + - theme( - panel.grid.minor.x = element_blank(), - legend.position = "bottom", - axis.text.x = element_text(hjust = 0.75) - ) - -ggsave(file = "benchmark1-relative.svg", width = 7, height = 7) diff --git a/benchmarking/benchmark1.R b/benchmarking/benchmark1.R deleted file mode 100644 index d8ac6cf8..00000000 --- a/benchmarking/benchmark1.R +++ /dev/null @@ -1,33 +0,0 @@ -source("benchmark-utilities.R") - -if (interactive()) { - outfile <- "DT__1e3__pccc_v3.1__1__1.rds" -} else { - outfile <- commandArgs(trailingOnly = TRUE) -} - -args <- as.list(strsplit(outfile, split = "__")[[1]]) - -this_data_set <- - build_set1( - data_class = args[[1]], - subjects = as.integer(args[[2]]), - seed = as.integer(args[[4]]) - ) - -method <- args[[3]] -subconditions <- endsWith(method, "s") - -if (subconditions) { - method <- sub("s$", "", method) -} - -output <- - benchmark1( - data = this_data_set, - method = method, - subconditions = subconditions - ) - -saveRDS(output, file = file.path("bench1_results", outfile)) - diff --git a/benchmarking/benchmark2-composite.png b/benchmarking/benchmark2-composite.png deleted file mode 100644 index 8d34a22c..00000000 Binary files a/benchmarking/benchmark2-composite.png and /dev/null differ diff --git a/benchmarking/benchmark2-composite.svg b/benchmarking/benchmark2-composite.svg deleted file mode 100644 index 9e4ce247..00000000 --- a/benchmarking/benchmark2-composite.svg +++ /dev/null @@ -1,2464 +0,0 @@ - - - - - - - - - - - - - - -Data Class - - - - - - - - - - - - -data.frame -data.table -tibble - -flag.method - - - - -cumulative -current - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -charlson_quan2005 - - - - - - - - - - -elixhauser_quan2005 - - - - - - - - - - -pccc_v3.1 - - - - - - - - - - -pccc_v3.1 (with subconditions) - - - - - - - - - - - - - - - - - - - - - - -0.1 -1.0 -10.0 -100.0 -1,000.0 - - - - - -Time (seconds) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.4 -0.6 -0.8 -1.0 -1.2 -1.4 - - - - - - -Relative expected run time -(vs data.frame) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -100 -1 k -10 k -100 k -1 M - - - - - -100 -1 k -10 k -100 k -1 M - - - - - -100 -1 k -10 k -100 k -1 M - - - - - -100 -1 k -10 k -100 k -1 M -1 -10 -100 - - - -Encounters -Memory (GiB) - - - diff --git a/benchmarking/benchmark2-relative.svg b/benchmarking/benchmark2-relative.svg deleted file mode 100644 index 99c43cb2..00000000 --- a/benchmarking/benchmark2-relative.svg +++ /dev/null @@ -1,884 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -pccc_v3.1 - - - - - - - - - - -cumulative - - - - - - - - - - -pccc_v3.1 - - - - - - - - - - -current - - - - - - - - - - -pccc_v3.1 (with subconditions) - - - - - - - - - - -cumulative - - - - - - - - - - -pccc_v3.1 (with subconditions) - - - - - - - - - - -current - - - - - - - - - - -charlson_quan2005 - - - - - - - - - - -cumulative - - - - - - - - - - -charlson_quan2005 - - - - - - - - - - -current - - - - - - - - - - -elixhauser_quan2005 - - - - - - - - - - -cumulative - - - - - - - - - - -elixhauser_quan2005 - - - - - - - - - - -current - - - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 -0.5 -1.0 -1.5 - - - -0.5 -1.0 -1.5 - - - -Encounters -Relative expected run time (vs data.frame) - -Data Class - - - - - - - - - -data.frame -data.table -tibble - - - diff --git a/benchmarking/benchmark2.svg b/benchmarking/benchmark2.svg deleted file mode 100644 index 2d0f9fe9..00000000 --- a/benchmarking/benchmark2.svg +++ /dev/null @@ -1,5333 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -pccc_v3.1 - - - - - - - - - - -cumulative - - - - - - - - - - -pccc_v3.1 - - - - - - - - - - -current - - - - - - - - - - -pccc_v3.1 (with subconditions) - - - - - - - - - - -cumulative - - - - - - - - - - -pccc_v3.1 (with subconditions) - - - - - - - - - - -current - - - - - - - - - - -charlson_quan2005 - - - - - - - - - - -cumulative - - - - - - - - - - -charlson_quan2005 - - - - - - - - - - -current - - - - - - - - - - -elixhauser_quan2005 - - - - - - - - - - -cumulative - - - - - - - - - - -elixhauser_quan2005 - - - - - - - - - - -current - - - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 - - - - - -100 -1,000 -10,000 -100,000 -1,000,000 -0.1 -1.0 -10.0 -100.0 -1,000.0 - - - - - -0.1 -1.0 -10.0 -100.0 -1,000.0 - - - - - -Encounters -Time (seconds) - -Data Class - - - - - - - - - - -data.frame -data.table -tibble - - - diff --git a/benchmarking/make_grid2.sh b/benchmarking/make_grid.sh similarity index 97% rename from benchmarking/make_grid2.sh rename to benchmarking/make_grid.sh index 75d4a823..973c8af7 100755 --- a/benchmarking/make_grid2.sh +++ b/benchmarking/make_grid.sh @@ -22,4 +22,4 @@ ITERS=$(seq 1 5) done done done -} > grid2.tsv +} > grid.tsv diff --git a/benchmarking/make_grid1.sh b/benchmarking/make_grid1.sh deleted file mode 100755 index efca7ffb..00000000 --- a/benchmarking/make_grid1.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -DATA_CLASSES=(DF DT TBL) -SUBJECTS=(1e1 1e2 1e3 5e3 1e4 5e4 1e5 5e5 1e6) -METHODS=(pccc_v3.1 pccc_v3.1s charlson_quan2005 elixhauser_quan2005) -SEEDS=$(seq 1 10) -ITERS=$(seq 1 10) - -{ - echo -e "data_class\tsubjects\tmethod\tseed\titer" - for dc in "${DATA_CLASSES[@]}"; do - for n in "${SUBJECTS[@]}"; do - for m in "${METHODS[@]}"; do - for s in ${SEEDS[@]}; do - for i in ${ITERS}; do - echo -e "${dc}\t${n}\t${m}\t${s}\t${i}" - done - done - done - done - done -} > grid1.tsv diff --git a/benchmarking/outtable.rds b/benchmarking/outtable.rds index c3d04d72..5f6976cc 100644 Binary files a/benchmarking/outtable.rds and b/benchmarking/outtable.rds differ diff --git a/benchmarking/run_parallel2.sh b/benchmarking/run_parallel.sh similarity index 72% rename from benchmarking/run_parallel2.sh rename to benchmarking/run_parallel.sh index 2645fc6b..636b159b 100755 --- a/benchmarking/run_parallel2.sh +++ b/benchmarking/run_parallel.sh @@ -1,7 +1,7 @@ #!/bin/bash set -euo pipefail -mkdir -p bench2_results logs2 logs2/mem +mkdir -p bench_results logs logs/mem # Detect cores (Linux + macOS) if command -v nproc >/dev/null 2>&1; then @@ -34,12 +34,12 @@ parallel \ --shuf \ --bar \ --eta \ - --joblog logs2/joblog1.tsv \ - --results logs2/out \ + --joblog logs/joblog1.tsv \ + --results logs/out \ ' - OUT="bench2_results/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.rds" - MEMRAW="logs2/mem/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.raw" - MEMTSV="logs2/mem/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.tsv" + OUT="bench_results/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.rds" + MEMRAW="logs/mem/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.raw" + MEMTSV="logs/mem/{data_class}__{subjects}__{method}__{flag_method}__{seed}__{iter}.tsv" # Idempotent: skip if result already exists and non-empty test -s "$OUT" && exit 0 @@ -47,11 +47,11 @@ parallel \ # Run and capture peak RSS cross-platform if /usr/bin/time -v true >/dev/null 2>&1; then # Linux/GNU time: write verbose stats to MEMRAW - /usr/bin/time -v -o "$MEMRAW" Rscript benchmark2.R "$(basename "$OUT")" + /usr/bin/time -v -o "$MEMRAW" Rscript benchmark.R "$(basename "$OUT")" PEAK_KIB=$(awk -F: "/Maximum resident set size/ {gsub(/^[[:space:]]+|[[:space:]]+$/,\"\",\$2); print \$2}" "$MEMRAW") else # macOS/BSD time: -l prints "maximum resident set size" in bytes to stderr - /usr/bin/time -l Rscript benchmark2.R "$(basename "$OUT")" 2> "$MEMRAW" + /usr/bin/time -l Rscript benchmark.R "$(basename "$OUT")" 2> "$MEMRAW" # Convert bytes -> KiB (rounded) PEAK_KIB=$(awk "/maximum resident set size/ {kb=int(\$1/1024+0.5); print kb}" "$MEMRAW") fi @@ -60,11 +60,11 @@ parallel \ printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \ "{data_class}" "{subjects}" "{method}" "{flag_method}" "{seed}" "{iter}" "$OUT" "$PEAK_KIB" >> "$MEMTSV" ' \ - :::: grid2.tsv + :::: grid.tsv # Aggregate per-job mem into one table (overwrite each run) { echo -e "data_class\tsubjects\tmethod\tflag_method\tseed\titer\tout\tmax_rss_kib" - awk 'FNR==1 && NR!=1 { next } { print }' logs2/mem/*.tsv 2>/dev/null || true -} > logs2/peak_mem.tsv + awk 'FNR==1 && NR!=1 { next } { print }' logs/mem/*.tsv 2>/dev/null || true +} > logs/peak_mem.tsv diff --git a/benchmarking/run_parallel1.sh b/benchmarking/run_parallel1.sh deleted file mode 100755 index c689b822..00000000 --- a/benchmarking/run_parallel1.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -euo pipefail - -mkdir -p bench1_results logs1 bench2_results logs2 - -# Detect cores (Linux + macOS) -if command -v nproc >/dev/null 2>&1; then - CORES=$(nproc) -else - CORES=$(sysctl -n hw.ncpu) -fi - -# Default concurrency: on dragontail 80% of machine cores (>=1) -JOBS=$(( 4 * CORES / 5 )) -if [ "$JOBS" -lt 1 ]; then JOBS=1; fi - -# Memory safeguard: require 2 GiB free before starting another job. -# Tweak this per box; you can also use 4G/8G on small/large machines. -MEMFREE="128G" - -# Export per-job env to force single-threaded math/libs: -export OPENBLAS_NUM_THREADS=1 -export OMP_NUM_THREADS=1 -export MKL_NUM_THREADS=1 -export VECLIB_MAXIMUM_THREADS=1 -export GOTO_NUM_THREADS=1 -export R_DATATABLE_NUM_THREADS=1 - -# Run! -parallel \ - --colsep '\t' \ - --header : \ - --jobs "$JOBS" \ - --memfree "$MEMFREE" \ - --shuf \ - --bar \ - --eta \ - --joblog logs1/joblog1.tsv \ - --results logs1/out \ - ' - OUT="bench1_results/{data_class}__{subjects}__{method}__{seed}__{iter}.rds" - # Idempotent: skip if file exists and is non-empty - test -s "$OUT" && exit 0 - - Rscript benchmark1.R "$(basename "$OUT")" - ' \ - :::: grid1.tsv diff --git a/man-roxygen/details-header-and-assignable-codes.R b/man-roxygen/details-header-and-assignable-codes.R index 22d8f5a8..3bdabbbf 100644 --- a/man-roxygen/details-header-and-assignable-codes.R +++ b/man-roxygen/details-header-and-assignable-codes.R @@ -10,4 +10,5 @@ #' Code 055.7 is a header because 055.71 and 055.72 exist. #' #' Some codes change status across years. For example, ICD-9-CM 516.3 was -#' assignable in fiscal years 2006–2011, then became a header in 2012–2015. +#' assignable in fiscal years 1997–2011 for the CDC extracts (2006–2011 for CMS) +#' and became a header in 2012–2015. diff --git a/man-roxygen/params-icd-year.R b/man-roxygen/params-icd-year.R index 7efd8535..4055f1b8 100644 --- a/man-roxygen/params-icd-year.R +++ b/man-roxygen/params-icd-year.R @@ -1,4 +1,5 @@ #' @param year Numeric scalar. Calendar or fiscal year to reference. Default -#' is the most current year available per source. For ICD-9, the latest year -#' is 2015; ICD-10 source are updated annually. Calendar year for WHO and CDC -#' mortality. Fiscal year for CMS. +#' is the most current year available per source. For ICD-9, CMS data run +#' through fiscal year 2015 and CDC extracts through 2012; ICD-10 sources are +#' updated annually. Calendar year for WHO and CDC mortality. Fiscal year for +#' CMS. diff --git a/man/get_icd_codes.Rd b/man/get_icd_codes.Rd index 8d0bf6df..1d7db691 100644 --- a/man/get_icd_codes.Rd +++ b/man/get_icd_codes.Rd @@ -79,14 +79,16 @@ calendar years. \code{known_start} is the first fiscal or calendar year (depending on source) that the medicalcoder package as definitive source data for. ICD-9-CM started in -the United States in fiscal year 1980. Source information that could be -downloaded from the CDC and CMS and added to the source code for the -medicalcoder package goes back to 1997. As such 1997 is the "known start" +the United States in fiscal year 1980. The CDC extracts included in +medicalcoder span fiscal years 1997--2012; the CMS ICD-9-CM/PCS extracts +start in fiscal year 2006 and run through fiscal year 2015. As such 1997 is +the earliest "known start" for ICD-9 within medicalcoder. \code{known_end} is the last fiscal or calendar year (depending on source) -for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS -that is 2015. For ICD-10-CM and ICD-10-PCS, which are active, it is just the -last year of known data. ICD-10 from the WHO ends in 2019. +for which we have definitive source data for. For ICD-9-CM and ICD-9-PCS, +CMS provides data through fiscal year 2015, while the CDC extracts stop at +fiscal year 2012. For ICD-10-CM and ICD-10-PCS, which are active, it is just +the last year of known data. ICD-10 from the WHO ends in 2019. } \subsection{Header and Assignable Codes}{ diff --git a/man/is_icd.Rd b/man/is_icd.Rd index fd2987c1..62a8e8d9 100644 --- a/man/is_icd.Rd +++ b/man/is_icd.Rd @@ -31,9 +31,10 @@ diagnostic (ICD-9-CM, ICD-10-CM, CDC mortality, WHO), \code{0L} for procedural \code{"cdc"}. Defaults to all.} \item{year}{Numeric scalar. Calendar or fiscal year to reference. Default -is the most current year available per source. For ICD-9, the latest year -is 2015; ICD-10 source are updated annually. Calendar year for WHO and CDC -mortality. Fiscal year for CMS.} +is the most current year available per source. For ICD-9, CMS data run +through fiscal year 2015 and CDC extracts through 2012; ICD-10 sources are +updated annually. Calendar year for WHO and CDC mortality. Fiscal year for +CMS.} \item{headerok}{Logical scalar. If \code{FALSE} (default), only assignable codes are considered valid; if \code{TRUE}, header codes are also accepted.} @@ -75,7 +76,8 @@ Similarly for ICD-9-CM: "055" is a header for measles; 055.0, 055.1, Code 055.7 is a header because 055.71 and 055.72 exist. Some codes change status across years. For example, ICD-9-CM 516.3 was -assignable in fiscal years 2006–2011, then became a header in 2012–2015. +assignable in fiscal years 1997–2011 for the CDC extracts (2006–2011 for CMS) +and became a header in 2012–2015. } \examples{ ################################################################################ diff --git a/man/lookup_icd_codes.Rd b/man/lookup_icd_codes.Rd index 50acc26c..0f490682 100644 --- a/man/lookup_icd_codes.Rd +++ b/man/lookup_icd_codes.Rd @@ -61,7 +61,8 @@ Similarly for ICD-9-CM: "055" is a header for measles; 055.0, 055.1, Code 055.7 is a header because 055.71 and 055.72 exist. Some codes change status across years. For example, ICD-9-CM 516.3 was -assignable in fiscal years 2006–2011, then became a header in 2012–2015. +assignable in fiscal years 1997–2011 for the CDC extracts (2006–2011 for CMS) +and became a header in 2012–2015. } \seealso{ \itemize{ diff --git a/man/mdcr.Rd b/man/mdcr.Rd index b213245d..2b9e9482 100644 --- a/man/mdcr.Rd +++ b/man/mdcr.Rd @@ -5,9 +5,7 @@ \alias{mdcr} \title{Synthetic Data} \format{ -\code{mdcr} is a \code{data.frame} with 4 columns, one for a patient id and 41 for -diagnostic codes and 41 possible procedure codes. Each row is for one -patient id. +\code{mdcr} is a \code{data.frame} with 4 columns, Each row is for one ICD id. \itemize{ \item \code{patid}: patient identifier, integer values \item \code{icdv}: ICD version; integer values, 9 or 10 diff --git a/man/mdcr_longitudinal.Rd b/man/mdcr_longitudinal.Rd index 07dfcda0..65aa6f9a 100644 --- a/man/mdcr_longitudinal.Rd +++ b/man/mdcr_longitudinal.Rd @@ -5,9 +5,9 @@ \alias{mdcr_longitudinal} \title{Synthetic Longitudinal Data} \format{ -\code{mdcr_longitudinal} is a \code{data.frame} with four columns. The codes are -expected to be treated as diagnostic codes but there are a few ICD-9 codes -which could match to procedure codes as well. +\code{mdcr_longitudinal} is a \code{data.frame} with 4 columns. The codes are +expected to be treated as diagnostic codes. Warning: there are a few ICD-9 +codes which could match to procedure codes. \itemize{ \item \code{patid}: patient identifier, integer values \item \code{date}: date the diagnostic code was recorded diff --git a/testing/Makefile b/testing/Makefile index b7223d90..2ea8d5de 100644 --- a/testing/Makefile +++ b/testing/Makefile @@ -20,40 +20,6 @@ WITHOUTSUGGESTS := $(addsuffix -without-suggests,$(VERSIONS)) # Limit with-suggests to R versions whose base OS is still supported (roughly Debian >= bullseye/R 4.1+). WITHSUGGESTS := $(addsuffix -with-suggests,$(V41) $(V42) $(V43) $(V44) $(V45) $(LATEST)) -# CRAN snapshots: use the date the *next* R release occurred. Empty means "current". -SNAP_4.4.2 := -SNAP_4.4.1 := 2024-10-31 -SNAP_4.4.0 := 2024-06-14 -SNAP_4.3.3 := 2024-04-24 -SNAP_4.3.2 := 2024-02-29 -SNAP_4.3.1 := 2023-10-31 -SNAP_4.3.0 := 2023-06-16 -SNAP_4.2.3 := 2023-04-21 -SNAP_4.2.2 := 2023-03-15 -SNAP_4.2.1 := 2022-10-31 -SNAP_4.2.0 := 2022-06-23 -SNAP_4.1.3 := 2022-04-22 -SNAP_4.1.2 := 2022-03-10 -SNAP_4.1.1 := 2021-11-01 -SNAP_4.1.0 := 2021-08-10 -SNAP_4.0.5 := 2021-05-18 -SNAP_4.0.4 := 2021-03-31 -SNAP_4.0.3 := 2021-02-15 -SNAP_4.0.2 := 2020-10-10 -SNAP_4.0.1 := 2020-06-22 -SNAP_4.0.0 := 2020-06-06 -SNAP_3.6.3 := 2020-04-24 -SNAP_3.6.2 := 2020-02-29 -SNAP_3.6.1 := 2019-12-12 -SNAP_3.6.0 := 2019-07-05 -SNAP_3.5.3 := 2019-04-26 -SNAP_3.5.2 := 2019-03-11 -SNAP_3.5.1 := 2018-12-20 -SNAP_3.5.0 := 2018-07-02 -# 4.5.x snapshots default to "current" until next releases occur. - -snapshot_arg = $(if $(SNAP_$1),--build-arg SNAPSHOT=$(SNAP_$1),) - PKG_ROOT := .. PKG_VERSION := $(shell awk '/^Version:/{print $$2}' $(PKG_ROOT)/DESCRIPTION) PKG_NAME := $(shell awk '/^Package:/{print $$2}' $(PKG_ROOT)/DESCRIPTION) @@ -74,12 +40,14 @@ $(TARBALL): $(PKG_ROOT)/$(TARBALL) README.md: README.Rmd $(CHECKS) $(R) -q -e "knitr::knit('$<', output = '$@')" -R-%-without-suggests/$(PKG_NAME).Rcheck/00check.log: .images/R-%-without-suggests.stamp $(TARBALL) +R-%-without-suggests/$(PKG_NAME).Rcheck/00check.log: $(TARBALL) $(RM) -r R-$*-without-suggests mkdir -p R-$*-without-suggests $(DOCKER) run \ -v "$$(PWD)":/work \ - --rm mdcr-$*-without-suggests \ + -w /work \ + -e _R_CHECK_FORCE_SUGGESTS_=false \ + --rm r-base:$* \ R CMD check --no-build-vignettes --ignore-vignettes --no-manual -o R-$*-without-suggests $(TARBALL) > R-$*-without-suggests/docker.log 2>&1 R-%-with-suggests/$(PKG_NAME).Rcheck/00check.log: .images/R-%-with-suggests.stamp $(TARBALL) @@ -90,14 +58,9 @@ R-%-with-suggests/$(PKG_NAME).Rcheck/00check.log: .images/R-%-with-suggests.stam --rm mdcr-$*-with-suggests \ R CMD check -o R-$*-with-suggests $(TARBALL) > R-$*-with-suggests/docker.log 2>&1 -.images/R-%-without-suggests.stamp: docker/without-suggests/Dockerfile $(TARBALL) | .images - $(DOCKER) info >/dev/null 2>&1 || { echo "$(DOCKER) is unavailable or the daemon cannot be reached"; exit 1; } - $(DOCKER) build --build-arg R_TAG=$* -t mdcr-$*-without-suggests -f $< . - @touch $@ - .images/R-%-with-suggests.stamp: docker/with-suggests/Dockerfile $(TARBALL) | .images $(DOCKER) info >/dev/null 2>&1 || { echo "$(DOCKER) is unavailable or the daemon cannot be reached"; exit 1; } - $(DOCKER) build --build-arg R_TAG=$* $(call snapshot_arg,$*) -t mdcr-$*-with-suggests -f $< . + $(DOCKER) build --build-arg R_TAG=$* -t mdcr-$*-with-suggests -f $< . @touch $@ .images: diff --git a/testing/README.Rmd b/testing/README.Rmd index 99b16ed0..79eddd30 100644 --- a/testing/README.Rmd +++ b/testing/README.Rmd @@ -29,6 +29,11 @@ To run the tests you need Just run `make` from this directory. +**NOTE:** When something goes wrong and you need to dig into a specific image +run from this directory. + + docker run -v .:/work/ -it + # Last Testing Results ```{r} diff --git a/testing/README.md b/testing/README.md index 7d1e5b80..50564a8c 100644 --- a/testing/README.md +++ b/testing/README.md @@ -18,6 +18,11 @@ To run the tests you need Just run `make` from this directory. +**NOTE:** When something goes wrong and you need to dig into a specific image +run from this directory. + + docker run -v .:/work/ -it + # Last Testing Results @@ -33,27 +38,111 @@ Just run `make` from this directory. - With Suggested Packages + With Suggested Packages + 4.1.0 + 0 + + + + + + 4.1.1 + 0 + + + + + + 4.1.2 + 0 + + + + + + 4.1.3 + 0 + + + + + + 4.2.0 + 0 + + + + + + 4.2.1 + 0 + + + + + + 4.2.2 + 0 + + + + + + 4.2.3 + 0 + + + + + + 4.3.0 + 0 + + + + + + 4.3.1 + 0 + + + + + + 4.3.2 + 0 + + + + + + 4.3.3 + 0 + + + + + 4.4.0 0 - Warning 1 - Note 4 + + 4.4.1 0 - Note 1 + 4.4.2 0 - Note 1 + 4.5.0 @@ -82,196 +171,196 @@ Just run `make` from this directory. 0 - Note 2 + Note 1 3.5.1 0 - Note 2 + Note 1 3.5.2 0 - Note 2 + Note 1 3.5.3 0 - Note 2 + Note 1 3.6.0 0 - Note 3 + Note 2 3.6.1 0 - Note 3 + Note 2 3.6.3 0 - Note 3 + Note 2 4.0.0 0 - Note 3 + Note 2 4.0.1 0 - Note 3 + Note 2 4.0.2 0 - Note 3 + Note 2 4.0.3 0 - Note 3 + Note 2 4.0.4 0 - Note 3 + Note 2 4.0.5 0 - Note 3 + Note 2 4.1.0 0 - Note 3 + Note 2 4.1.1 0 - Note 3 + Note 2 4.1.2 0 - Note 3 + Note 2 4.1.3 0 - Note 3 + Note 2 4.2.0 0 - Note 3 + Note 2 4.2.1 0 - Note 3 + Note 2 4.2.2 0 - Note 3 + Note 2 4.2.3 0 - Note 3 + Note 2 4.3.0 0 - Note 3 + Note 2 4.3.1 0 - Note 3 + Note 2 4.3.2 0 - Note 3 + Note 2 4.3.3 0 - Note 3 + Note 2 4.4.0 0 - Note 3 + Note 2 4.4.1 0 - Note 3 + Note 2 4.4.2 0 - Note 3 + Note 2 4.5.0 @@ -303,12 +392,9 @@ Just run `make` from this directory. **Warnings:** -1. checking package dependencies ... WARNING Skipping vignette re-building Packages suggested but not available for checking: 'data.table', 'kableExtra', 'knitr', 'R.utils', 'rmarkdown', 'tibble' VignetteBuilder package required for checking but not installed: ‘knitr’ **Notes:** -1. checking package dependencies ... NOTE Package suggested but not available for checking: ‘R.utils’ -2. checking package dependencies ... NOTE Packages suggested but not available for checking: ‘data.table’ ‘kableExtra’ ‘knitr’ ‘R.utils’ ‘rmarkdown’ ‘tibble’ -3. checking package dependencies ... NOTE Packages suggested but not available for checking: 'data.table', 'kableExtra', 'knitr', 'R.utils', 'rmarkdown', 'tibble' -4. checking package vignettes ... NOTE Package has ‘vignettes’ subdirectory but apparently no vignettes. Perhaps the ‘VignetteBuilder’ information is missing from the DESCRIPTION file? +1. checking package dependencies ... NOTE Packages suggested but not available for checking: ‘data.table’ ‘dplyr’ ‘kableExtra’ ‘knitr’ ‘R.utils’ ‘rmarkdown’ ‘tibble’ +2. checking package dependencies ... NOTE Packages suggested but not available for checking: 'data.table', 'dplyr', 'kableExtra', 'knitr', 'R.utils', 'rmarkdown', 'tibble' diff --git a/testing/docker/with-suggests/Dockerfile b/testing/docker/with-suggests/Dockerfile index 2d6f6d35..cdab07a6 100644 --- a/testing/docker/with-suggests/Dockerfile +++ b/testing/docker/with-suggests/Dockerfile @@ -1,24 +1,65 @@ ARG R_TAG=latest -ARG SNAPSHOT="" FROM rocker/r-ver:${R_TAG} ARG DEBIAN_FRONTEND=noninteractive ENV _R_CHECK_FORCE_SUGGESTS_=false \ - CRAN_SNAPSHOT=${SNAPSHOT:-} \ TZ=Etc/UTC +# system libs, but no pandoc / pandoc-citeproc here RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates curl git make g++ pkg-config \ - pandoc texinfo \ - texlive-latex-base texlive-latex-recommended texlive-latex-extra \ - texlive-fonts-recommended texlive-fonts-extra lmodern \ - libxml2-dev libcurl4-openssl-dev libssl-dev libgit2-dev \ - libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev \ - libpng-dev libjpeg-dev libtiff5-dev libwebp-dev \ - zlib1g-dev libicu-dev libbz2-dev liblzma-dev \ - && rm -rf /var/lib/apt/lists/* + ca-certificates \ + libxml2-dev \ + libcurl4-openssl-dev \ + libssl-dev \ + libgit2-dev \ + libfontconfig1-dev \ + libharfbuzz-dev \ + libfribidi-dev \ + libfreetype6-dev \ + libpng-dev \ + libjpeg-dev \ + libtiff5-dev \ + libwebp-dev \ + zlib1g-dev \ + libicu-dev \ + libbz2-dev \ + liblzma-dev \ + curl \ + git \ + make \ + g++ \ + pkg-config \ + texinfo \ + texlive-latex-base \ + texlive-latex-recommended \ + texlive-latex-extra \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + lmodern \ + && rm -rf /var/lib/apt/lists/* -COPY docker/with-suggests/install-suggests.R /opt/install-suggests.R -RUN R -q -e "source('/opt/install-suggests.R')" +# one Pandoc version for all R/OS combos +ARG PANDOC_VERSION=3.1.12.2 + +RUN set -eux; \ + arch="$(dpkg --print-architecture)"; \ + case "$arch" in \ + amd64|arm64) ;; \ + *) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \ + esac; \ + deb="pandoc-${PANDOC_VERSION}-1-${arch}.deb"; \ + curl -L "https://github.com/jgm/pandoc/releases/download/${PANDOC_VERSION}/${deb}" \ + -o /tmp/pandoc.deb; \ + dpkg -i /tmp/pandoc.deb; \ + rm /tmp/pandoc.deb + +RUN R -q -e " \ + cores <- parallel::detectCores(logical = FALSE); \ + cores <- if (is.na(cores) || cores < 1) 1L else cores; \ + install.packages( \ + pkgs = c('data.table', 'dplyr','kableExtra', 'knitr', 'rmarkdown', 'R.utils'), \ + repos = 'https://cran.rstudio.com', \ + Ncpus = cores \ + )" WORKDIR /work diff --git a/testing/docker/with-suggests/install-suggests.R b/testing/docker/with-suggests/install-suggests.R deleted file mode 100644 index 328dbf7e..00000000 --- a/testing/docker/with-suggests/install-suggests.R +++ /dev/null @@ -1,57 +0,0 @@ -# Install Suggests with version pins, honoring an optional CRAN snapshot. - -snap <- Sys.getenv("CRAN_SNAPSHOT", "") -cran <- if (nzchar(snap)) { - sprintf("https://cran.microsoft.com/snapshot/%s", snap) -} else { - "https://cran.r-project.org" -} -Ncpus <- max(1L, parallel::detectCores() - 1L) - -install.packages("remotes", repos = cran, Ncpus = Ncpus) - -rver <- getRversion() - -pkgs <- list( - knitr = NA_character_, - kableExtra = NA_character_, - data.table = NA_character_, - tibble = NA_character_, - rmarkdown = NA_character_, - R.utils = NA_character_ -) - -if (rver < "4.0.0") { - pkgs$knitr <- "1.40" - pkgs$kableExtra <- "1.3.4" - pkgs$data.table <- "1.14.10" - pkgs$tibble <- "3.1.8" - pkgs$rmarkdown <- "2.21" - pkgs$R.utils <- "2.12.2" -} else if (rver < "4.2.0") { - pkgs$knitr <- "1.41" - pkgs$kableExtra <- "1.4.0" - pkgs$data.table <- "1.14.10" - pkgs$tibble <- "3.2.1" - pkgs$rmarkdown <- "2.24" - pkgs$R.utils <- "2.12.2" -} else if (rver < "4.4.0") { - pkgs$knitr <- "1.45" - pkgs$kableExtra <- "1.4.0" - pkgs$data.table <- "1.15.0" - pkgs$tibble <- "3.2.1" - pkgs$rmarkdown <- "2.26" - pkgs$R.utils <- "2.12.2" -} else { - # Current CRAN versions for latest R releases - pkgs[] <- NA_character_ -} - -for (nm in names(pkgs)) { - ver <- pkgs[[nm]] - if (is.na(ver)) { - install.packages(nm, repos = cran, Ncpus = Ncpus) - } else { - remotes::install_version(nm, version = ver, repos = cran, Ncpus = Ncpus, upgrade = "never") - } -} diff --git a/tests/test-data-frame-tools.R b/tests/test-data-frame-tools.R index 97c88555..d4dce171 100644 --- a/tests/test-data-frame-tools.R +++ b/tests/test-data-frame-tools.R @@ -9,7 +9,11 @@ dataframetools <- "mdcr_subset", "mdcr_setorder", "mdcr_setnames", - "mdcr_duplicated" + "mdcr_duplicated", + "mdcr_inner_join", + "mdcr_full_outer_join", + "mdcr_left_join", + "mdcr_cbind" ) mdcr <- getNamespace("medicalcoder") @@ -27,9 +31,9 @@ stopifnot( ################################################################################ # Set up data for testing -DF <- data.frame(A = 1:10, C = NA_integer_, B = LETTERS[1:10]) -if (requireNamespace("tibble", quietly = TRUE)) { - TBL <- getExportedValue(name = "as_tibble", ns = "tibble")(DF) +DF <- data.frame(A = 1:10, C = NA_integer_, B = LETTERS[1:10], stringsAsFactors = FALSE) +if (requireNamespace("dplyr", quietly = TRUE)) { + TBL <- getExportedValue(name = "as_tibble", ns = "dplyr")(DF) } else { TBL <- DF } @@ -97,6 +101,18 @@ stopifnot( identical(DT[["D"]], x) ) +# create a new column and only add a value to rows 2, 5, and 7 +x <- c(NA_character_, "first", NA_character_, NA_character_, "second", NA_character_, "third", rep(NA_character_, nrow(DF) - 7L)) +DF <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DF, i = c(2L, 5L, 7L), j = "newC", value = c("first", "second", "third")) +TBL <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(TBL, i = c(2L, 5L, 7L), j = "newC", value = c("first", "second", "third")) +DT <- getFromNamespace(x = "mdcr_set", ns = "medicalcoder")(DT, i = c(2L, 5L, 7L), j = "newC", value = c("first", "second", "third")) + +stopifnot( + identical(DF[["newC"]], x), + identical(TBL[["newC"]], x), + identical(DT[["newC"]], x) +) + ################################################################################ # testing mdcr_select # set colummns - change the order of the columns @@ -117,6 +133,23 @@ stopifnot( identical(getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DT), DT) ) +# check for one column +DFD <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DF, col = c("D")) +TBLD <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(TBL, col = c("D")) +DTD <- getFromNamespace(x = "mdcr_select", ns = "medicalcoder")(DT, col = c("D")) + +stopifnot( + identical(names(DFD), c("D")), + identical(names(TBLD), c("D")), + identical(names(DTD), c("D")), + identical(DFD[["D"]], paste("v", c(0:5, 5:8))), + identical(TBLD[["D"]], paste("v", c(0:5, 5:8))), + identical(DTD[["D"]], paste("v", c(0:5, 5:8))), + identical(class(DFD), class(DF)), + identical(class(TBLD), class(TBL)), + identical(class(DTD), class(DT)) +) + ################################################################################ # testing mdcr_subset @@ -170,7 +203,7 @@ if (requireNamespace("data.table", quietly = TRUE)) { stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) } -if (requireNamespace("tibble", quietly = TRUE)) { +if (requireNamespace("dplyr", quietly = TRUE)) { stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) } @@ -201,7 +234,7 @@ if (requireNamespace("data.table", quietly = TRUE)) { stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) } -if (requireNamespace("tibble", quietly = TRUE)) { +if (requireNamespace("dplyr", quietly = TRUE)) { stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) } @@ -232,7 +265,7 @@ if (requireNamespace("data.table", quietly = TRUE)) { stopifnot(inherits(DT0, "data.table"), inherits(DT1, "data.table")) } -if (requireNamespace("tibble", quietly = TRUE)) { +if (requireNamespace("dplyr", quietly = TRUE)) { stopifnot(inherits(TBL0, "tbl_df"), inherits(TBL1, "tbl_df")) } @@ -304,6 +337,361 @@ stopifnot( identical(getFromNamespace(x = "mdcr_duplicated", ns = "medicalcoder")(DT, by = "D", fromLast = TRUE), expected) ) +################################################################################ +# testing mdcr_inner_join +# +# without specifying the by +t0 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(DF, DF[2, ]) +t1 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(DT, DT[2, ]) +t2 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(TBL, TBL[2, ]) + +stopifnot( + isTRUE( all.equal(t0, DF[2, ], check.attributes = FALSE)), + isTRUE( all.equal(t1, DT[2, ], check.attributes = FALSE)), + isTRUE( all.equal(t2, TBL[2, ], check.attributes = FALSE)) +) + +t3 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(DF, DF[c(2, 5, 1), ]) +t4 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(DT, DT[c(2, 5, 1), ]) +t5 <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(TBL, TBL[c(2, 5, 1), ]) + +stopifnot( + isTRUE( all.equal( t3, DF[c(1, 2, 5), ], check.attributes = FALSE)), + isTRUE( all.equal( t4, DT[c(1, 2, 5), ], check.attributes = FALSE)), + isTRUE( all.equal( t5, TBL[c(1, 2, 5), ], check.attributes = FALSE)) + ) + +# test with a by statement and suffixes statement +expected_df <- + data.frame( + x1 = c(1L, 2L, 8L), + x2.right = c("A", "B", "C"), + x2.left = c("a", "b", "c"), + stringsAsFactors = FALSE + ) +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + x1 = as.integer(c(1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +stopifnot(identical(outTBL, expected_tb)) + +# test with by.x and by.y statements +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + z = as.integer(c(1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "z", suffixes = c(".right", ".left")) +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "z", suffixes = c(".right", ".left")) +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_inner_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "z", suffixes = c(".right", ".left")) +stopifnot(identical(outTBL, expected_tb)) + +################################################################################ +# testing mdcr_left_join + +# These wrappers around merge have sort = FALSE and dplyr::left_join doesn't +# sort the return by default. So, build the merge, sort the result, and then +# test for the outcome. The first set of tests for a single + +t0 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(DF, DF[2, ]) +t0 <- t0[do.call(order, t0), ] + +t1 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(DF, DF[c(2, 5, 2), ]) +t1 <- t1[do.call(order, t1), ] + +t2 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(DT, DT[2, ]) +t2 <- t2[do.call(order, t2), ] + +t3 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(DT, DT[c(2, 5, 2), ]) +t3 <- t3[do.call(order, t3), ] + +t4 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(TBL, TBL[2, ]) +t4 <- t4[do.call(order, t4), ] + +t5 <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(TBL, TBL[c(2, 5, 2), ]) +t5 <- t5[do.call(order, t5), ] + +# without specifying the by +stopifnot( + isTRUE( all.equal( t0, DF, check.attributes = FALSE)), + isTRUE( all.equal( t1, DF[c(1, 2, 2, 3:nrow(DF)), ], check.attributes = FALSE)), + isTRUE( all.equal( t2, DT, check.attributes = FALSE)), + isTRUE( all.equal( t3, DT[c(1, 2, 2, 3:nrow(DF)), ], check.attributes = FALSE)), + isTRUE( all.equal( t4, TBL, check.attributes = FALSE)), + isTRUE( all.equal( t5, TBL[c(1, 2, 2, 3:nrow(DF)), ], check.attributes = FALSE)) +) + +# tests with by statements and suffixes +expected_df <- + data.frame( + x1 = as.integer(1:10), + x2.right = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + x2.left = c("a", "b", rep(NA_character_, 5), "c", rep(NA_character_, 2)), + stringsAsFactors = FALSE + ) +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + x1 = as.integer(c(1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outDF <- outDF[order(outDF$x1), ] +rownames(outDF) <- NULL +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outDT <- outDT[order(outDT$x1), ] +rownames(outDT) <- NULL +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outTBL <- outTBL[order(outTBL$x1), ] +rownames(outTBL) <- NULL +stopifnot(identical(outTBL, expected_tb)) + +# tests with by.x and by.x statements and suffixes +expected_df <- + data.frame( + x1 = as.integer(1:10), + x2.right = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + x2.left = c("a", "b", rep(NA_character_, 5), "c", rep(NA_character_, 2)), + stringsAsFactors = FALSE + ) +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + l1 = as.integer(c(1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "l1", suffixes = c(".right", ".left")) +outDF <- outDF[order(outDF$x1), ] +rownames(outDF) <- NULL +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "l1", suffixes = c(".right", ".left")) +outDT <- outDT[order(outDT$x1), ] +rownames(outDT) <- NULL +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_left_join", ns = "medicalcoder")(r, l, by.x = "x1", by.y = "l1", suffixes = c(".right", ".left")) +outTBL <- outTBL[order(outTBL$x1), ] +rownames(outTBL) <- NULL +stopifnot(identical(outTBL, expected_tb)) + +################################################################################ +# testing mdcr_full_outer_join + +expected_df <- + data.frame( + x1 = c(1L, 1L, 1L, 2L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 8L, 9L, 10L, 33L, 44L, 55L, 66L, 77L, 99L, 1100L), + x2 = c("A", "a", "a1", "B", "b", "D", "E", "F", "T", "A", "C", "c", "9", "ten", "d", "e", "f", "t", "a", "9", "TEN"), + stringsAsFactors = FALSE + ) +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + x1 = as.integer(c(1, 1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "a1", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l) +outDF <- outDF[order(outDF$x1), ] +rownames(outDF) <- NULL +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l) +outDT <- outDT[order(outDT$x1), ] +rownames(outDT) <- NULL +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l) +outTBL <- outTBL[order(outTBL$x1), ] +rownames(outTBL) <- NULL +stopifnot(identical(outTBL, expected_tb)) + +# tests with by statements and suffixes +expected_df <- + data.frame( + x1 = as.integer(c(1, 1:10, 33, 44, 55, 66, 77, 99, 1100)), + x2.right = c("A", "A", "B", "D", "E", "F", "T", "A", "C", "9", "ten", rep(NA_character_, 7)), + x2.left = c("a", "a1", "b", rep(NA_character_, 5), "c", rep(NA_character_, 2), "d", "e", "f", "t", "a", "9", "TEN"), + stringsAsFactors = FALSE + ) +r <- data.frame( + x1 = as.integer(1:10), + x2 = c("A", "B", "D", "E", "F", "T", "A", "C", "9", "ten"), + stringsAsFactors = FALSE +) +l <- data.frame( + x1 = as.integer(c(1, 1, 2, 33, 44, 55, 66, 77, 8, 99, 1100)), + x2 = c("a", "a1", "b", "d", "e", "f", "t", "a", "c", "9", "TEN"), + stringsAsFactors = FALSE +) +outDF <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outDF <- outDF[order(outDF$x1), ] +rownames(outDF) <- NULL +stopifnot(identical(outDF, expected_df)) + +if (requireNamespace("data.table", quietly = TRUE)) { + data.table::setDT(r) + data.table::setDT(l) + expected_dt <- data.table::copy(expected_df) + data.table::setDT(expected_dt) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_dt <- expected_df +} +outDT <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outDT <- outDT[order(outDT$x1), ] +rownames(outDT) <- NULL +stopifnot(identical(outDT, expected_dt)) + +if (requireNamespace("dplyr", quietly = TRUE)) { + r <- dplyr::as_tibble(r) + l <- dplyr::as_tibble(l) + expected_tb <- dplyr::as_tibble(expected_df) +} else { + r <- as.data.frame(r, stringsAsFactors = FALSE) + l <- as.data.frame(l, stringsAsFactors = FALSE) + expected_tb <- expected_df +} +outTBL <- getFromNamespace(x = "mdcr_full_outer_join", ns = "medicalcoder")(r, l, by = "x1", suffixes = c(".right", ".left")) +outTBL <- outTBL[order(outTBL$x1), ] +rownames(outTBL) <- NULL +stopifnot(identical(outTBL, expected_tb)) + +################################################################################ +# testing mdcr_cbind ################################################################################ # End of File # diff --git a/tests/test-longitudinal-comorbidities.R b/tests/test-longitudinal-comorbidities.R index 14c41ccd..aaa72667 100644 --- a/tests/test-longitudinal-comorbidities.R +++ b/tests/test-longitudinal-comorbidities.R @@ -55,6 +55,7 @@ record <- row.names = c(NA, -16L) ) + # set the data in an unsorted order to verify that the output will be sorted and # as expected. set.seed(42) @@ -63,6 +64,12 @@ rws <- c(sample(seq_len(nrow(record))), sample(seq_len(nrow(record)))) record <- record[rws, ] +recordDT <- record +recordTBL <- record + +class(recordDT) <- c("data.table", class(recordDT)) +class(recordTBL) <- c("tbl_df", "tbl", class(recordTBL)) + ################################################################################ # Expected results @@ -118,6 +125,80 @@ CMRBS <- elixhauser_cumulative_v = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa.var = "poa"))) ) +args[["data"]] <- recordDT +CMRBSDT <- + list( + charlson_current_0 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa = 0))), + charlson_current_1 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa = 1))), + charlson_current_v = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa.var = "poa"))), + + charlson_cumulative_0 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa = 0))), + charlson_cumulative_1 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa = 1))), + charlson_cumulative_v = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa.var = "poa"))), + + pccc_current_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 0))), + pccc_current_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 1))), + pccc_current_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa.var = "poa"))), + + pccc_cumulative_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 0))), + pccc_cumulative_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 1))), + pccc_cumulative_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa.var = "poa"))), + + spccc_current_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 0, subconditions = TRUE))), + spccc_current_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 1, subconditions = TRUE))), + spccc_current_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa.var = "poa", subconditions = TRUE))), + + spccc_cumulative_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 0, subconditions = TRUE))), + spccc_cumulative_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 1, subconditions = TRUE))), + spccc_cumulative_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa.var = "poa", subconditions = TRUE))), + + elixhauser_current_0 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa = 0))), + elixhauser_current_1 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa = 1))), + elixhauser_current_v = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa.var = "poa"))), + + elixhauser_cumulative_0 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa = 0))), + elixhauser_cumulative_1 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa = 1))), + elixhauser_cumulative_v = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa.var = "poa"))) +) + +args[["data"]] <- recordTBL +CMRBSTBL <- + list( + charlson_current_0 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa = 0))), + charlson_current_1 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa = 1))), + charlson_current_v = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "current", primarydx = 0L, poa.var = "poa"))), + + charlson_cumulative_0 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa = 0))), + charlson_cumulative_1 = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa = 1))), + charlson_cumulative_v = do.call(comorbidities, c(args, list(method = "charlson_quan2005", flag.method = "cumulative", primarydx = 0L, poa.var = "poa"))), + + pccc_current_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 0))), + pccc_current_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 1))), + pccc_current_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa.var = "poa"))), + + pccc_cumulative_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 0))), + pccc_cumulative_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 1))), + pccc_cumulative_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa.var = "poa"))), + + spccc_current_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 0, subconditions = TRUE))), + spccc_current_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa = 1, subconditions = TRUE))), + spccc_current_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "current", poa.var = "poa", subconditions = TRUE))), + + spccc_cumulative_0 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 0, subconditions = TRUE))), + spccc_cumulative_1 = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa = 1, subconditions = TRUE))), + spccc_cumulative_v = do.call(comorbidities, c(args, list(method = "pccc_v3.1", flag.method = "cumulative", poa.var = "poa", subconditions = TRUE))), + + elixhauser_current_0 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa = 0))), + elixhauser_current_1 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa = 1))), + elixhauser_current_v = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "current", primarydx = 0L, poa.var = "poa"))), + + elixhauser_cumulative_0 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa = 0))), + elixhauser_cumulative_1 = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa = 1))), + elixhauser_cumulative_v = do.call(comorbidities, c(args, list(method = "elixhauser_ahrq2025", flag.method = "cumulative", primarydx = 0L, poa.var = "poa"))) +) + +CMRBS <- list(DF = CMRBS, DT = CMRBSDT, TBL = CMRBSTBL) + ################################################################################ # for the testing - check for the expected results and then remove the object # from the CMRBS until it is empty. @@ -126,626 +207,735 @@ CMRBS <- # # The PCCC with subconditions should have the same comorbidities as the objects # without subconditions. -for (x in grep("^pccc_", names(CMRBS), value = TRUE)) { - check <- - all.equal( - target = CMRBS[[x]], - current = CMRBS[[paste0("s", x)]][["conditions"]], - check.attributes = FALSE - ) - if (check) { - CMRBS[[paste0("s", x)]][["conditions"]] <- NULL - } else { - stop(sprintf('CMRBS[[s%s]][["conditions"]] is not all.equal to CMRBS[[%s]]', x, x)) +for (i in seq_len(length(CMRBS))) { + for (x in grep("^pccc_", names(CMRBS[[i]]), value = TRUE)) { + check <- + all.equal( + target = CMRBS[[i]][[x]], + current = CMRBS[[i]][[paste0("s", x)]][["conditions"]], + check.attributes = FALSE + ) + if (check) { + CMRBS[[i]][[paste0("s", x)]][["conditions"]] <- NULL + } else { + stop(sprintf('CMRBS[[%d]][[s%s]][["conditions"]] is not all.equal to CMRBS[[%s]]', i, x, x)) + } } } - # Check each of the subconditions. For the following conditions, all of which # should not be flagged, the number of rows in the output should be zero. -for (cnd in c("respiratory", "neuromusc", "neonatal", "misc", "metabolic", "hemato_immu", "gi", "congeni_genetic")) { - stopifnot( - nrow(CMRBS$spccc_current_0$subconditions[[cnd]]) == 0L, - nrow(CMRBS$spccc_current_1$subconditions[[cnd]]) == 0L, - nrow(CMRBS$spccc_current_v$subconditions[[cnd]]) == 0L, - nrow(CMRBS$spccc_cumulative_0$subconditions[[cnd]]) == 0L, - nrow(CMRBS$spccc_cumulative_1$subconditions[[cnd]]) == 0L, - nrow(CMRBS$spccc_cumulative_v$subconditions[[cnd]]) == 0L - ) - CMRBS$spccc_current_0$subconditions[[cnd]] <- NULL - CMRBS$spccc_current_1$subconditions[[cnd]] <- NULL - CMRBS$spccc_current_v$subconditions[[cnd]] <- NULL - CMRBS$spccc_cumulative_0$subconditions[[cnd]] <- NULL - CMRBS$spccc_cumulative_1$subconditions[[cnd]] <- NULL - CMRBS$spccc_cumulative_v$subconditions[[cnd]] <- NULL +for (i in seq_len(length(CMRBS))) { + for (cnd in c("respiratory", "neuromusc", "neonatal", "misc", "metabolic", "hemato_immu", "gi", "congeni_genetic")) { + stopifnot( + nrow(CMRBS[[i]][["spccc_current_0"]][["subconditions"]][[cnd]]) == 0L, + nrow(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][[cnd]]) == 0L, + nrow(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][[cnd]]) == 0L, + nrow(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][[cnd]]) == 0L, + nrow(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][[cnd]]) == 0L, + nrow(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][[cnd]]) == 0L + ) + CMRBS[[i]][["spccc_current_0"]][["subconditions"]][[cnd]] <- NULL + CMRBS[[i]][["spccc_current_1"]][["subconditions"]][[cnd]] <- NULL + CMRBS[[i]][["spccc_current_v"]][["subconditions"]][[cnd]] <- NULL + CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][[cnd]] <- NULL + CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][[cnd]] <- NULL + CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][[cnd]] <- NULL + } } # Specific checks for cvd -stopifnot(identical(nrow(CMRBS$spccc_current_0$subconditions[["cvd"]]), 0L)) -CMRBS$spccc_current_0$subconditions[["cvd"]] <- NULL - -stopifnot( - identical(CMRBS$spccc_current_1$subconditions[["cvd"]][["patid"]], c("A", "A")), - identical(CMRBS$spccc_current_1$subconditions[["cvd"]][["encid"]], c(3L, 5L)), - identical(CMRBS$spccc_current_1$subconditions[["cvd"]][["other"]], c(1L, 1L)), - all(CMRBS$spccc_current_1$subconditions[["cvd"]][, !(names(CMRBS$spccc_current_1$subconditions[["cvd"]]) %in% c("patid", "encid", "other"))] == 0) -) -CMRBS$spccc_current_1$subconditions[["cvd"]] <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(nrow(CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["cvd"]]), 0L)) + CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["cvd"]] <- NULL -stopifnot( - identical(CMRBS$spccc_current_v$subconditions[["cvd"]][["patid"]], c("A")), - identical(CMRBS$spccc_current_v$subconditions[["cvd"]][["encid"]], c(3L)), - identical(CMRBS$spccc_current_v$subconditions[["cvd"]][["other"]], c(1L)), - all(CMRBS$spccc_current_v$subconditions[["cvd"]][, !(names(CMRBS$spccc_current_v$subconditions[["cvd"]]) %in% c("patid", "encid", "other"))] == 0) -) -CMRBS$spccc_current_v$subconditions[["cvd"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["patid"]], c("A", "A")), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["encid"]], c(3L, 5L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["cardiomyopathies"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["conduction_disorder"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["device_and_technology_use"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["dysrhythmias"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["endocardium_diseases"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["heart_and_great_vessel_malformations"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["transplantation"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]][["other"]], c(1L, 1L)) + ) + CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["cvd"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_0$subconditions[["cvd"]][["patid"]], rep("A", 4)), - identical(CMRBS$spccc_cumulative_0$subconditions[["cvd"]][["encid"]], 4:7), - identical(CMRBS$spccc_cumulative_0$subconditions[["cvd"]][["other"]], rep(1L, 4)), - all(CMRBS$spccc_cumulative_0$subconditions[["cvd"]][, !(names(CMRBS$spccc_current_0$subconditions[["cvd"]]) %in% c("patid", "encid", "other"))] == 0) -) -CMRBS$spccc_cumulative_0$subconditions[["cvd"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["patid"]], c("A")), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["encid"]], c(3L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["cardiomyopathies"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["conduction_disorder"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["device_and_technology_use"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["dysrhythmias"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["endocardium_diseases"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["heart_and_great_vessel_malformations"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["transplantation"]], c(0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]][["other"]], c(1L)) + ) + CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["cvd"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_1$subconditions[["cvd"]][["patid"]], rep("A", 5)), - identical(CMRBS$spccc_cumulative_1$subconditions[["cvd"]][["encid"]], 3:7), - identical(CMRBS$spccc_cumulative_1$subconditions[["cvd"]][["other"]], rep(1L, 5)), - all(CMRBS$spccc_cumulative_1$subconditions[["cvd"]][, !(names(CMRBS$spccc_current_1$subconditions[["cdv"]]) %in% c("patid", "encid", "other"))] == 0) -) -CMRBS$spccc_cumulative_1$subconditions[["cvd"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["patid"]], rep("A", 4)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["encid"]], 4:7), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["other"]], rep(c(1L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["cardiomyopathies"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["conduction_disorder"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["device_and_technology_use"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["dysrhythmias"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["endocardium_diseases"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["heart_and_great_vessel_malformations"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["transplantation"]], rep(c(0L), 4L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]][["other"]], rep(c(1L), 4L)) + ) + CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["cvd"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_v$subconditions[["cvd"]][["patid"]], rep("A", 5)), - identical(CMRBS$spccc_cumulative_v$subconditions[["cvd"]][["encid"]], 3:7), - identical(CMRBS$spccc_cumulative_v$subconditions[["cvd"]][["other"]], rep(1L, 5)), - all(CMRBS$spccc_cumulative_v$subconditions[["cvd"]][, !(names(CMRBS$spccc_cumulative_v$subconditions[["cvd"]]) %in% c("patid", "encid", "other"))] == 0) -) -CMRBS$spccc_cumulative_v$subconditions[["cvd"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["patid"]], rep("A", 5)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["encid"]], 3:7), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["cardiomyopathies"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["conduction_disorder"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["device_and_technology_use"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["dysrhythmias"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["endocardium_diseases"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["heart_and_great_vessel_malformations"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["transplantation"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]][["other"]], rep(c(1L), 5L)) + ) + CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["cvd"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["patid"]], rep("A", 5)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["encid"]], 3:7), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["cardiomyopathies"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["conduction_disorder"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["device_and_technology_use"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["dysrhythmias"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["endocardium_diseases"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["heart_and_great_vessel_malformations"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["transplantation"]], rep(c(0L), 5L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]][["other"]], rep(c(1L), 5L)) + ) + CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["cvd"]] <- NULL +} + +################################################################################ # for cancer/malignancy -stopifnot(identical(nrow(CMRBS$spccc_current_0$subconditions[["malignancy"]]), 0L)) -CMRBS$spccc_current_0$subconditions[["malignancy"]] <- NULL - -stopifnot( - identical(CMRBS$spccc_current_1$subconditions[["malignancy"]][["patid"]], c("A", "A")), - identical(CMRBS$spccc_current_1$subconditions[["malignancy"]][["encid"]], c(2L, 5L)), - identical(CMRBS$spccc_current_1$subconditions[["malignancy"]][["neoplasms"]], c(1L, 1L)), - all(CMRBS$spccc_current_1$subconditions[["malignancy"]][, !(names(CMRBS$spccc_current_1$subconditions[["malignancy"]]) %in% c("patid", "encid", "neoplasms"))] == 0) -) -CMRBS$spccc_current_1$subconditions[["malignancy"]] <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(nrow(CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["malignancy"]]), 0L)) + CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["malignancy"]] <- NULL -stopifnot( - identical(CMRBS$spccc_current_v$subconditions[["malignancy"]][["patid"]], c("A")), - identical(CMRBS$spccc_current_v$subconditions[["malignancy"]][["encid"]], c(5L)), - identical(CMRBS$spccc_current_v$subconditions[["malignancy"]][["neoplasms"]], c(1L)), - all(CMRBS$spccc_current_v$subconditions[["malignancy"]][, !(names(CMRBS$spccc_current_v$subconditions[["malignancy"]]) %in% c("patid", "encid", "neoplasms"))] == 0) -) -CMRBS$spccc_current_v$subconditions[["malignancy"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["malignancy"]][["patid"]], c("A", "A")), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["malignancy"]][["encid"]], c(2L, 5L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["malignancy"]][["neoplasms"]], c(1L, 1L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["malignancy"]][["transplantation"]], c(0L, 0L)) + ) + CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["malignancy"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_0$subconditions[["malignancy"]][["patid"]], rep("A", 5)), - identical(CMRBS$spccc_cumulative_0$subconditions[["malignancy"]][["encid"]], 3:7), - identical(CMRBS$spccc_cumulative_0$subconditions[["malignancy"]][["neoplasms"]], rep(1L, 5)), - all(CMRBS$spccc_cumulative_0$subconditions[["malignancy"]][, !(names(CMRBS$spccc_cumulative_0$subconditions[["malignancy"]]) %in% c("patid", "encid", "neoplasms"))] == 0) -) -CMRBS$spccc_cumulative_0$subconditions[["malignancy"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["malignancy"]][["patid"]], c("A")), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["malignancy"]][["encid"]], c(5L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["malignancy"]][["neoplasms"]], c(1L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["malignancy"]][["transplantation"]], c(0L)) + ) + CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["malignancy"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_1$subconditions[["malignancy"]][["patid"]], rep("A", 6)), - identical(CMRBS$spccc_cumulative_1$subconditions[["malignancy"]][["encid"]], 2:7), - identical(CMRBS$spccc_cumulative_1$subconditions[["malignancy"]][["neoplasms"]], rep(1L, 6)), - all(CMRBS$spccc_cumulative_1$subconditions[["malignancy"]][, !(names(CMRBS$spccc_cumulative_1$subconditions[["malignancy"]]) %in% c("patid", "encid", "neoplasms"))] == 0) -) -CMRBS$spccc_cumulative_1$subconditions[["malignancy"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["malignancy"]][["patid"]], rep("A", 5)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["malignancy"]][["encid"]], 3:7), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["malignancy"]][["neoplasms"]], rep(1L, 5)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["malignancy"]][["transplantation"]], rep(0L, 5)) + ) + CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["malignancy"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_v$subconditions[["malignancy"]][["patid"]], rep("A", 5)), - identical(CMRBS$spccc_cumulative_v$subconditions[["malignancy"]][["encid"]], 3:7), - identical(CMRBS$spccc_cumulative_v$subconditions[["malignancy"]][["neoplasms"]], rep(1L, 5)), - all(CMRBS$spccc_cumulative_v$subconditions[["malignancy"]][, !(names(CMRBS$spccc_cumulative_v$subconditions[["malignancy"]]) %in% c("patid", "encid", "neoplasms"))] == 0) -) -CMRBS$spccc_cumulative_v$subconditions[["malignancy"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["malignancy"]][["patid"]], rep("A", 6)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["malignancy"]][["encid"]], 2:7), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["malignancy"]][["neoplasms"]], rep(1L, 6)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["malignancy"]][["transplantation"]], rep(0L, 6)) + ) + CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["malignancy"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["malignancy"]][["patid"]], rep("A", 5)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["malignancy"]][["encid"]], 3:7), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["malignancy"]][["neoplasms"]], rep(1L, 5)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["malignancy"]][["transplantation"]], rep(0L, 5)) + ) + CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["malignancy"]] <- NULL +} + +################################################################################ # for renal -stopifnot(identical(nrow(CMRBS$spccc_current_0$subconditions[["renal"]]), 0L)) -CMRBS$spccc_current_0$subconditions[["renal"]] <- NULL - -stopifnot( - identical(CMRBS$spccc_current_1$subconditions[["renal"]][["patid"]], c("A", "B", "B")), - identical(CMRBS$spccc_current_1$subconditions[["renal"]][["encid"]], c(4L, 1L, 4L)), - identical(CMRBS$spccc_current_1$subconditions[["renal"]][["chronic_renal_failure"]], c(1L, 1L, 1L)), - all(CMRBS$spccc_current_1$subconditions[["renal"]][, !(names(CMRBS$spccc_current_1$subconditions[["renal"]]) %in% c("patid", "encid", "chronic_renal_failure"))] == 0) -) -CMRBS$spccc_current_1$subconditions[["renal"]] <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(nrow(CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["renal"]]), 0L)) + CMRBS[[i]][["spccc_current_0"]][["subconditions"]][["renal"]] <- NULL -stopifnot( - identical(CMRBS$spccc_current_v$subconditions[["renal"]][["patid"]], c("A", "B")), - identical(CMRBS$spccc_current_v$subconditions[["renal"]][["encid"]], c(4L, 1L)), - identical(CMRBS$spccc_current_v$subconditions[["renal"]][["chronic_renal_failure"]], c(1L, 1L)), - all(CMRBS$spccc_current_v$subconditions[["renal"]][, !(names(CMRBS$spccc_current_v$subconditions[["renal"]]) %in% c("patid", "encid", "chronic_renal_failure"))] == 0) -) -CMRBS$spccc_current_v$subconditions[["renal"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["patid"]], c("A", "B", "B")), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["encid"]], c(4L, 1L, 4L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["chronic_renal_failure"]], c(1L, 1L, 1L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["chronic_bladder_diseases"]], c(0L, 0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["congenital_anomalies"]], c(0L, 0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["device_and_technology_use"]], c(0L, 0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["other"]], c(0L, 0L, 0L)), + identical(CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]][["transplantation"]], c(0L, 0L, 0L)) + ) + CMRBS[[i]][["spccc_current_1"]][["subconditions"]][["renal"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_0$subconditions[["renal"]][["patid"]], rep(c("A", "B"), times = c(3, 4))), - identical(CMRBS$spccc_cumulative_0$subconditions[["renal"]][["encid"]], c(5:7, 2:5)), - identical(CMRBS$spccc_cumulative_0$subconditions[["renal"]][["chronic_renal_failure"]], rep(1L, 7)), - all(CMRBS$spccc_cumulative_0$subconditions[["renal"]][, !(names(CMRBS$spccc_cumulative_0$subconditions[["renal"]]) %in% c("patid", "encid", "chronic_renal_failure"))] == 0) -) -CMRBS$spccc_cumulative_0$subconditions[["renal"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["patid"]], c("A", "B")), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["encid"]], c(4L, 1L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["chronic_renal_failure"]], c(1L, 1L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["chronic_bladder_diseases"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["congenital_anomalies"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["device_and_technology_use"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["other"]], c(0L, 0L)), + identical(CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]][["transplantation"]], c(0L, 0L)) + ) + CMRBS[[i]][["spccc_current_v"]][["subconditions"]][["renal"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_1$subconditions[["renal"]][["patid"]], rep(c("A", "B"), times = c(4, 5))), - identical(CMRBS$spccc_cumulative_1$subconditions[["renal"]][["encid"]], c(4:7, 1:5)), - identical(CMRBS$spccc_cumulative_1$subconditions[["renal"]][["chronic_renal_failure"]], rep(1L, 9)), - all(CMRBS$spccc_cumulative_1$subconditions[["renal"]][, !(names(CMRBS$spccc_cumulative_1$subconditions[["renal"]]) %in% c("patid", "encid", "chronic_renal_failure"))] == 0) -) -CMRBS$spccc_cumulative_1$subconditions[["renal"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["patid"]], rep(c("A", "B"), times = c(3, 4))), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["encid"]], c(5:7, 2:5)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["chronic_renal_failure"]], rep(1L, 7)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["chronic_bladder_diseases"]], rep(0L, 7L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["congenital_anomalies"]], rep(0L, 7L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["device_and_technology_use"]], rep(0L, 7L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["other"]], rep(0L, 7L)), + identical(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]][["transplantation"]], rep(0L, 7L)) + ) + CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]][["renal"]] <- NULL -stopifnot( - identical(CMRBS$spccc_cumulative_v$subconditions[["renal"]][["patid"]], rep(c("A", "B"), times = c(4, 5))), - identical(CMRBS$spccc_cumulative_v$subconditions[["renal"]][["encid"]], c(4:7, 1:5)), - identical(CMRBS$spccc_cumulative_v$subconditions[["renal"]][["chronic_renal_failure"]], rep(1L, 9)), - all(CMRBS$spccc_cumulative_v$subconditions[["renal"]][, !(names(CMRBS$spccc_cumulative_v$subconditions[["renal"]]) %in% c("patid", "encid", "chronic_renal_failure"))] == 0) -) -CMRBS$spccc_cumulative_v$subconditions[["renal"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["patid"]], rep(c("A", "B"), times = c(4, 5))), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["encid"]], c(4:7, 1:5)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["chronic_renal_failure"]], rep(1L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["chronic_bladder_diseases"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["congenital_anomalies"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["device_and_technology_use"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["other"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]][["transplantation"]], rep(0L, 9L)) + ) + CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]][["renal"]] <- NULL + stopifnot( + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["patid"]], rep(c("A", "B"), times = c(4, 5))), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["encid"]], c(4:7, 1:5)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["chronic_renal_failure"]], rep(1L, 9)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["chronic_bladder_diseases"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["congenital_anomalies"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["device_and_technology_use"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["other"]], rep(0L, 9L)), + identical(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]][["transplantation"]], rep(0L, 9L)) + ) + CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]][["renal"]] <- NULL +} + +################################################################################ # remove the subcondtions after verifying they are all empty -stopifnot(identical(length(CMRBS$spccc_current_0$subconditions), 0L)) -CMRBS$spccc_current_0$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_current_0), 0L)) -CMRBS$spccc_current_0 <- NULL - -stopifnot(identical(length(CMRBS$spccc_current_1$subconditions), 0L)) -CMRBS$spccc_current_1$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_current_1), 0L)) -CMRBS$spccc_current_1 <- NULL - -stopifnot(identical(length(CMRBS$spccc_current_v$subconditions), 0L)) -CMRBS$spccc_current_v$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_current_v), 0L)) -CMRBS$spccc_current_v <- NULL - -stopifnot(identical(length(CMRBS$spccc_cumulative_0$subconditions), 0L)) -CMRBS$spccc_cumulative_0$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_cumulative_0), 0L)) -CMRBS$spccc_cumulative_0 <- NULL - -stopifnot(identical(length(CMRBS$spccc_cumulative_1$subconditions), 0L)) -CMRBS$spccc_cumulative_1$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_cumulative_1), 0L)) -CMRBS$spccc_cumulative_1 <- NULL - -stopifnot(identical(length(CMRBS$spccc_cumulative_v$subconditions), 0L)) -CMRBS$spccc_cumulative_v$subconditions <- NULL -stopifnot(identical(length(CMRBS$spccc_cumulative_v), 0L)) -CMRBS$spccc_cumulative_v <- NULL +for( i in seq_len(length(CMRBS)) ) { + stopifnot(identical(length(CMRBS[[i]][["spccc_current_0"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_current_0"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_current_0"]]), 0L)) + CMRBS[[i]][["spccc_current_0"]] <- NULL + + stopifnot(identical(length(CMRBS[[i]][["spccc_current_1"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_current_1"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_current_1"]]), 0L)) + CMRBS[[i]][["spccc_current_1"]] <- NULL + + stopifnot(identical(length(CMRBS[[i]][["spccc_current_v"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_current_v"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_current_v"]]), 0L)) + CMRBS[[i]][["spccc_current_v"]] <- NULL + + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_0"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_0"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_0"]] <- NULL + + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_1"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_1"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_1"]] <- NULL + + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_v"]][["subconditions"]] <- NULL + stopifnot(identical(length(CMRBS[[i]][["spccc_cumulative_v"]]), 0L)) + CMRBS[[i]][["spccc_cumulative_v"]] <- NULL +} ################################################################################ # Check the conditions for charlson current poa 0 -stopifnot(identical(CMRBS$charlson_current_0[["age_score"]], rep(NA_integer_, 12))) -CMRBS$charlson_current_0[["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_0[["patid"]], expected_patid)) -CMRBS$charlson_current_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_0[["encid"]], expected_encid)) -CMRBS$charlson_current_0[["encid"]] <- NULL -stopifnot(all(CMRBS$charlson_current_0 == 0)) -CMRBS$charlson_current_0 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_current_0"]][["age_score"]], rep(NA_integer_, 12))) + CMRBS[[i]][["charlson_current_0"]][["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_current_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_current_0"]][["encid"]] <- NULL + stopifnot(all(CMRBS[[i]][["charlson_current_0"]] == 0)) + CMRBS[[i]][["charlson_current_0"]] <- NULL +} ################################################################################ # Check the conditions for pccc current poa 0 -stopifnot(identical(CMRBS$pccc_current_0[["patid"]], expected_patid)) -CMRBS$pccc_current_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_0[["encid"]], expected_encid)) -CMRBS$pccc_current_0[["encid"]] <- NULL -stopifnot(all(CMRBS$pccc_current_0 == 0)) -CMRBS$pccc_current_0 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_current_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_current_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_current_0"]][["encid"]] <- NULL + stopifnot(all(CMRBS[[i]][["pccc_current_0"]] == 0)) + CMRBS[[i]][["pccc_current_0"]] <- NULL +} ################################################################################ # Check the conditions for elixhauser current poa 0 -stopifnot(identical(CMRBS$elixhauser_current_0[["patid"]], expected_patid)) -CMRBS$elixhauser_current_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["encid"]], expected_encid)) -CMRBS$elixhauser_current_0[["encid"]] <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_current_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_current_0"]][["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) -CMRBS$elixhauser_current_0[["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) + CMRBS[[i]][["elixhauser_current_0"]][["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["mortality_index"]], 22L * c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) -CMRBS$elixhauser_current_0[["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["mortality_index"]], 22L * c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) + CMRBS[[i]][["elixhauser_current_0"]][["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["readmission_index"]], 11L * c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) -CMRBS$elixhauser_current_0[["readmission_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["readmission_index"]], 11L * c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) + CMRBS[[i]][["elixhauser_current_0"]][["readmission_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["cmrb_flag"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) -CMRBS$elixhauser_current_0[["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["cmrb_flag"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) + CMRBS[[i]][["elixhauser_current_0"]][["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_0[["num_cmrb"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) -CMRBS$elixhauser_current_0[["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_0"]][["num_cmrb"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7)))) + CMRBS[[i]][["elixhauser_current_0"]][["num_cmrb"]] <- NULL -stopifnot(all(CMRBS$elixhauser_current_0 == 0)) -CMRBS$elixhauser_current_0 <- NULL + stopifnot(all(CMRBS[[i]][["elixhauser_current_0"]] == 0)) + CMRBS[[i]][["elixhauser_current_0"]] <- NULL +} ################################################################################ # Charlson, current, poa 1 -stopifnot(identical(CMRBS$charlson_current_1[["patid"]], expected_patid)) -CMRBS$charlson_current_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["encid"]], expected_encid)) -CMRBS$charlson_current_1[["encid"]] <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_current_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_current_1"]][["encid"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["age_score"]], rep(NA_integer_, 12))) -CMRBS$charlson_current_1[["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["age_score"]], rep(NA_integer_, 12))) + CMRBS[[i]][["charlson_current_1"]][["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["chf"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) -CMRBS$charlson_current_1[["chf"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["chf"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["charlson_current_1"]][["chf"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["mst"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) -CMRBS$charlson_current_1[["mst"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["mst"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["charlson_current_1"]][["mst"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["rnd"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$charlson_current_1[["rnd"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["rnd"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["charlson_current_1"]][["rnd"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$charlson_current_1[["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["charlson_current_1"]][["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$charlson_current_1[["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["charlson_current_1"]][["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_1[["cci"]], c(0L, 6L, 1L, 2L, 7L, 0L, 0L, 2L, 0L, 0L, 2L, 0L))) -CMRBS$charlson_current_1[["cci"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_1"]][["cci"]], c(0L, 6L, 1L, 2L, 7L, 0L, 0L, 2L, 0L, 0L, 2L, 0L))) + CMRBS[[i]][["charlson_current_1"]][["cci"]] <- NULL -stopifnot(all(CMRBS$charlson_current_1 == 0)) -CMRBS$charlson_current_1 <- NULL + stopifnot(all(CMRBS[[i]][["charlson_current_1"]] == 0)) + CMRBS[[i]][["charlson_current_1"]] <- NULL +} ################################################################################ # charlson_current_v -stopifnot(identical(CMRBS$charlson_current_v[["patid"]], expected_patid)) -CMRBS$charlson_current_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["encid"]], expected_encid)) -CMRBS$charlson_current_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["age_score"]], rep(NA_integer_, 12L))) -CMRBS$charlson_current_v[["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["chf"]], c(rep(0L, 2L), 1L, rep(0L, 9L)))) -CMRBS$charlson_current_v[["chf"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["rnd"]], c(rep(0L, 3L), 1L, rep(0L, 3L), 1L, rep(0L, 4L)))) -CMRBS$charlson_current_v[["rnd"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["mst"]], c(rep(0L, 4L), 1L, rep(0L, 7L)))) -CMRBS$charlson_current_v[["mst"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["num_cmrb"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$charlson_current_v[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$charlson_current_v[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$charlson_current_v[["cci"]], c(0L, 0L, 1L, 2L, 6L, 0L, 0L, 2L, 0L, 0L, 0L, 0L))) -CMRBS$charlson_current_v[["cci"]] <- NULL -stopifnot(all(CMRBS$charlson_current_v == 0)) -CMRBS$charlson_current_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_current_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_current_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["age_score"]], rep(NA_integer_, 12L))) + CMRBS[[i]][["charlson_current_v"]][["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["chf"]], c(rep(0L, 2L), 1L, rep(0L, 9L)))) + CMRBS[[i]][["charlson_current_v"]][["chf"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["rnd"]], c(rep(0L, 3L), 1L, rep(0L, 3L), 1L, rep(0L, 4L)))) + CMRBS[[i]][["charlson_current_v"]][["rnd"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["mst"]], c(rep(0L, 4L), 1L, rep(0L, 7L)))) + CMRBS[[i]][["charlson_current_v"]][["mst"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["num_cmrb"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["charlson_current_v"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["charlson_current_v"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_current_v"]][["cci"]], c(0L, 0L, 1L, 2L, 6L, 0L, 0L, 2L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["charlson_current_v"]][["cci"]] <- NULL + stopifnot(all(CMRBS[[i]][["charlson_current_v"]] == 0)) + CMRBS[[i]][["charlson_current_v"]] <- NULL +} ################################################################################ # charlson_cumulative_0 -stopifnot(identical(CMRBS$charlson_cumulative_0[["patid"]], expected_patid)) -CMRBS$charlson_cumulative_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["encid"]], expected_encid)) -CMRBS$charlson_cumulative_0[["encid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["age_score"]], rep(NA_integer_, 12L))) -CMRBS$charlson_cumulative_0[["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["chf"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) -CMRBS$charlson_cumulative_0[["chf"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["rnd"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) -CMRBS$charlson_cumulative_0[["rnd"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["mst"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$charlson_cumulative_0[["mst"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["num_cmrb"]], c(0L, 0L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$charlson_cumulative_0[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$charlson_cumulative_0[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_0[["cci"]], c(0L, 0L, 6L, 7L, 9L, 9L, 9L, 0L, 2L, 2L, 2L, 2L))) -CMRBS$charlson_cumulative_0[["cci"]] <- NULL - -stopifnot(all(CMRBS$charlson_cumulative_0 == 0)) -CMRBS$charlson_cumulative_0 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_cumulative_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_cumulative_0"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["age_score"]], rep(NA_integer_, 12L))) + CMRBS[[i]][["charlson_cumulative_0"]][["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["chf"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) + CMRBS[[i]][["charlson_cumulative_0"]][["chf"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["rnd"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) + CMRBS[[i]][["charlson_cumulative_0"]][["rnd"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["mst"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["charlson_cumulative_0"]][["mst"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["num_cmrb"]], c(0L, 0L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["charlson_cumulative_0"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["charlson_cumulative_0"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_0"]][["cci"]], c(0L, 0L, 6L, 7L, 9L, 9L, 9L, 0L, 2L, 2L, 2L, 2L))) + CMRBS[[i]][["charlson_cumulative_0"]][["cci"]] <- NULL + + stopifnot(all(CMRBS[[i]][["charlson_cumulative_0"]] == 0)) + CMRBS[[i]][["charlson_cumulative_0"]] <- NULL +} ################################################################################ # charlson_cumulative_1 -stopifnot(identical(CMRBS$charlson_cumulative_1[["patid"]], expected_patid)) -CMRBS$charlson_cumulative_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["encid"]], expected_encid)) -CMRBS$charlson_cumulative_1[["encid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["age_score"]], rep(NA_integer_, 12L))) -CMRBS$charlson_cumulative_1[["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["chf"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$charlson_cumulative_1[["chf"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["rnd"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$charlson_cumulative_1[["rnd"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["mst"]], c(0L, rep(1L, 6L), rep(0L, 5)))) -CMRBS$charlson_cumulative_1[["mst"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) -CMRBS$charlson_cumulative_1[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["cmrb_flag"]], c(0L, rep(1L, 11L)))) -CMRBS$charlson_cumulative_1[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_1[["cci"]], c(0L, 6L, 7L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L))) -CMRBS$charlson_cumulative_1[["cci"]] <- NULL - -stopifnot(all(CMRBS$charlson_cumulative_1 == 0)) -CMRBS$charlson_cumulative_1 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_cumulative_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_cumulative_1"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["age_score"]], rep(NA_integer_, 12L))) + CMRBS[[i]][["charlson_cumulative_1"]][["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["chf"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["charlson_cumulative_1"]][["chf"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["rnd"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["charlson_cumulative_1"]][["rnd"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["mst"]], c(0L, rep(1L, 6L), rep(0L, 5)))) + CMRBS[[i]][["charlson_cumulative_1"]][["mst"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) + CMRBS[[i]][["charlson_cumulative_1"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["cmrb_flag"]], c(0L, rep(1L, 11L)))) + CMRBS[[i]][["charlson_cumulative_1"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_1"]][["cci"]], c(0L, 6L, 7L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L))) + CMRBS[[i]][["charlson_cumulative_1"]][["cci"]] <- NULL + + stopifnot(all(CMRBS[[i]][["charlson_cumulative_1"]] == 0)) + CMRBS[[i]][["charlson_cumulative_1"]] <- NULL +} ################################################################################ # charlson_cumulative_v -stopifnot(identical(CMRBS$charlson_cumulative_v[["patid"]], expected_patid)) -CMRBS$charlson_cumulative_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["encid"]], expected_encid)) -CMRBS$charlson_cumulative_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["age_score"]], rep(NA_integer_, 12L))) -CMRBS$charlson_cumulative_v[["age_score"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["chf"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$charlson_cumulative_v[["chf"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["rnd"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$charlson_cumulative_v[["rnd"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["mst"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$charlson_cumulative_v[["mst"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["num_cmrb"]], c(0L, 0L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) -CMRBS$charlson_cumulative_v[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["cmrb_flag"]], c(0L, 0L, rep(1L, 10L)))) -CMRBS$charlson_cumulative_v[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$charlson_cumulative_v[["cci"]], c(0L, 0L, 7L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L))) -CMRBS$charlson_cumulative_v[["cci"]] <- NULL - -stopifnot(all(CMRBS$charlson_cumulative_v == 0)) -CMRBS$charlson_cumulative_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["charlson_cumulative_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["charlson_cumulative_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["age_score"]], rep(NA_integer_, 12L))) + CMRBS[[i]][["charlson_cumulative_v"]][["age_score"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["chf"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["charlson_cumulative_v"]][["chf"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["rnd"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["charlson_cumulative_v"]][["rnd"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["mst"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["charlson_cumulative_v"]][["mst"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["num_cmrb"]], c(0L, 0L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) + CMRBS[[i]][["charlson_cumulative_v"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["cmrb_flag"]], c(0L, 0L, rep(1L, 10L)))) + CMRBS[[i]][["charlson_cumulative_v"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["charlson_cumulative_v"]][["cci"]], c(0L, 0L, 7L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L))) + CMRBS[[i]][["charlson_cumulative_v"]][["cci"]] <- NULL + + stopifnot(all(CMRBS[[i]][["charlson_cumulative_v"]] == 0)) + CMRBS[[i]][["charlson_cumulative_v"]] <- NULL +} ################################################################################ # pccc_current_1 -stopifnot(identical(CMRBS$pccc_current_1[["patid"]], expected_patid)) -CMRBS$pccc_current_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["encid"]], expected_encid)) -CMRBS$pccc_current_1[["encid"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["cvd_dxpr_or_tech"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_1[["cvd_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["cvd_dxpr_only"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_1[["cvd_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["malignancy_dxpr_or_tech"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_1[["malignancy_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["malignancy_dxpr_only"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_1[["malignancy_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["renal_dxpr_or_tech"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$pccc_current_1[["renal_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["renal_dxpr_only"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$pccc_current_1[["renal_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$pccc_current_1[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_1[["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$pccc_current_1[["num_cmrb"]] <- NULL - -stopifnot(all(CMRBS$pccc_current_1 == 0)) -CMRBS$pccc_current_1 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_current_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_current_1"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["cvd_dxpr_or_tech"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_1"]][["cvd_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["cvd_dxpr_only"]], c(0L, 0L, 1L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_1"]][["cvd_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["malignancy_dxpr_or_tech"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_1"]][["malignancy_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["malignancy_dxpr_only"]], c(0L, 1L, 0L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_1"]][["malignancy_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["renal_dxpr_or_tech"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["pccc_current_1"]][["renal_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["renal_dxpr_only"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["pccc_current_1"]][["renal_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["pccc_current_1"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_1"]][["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["pccc_current_1"]][["num_cmrb"]] <- NULL + + stopifnot(all(CMRBS[[i]][["pccc_current_1"]] == 0)) + CMRBS[[i]][["pccc_current_1"]] <- NULL +} ################################################################################ # pccc_current_v -stopifnot(identical(CMRBS$pccc_current_v[["patid"]], expected_patid)) -CMRBS$pccc_current_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["encid"]], expected_encid)) -CMRBS$pccc_current_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["cvd_dxpr_or_tech"]], c(0L, 0L, 1L, 0L, 0L, rep(0L, 7L)))) -CMRBS$pccc_current_v[["cvd_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["cvd_dxpr_only"]], c(0L, 0L, 1L, 0L, 0L, rep(0L, 7L)))) -CMRBS$pccc_current_v[["cvd_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["malignancy_dxpr_or_tech"]], c(0L, 0L, 0L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_v[["malignancy_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["malignancy_dxpr_only"]], c(0L, 0L, 0L, 0L, 1L, rep(0L, 7L)))) -CMRBS$pccc_current_v[["malignancy_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["renal_dxpr_or_tech"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$pccc_current_v[["renal_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["renal_dxpr_only"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$pccc_current_v[["renal_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$pccc_current_v[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$pccc_current_v[["num_cmrb"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$pccc_current_v[["num_cmrb"]] <- NULL - -stopifnot(all(CMRBS$pccc_current_v == 0)) -CMRBS$pccc_current_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_current_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_current_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["cvd_dxpr_or_tech"]], c(0L, 0L, 1L, 0L, 0L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_v"]][["cvd_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["cvd_dxpr_only"]], c(0L, 0L, 1L, 0L, 0L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_v"]][["cvd_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["malignancy_dxpr_or_tech"]], c(0L, 0L, 0L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_v"]][["malignancy_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["malignancy_dxpr_only"]], c(0L, 0L, 0L, 0L, 1L, rep(0L, 7L)))) + CMRBS[[i]][["pccc_current_v"]][["malignancy_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["renal_dxpr_or_tech"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["pccc_current_v"]][["renal_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["renal_dxpr_only"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["pccc_current_v"]][["renal_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["pccc_current_v"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_current_v"]][["num_cmrb"]], c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["pccc_current_v"]][["num_cmrb"]] <- NULL + + stopifnot(all(CMRBS[[i]][["pccc_current_v"]] == 0)) + CMRBS[[i]][["pccc_current_v"]] <- NULL +} ################################################################################ # pccc_cumulative_0 -stopifnot(identical(CMRBS$pccc_cumulative_0[["patid"]], expected_patid)) -CMRBS$pccc_cumulative_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["encid"]], expected_encid)) -CMRBS$pccc_cumulative_0[["encid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["cvd_dxpr_or_tech"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_0[["cvd_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["cvd_dxpr_only"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_0[["cvd_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["renal_dxpr_or_tech"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) -CMRBS$pccc_cumulative_0[["renal_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["renal_dxpr_only"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) -CMRBS$pccc_cumulative_0[["renal_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["malignancy_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$pccc_cumulative_0[["malignancy_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["malignancy_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$pccc_cumulative_0[["malignancy_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["num_cmrb"]], c(0L, 0L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$pccc_cumulative_0[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_0[["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$pccc_cumulative_0[["cmrb_flag"]] <- NULL - -stopifnot(all(CMRBS$pccc_cumulative_0 == 0)) -CMRBS$pccc_cumulative_0 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_cumulative_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_cumulative_0"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["cvd_dxpr_or_tech"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_0"]][["cvd_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["cvd_dxpr_only"]], c(0L, 0L, 0L, rep(1L, 4L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_0"]][["cvd_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["renal_dxpr_or_tech"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) + CMRBS[[i]][["pccc_cumulative_0"]][["renal_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["renal_dxpr_only"]], c(rep(0L, 4L), rep(1L, 3L), 0L, rep(1L, 4L)))) + CMRBS[[i]][["pccc_cumulative_0"]][["renal_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["malignancy_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_0"]][["malignancy_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["malignancy_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_0"]][["malignancy_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["num_cmrb"]], c(0L, 0L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["pccc_cumulative_0"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_0"]][["cmrb_flag"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["pccc_cumulative_0"]][["cmrb_flag"]] <- NULL + + stopifnot(all(CMRBS[[i]][["pccc_cumulative_0"]] == 0)) + CMRBS[[i]][["pccc_cumulative_0"]] <- NULL +} ################################################################################ # pccc_cumulative_1 -stopifnot(identical(CMRBS$pccc_cumulative_1[["patid"]], expected_patid)) -CMRBS$pccc_cumulative_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["encid"]], expected_encid)) -CMRBS$pccc_cumulative_1[["encid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["cvd_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_1[["cvd_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["renal_dxpr_or_tech"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$pccc_cumulative_1[["renal_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["malignancy_dxpr_or_tech"]], c(0L, rep(1L, 6L), rep(0L, 5)))) -CMRBS$pccc_cumulative_1[["malignancy_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["cvd_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_1[["cvd_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["renal_dxpr_only"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$pccc_cumulative_1[["renal_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["malignancy_dxpr_only"]], c(0L, rep(1L, 6L), rep(0L, 5)))) -CMRBS$pccc_cumulative_1[["malignancy_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) -CMRBS$pccc_cumulative_1[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_1[["cmrb_flag"]], c(0L, rep(1L, 11L)))) -CMRBS$pccc_cumulative_1[["cmrb_flag"]] <- NULL - -stopifnot(all(CMRBS$pccc_cumulative_1 == 0)) -CMRBS$pccc_cumulative_1 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_cumulative_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_cumulative_1"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["cvd_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["cvd_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["renal_dxpr_or_tech"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["renal_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["malignancy_dxpr_or_tech"]], c(0L, rep(1L, 6L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_1"]][["malignancy_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["cvd_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["cvd_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["renal_dxpr_only"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["renal_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["malignancy_dxpr_only"]], c(0L, rep(1L, 6L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_1"]][["malignancy_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_1"]][["cmrb_flag"]], c(0L, rep(1L, 11L)))) + CMRBS[[i]][["pccc_cumulative_1"]][["cmrb_flag"]] <- NULL + + stopifnot(all(CMRBS[[i]][["pccc_cumulative_1"]] == 0)) + CMRBS[[i]][["pccc_cumulative_1"]] <- NULL +} ################################################################################ # pccc_cumulative_v -stopifnot(identical(CMRBS$pccc_cumulative_v[["patid"]], expected_patid)) -CMRBS$pccc_cumulative_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["encid"]], expected_encid)) -CMRBS$pccc_cumulative_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["cvd_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_v[["cvd_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["renal_dxpr_or_tech"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$pccc_cumulative_v[["renal_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["malignancy_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$pccc_cumulative_v[["malignancy_dxpr_or_tech"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["cvd_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) -CMRBS$pccc_cumulative_v[["cvd_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["renal_dxpr_only"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) -CMRBS$pccc_cumulative_v[["renal_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["malignancy_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) -CMRBS$pccc_cumulative_v[["malignancy_dxpr_only"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["num_cmrb"]], c(0L, 0L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) -CMRBS$pccc_cumulative_v[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$pccc_cumulative_v[["cmrb_flag"]], c(0L, 0L, rep(1L, 10L)))) -CMRBS$pccc_cumulative_v[["cmrb_flag"]] <- NULL - -stopifnot(all(CMRBS$pccc_cumulative_v == 0)) -CMRBS$pccc_cumulative_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["pccc_cumulative_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["pccc_cumulative_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["cvd_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["cvd_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["renal_dxpr_or_tech"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["renal_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["malignancy_dxpr_or_tech"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_v"]][["malignancy_dxpr_or_tech"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["cvd_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["cvd_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["renal_dxpr_only"]], c(rep(0L, 3L), rep(1L, 4L), rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["renal_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["malignancy_dxpr_only"]], c(0L, 0L, rep(1L, 5L), rep(0L, 5)))) + CMRBS[[i]][["pccc_cumulative_v"]][["malignancy_dxpr_only"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["num_cmrb"]], c(0L, 0L, 2L, 3L, 3L, 3L, 3L, rep(1L, 5L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["pccc_cumulative_v"]][["cmrb_flag"]], c(0L, 0L, rep(1L, 10L)))) + CMRBS[[i]][["pccc_cumulative_v"]][["cmrb_flag"]] <- NULL + + stopifnot(all(CMRBS[[i]][["pccc_cumulative_v"]] == 0)) + CMRBS[[i]][["pccc_cumulative_v"]] <- NULL +} ################################################################################ # elixhauser_current_1 -stopifnot(identical(CMRBS$elixhauser_current_1[["patid"]], expected_patid)) -CMRBS$elixhauser_current_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["encid"]], expected_encid)) -CMRBS$elixhauser_current_1[["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$elixhauser_current_1[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$elixhauser_current_1[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) -CMRBS$elixhauser_current_1[["RENLFL_SEV"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["HF"]], c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_1[["HF"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_1[["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["mortality_index"]], c(0L, 22L, 14L, 7L, 36L, 0L, 0L, 7L, 0L, 0L, 7L, 0L))) -CMRBS$elixhauser_current_1[["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_1[["readmission_index"]], c(0L, 11L, 7L, 8L, 18L, 0L, 0L, 8L, 0L, 0L, 8L, 0L))) -CMRBS$elixhauser_current_1[["readmission_index"]] <- NULL - -stopifnot(all(CMRBS$elixhauser_current_1 == 0)) -CMRBS$elixhauser_current_1 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_current_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_current_1"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["num_cmrb"]], c(0L, 1L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["RENLFL_SEV"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["HF"]], c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["HF"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["mortality_index"]], c(0L, 22L, 14L, 7L, 36L, 0L, 0L, 7L, 0L, 0L, 7L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_1"]][["readmission_index"]], c(0L, 11L, 7L, 8L, 18L, 0L, 0L, 8L, 0L, 0L, 8L, 0L))) + CMRBS[[i]][["elixhauser_current_1"]][["readmission_index"]] <- NULL + + stopifnot(all(CMRBS[[i]][["elixhauser_current_1"]] == 0)) + CMRBS[[i]][["elixhauser_current_1"]] <- NULL +} ################################################################################ # elixhauser_current_v -stopifnot(identical(CMRBS$elixhauser_current_v[["patid"]], expected_patid)) -CMRBS$elixhauser_current_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["encid"]], expected_encid)) -CMRBS$elixhauser_current_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["num_cmrb"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["RENLFL_SEV"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["HF"]], c(0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["HF"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["mortality_index"]], c(0L, 22L, 14L, 7L, 22L, 0L, 0L, 7L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_current_v[["readmission_index"]], c(0L, 11L, 7L, 8L, 11L, 0L, 0L, 8L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_current_v[["readmission_index"]] <- NULL - -stopifnot(all(CMRBS$elixhauser_current_v == 0)) -CMRBS$elixhauser_current_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_current_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_current_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["num_cmrb"]], c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["RENLFL_SEV"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["HF"]], c(0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["HF"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["CANCER_METS"]], c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["mortality_index"]], c(0L, 22L, 14L, 7L, 22L, 0L, 0L, 7L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_current_v"]][["readmission_index"]], c(0L, 11L, 7L, 8L, 11L, 0L, 0L, 8L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_current_v"]][["readmission_index"]] <- NULL + + stopifnot(all(CMRBS[[i]][["elixhauser_current_v"]] == 0)) + CMRBS[[i]][["elixhauser_current_v"]] <- NULL +} ################################################################################ # elixhauser_cumulative_0 -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["patid"]], expected_patid)) -CMRBS$elixhauser_cumulative_0[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["encid"]], expected_encid)) -CMRBS$elixhauser_cumulative_0[["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_0[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["num_cmrb"]], c(0L, 1L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_0[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["RENLFL_SEV"]], c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_0[["RENLFL_SEV"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["HF"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_0[["HF"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_0[["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["mortality_index"]], c(0L, 22L, 22L, 36L, 43L, 43L, 43L, 0L, 7L, 7L, 7L, 7L))) -CMRBS$elixhauser_cumulative_0[["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_0[["readmission_index"]], c(0L, 11L, 11L, 18L, 26L, 26L, 26L, 0L, 8L, 8L, 8L, 8L))) -CMRBS$elixhauser_cumulative_0[["readmission_index"]] <- NULL - -stopifnot(all(CMRBS$elixhauser_cumulative_0 == 0)) -CMRBS$elixhauser_cumulative_0 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_cumulative_0"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_cumulative_0"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["num_cmrb"]], c(0L, 1L, 1L, 2L, 3L, 3L, 3L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["RENLFL_SEV"]], c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["RENLFL_SEV"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["HF"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["HF"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["mortality_index"]], c(0L, 22L, 22L, 36L, 43L, 43L, 43L, 0L, 7L, 7L, 7L, 7L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_0"]][["readmission_index"]], c(0L, 11L, 11L, 18L, 26L, 26L, 26L, 0L, 8L, 8L, 8L, 8L))) + CMRBS[[i]][["elixhauser_cumulative_0"]][["readmission_index"]] <- NULL + + stopifnot(all(CMRBS[[i]][["elixhauser_cumulative_0"]] == 0)) + CMRBS[[i]][["elixhauser_cumulative_0"]] <- NULL +} ################################################################################ # elixhauser_cumulative_1 -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["patid"]], expected_patid)) -CMRBS$elixhauser_cumulative_1[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["encid"]], expected_encid)) -CMRBS$elixhauser_cumulative_1[["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_1[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_1[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_1[["RENLFL_SEV"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["HF"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_1[["HF"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_1[["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["mortality_index"]], c(0L, 22L, 36L, 43L, 43L, 43L, 43L, 7L, 7L, 7L, 7L, 7L))) -CMRBS$elixhauser_cumulative_1[["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_1[["readmission_index"]], c(0L, 11L, 18L, 26L, 26L, 26L, 26L, 8L, 8L, 8L, 8L, 8L))) -CMRBS$elixhauser_cumulative_1[["readmission_index"]] <- NULL - -stopifnot(all(CMRBS$elixhauser_cumulative_1 == 0)) -CMRBS$elixhauser_cumulative_1 <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_cumulative_1"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_cumulative_1"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["RENLFL_SEV"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["HF"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["HF"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["mortality_index"]], c(0L, 22L, 36L, 43L, 43L, 43L, 43L, 7L, 7L, 7L, 7L, 7L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_1"]][["readmission_index"]], c(0L, 11L, 18L, 26L, 26L, 26L, 26L, 8L, 8L, 8L, 8L, 8L))) + CMRBS[[i]][["elixhauser_cumulative_1"]][["readmission_index"]] <- NULL + + stopifnot(all(CMRBS[[i]][["elixhauser_cumulative_1"]] == 0)) + CMRBS[[i]][["elixhauser_cumulative_1"]] <- NULL +} ################################################################################ # elixhauser_cumulative_v -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["patid"]], expected_patid)) -CMRBS$elixhauser_cumulative_v[["patid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["encid"]], expected_encid)) -CMRBS$elixhauser_cumulative_v[["encid"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_v[["cmrb_flag"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_v[["num_cmrb"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) -CMRBS$elixhauser_cumulative_v[["RENLFL_SEV"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["HF"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_v[["HF"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) -CMRBS$elixhauser_cumulative_v[["CANCER_METS"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["mortality_index"]], c(0L, 22L, 36L, 43L, 43L, 43L, 43L, 7L, 7L, 7L, 7L, 7L))) -CMRBS$elixhauser_cumulative_v[["mortality_index"]] <- NULL -stopifnot(identical(CMRBS$elixhauser_cumulative_v[["readmission_index"]], c(0L, 11L, 18L, 26L, 26L, 26L, 26L, 8L, 8L, 8L, 8L, 8L))) -CMRBS$elixhauser_cumulative_v[["readmission_index"]] <- NULL - -stopifnot(all(CMRBS$elixhauser_cumulative_v == 0)) -CMRBS$elixhauser_cumulative_v <- NULL +for (i in seq_len(length(CMRBS))) { + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["patid"]], expected_patid)) + CMRBS[[i]][["elixhauser_cumulative_v"]][["patid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["encid"]], expected_encid)) + CMRBS[[i]][["elixhauser_cumulative_v"]][["encid"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["cmrb_flag"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["cmrb_flag"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["num_cmrb"]], c(0L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["num_cmrb"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["RENLFL_SEV"]], c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["RENLFL_SEV"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["HF"]], c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["HF"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["CANCER_METS"]], c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["CANCER_METS"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["mortality_index"]], c(0L, 22L, 36L, 43L, 43L, 43L, 43L, 7L, 7L, 7L, 7L, 7L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["mortality_index"]] <- NULL + stopifnot(identical(CMRBS[[i]][["elixhauser_cumulative_v"]][["readmission_index"]], c(0L, 11L, 18L, 26L, 26L, 26L, 26L, 8L, 8L, 8L, 8L, 8L))) + CMRBS[[i]][["elixhauser_cumulative_v"]][["readmission_index"]] <- NULL + + stopifnot(all(CMRBS[[i]][["elixhauser_cumulative_v"]] == 0)) + CMRBS[[i]][["elixhauser_cumulative_v"]] <- NULL +} ################################################################################ #summary(CMRBS) +stopifnot(identical(length(CMRBS[["DF"]]), 0L)) +CMRBS[["DF"]] <- NULL + +stopifnot(identical(length(CMRBS[["DT"]]), 0L)) +CMRBS[["DT"]] <- NULL + +stopifnot(identical(length(CMRBS[["TBL"]]), 0L)) +CMRBS[["TBL"]] <- NULL stopifnot(identical(length(CMRBS), 0L)) diff --git a/tests/test-tibble-datatable.R b/tests/test-tibble-datatable.R index 4ed84bef..daef1338 100644 --- a/tests/test-tibble-datatable.R +++ b/tests/test-tibble-datatable.R @@ -2,10 +2,10 @@ source('utilities.R') library(medicalcoder) ################################################################################ -# Prep data I want to test the output with out loading or attaching the -# data.table or the tibble namespaces. So, the following commented out code is +# Prep data I want to test the output without loading or attaching the +# data.table or the dplyr namespaces. So, the following commented out code is # run once, and only run when needed to update the data, so that a data.frame, -# data.table, and tibble are all available. +# data.table, and dplyr are all available. # # To keep the disk space use down, use only a subset of the mdcr data set @@ -34,7 +34,7 @@ library(medicalcoder) ### x <- mdcr[mdcr$patid %in% c1[c(foo(0), foo(1), foo(2), foo(3), foo(4)), "patid"], ] ### ### saveRDS(x, file = "mdcr_subset_DF.rds", compress = "xz") -### saveRDS(tibble::as_tibble(x), file = "mdcr_subset_TBL.rds", compress = "xz") +### saveRDS(dplyr::as_tibble(x), file = "mdcr_subset_TBL.rds", compress = "xz") ### data.table::setDT(x) ### saveRDS(x, file = "mdcr_subset_DT.rds", compress = "xz") @@ -172,20 +172,20 @@ for (obj in ls(envir = DFS, all.names = TRUE)) { ################################################################################ # For tibbles -if (requireNamespace("tibble", quietly = TRUE)) { +if (requireNamespace("dplyr", quietly = TRUE)) { stopifnot(is.data.frame(mdcrDF)) stopifnot(is.data.frame(mdcrTBL)) - stopifnot(tibble::is_tibble(mdcrTBL)) + stopifnot(inherits(mdcrTBL, "tbl_df")) for (obj in ls(envir = TBLS, all.names = TRUE)) { if (grepl("_with_subconditions", obj)) { - stopifnot(tibble::is_tibble(TBLS[[obj]][["conditions"]])) + stopifnot(inherits(TBLS[[obj]][["conditions"]], "tbl_df")) TBLS[[obj]][["conditions"]] <- as.data.frame(TBLS[[obj]][["conditions"]]) for (sc in names(TBLS[[obj]][["subconditions"]])) { - stopifnot(tibble::is_tibble(TBLS[[obj]][["subconditions"]][[sc]])) + stopifnot(inherits(TBLS[[obj]][["subconditions"]][[sc]], "tbl_df")) TBLS[[obj]][["subconditions"]][[sc]] <- as.data.frame(TBLS[[obj]][["subconditions"]][[sc]]) } } else { - stopifnot(tibble::is_tibble(TBLS[[obj]])) + stopifnot(inherits(TBLS[[obj]], "tbl_df")) TBLS[[obj]] <- as.data.frame(TBLS[[obj]]) class(TBLS[[obj]]) <- c("medicalcoder_comorbidities", class(TBLS[[obj]])) } diff --git a/vignettes/articles/medicalcoder-vs-comorbidity.Rmd b/vignettes/articles/medicalcoder-vs-comorbidity.Rmd index 37cacb4c..b2cb27fd 100644 --- a/vignettes/articles/medicalcoder-vs-comorbidity.Rmd +++ b/vignettes/articles/medicalcoder-vs-comorbidity.Rmd @@ -535,7 +535,11 @@ The medicalcoder package was built to use base R methods and has zero imports. That said, if the input data set to `medicalcoder::comorbidities()` is a [`data.table`](https://cran.r-project.org/package=data.table) and the `data.table` namespace is available, the S3 methods for `data.table` will be used -and there will be a performance improvement. +and there will be a performance improvement. When given a +[`tibble`](https://tibble.tidyverse.org/) and the +[`dplyr`](https://dplyr.tidyverse.org/) namespace is available, the tibble-aware +path improves on base `data.frame` performance but remains slower than +`data.table`; see the project benchmarking results for details. comorbidity imports several namespaces, including `data.table`, and uses `data.table` for efficiency. diff --git a/vignettes/comorbidities.Rmd b/vignettes/comorbidities.Rmd index f84bb2bb..0c3180f0 100644 --- a/vignettes/comorbidities.Rmd +++ b/vignettes/comorbidities.Rmd @@ -116,8 +116,8 @@ a default value to be applied to the entirety of `data`. We will see some examples where this is useful. The `data` element is expected to be a `data.frame`, or at least something that -inherits the `data.frame` class. The format is expected to be a 'long' format. -One row per code. Two example data sets in the package show the general +inherits the `data.frame` class. The format is expected to be a 'long' format: +one ICD code per row. Two example data sets in the package show the general expected form of the `data`. ```{r label = "example-data"} @@ -125,10 +125,6 @@ head(mdcr) head(mdcr_longitudinal) ``` -```{r} -``` - - # When are conditions flagged? Whether or not the code is present on admission (POA) is useful when applying diff --git a/vignettes/icd.Rmd b/vignettes/icd.Rmd index 7006612e..11da6c07 100644 --- a/vignettes/icd.Rmd +++ b/vignettes/icd.Rmd @@ -30,11 +30,11 @@ with International Classification of Diseases (ICD) codes. # `get_icd_codes()` -A lookup table for the ICD codes has been built as internal data sets within -the medicalcoder package. The sources for these lookup tables come from the -Centers for Disease Control (CDC) and from the Centers for Medicare & Medicaid -Services (CMS). The specific links to the source data sets can be found in the -source code for the medicalcoder package on +A lookup table for the ICD codes has been built as internal data sets within the +medicalcoder package. The sources for these lookup tables come from the Centers +for Disease Control (CDC) and from the Centers for Medicare & Medicaid Services +(CMS) and World Health Organization (WHO). The specific links to the source +data sets can be found in the source code for the medicalcoder package on [GitHub](https://github.com/dewittpe/medicalcoder). End users can get a `data.frame` with @@ -67,10 +67,12 @@ The columns of this data.frame are: * For codes from the World Health Organization (WHO) and the Centers for Disease Control and Prevention (CDC) Mortality coding, the year is _calendar year_. + * For ICD-9, CDC extracts in medicalcoder span fiscal years 1997--2012 and + CMS extracts span fiscal years 2006--2015. * `known_end`: the last year the code was part of the standard, or that the medicalcoder package has data for. - * ICD-9 last year of active use was FY 2015. + * ICD-9 CMS extracts run through FY 2015; CDC ICD-9 extracts end at FY 2012. * ICD-10 is active. The current version of medicalcoder has details on ICD-10 codes through FY `r max(icd_codes[["known_end"]][icd_codes[["icdv"]] == 10L])`. @@ -276,13 +278,13 @@ tab <- data.frame( code = x, default = is_icd(x, icdv = 9, dx = 1), - assignable_1997 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 1997), - assignable_2010 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2010), - assignable_2011 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2011), - assignable_2012 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2012), - assignable_2013 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2013), - assignable_2016 = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2016), - assignable_ever = is_icd(x, src = "cms", icdv = 9, dx = 1, ever.assignable = TRUE) + assignable_1997_cdc = is_icd(x, src = "cdc", icdv = 9, dx = 1, year = 1997), + assignable_2010_cms = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2010), + assignable_2011_cms = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2011), + assignable_2012_cdc = is_icd(x, src = "cdc", icdv = 9, dx = 1, year = 2012), + assignable_2012_cms = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2012), + assignable_2015_cms = is_icd(x, src = "cms", icdv = 9, dx = 1, year = 2015), + assignable_ever_cdc = is_icd(x, src = "cdc", icdv = 9, dx = 1, ever.assignable = TRUE) ) knitr::kable(tab) ```