diff --git a/NEWS.md b/NEWS.md index b4cc72b..6467317 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,12 @@ change will generally result in less computation time than base R `data.frames` (`data.tables` require even less time). +## Bug Fixes + +* `summary.medicalcoder_comorbidites()` no longer crashes when a zero row input + is passed in. Consisently return `NA` instead of `NaN` when counts are zeros. + (#26, #27) + ## Other Changes * Extend and improve the internal ICD-9 database to distinguish between CDC and diff --git a/R/summary.R b/R/summary.R index e629750..ae6af09 100644 --- a/R/summary.R +++ b/R/summary.R @@ -130,7 +130,7 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { condition = cnd, subcondition = NA_character_, count = counts[cnd], - percent_of_cohort = 100 * counts[cnd] / N, + percent_of_cohort = if (N > 0) {100 * counts[cnd] / N} else {NA_real_}, percent_of_those_with_condition = NA_real_, stringsAsFactors = FALSE ) @@ -138,8 +138,8 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { condition = cnd, subcondition = names(scounts[[cnd]]), count = scounts[[cnd]], - percent_of_cohort = 100 * scounts[[cnd]] / N, - percent_of_those_with_condition = 100 * scounts[[cnd]] / counts[cnd], + percent_of_cohort = if (N > 0) {100 * scounts[[cnd]] / N} else {NA_real_}, + percent_of_those_with_condition = ifelse(scounts[[cnd]] > 0, 100 * scounts[[cnd]] / counts[cnd], NA_real_), stringsAsFactors = FALSE ) rbind(x1, x2) @@ -167,15 +167,19 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { # Track running counts for patients meeting at least N conditions # so the summary can report distribution thresholds (>=2, >=3, ...). - tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)}) + tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)}) + tlts <- do.call(cbind, tlts) colnames(tlts) <- paste(">=", 2:11, "conditions") + p <- 100 * c(colMeans(cnds), colMeans(tlts)) + p <- ifelse(is.nan(p), NA_real_, p) + rtn <- data.frame( condition = c(names(cnds), rep("num_cmrb", ncol(tlts))), label = c(conditions[["condition_label"]], "Any Technology Dependence", "Any Transplantation", "Any Condition", colnames(tlts)), count = as.integer(c(colSums(cnds), colSums(tlts))), - percent = 100 * c(colMeans(cnds), colMeans(tlts)), + percent = p, stringsAsFactors = FALSE ) @@ -195,7 +199,8 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { # Track running counts for patients meeting at least N conditions # so the summary can report distribution thresholds (>=2, >=3, ...). - tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)}) + tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)}) + tlts <- do.call(cbind, tlts) colnames(tlts) <- paste(">=", 2:11, "conditions") sets <- @@ -210,7 +215,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { counts <- lapply(sets, colSums, na.rm = TRUE) N <- nrow(object) - percents <- lapply(counts, function(x) 100 * x / N) + percents <- lapply(counts, + function(x) { + y <- 100 * x / N + ifelse(is.nan(y), NA_real_, y) + }) rtn <- data.frame( @@ -256,11 +265,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { cmrbs <- ..mdcr_internal_charlson_index_scores..[!is.na( ..mdcr_internal_charlson_index_scores..[[attr(object, "method")]]), c("condition_description", "condition")] - cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) + cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]]) num_cmrbs <- - lapply(seq_len(max(object[["num_cmrb"]])), + lapply(seq_len(max(c(1L, object[["num_cmrb"]]))), function(x) { y <- object[["num_cmrb"]] >= x data.frame( @@ -275,16 +284,31 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { cmrbs <- rbind(cmrbs, num_cmrbs) rownames(cmrbs) <- NULL + # set percent to NA instead of NaN + cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_ + index_summary <- - data.frame( - min = min(object[["cci"]]), - q1 = stats::quantile(object[["cci"]], prob = 0.25), - median = stats::median(object[["cci"]]), - q3 = stats::quantile(object[["cci"]], prob = 0.75), - max = max(object[["cci"]]), - row.names = NULL, - stringsAsFactors = FALSE - ) + if (length(object[["cci"]]) > 0L) { + data.frame( + min = min(object[["cci"]]), + q1 = stats::quantile(object[["cci"]], prob = 0.25), + median = stats::median(object[["cci"]]), + q3 = stats::quantile(object[["cci"]], prob = 0.75), + max = max(object[["cci"]]), + row.names = NULL, + stringsAsFactors = FALSE + ) + } else { + data.frame( + min = NA_integer_, + q1 = NA_real_, + median = NA_real_, + q3 = NA_real_, + max = NA_integer_, + row.names = NULL, + stringsAsFactors = FALSE + ) + } age_summary <- merge( @@ -293,6 +317,9 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { by = c("age_score") ) + # set NA instead of NaN + age_summary[["percent"]][is.nan(age_summary[["percent"]])] <- NA_real_ + list( conditions = cmrbs, age_summary = age_summary, @@ -308,11 +335,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { cmrbs <- ..mdcr_internal_elixhauser_index_scores..[!is.na( ..mdcr_internal_elixhauser_index_scores..[[attr(object, "method")]]), "condition", drop = FALSE] cmrbs <- unique(cmrbs) - cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) + cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]]) cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]]) num_cmrbs <- - lapply(seq_len(max(object[["num_cmrb"]])), + lapply(seq_len(max(c(1L, object[["num_cmrb"]]))), function(x) { y <- object[["num_cmrb"]] >= x data.frame(condition = paste(">=", x), count = sum(y), percent = 100 * mean(y), stringsAsFactors = FALSE) @@ -320,23 +347,38 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) { num_cmrbs <- do.call(rbind, num_cmrbs) cmrbs <- rbind(cmrbs, num_cmrbs) rownames(cmrbs) <- NULL + # set percent to NA instead of NaN + cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_ index_summary <- - data.frame( - index = c("readmission", "mortality"), - min = c(min(object[["readmission_index"]]), - min(object[["mortality_index"]])), - q1 = c(stats::quantile(object[["readmission_index"]], prob = 0.25), - stats::quantile(object[["mortality_index"]], prob = 0.25)), - median = c(stats::median(object[["readmission_index"]]), - stats::median(object[["mortality_index"]])), - q3 = c(stats::quantile(object[["readmission_index"]], prob = 0.75), - stats::quantile(object[["mortality_index"]], prob = 0.75)), - max = c(max(object[["readmission_index"]]), - max(object[["mortality_index"]])), - row.names = NULL, - stringsAsFactors = FALSE - ) + if (length(object[["readmission_index"]]) > 0L) { + data.frame( + index = c("readmission", "mortality"), + min = c(min(object[["readmission_index"]]), + min(object[["mortality_index"]])), + q1 = c(stats::quantile(object[["readmission_index"]], prob = 0.25), + stats::quantile(object[["mortality_index"]], prob = 0.25)), + median = c(stats::median(object[["readmission_index"]]), + stats::median(object[["mortality_index"]])), + q3 = c(stats::quantile(object[["readmission_index"]], prob = 0.75), + stats::quantile(object[["mortality_index"]], prob = 0.75)), + max = c(max(object[["readmission_index"]]), + max(object[["mortality_index"]])), + row.names = NULL, + stringsAsFactors = FALSE + ) + } else { + data.frame( + index = c("readmission", "mortality"), + min = c(NA_integer_, NA_integer_), + q1 = c(NA_real_, NA_real_), + median = c(NA_real_, NA_real_), + q3 = c(NA_real_, NA_real_), + max = c(NA_integer_, NA_integer_), + row.names = NULL, + stringsAsFactors = FALSE + ) + } list( conditions = cmrbs, diff --git a/tests/test-summary-charlson.R b/tests/test-summary-charlson.R index a92f174..6ca8b0b 100644 --- a/tests/test-summary-charlson.R +++ b/tests/test-summary-charlson.R @@ -119,6 +119,41 @@ stopifnot( identical(summary_cumulative$index_summary, summary_current$index_summary) ) +################################################################################ +# Zero-row input should summarize without NaN/Inf +df0 <- data.frame( + patid = integer(), + icdv = integer(), + dx = integer(), + code = character(), + age = integer(), + stringsAsFactors = FALSE +) + +charlson_zero <- comorbidities( + data = df0, + id.vars = "patid", + icdv.var = "icdv", + icd.codes = "code", + dx.var = "dx", + method = "charlson_quan2011", + flag.method = "current", + poa = 1L, + primarydx = 0L, + age.var = "age" +) + +summary_zero <- summary(charlson_zero) + +stopifnot( + is.list(summary_zero), + identical(names(summary_zero), c("conditions", "age_summary", "index_summary")), + all(summary_zero$conditions$count == 0L), + !any(is.nan(summary_zero$conditions$percent)), + all(is.na(summary_zero$conditions$percent)), + all(summary_zero$index_summary == 0 | is.na(summary_zero$index_summary)) +) + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-summary-elixhauser.R b/tests/test-summary-elixhauser.R index b9324f5..8a5a4e1 100644 --- a/tests/test-summary-elixhauser.R +++ b/tests/test-summary-elixhauser.R @@ -122,6 +122,43 @@ stopifnot( identical(summary_cumulative$index_summary, summary_current$index_summary) ) +################################################################################ +# Zero-row input should summarize without NaN/Inf +df0 <- data.frame( + patid = integer(), + icdv = integer(), + dx = integer(), + code = character(), + stringsAsFactors = FALSE +) + +elixhauser_zero <- comorbidities( + data = df0, + id.vars = "patid", + icdv.var = "icdv", + icd.codes = "code", + dx.var = "dx", + method = "elixhauser_ahrq2025", + flag.method = "current", + poa = 1L, + primarydx = 0L +) + +summary_zero <- summary(elixhauser_zero) + +stopifnot( + is.list(summary_zero), + identical(names(summary_zero), c("conditions", "index_summary")), + all(summary_zero$conditions$count == 0L), + !any(is.nan(summary_zero$conditions$percent)), + all(is.na(summary_zero$conditions$percent)), + all(is.na(summary_zero$index_summary$min)), + all(is.na(summary_zero$index_summary$q1)), + all(is.na(summary_zero$index_summary$median)), + all(is.na(summary_zero$index_summary$q3)), + all(is.na(summary_zero$index_summary$max)) +) + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-summary-pccc-subconditions.R b/tests/test-summary-pccc-subconditions.R index 9b4dd1e..d863b27 100644 --- a/tests/test-summary-pccc-subconditions.R +++ b/tests/test-summary-pccc-subconditions.R @@ -38,8 +38,8 @@ stopifnot( all(summary_current[["percent_of_cohort"]] >= 0), all(summary_current[["percent_of_cohort"]] <= 100), all(is.na(summary_current[["percent_of_those_with_condition"]][is.na(summary_current[["subcondition"]])])), - all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] >= 0), - all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] <= 100) + all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] >= 0), + all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] <= 100) ) ################################################################################ @@ -125,6 +125,39 @@ summary_cumulative <- suppressWarnings(summary(pccc_sub_cumulative)) stopifnot(identical(summary_cumulative, summary_current)) +################################################################################ +# No conditions flagged -> percentages should be 0/NA, not NaN/Inf +df_none <- data.frame( + patid = 1:3, + icdv = 10L, + dx = 1L, + code = c("XXX1", "XXX2", "XXX3"), + stringsAsFactors = FALSE +) + +pccc_empty <- comorbidities( + data = df_none, + id.vars = "patid", + icd.codes = "code", + icdv.var = "icdv", + dx.var = "dx", + method = "pccc_v3.1", + flag.method = "current", + poa = 1, + subconditions = TRUE +) + +summary_empty <- summary(pccc_empty) + +stopifnot( + inherits(summary_empty, "data.frame"), + all(summary_empty$count == 0), + all(summary_empty$percent_of_cohort == 0), + !any(is.nan(summary_empty$percent_of_cohort)), + all(is.na(summary_empty$percent_of_those_with_condition) | summary_empty$percent_of_those_with_condition == 0), + !any(is.nan(summary_empty$percent_of_those_with_condition)) +) + ################################################################################ # End of File # ################################################################################ diff --git a/tests/test-summary-pccc.R b/tests/test-summary-pccc.R index 0d93d85..645df5f 100644 --- a/tests/test-summary-pccc.R +++ b/tests/test-summary-pccc.R @@ -141,6 +141,53 @@ stopifnot( is.numeric(rtn[["percent"]]) ) +################################################################################ +# Zero-row input should summarize without NaN/Inf (v2.1) + +pccc_zero <- comorbidities( + data = mdcr[0, ], + id.vars = "patid", + icd.codes = "code", + poa = 1, + flag.method = "current", + method = "pccc_v2.1" +) + +summary_zero <- summary(pccc_zero) + +stopifnot( + inherits(summary_zero, "data.frame"), + all(identical(summary_zero$count, c(rep(0L, 24)))), + !any(is.nan(summary_zero$percent)), + all(is.na(summary_zero$percent)) +) + +################################################################################ +# Zero-row input should summarize without NaN/Inf (v3.1) + +pccc_zero <- comorbidities( + data = mdcr[0, ], + id.vars = "patid", + icd.codes = "code", + poa = 1, + flag.method = "current", + method = "pccc_v3.1" +) + +summary_zero <- summary(pccc_zero) + +stopifnot( + inherits(summary_zero, "data.frame"), + all(identical(summary_zero$dxpr_or_tech_count, c(rep(0L, 24)))), + all(identical(summary_zero$dxpr_only_count, c(rep(0L, 11), rep(NA_integer_, 13)))), + all(identical(summary_zero$tech_only_count, c(rep(0L, 11), rep(NA_integer_, 13)))), + all(identical(summary_zero$dxpr_and_tech_count, c(rep(0L, 11), rep(NA_integer_, 13)))), + !any(is.nan(summary_zero$dxpr_or_tech_percent)), + !any(is.nan(summary_zero$dxpr_only_percent)), + !any(is.nan(summary_zero$tech_only_percent)), + !any(is.nan(summary_zero$dxpr_and_tech_percent)) +) + ################################################################################ # End of File # ################################################################################