Merge pull request #30 from dewittpe/fix-26-and-27

dewittpe · web-flow · commit 6089d2b669eb · 2025-12-17T17:46:44.000-07:00
Fix 26 and 27
diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,12 @@
   change will generally result in less computation time than base R
   `data.frames` (`data.tables` require even less time).
 
+## Bug Fixes
+
+* `summary.medicalcoder_comorbidites()` no longer crashes when a zero row input
+  is passed in. Consisently return `NA` instead of `NaN` when counts are zeros.
+  (#26, #27)
+
 ## Other Changes
 
 * Extend and improve the internal ICD-9 database to distinguish between CDC and
diff --git a/R/summary.R b/R/summary.R
@@ -130,16 +130,16 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
                condition = cnd,
                subcondition = NA_character_,
                count = counts[cnd],
-               percent_of_cohort = 100 * counts[cnd] / N,
+               percent_of_cohort = if (N > 0) {100 * counts[cnd] / N} else {NA_real_},
                percent_of_those_with_condition = NA_real_,
                stringsAsFactors = FALSE
              )
              x2 <- data.frame(
                condition = cnd,
                subcondition = names(scounts[[cnd]]),
                count = scounts[[cnd]],
-               percent_of_cohort = 100 * scounts[[cnd]] / N,
-               percent_of_those_with_condition = 100 * scounts[[cnd]] / counts[cnd],
+               percent_of_cohort = if (N > 0) {100 * scounts[[cnd]] / N} else {NA_real_},
+               percent_of_those_with_condition = ifelse(scounts[[cnd]] > 0, 100 * scounts[[cnd]] / counts[cnd], NA_real_),
                stringsAsFactors = FALSE
              )
              rbind(x1, x2)
@@ -167,15 +167,19 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
 
   # Track running counts for patients meeting at least N conditions
   # so the summary can report distribution thresholds (>=2, >=3, ...).
-  tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
+  tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
+  tlts <- do.call(cbind, tlts)
   colnames(tlts) <- paste(">=", 2:11, "conditions")
 
+  p <- 100 * c(colMeans(cnds), colMeans(tlts))
+  p <- ifelse(is.nan(p), NA_real_, p)
+
   rtn <-
     data.frame(
       condition = c(names(cnds), rep("num_cmrb", ncol(tlts))),
       label     = c(conditions[["condition_label"]], "Any Technology Dependence", "Any Transplantation", "Any Condition", colnames(tlts)),
       count     = as.integer(c(colSums(cnds), colSums(tlts))),
-      percent   = 100 * c(colMeans(cnds), colMeans(tlts)),
+      percent   = p,
       stringsAsFactors = FALSE
     )
 
@@ -195,7 +199,8 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
 
   # Track running counts for patients meeting at least N conditions
   # so the summary can report distribution thresholds (>=2, >=3, ...).
-  tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
+  tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
+  tlts <- do.call(cbind, tlts)
   colnames(tlts) <- paste(">=", 2:11, "conditions")
 
   sets <-
@@ -210,7 +215,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
 
   counts <- lapply(sets, colSums, na.rm = TRUE)
   N <- nrow(object)
-  percents <- lapply(counts, function(x) 100 * x / N)
+  percents <- lapply(counts,
+    function(x) {
+      y <- 100 * x / N
+      ifelse(is.nan(y), NA_real_, y)
+    })
 
   rtn <-
     data.frame(
@@ -256,11 +265,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
 
   cmrbs <- ..mdcr_internal_charlson_index_scores..[!is.na( ..mdcr_internal_charlson_index_scores..[[attr(object, "method")]]), c("condition_description", "condition")]
 
-  cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
+  cmrbs[["count"]]   <- colSums(object[cmrbs[["condition"]]])
   cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])
 
   num_cmrbs <-
-    lapply(seq_len(max(object[["num_cmrb"]])),
+    lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
       function(x) {
         y <- object[["num_cmrb"]] >= x
         data.frame(
@@ -275,16 +284,31 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
   cmrbs <- rbind(cmrbs, num_cmrbs)
   rownames(cmrbs) <- NULL
 
+  # set percent to NA instead of NaN
+  cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_
+
   index_summary <-
-    data.frame(
-      min       = min(object[["cci"]]),
-      q1        = stats::quantile(object[["cci"]], prob = 0.25),
-      median    = stats::median(object[["cci"]]),
-      q3        = stats::quantile(object[["cci"]], prob = 0.75),
-      max       = max(object[["cci"]]),
-      row.names = NULL,
-      stringsAsFactors = FALSE
-    )
+    if (length(object[["cci"]]) > 0L) {
+      data.frame(
+        min       = min(object[["cci"]]),
+        q1        = stats::quantile(object[["cci"]], prob = 0.25),
+        median    = stats::median(object[["cci"]]),
+        q3        = stats::quantile(object[["cci"]], prob = 0.75),
+        max       = max(object[["cci"]]),
+        row.names = NULL,
+        stringsAsFactors = FALSE
+      )
+    } else {
+      data.frame(
+        min       = NA_integer_,
+        q1        = NA_real_,
+        median    = NA_real_,
+        q3        = NA_real_,
+        max       = NA_integer_,
+        row.names = NULL,
+        stringsAsFactors = FALSE
+      )
+    }
 
   age_summary <-
     merge(
@@ -293,6 +317,9 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
       by = c("age_score")
     )
 
+  # set NA instead of NaN
+  age_summary[["percent"]][is.nan(age_summary[["percent"]])] <- NA_real_
+
   list(
     conditions = cmrbs,
     age_summary = age_summary,
@@ -308,35 +335,50 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
   cmrbs <- ..mdcr_internal_elixhauser_index_scores..[!is.na( ..mdcr_internal_elixhauser_index_scores..[[attr(object, "method")]]), "condition", drop = FALSE]
   cmrbs <- unique(cmrbs)
 
-  cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
+  cmrbs[["count"]]   <- colSums(object[cmrbs[["condition"]]])
   cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])
 
   num_cmrbs <-
-    lapply(seq_len(max(object[["num_cmrb"]])),
+    lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
       function(x) {
         y <- object[["num_cmrb"]] >= x
         data.frame(condition = paste(">=", x), count = sum(y), percent = 100 * mean(y), stringsAsFactors = FALSE)
       })
   num_cmrbs <- do.call(rbind, num_cmrbs)
   cmrbs <- rbind(cmrbs, num_cmrbs)
   rownames(cmrbs) <- NULL
+  # set percent to NA instead of NaN
+  cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_
 
   index_summary <-
-    data.frame(
-      index  = c("readmission", "mortality"),
-      min    = c(min(object[["readmission_index"]]),
-                 min(object[["mortality_index"]])),
-      q1     = c(stats::quantile(object[["readmission_index"]], prob = 0.25),
-                 stats::quantile(object[["mortality_index"]], prob = 0.25)),
-      median = c(stats::median(object[["readmission_index"]]),
-                 stats::median(object[["mortality_index"]])),
-      q3     = c(stats::quantile(object[["readmission_index"]], prob = 0.75),
-                 stats::quantile(object[["mortality_index"]], prob = 0.75)),
-      max    = c(max(object[["readmission_index"]]),
-                 max(object[["mortality_index"]])),
-      row.names = NULL,
-      stringsAsFactors = FALSE
-    )
+    if (length(object[["readmission_index"]]) > 0L) {
+      data.frame(
+        index  = c("readmission", "mortality"),
+        min    = c(min(object[["readmission_index"]]),
+          min(object[["mortality_index"]])),
+        q1     = c(stats::quantile(object[["readmission_index"]], prob = 0.25),
+          stats::quantile(object[["mortality_index"]], prob = 0.25)),
+        median = c(stats::median(object[["readmission_index"]]),
+          stats::median(object[["mortality_index"]])),
+        q3     = c(stats::quantile(object[["readmission_index"]], prob = 0.75),
+          stats::quantile(object[["mortality_index"]], prob = 0.75)),
+        max    = c(max(object[["readmission_index"]]),
+          max(object[["mortality_index"]])),
+        row.names = NULL,
+        stringsAsFactors = FALSE
+      )
+    } else {
+      data.frame(
+        index  = c("readmission", "mortality"),
+        min    = c(NA_integer_, NA_integer_),
+        q1     = c(NA_real_, NA_real_),
+        median = c(NA_real_, NA_real_),
+        q3     = c(NA_real_, NA_real_),
+        max    = c(NA_integer_, NA_integer_),
+        row.names = NULL,
+        stringsAsFactors = FALSE
+      )
+    }
 
   list(
     conditions = cmrbs,
diff --git a/tests/test-summary-charlson.R b/tests/test-summary-charlson.R
@@ -119,6 +119,41 @@ stopifnot(
   identical(summary_cumulative$index_summary, summary_current$index_summary)
 )
 
+################################################################################
+# Zero-row input should summarize without NaN/Inf
+df0 <- data.frame(
+  patid = integer(),
+  icdv  = integer(),
+  dx    = integer(),
+  code  = character(),
+  age   = integer(),
+  stringsAsFactors = FALSE
+)
+
+charlson_zero <- comorbidities(
+  data        = df0,
+  id.vars     = "patid",
+  icdv.var    = "icdv",
+  icd.codes   = "code",
+  dx.var      = "dx",
+  method      = "charlson_quan2011",
+  flag.method = "current",
+  poa         = 1L,
+  primarydx   = 0L,
+  age.var     = "age"
+)
+
+summary_zero <- summary(charlson_zero)
+
+stopifnot(
+  is.list(summary_zero),
+  identical(names(summary_zero), c("conditions", "age_summary", "index_summary")),
+  all(summary_zero$conditions$count == 0L),
+  !any(is.nan(summary_zero$conditions$percent)),
+  all(is.na(summary_zero$conditions$percent)),
+  all(summary_zero$index_summary == 0 | is.na(summary_zero$index_summary))
+)
+
 ################################################################################
 #                                 End of File                                  #
 ################################################################################
diff --git a/tests/test-summary-elixhauser.R b/tests/test-summary-elixhauser.R
@@ -122,6 +122,43 @@ stopifnot(
   identical(summary_cumulative$index_summary, summary_current$index_summary)
 )
 
+################################################################################
+# Zero-row input should summarize without NaN/Inf
+df0 <- data.frame(
+  patid = integer(),
+  icdv  = integer(),
+  dx    = integer(),
+  code  = character(),
+  stringsAsFactors = FALSE
+)
+
+elixhauser_zero <- comorbidities(
+  data        = df0,
+  id.vars     = "patid",
+  icdv.var    = "icdv",
+  icd.codes   = "code",
+  dx.var      = "dx",
+  method      = "elixhauser_ahrq2025",
+  flag.method = "current",
+  poa         = 1L,
+  primarydx   = 0L
+)
+
+summary_zero <- summary(elixhauser_zero)
+
+stopifnot(
+  is.list(summary_zero),
+  identical(names(summary_zero), c("conditions", "index_summary")),
+  all(summary_zero$conditions$count == 0L),
+  !any(is.nan(summary_zero$conditions$percent)),
+  all(is.na(summary_zero$conditions$percent)),
+  all(is.na(summary_zero$index_summary$min)),
+  all(is.na(summary_zero$index_summary$q1)),
+  all(is.na(summary_zero$index_summary$median)),
+  all(is.na(summary_zero$index_summary$q3)),
+  all(is.na(summary_zero$index_summary$max))
+)
+
 ################################################################################
 #                                 End of File                                  #
 ################################################################################
diff --git a/tests/test-summary-pccc-subconditions.R b/tests/test-summary-pccc-subconditions.R
@@ -38,8 +38,8 @@ stopifnot(
   all(summary_current[["percent_of_cohort"]] >= 0),
   all(summary_current[["percent_of_cohort"]] <= 100),
   all(is.na(summary_current[["percent_of_those_with_condition"]][is.na(summary_current[["subcondition"]])])),
-  all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] >= 0),
-  all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] <= 100)
+  all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] >= 0),
+  all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] <= 100)
 )
 
 ################################################################################
@@ -125,6 +125,39 @@ summary_cumulative <- suppressWarnings(summary(pccc_sub_cumulative))
 
 stopifnot(identical(summary_cumulative, summary_current))
 
+################################################################################
+# No conditions flagged -> percentages should be 0/NA, not NaN/Inf
+df_none <- data.frame(
+  patid = 1:3,
+  icdv  = 10L,
+  dx    = 1L,
+  code  = c("XXX1", "XXX2", "XXX3"),
+  stringsAsFactors = FALSE
+)
+
+pccc_empty <- comorbidities(
+  data          = df_none,
+  id.vars       = "patid",
+  icd.codes     = "code",
+  icdv.var      = "icdv",
+  dx.var        = "dx",
+  method        = "pccc_v3.1",
+  flag.method   = "current",
+  poa           = 1,
+  subconditions = TRUE
+)
+
+summary_empty <- summary(pccc_empty)
+
+stopifnot(
+  inherits(summary_empty, "data.frame"),
+  all(summary_empty$count == 0),
+  all(summary_empty$percent_of_cohort == 0),
+  !any(is.nan(summary_empty$percent_of_cohort)),
+  all(is.na(summary_empty$percent_of_those_with_condition) | summary_empty$percent_of_those_with_condition == 0),
+  !any(is.nan(summary_empty$percent_of_those_with_condition))
+)
+
 ################################################################################
 #                                 End of File                                  #
 ################################################################################
diff --git a/tests/test-summary-pccc.R b/tests/test-summary-pccc.R