Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
change will generally result in less computation time than base R
`data.frames` (`data.tables` require even less time).

## Bug Fixes

* `summary.medicalcoder_comorbidites()` no longer crashes when a zero row input
is passed in. Consisently return `NA` instead of `NaN` when counts are zeros.
(#26, #27)

## Other Changes

* Extend and improve the internal ICD-9 database to distinguish between CDC and
Expand Down
112 changes: 77 additions & 35 deletions R/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
condition = cnd,
subcondition = NA_character_,
count = counts[cnd],
percent_of_cohort = 100 * counts[cnd] / N,
percent_of_cohort = if (N > 0) {100 * counts[cnd] / N} else {NA_real_},
percent_of_those_with_condition = NA_real_,
stringsAsFactors = FALSE
)
x2 <- data.frame(
condition = cnd,
subcondition = names(scounts[[cnd]]),
count = scounts[[cnd]],
percent_of_cohort = 100 * scounts[[cnd]] / N,
percent_of_those_with_condition = 100 * scounts[[cnd]] / counts[cnd],
percent_of_cohort = if (N > 0) {100 * scounts[[cnd]] / N} else {NA_real_},
percent_of_those_with_condition = ifelse(scounts[[cnd]] > 0, 100 * scounts[[cnd]] / counts[cnd], NA_real_),
stringsAsFactors = FALSE
)
rbind(x1, x2)
Expand Down Expand Up @@ -167,15 +167,19 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {

# Track running counts for patients meeting at least N conditions
# so the summary can report distribution thresholds (>=2, >=3, ...).
tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
tlts <- do.call(cbind, tlts)
colnames(tlts) <- paste(">=", 2:11, "conditions")

p <- 100 * c(colMeans(cnds), colMeans(tlts))
p <- ifelse(is.nan(p), NA_real_, p)

rtn <-
data.frame(
condition = c(names(cnds), rep("num_cmrb", ncol(tlts))),
label = c(conditions[["condition_label"]], "Any Technology Dependence", "Any Transplantation", "Any Condition", colnames(tlts)),
count = as.integer(c(colSums(cnds), colSums(tlts))),
percent = 100 * c(colMeans(cnds), colMeans(tlts)),
percent = p,
stringsAsFactors = FALSE
)

Expand All @@ -195,7 +199,8 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {

# Track running counts for patients meeting at least N conditions
# so the summary can report distribution thresholds (>=2, >=3, ...).
tlts <- sapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
tlts <- do.call(cbind, tlts)
colnames(tlts) <- paste(">=", 2:11, "conditions")

sets <-
Expand All @@ -210,7 +215,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {

counts <- lapply(sets, colSums, na.rm = TRUE)
N <- nrow(object)
percents <- lapply(counts, function(x) 100 * x / N)
percents <- lapply(counts,
function(x) {
y <- 100 * x / N
ifelse(is.nan(y), NA_real_, y)
})

rtn <-
data.frame(
Expand Down Expand Up @@ -256,11 +265,11 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {

cmrbs <- ..mdcr_internal_charlson_index_scores..[!is.na( ..mdcr_internal_charlson_index_scores..[[attr(object, "method")]]), c("condition_description", "condition")]

cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])

num_cmrbs <-
lapply(seq_len(max(object[["num_cmrb"]])),
lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
function(x) {
y <- object[["num_cmrb"]] >= x
data.frame(
Expand All @@ -275,16 +284,31 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
cmrbs <- rbind(cmrbs, num_cmrbs)
rownames(cmrbs) <- NULL

# set percent to NA instead of NaN
cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_

index_summary <-
data.frame(
min = min(object[["cci"]]),
q1 = stats::quantile(object[["cci"]], prob = 0.25),
median = stats::median(object[["cci"]]),
q3 = stats::quantile(object[["cci"]], prob = 0.75),
max = max(object[["cci"]]),
row.names = NULL,
stringsAsFactors = FALSE
)
if (length(object[["cci"]]) > 0L) {
data.frame(
min = min(object[["cci"]]),
q1 = stats::quantile(object[["cci"]], prob = 0.25),
median = stats::median(object[["cci"]]),
q3 = stats::quantile(object[["cci"]], prob = 0.75),
max = max(object[["cci"]]),
row.names = NULL,
stringsAsFactors = FALSE
)
} else {
data.frame(
min = NA_integer_,
q1 = NA_real_,
median = NA_real_,
q3 = NA_real_,
max = NA_integer_,
row.names = NULL,
stringsAsFactors = FALSE
)
}

age_summary <-
merge(
Expand All @@ -293,6 +317,9 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
by = c("age_score")
)

# set NA instead of NaN
age_summary[["percent"]][is.nan(age_summary[["percent"]])] <- NA_real_

list(
conditions = cmrbs,
age_summary = age_summary,
Expand All @@ -308,35 +335,50 @@ summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
cmrbs <- ..mdcr_internal_elixhauser_index_scores..[!is.na( ..mdcr_internal_elixhauser_index_scores..[[attr(object, "method")]]), "condition", drop = FALSE]
cmrbs <- unique(cmrbs)

cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
cmrbs[["count"]] <- colSums(object[cmrbs[["condition"]]])
cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])

num_cmrbs <-
lapply(seq_len(max(object[["num_cmrb"]])),
lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
function(x) {
y <- object[["num_cmrb"]] >= x
data.frame(condition = paste(">=", x), count = sum(y), percent = 100 * mean(y), stringsAsFactors = FALSE)
})
num_cmrbs <- do.call(rbind, num_cmrbs)
cmrbs <- rbind(cmrbs, num_cmrbs)
rownames(cmrbs) <- NULL
# set percent to NA instead of NaN
cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_

index_summary <-
data.frame(
index = c("readmission", "mortality"),
min = c(min(object[["readmission_index"]]),
min(object[["mortality_index"]])),
q1 = c(stats::quantile(object[["readmission_index"]], prob = 0.25),
stats::quantile(object[["mortality_index"]], prob = 0.25)),
median = c(stats::median(object[["readmission_index"]]),
stats::median(object[["mortality_index"]])),
q3 = c(stats::quantile(object[["readmission_index"]], prob = 0.75),
stats::quantile(object[["mortality_index"]], prob = 0.75)),
max = c(max(object[["readmission_index"]]),
max(object[["mortality_index"]])),
row.names = NULL,
stringsAsFactors = FALSE
)
if (length(object[["readmission_index"]]) > 0L) {
data.frame(
index = c("readmission", "mortality"),
min = c(min(object[["readmission_index"]]),
min(object[["mortality_index"]])),
q1 = c(stats::quantile(object[["readmission_index"]], prob = 0.25),
stats::quantile(object[["mortality_index"]], prob = 0.25)),
median = c(stats::median(object[["readmission_index"]]),
stats::median(object[["mortality_index"]])),
q3 = c(stats::quantile(object[["readmission_index"]], prob = 0.75),
stats::quantile(object[["mortality_index"]], prob = 0.75)),
max = c(max(object[["readmission_index"]]),
max(object[["mortality_index"]])),
row.names = NULL,
stringsAsFactors = FALSE
)
} else {
data.frame(
index = c("readmission", "mortality"),
min = c(NA_integer_, NA_integer_),
q1 = c(NA_real_, NA_real_),
median = c(NA_real_, NA_real_),
q3 = c(NA_real_, NA_real_),
max = c(NA_integer_, NA_integer_),
row.names = NULL,
stringsAsFactors = FALSE
)
}

list(
conditions = cmrbs,
Expand Down
35 changes: 35 additions & 0 deletions tests/test-summary-charlson.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,41 @@ stopifnot(
identical(summary_cumulative$index_summary, summary_current$index_summary)
)

################################################################################
# Zero-row input should summarize without NaN/Inf
df0 <- data.frame(
patid = integer(),
icdv = integer(),
dx = integer(),
code = character(),
age = integer(),
stringsAsFactors = FALSE
)

charlson_zero <- comorbidities(
data = df0,
id.vars = "patid",
icdv.var = "icdv",
icd.codes = "code",
dx.var = "dx",
method = "charlson_quan2011",
flag.method = "current",
poa = 1L,
primarydx = 0L,
age.var = "age"
)

summary_zero <- summary(charlson_zero)

stopifnot(
is.list(summary_zero),
identical(names(summary_zero), c("conditions", "age_summary", "index_summary")),
all(summary_zero$conditions$count == 0L),
!any(is.nan(summary_zero$conditions$percent)),
all(is.na(summary_zero$conditions$percent)),
all(summary_zero$index_summary == 0 | is.na(summary_zero$index_summary))
)

################################################################################
# End of File #
################################################################################
37 changes: 37 additions & 0 deletions tests/test-summary-elixhauser.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,43 @@ stopifnot(
identical(summary_cumulative$index_summary, summary_current$index_summary)
)

################################################################################
# Zero-row input should summarize without NaN/Inf
df0 <- data.frame(
patid = integer(),
icdv = integer(),
dx = integer(),
code = character(),
stringsAsFactors = FALSE
)

elixhauser_zero <- comorbidities(
data = df0,
id.vars = "patid",
icdv.var = "icdv",
icd.codes = "code",
dx.var = "dx",
method = "elixhauser_ahrq2025",
flag.method = "current",
poa = 1L,
primarydx = 0L
)

summary_zero <- summary(elixhauser_zero)

stopifnot(
is.list(summary_zero),
identical(names(summary_zero), c("conditions", "index_summary")),
all(summary_zero$conditions$count == 0L),
!any(is.nan(summary_zero$conditions$percent)),
all(is.na(summary_zero$conditions$percent)),
all(is.na(summary_zero$index_summary$min)),
all(is.na(summary_zero$index_summary$q1)),
all(is.na(summary_zero$index_summary$median)),
all(is.na(summary_zero$index_summary$q3)),
all(is.na(summary_zero$index_summary$max))
)

################################################################################
# End of File #
################################################################################
37 changes: 35 additions & 2 deletions tests/test-summary-pccc-subconditions.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ stopifnot(
all(summary_current[["percent_of_cohort"]] >= 0),
all(summary_current[["percent_of_cohort"]] <= 100),
all(is.na(summary_current[["percent_of_those_with_condition"]][is.na(summary_current[["subcondition"]])])),
all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] >= 0),
all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]])] <= 100)
all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] >= 0),
all(summary_current[["percent_of_those_with_condition"]][!is.na(summary_current[["subcondition"]]) & summary_current[["count"]] > 0] <= 100)
)

################################################################################
Expand Down Expand Up @@ -125,6 +125,39 @@ summary_cumulative <- suppressWarnings(summary(pccc_sub_cumulative))

stopifnot(identical(summary_cumulative, summary_current))

################################################################################
# No conditions flagged -> percentages should be 0/NA, not NaN/Inf
df_none <- data.frame(
patid = 1:3,
icdv = 10L,
dx = 1L,
code = c("XXX1", "XXX2", "XXX3"),
stringsAsFactors = FALSE
)

pccc_empty <- comorbidities(
data = df_none,
id.vars = "patid",
icd.codes = "code",
icdv.var = "icdv",
dx.var = "dx",
method = "pccc_v3.1",
flag.method = "current",
poa = 1,
subconditions = TRUE
)

summary_empty <- summary(pccc_empty)

stopifnot(
inherits(summary_empty, "data.frame"),
all(summary_empty$count == 0),
all(summary_empty$percent_of_cohort == 0),
!any(is.nan(summary_empty$percent_of_cohort)),
all(is.na(summary_empty$percent_of_those_with_condition) | summary_empty$percent_of_those_with_condition == 0),
!any(is.nan(summary_empty$percent_of_those_with_condition))
)

################################################################################
# End of File #
################################################################################
Loading