Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: rdhs
Type: Package
Title: API Client and Dataset Management for the Demographic and Health Survey (DHS) Data
Version: 0.8.2
Version: 0.8.3
Authors@R:
c(person(given = "OJ",
family = "Watson",
Expand Down Expand Up @@ -43,10 +43,10 @@ Imports:
qdapRegex,
getPass,
haven,
iotools,
sf,
cli,
rlang
rlang,
vroom
Suggests:
testthat,
knitr,
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# rdhs (development version)

## rdhs 0.8.3

* Internal change to `read_dhs_flat()` to reduce memory usage (`for` loop instead of `Map()`).
Reduces risk of `Error: vector memory exhausted` when parsing large dataset.
* Replace iotools::input.file() with vroom::vroom_fwf().

## rdhs 0.8.2

* Spatial boundaries will be cached using the DHS client (#122)


## rdhs 0.8.1

* Convert DHS dataset flat file data dictionaries to UTF-8. This addresses parsing
Expand Down
26 changes: 16 additions & 10 deletions R/read_dhs_flat.R
Original file line number Diff line number Diff line change
Expand Up @@ -335,25 +335,31 @@ read_dhs_flat <- function(zfile, all_lower=TRUE, meta_source=NULL) {
stop("metadata file not found")
}

types <- c("integer", "character", "numeric")
types <- c("i", "c", "n")
dct$col_types <- types[match(dct$datatype, c("Numeric", "Alpha", "Decimal"))]
dat <- read_zipdata(
zfile, "\\.DAT$", iotools::input.file, formatter = iotools::dstrfw,
col_types = dct$col_types, widths = dct$len, strict = FALSE
)
names(dat) <- dct$name
dat[dct$name] <- Map("attr<-", dat[dct$name], "label", dct$label)
zfile, "\\.DAT$", vroom::vroom_fwf,
col_positions = vroom::fwf_widths(dct$len, col_names = dct$name),
col_types = paste0(dct$col_types, collapse = ""),
progress = FALSE,
.name_repair = "minimal"
)

for(idx in seq_along(dct$name)) {
attr(dat[dct$name[[idx]]], "label") <- dct$label[[idx]]
}

haslbl <- unlist(lapply(dct$labels, length)) > 0

# match on haven package version
if (packageVersion("haven") > "1.1.2") {
dat[dct$name[haslbl]] <- Map(haven::labelled, dat[dct$name[haslbl]],
dct$labels[haslbl],
dct$label[haslbl])
dat[dct$name[haslbl]] <- Map(haven::labelled, dat[dct$name[haslbl]],
dct$labels[haslbl],
dct$label[haslbl])
} else {
dat[dct$name[haslbl]] <- Map(haven::labelled, dat[dct$name[haslbl]],
dct$labels[haslbl])
}

return(dat)
}
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ rbind_list_base <- function(x) {
#'
unzip_special <- function(zipfile, files = NULL, overwrite = TRUE,
junkpaths = FALSE, exdir = ".", unzip = "internal",
setTimes = FALSE){
setTimes = FALSE) {

if (max(unzip(zipfile, list = TRUE)$Length) > 4e9) {
unzip_file <- Sys.which("unzip")
Expand Down