Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# haven (development version)

* Fix bug that caused `write_xpt()` to fail when writing tagged `NA` values (#755).

* Tagged `NA` values are now silently converted to lower case when they are
created in R with `tagged_na()` or checked with `is_tagged_na()`. These are
case insensitive when reading and writing, so forcing lower case removes any
potential ambiguity.

* Character limit checks in `write_xpt()` now check for the number of bytes
instead of the number of characters (#746).

Expand Down
2 changes: 1 addition & 1 deletion R/haven-sas.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ write_sas <- function(data, path) {
#'
#' If a dataset label is defined, it will be stored in the "label" attribute
#' of the tibble.
#'
#'
#' `write_xpt()` returns the input `data` invisibly.
#' @export
#' @examples
Expand Down
8 changes: 4 additions & 4 deletions R/tagged_na.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
#'
#' "Tagged" missing values work exactly like regular R missing values except
#' that they store one additional byte of information a tag, which is usually
#' a letter ("a" to "z"). When by loading a SAS and Stata file, the tagged
#' missing values always use lower case values.
#' a letter ("a" to "z"). Tagged missing values are always lower case characters
#' in R - upper case characters are silently converted to lower case.
#'
#' `format_tagged_na()` and `print_tagged_na()` format tagged
#' NA's as NA(a), NA(b), etc.
#'
#' @param ... Vectors containing single character. The letter will be used to
#' "tag" the missing value.
#' "tag" the missing value. Tags are silently converted to lower case.
#' @param x A numeric vector
#' @param digits Number of digits to use in string representation
#' @export
Expand Down Expand Up @@ -42,7 +42,7 @@ na_tag <- function(x) {
.Call(na_tag_, x)
}

#' @param tag If `NULL`, will only return true if the tag has this value.
#' @param tag If not `NULL`, will only return true if the tag has this value.
#' @rdname tagged_na
#' @export
is_tagged_na <- function(x, tag = NULL) {
Expand Down
8 changes: 4 additions & 4 deletions man/tagged_na.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/DfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ class Writer {
if (!std::isnan(values[i]) || tag == '\0') {
readstat_label_double_value(labelSet, values[i], string_utf8(labels, i));
} else {
if (ext_ == HAVEN_XPT || ext_ == HAVEN_SAS7BDAT || ext_ == HAVEN_SAS7BCAT) {
tag = toupper(tag);
}
readstat_label_tagged_value(labelSet, tag, string_utf8(labels, i));
}
}
Expand Down Expand Up @@ -448,6 +451,9 @@ class Writer {
if (tag == '\0') {
return readstat_insert_missing_value(writer_, var);
} else {
if (ext_ == HAVEN_XPT || ext_ == HAVEN_SAS7BDAT || ext_ == HAVEN_SAS7BCAT) {
tag = toupper(tag);
}
return readstat_insert_tagged_missing_value(writer_, var, tag);
}
} else {
Expand Down
5 changes: 3 additions & 2 deletions src/tagged_na.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <R.h>
#include <Rinternals.h>
#include <stdbool.h>
#include <ctype.h>

// Scalar operators -------------------------------------------------------

Expand Down Expand Up @@ -68,7 +69,7 @@ SEXP tagged_na_(SEXP x) {

for (int i = 0; i < n; ++i) {
char xi = first_char(STRING_ELT(x, i));
REAL(out)[i] = make_tagged_na(xi);
REAL(out)[i] = make_tagged_na(tolower(xi));
}

UNPROTECT(1);
Expand Down Expand Up @@ -124,7 +125,7 @@ SEXP is_tagged_na_(SEXP x, SEXP tag_) {
if (Rf_length(tag_) != 1)
Rf_errorcall(R_NilValue, "`tag` must be a character vector of length one.");
has_tag = true;
check_tag = first_char(STRING_ELT(tag_, 0));
check_tag = tolower(first_char(STRING_ELT(tag_, 0)));
} else {
Rf_errorcall(R_NilValue, "`tag` must be NULL or a character vector");
}
Expand Down
12 changes: 12 additions & 0 deletions tests/testthat/test-haven-sas.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ test_that("can roundtrip date times", {
expect_equal(attr(roundtrip_var(x2, "xpt"), "label"), "abc")
})

test_that("can roundtrip tagged NAs", {
x <- c(1, 2, tagged_na("a", "b"), NA)
expect_equal(roundtrip_var(x, "dta"), x)

tags <- tagged_na("a", "b")
y <- labelled(
x,
c("ABC" = tags[1], "DEF" = tags[2])
)
expect_equal(roundtrip_var(y, "xpt"), x)
})

test_that("invalid files generate informative errors", {
expect_snapshot(error = TRUE, {
write_xpt(mtcars, file.path(tempdir(), " temp.xpt"))
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-tagged_na.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,13 @@ test_that("print_tagged_na is stable", {
x <- c(1:100, tagged_na(letters), NA)
expect_snapshot(print_tagged_na(x))
})

test_that("uppercase tags are converted to lowercase", {
x_upper <- tagged_na("A", "B", "Z")
x_lower <- tagged_na("a", "b", "z")

expect_equal(na_tag(x_upper), c("a", "b", "z"))
expect_equal(is_tagged_na(x_lower, "A"), c(TRUE, FALSE, FALSE))

expect_equal(x_upper, x_lower)
})
Loading