diff --git a/NEWS.md b/NEWS.md index a6c8d66c..825b14c3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # haven (development version) +* Fix bug that caused `write_xpt()` to fail when writing tagged `NA` values (#755). + +* Tagged `NA` values are now silently converted to lower case when they are + created in R with `tagged_na()` or checked with `is_tagged_na()`. These are + case insensitive when reading and writing, so forcing lower case removes any + potential ambiguity. + * Character limit checks in `write_xpt()` now check for the number of bytes instead of the number of characters (#746). diff --git a/R/haven-sas.R b/R/haven-sas.R index d7389f19..68778a30 100644 --- a/R/haven-sas.R +++ b/R/haven-sas.R @@ -111,7 +111,7 @@ write_sas <- function(data, path) { #' #' If a dataset label is defined, it will be stored in the "label" attribute #' of the tibble. -#' +#' #' `write_xpt()` returns the input `data` invisibly. #' @export #' @examples diff --git a/R/tagged_na.R b/R/tagged_na.R index 5c633571..b35387c5 100644 --- a/R/tagged_na.R +++ b/R/tagged_na.R @@ -2,14 +2,14 @@ #' #' "Tagged" missing values work exactly like regular R missing values except #' that they store one additional byte of information a tag, which is usually -#' a letter ("a" to "z"). When by loading a SAS and Stata file, the tagged -#' missing values always use lower case values. +#' a letter ("a" to "z"). Tagged missing values are always lower case characters +#' in R - upper case characters are silently converted to lower case. #' #' `format_tagged_na()` and `print_tagged_na()` format tagged #' NA's as NA(a), NA(b), etc. #' #' @param ... Vectors containing single character. The letter will be used to -#' "tag" the missing value. +#' "tag" the missing value. Tags are silently converted to lower case. #' @param x A numeric vector #' @param digits Number of digits to use in string representation #' @export @@ -42,7 +42,7 @@ na_tag <- function(x) { .Call(na_tag_, x) } -#' @param tag If `NULL`, will only return true if the tag has this value. +#' @param tag If not `NULL`, will only return true if the tag has this value. #' @rdname tagged_na #' @export is_tagged_na <- function(x, tag = NULL) { diff --git a/man/tagged_na.Rd b/man/tagged_na.Rd index d57a5708..1ee0bc96 100644 --- a/man/tagged_na.Rd +++ b/man/tagged_na.Rd @@ -20,19 +20,19 @@ print_tagged_na(x, digits = getOption("digits")) } \arguments{ \item{...}{Vectors containing single character. The letter will be used to -"tag" the missing value.} +"tag" the missing value. Tags are silently converted to lower case.} \item{x}{A numeric vector} -\item{tag}{If \code{NULL}, will only return true if the tag has this value.} +\item{tag}{If not \code{NULL}, will only return true if the tag has this value.} \item{digits}{Number of digits to use in string representation} } \description{ "Tagged" missing values work exactly like regular R missing values except that they store one additional byte of information a tag, which is usually -a letter ("a" to "z"). When by loading a SAS and Stata file, the tagged -missing values always use lower case values. +a letter ("a" to "z"). Tagged missing values are always lower case characters +in R - upper case characters are silently converted to lower case. } \details{ \code{format_tagged_na()} and \code{print_tagged_na()} format tagged diff --git a/src/DfWriter.cpp b/src/DfWriter.cpp index 562c0ada..7ab6768f 100644 --- a/src/DfWriter.cpp +++ b/src/DfWriter.cpp @@ -326,6 +326,9 @@ class Writer { if (!std::isnan(values[i]) || tag == '\0') { readstat_label_double_value(labelSet, values[i], string_utf8(labels, i)); } else { + if (ext_ == HAVEN_XPT || ext_ == HAVEN_SAS7BDAT || ext_ == HAVEN_SAS7BCAT) { + tag = toupper(tag); + } readstat_label_tagged_value(labelSet, tag, string_utf8(labels, i)); } } @@ -448,6 +451,9 @@ class Writer { if (tag == '\0') { return readstat_insert_missing_value(writer_, var); } else { + if (ext_ == HAVEN_XPT || ext_ == HAVEN_SAS7BDAT || ext_ == HAVEN_SAS7BCAT) { + tag = toupper(tag); + } return readstat_insert_tagged_missing_value(writer_, var, tag); } } else { diff --git a/src/tagged_na.c b/src/tagged_na.c index f549173e..c43878d7 100644 --- a/src/tagged_na.c +++ b/src/tagged_na.c @@ -2,6 +2,7 @@ #include #include #include +#include // Scalar operators ------------------------------------------------------- @@ -68,7 +69,7 @@ SEXP tagged_na_(SEXP x) { for (int i = 0; i < n; ++i) { char xi = first_char(STRING_ELT(x, i)); - REAL(out)[i] = make_tagged_na(xi); + REAL(out)[i] = make_tagged_na(tolower(xi)); } UNPROTECT(1); @@ -124,7 +125,7 @@ SEXP is_tagged_na_(SEXP x, SEXP tag_) { if (Rf_length(tag_) != 1) Rf_errorcall(R_NilValue, "`tag` must be a character vector of length one."); has_tag = true; - check_tag = first_char(STRING_ELT(tag_, 0)); + check_tag = tolower(first_char(STRING_ELT(tag_, 0))); } else { Rf_errorcall(R_NilValue, "`tag` must be NULL or a character vector"); } diff --git a/tests/testthat/test-haven-sas.R b/tests/testthat/test-haven-sas.R index b3845fff..5cd01448 100644 --- a/tests/testthat/test-haven-sas.R +++ b/tests/testthat/test-haven-sas.R @@ -188,6 +188,18 @@ test_that("can roundtrip date times", { expect_equal(attr(roundtrip_var(x2, "xpt"), "label"), "abc") }) +test_that("can roundtrip tagged NAs", { + x <- c(1, 2, tagged_na("a", "b"), NA) + expect_equal(roundtrip_var(x, "dta"), x) + + tags <- tagged_na("a", "b") + y <- labelled( + x, + c("ABC" = tags[1], "DEF" = tags[2]) + ) + expect_equal(roundtrip_var(y, "xpt"), x) +}) + test_that("invalid files generate informative errors", { expect_snapshot(error = TRUE, { write_xpt(mtcars, file.path(tempdir(), " temp.xpt")) diff --git a/tests/testthat/test-tagged_na.R b/tests/testthat/test-tagged_na.R index 63846846..c8802521 100644 --- a/tests/testthat/test-tagged_na.R +++ b/tests/testthat/test-tagged_na.R @@ -54,3 +54,13 @@ test_that("print_tagged_na is stable", { x <- c(1:100, tagged_na(letters), NA) expect_snapshot(print_tagged_na(x)) }) + +test_that("uppercase tags are converted to lowercase", { + x_upper <- tagged_na("A", "B", "Z") + x_lower <- tagged_na("a", "b", "z") + + expect_equal(na_tag(x_upper), c("a", "b", "z")) + expect_equal(is_tagged_na(x_lower, "A"), c(TRUE, FALSE, FALSE)) + + expect_equal(x_upper, x_lower) +})