From 38669dbdc20fa6b770ba29508714bb64682eb976 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 27 Sep 2024 10:43:49 +0200 Subject: [PATCH] extract additional columns from files #68 --- R/readActiGraphCount.R | 51 +++++++++++++++++------- R/readActicalCount.R | 5 +-- R/readActiwatchCount.R | 32 ++++++++------- R/utils_for_countdata.R | 10 ++++- tests/testthat/test_readActiGraphCount.R | 32 +++++++++------ tests/testthat/test_readActiwatchCount.R | 22 ++++++---- 6 files changed, 96 insertions(+), 56 deletions(-) diff --git a/R/readActiGraphCount.R b/R/readActiGraphCount.R index 1455c73..f12c7b1 100644 --- a/R/readActiGraphCount.R +++ b/R/readActiGraphCount.R @@ -34,8 +34,12 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL, if (any(grepl("serialnumber", fileHeader$item))) headerAvailable = TRUE # Depending on whether header is present assign number of rows to skip: + mode = NULL if (headerAvailable == TRUE) { skip = 10 + # Extract mode number from header because this tells us how to interpret the columns + mode = as.numeric(fileHeader$value[grep(pattern = "mode", x = fileHeader$item)]) + if (is.na(mode)) mode = NULL } else { tmp = data.table::fread(input = filename, header = FALSE, @@ -80,33 +84,50 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL, D = D[, -1, drop = FALSE] } # Identify columns with count data - acccol = vmcol = NA + acccol = NA + stepcol = NULL if (colnames == TRUE) { acccol = grep("axis|activity", colnames(D), ignore.case = TRUE) vmcol = grep("vector magnitude|vm", colnames(D), ignore.case = TRUE) + stepcol = grep("step", colnames(D), ignore.case = TRUE) } else { - # Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3 - # First column is VM if ncol(D) < 3 - # Note that in ActiLife software the user can select - # the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps") - # which may mean that our assumptions here are not necessarily true. - if (ncol(D) >= 3) { - acccol = 1:3 + if (!is.null(mode)) { + if ((mode >= 12 && mode <= 15) | (mode >= 28 && mode <= 31) | + (mode >= 44 && mode <= 47) | (mode >= 60 && mode <= 63)) { + acccol = 1:3 + } else { + acccol = 1 + } + if (mode %in% c(13, 15, 29, 31, 45, 47, 61, 63)) { + stepcol = 4 + } } else { - vmcol = 1 + # Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3 + # First column is VM if ncol(D) < 3 + # Note that in ActiLife software the user can select + # the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps") + # which may mean that our assumptions here are not necessarily true. + if (ncol(D) >= 3) { + acccol = 1:3 + } else { + acccol = 1 + } } } # Assign colnames and formatting - if (is.na(acccol[1]) == FALSE) { + if (length(acccol) == 3 && is.na(acccol[1]) == FALSE) { colnames(D)[acccol] = c("y", "x", "z") # ActiGraph always stores y axis first } - if (is.na(vmcol[1]) == FALSE) { + if (length(acccol) == 1 &&is.na(vmcol[1]) == FALSE) { D = as.matrix(D, drop = FALSE) # Convert to matrix as data.frame will auto-collapse to vector - colnames(D)[vmcol] = c("vm") + colnames(D)[acccol] = "vm" + } + if (length(stepcol) == 1 && is.na(stepcol[1]) == FALSE) { + colnames(D)[stepcol] = "steps" } - keep = c(acccol, vmcol)[!is.na(c(acccol, vmcol))] + keep = c(acccol, stepcol)[!is.na(c(acccol, stepcol))] D = D[, keep, drop = FALSE] - if (ncol(D) == 3 & is.na(vmcol)) { + if (ncol(D) >= 3) { D$vm = sqrt(D[, 1] ^ 2 + D[, 2] ^ 2 + D[, 3] ^ 2) } # Extract information from header, if present @@ -154,7 +175,7 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL, if (!is.null(desiredEpochSize)) { if (desiredEpochSize > epSizeShort) { step = desiredEpochSize %/% epSizeShort - D = sumAggregate(D, step) + D = matAggregate(D, step) epSizeShort = epSizeShort * step } checkEpochMatch(desiredEpochSize, epSizeShort) diff --git a/R/readActicalCount.R b/R/readActicalCount.R index e797cfb..9896ddd 100644 --- a/R/readActicalCount.R +++ b/R/readActicalCount.R @@ -48,18 +48,17 @@ readActicalCount = function(filename = file, desiredEpochSize = NULL, timestamp_POSIX = timestamp_POSIX[1] D = D[, -which(colnames(D) %in% c("date", "time"))] D = as.matrix(D, drop = FALSE) - + if (quote == "") D = apply(D, 2, as.numeric) # If requested, aggregate data to lower resolution to match desired # epoch size in argument windowsizes if (!is.null(desiredEpochSize)) { if (desiredEpochSize > epSizeShort) { step = desiredEpochSize %/% epSizeShort - D = sumAggregate(D, step) + D = matAggregate(D, step) epSizeShort = epSizeShort * step } checkEpochMatch(desiredEpochSize, epSizeShort) } - if (quote == "") D = apply(D, 2, as.numeric) invisible(list(data = D, epochSize = epSizeShort, startTime = timestamp_POSIX)) } \ No newline at end of file diff --git a/R/readActiwatchCount.R b/R/readActiwatchCount.R index 76d9c76..53fe822 100644 --- a/R/readActiwatchCount.R +++ b/R/readActiwatchCount.R @@ -13,9 +13,9 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL, startindex = 1000 quote = detectQuote(fn = filename, index = startindex) index = findStartData(filename, quote, startindex) - D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote) + D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote, data.table = FALSE) # ! Assumption that column names are present 2 lines prior to timeseries - colnames = data.table::fread(input = filename, + colnames = data.table::fread(input = filename, data.table = FALSE, header = FALSE, sep = ",", skip = index - 2, nrows = 1, quote = quote) if (all(is.na(colnames))) { @@ -23,14 +23,17 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL, header = FALSE, sep = ",", skip = index - 4, nrows = 1, quote = quote) } - colnames(D) = as.character(colnames)[1:ncol(D)] + colnames = colnames[!is.na(colnames)] + D = D[, which(!is.na(colnames))] + colnames(D) = tolower(as.character(colnames)) # ! Assumptions about columns names - colnames(D) = gsub(pattern = "datum|date", replacement = "date", - x = colnames(D), ignore.case = TRUE) - colnames(D) = gsub(pattern = "tijd|time", replacement = "time", - x = colnames(D), ignore.case = TRUE) - colnames(D) = gsub(pattern = "activiteit|activity", replacement = "ZCY", - x = colnames(D), ignore.case = TRUE) + # browser() + colnames(D)[grep(pattern = "datum|date", x = colnames(D))] = "date" + colnames(D)[grep(pattern = "tijd|time", x = colnames(D))] = "time" + colnames(D)[grep(pattern = "activiteit|activity", x = colnames(D))] = "counts" + colnames(D)[grep(pattern = "slapen|sleep", x = colnames(D))] = "sleep" + colnames(D)[grep(pattern = "niet-om|wear|worn", x = colnames(D))] = "nonwear" + D = D[, grep(pattern = "time|date|counts|sleep|nonwear", x = colnames(D))] timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "), format = timeformat, tz = tz) @@ -41,7 +44,7 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL, epSizeShort = mean(diff(as.numeric(timestamp_POSIX))) timestamp_POSIX = timestamp_POSIX[1] - D = D[, "ZCY"] + D = D[, -which(colnames(D) %in% c("date", "time"))] } else if (fileExtension == "awd") { #========================================================= # AWD @@ -64,8 +67,8 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL, } D = data.table::fread(input = filename, header = FALSE, sep = ",", skip = index, quote = quote) - D = D[,1] - colnames(D)[1] = "ZCY" + D = D[, 1:2] + colnames(D)[1:2] = c("counts", "light") header = data.table::fread(input = filename, header = FALSE, sep = ",", nrows = 7, quote = quote) # Get epoch size @@ -83,18 +86,17 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL, } D = as.matrix(D, drop = FALSE) - + if (quote == "") D = apply(D, 2, as.numeric) # If requested, aggregate data to lower resolution to match desired # epoch size in argument windowsizes if (!is.null(desiredEpochSize)) { if (desiredEpochSize > epSizeShort) { step = desiredEpochSize %/% epSizeShort - D = sumAggregate(D, step) + D = matAggregate(D, step) epSizeShort = epSizeShort * step } checkEpochMatch(desiredEpochSize, epSizeShort) } - if (quote == "") D$ZCY = as.numeric(D$ZCY) invisible(list(data = D, epochSize = epSizeShort, startTime = timestamp_POSIX)) } \ No newline at end of file diff --git a/R/utils_for_countdata.R b/R/utils_for_countdata.R index e2112b6..61773c4 100644 --- a/R/utils_for_countdata.R +++ b/R/utils_for_countdata.R @@ -62,8 +62,10 @@ getExtension <- function(filename){ return(ex[-1]) } -sumAggregate = function(mat, step) { - # Aggregate matrix mat by taking the sum over step number of rows +matAggregate = function(mat, step) { + # Aggregate matrix mat by taking over step number of rows + # as sum unless column names is sleep or nonwear in that case + # we take the rounded mean. mat = rbind(rep(0, ncol(mat)), mat) cumsum2 = function(x) { x = cumsum(ifelse(is.na(x), 0, x)) + x*0 @@ -72,6 +74,10 @@ sumAggregate = function(mat, step) { mat = apply(mat, 2, cumsum2) mat = mat[seq(1, nrow(mat), by = step), , drop = FALSE] mat = apply(mat, 2, diff) + # Correct non incremental variables + for (niv in c("sleep", "nonwear")) { + if (niv %in% colnames(D)) D[, niv] = round(D[, niv] / step) + } return(mat) } diff --git a/tests/testthat/test_readActiGraphCount.R b/tests/testthat/test_readActiGraphCount.R index 6e9a2db..73772a1 100644 --- a/tests/testthat/test_readActiGraphCount.R +++ b/tests/testthat/test_readActiGraphCount.R @@ -7,16 +7,18 @@ test_that("ActiGraph61 is correctly read", { expect_equal(D$epochSize, 10) expect_equal(format(D$startTime), "2016-08-15 21:35:00") expect_equal(nrow(D$data), 495) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 63952.33) + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33) + expect_equal(sum(D$data[, c("steps")]), 253) D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%m/%d/%Y %H:%M:%S", tz = "") expect_equal(D$deviceSerialNumber, "MOS2D16160581") expect_equal(D$epochSize, 5) expect_equal(format(D$startTime), "2016-08-15 21:35:00") expect_equal(nrow(D$data), 990) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 63952.33) + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33) + expect_equal(sum(D$data[, c("steps")]), 253) }) test_that("ActiGraph31 is correctly read", { @@ -26,16 +28,19 @@ test_that("ActiGraph31 is correctly read", { expect_equal(D$epochSize, 15) expect_equal(format(D$startTime), "2013-08-26 09:00:00") expect_equal(nrow(D$data), 990) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 272870.6, tol = 0.1) + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1) + expect_equal(sum(D$data[, c("steps")]), 1118) D = readActiGraphCount(filename = file, desiredEpochSize = 30, timeformat = "%m/%d/%Y %H:%M:%S", tz = "") expect_equal(D$deviceSerialNumber, "CLE2A2123456") expect_equal(D$epochSize, 30) expect_equal(format(D$startTime), "2013-08-26 09:00:00") expect_equal(nrow(D$data), 495) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 272870.6, tol = 0.1) + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1) + expect_equal(sum(D$data[, c("steps")]), 1118) + }) test_that("ActiGraph13_timestamps_headers.csv is correctly read", { @@ -45,17 +50,18 @@ test_that("ActiGraph13_timestamps_headers.csv is correctly read", { expect_equal(D$epochSize, 1) expect_equal(format(D$startTime), "2017-12-09 15:00:00") expect_equal(nrow(D$data), 1000) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 256047) - + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1) + expect_equal(sum(D$data[, c("steps")]), 442) D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%d-%m-%Y %H:%M:%S", tz = "") expect_equal(D$deviceSerialNumber, "TAS1D48140206") expect_equal(D$epochSize, 5) expect_equal(format(D$startTime), "2017-12-09 15:00:00") expect_equal(nrow(D$data), 200) - expect_equal(ncol(D$data), 4) - expect_equal(sum(D$data), 256047) + expect_equal(ncol(D$data), 5) + expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1) + expect_equal(sum(D$data[, c("steps")]), 442) }) test_that("Actiwatch csv error correctly", { diff --git a/tests/testthat/test_readActiwatchCount.R b/tests/testthat/test_readActiwatchCount.R index e36dda0..23e76c0 100644 --- a/tests/testthat/test_readActiwatchCount.R +++ b/tests/testthat/test_readActiwatchCount.R @@ -6,15 +6,19 @@ test_that("Actiwatch csv is correctly read", { expect_equal(D$epochSize, 15) expect_equal(format(D$startTime), "2019-11-23 06:00:00") expect_equal(nrow(D$data), 860) - expect_equal(ncol(D$data), 1) - expect_equal(sum(D$data, na.rm = TRUE), 4589) + expect_equal(ncol(D$data), 3) + expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4589) + expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 55) + expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797) D = readActiwatchCount(filename = file, desiredEpochSize = 30, timeformat = "%d/%m/%Y %H:%M:%S", tz = "") expect_equal(D$epochSize, 30) expect_equal(format(D$startTime), "2019-11-23 06:00:00") expect_equal(nrow(D$data), 430) - expect_equal(ncol(D$data), 1) - expect_equal(sum(D$data, na.rm = TRUE), 4569) + expect_equal(ncol(D$data), 3) + expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4569) + expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 54) + expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797) }) test_that("Actiwatch awd is correctly read", { file = system.file("testfiles/Actiwatch.AWD", package = "GGIRread") @@ -22,15 +26,17 @@ test_that("Actiwatch awd is correctly read", { expect_equal(D$epochSize, 60) expect_equal(format(D$startTime), "2009-10-01 17:00:00") expect_equal(nrow(D$data), 329) - expect_equal(ncol(D$data), 1) - expect_equal(sum(D$data, na.rm = TRUE), 108864) + expect_equal(ncol(D$data), 2) + expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108864) + expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0) D = readActiwatchCount(filename = file, desiredEpochSize = 300, timeformat = "%d-%b-%Y %H:%M:%S", tz = "") expect_equal(D$epochSize, 300) expect_equal(format(D$startTime), "2009-10-01 17:00:00") expect_equal(nrow(D$data), 65) - expect_equal(ncol(D$data), 1) - expect_equal(sum(D$data, na.rm = TRUE), 108713) + expect_equal(ncol(D$data), 2) + expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108713) + expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0) }) test_that("Actiwatch awd error correctly", {