extract additional columns from files #68

wadpac · Sep 27, 2024 · 38669db · 38669db
1 parent 952d2e4
commit 38669db
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 56 deletions.
diff --git a/R/readActiGraphCount.R b/R/readActiGraphCount.R
@@ -34,8 +34,12 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
   if (any(grepl("serialnumber", fileHeader$item))) headerAvailable = TRUE
 
   # Depending on whether header is present assign number of rows to skip:
+  mode = NULL
   if (headerAvailable == TRUE) {
     skip = 10
+    # Extract mode number from header because this tells us how to interpret the columns
+    mode = as.numeric(fileHeader$value[grep(pattern = "mode", x = fileHeader$item)])
+    if (is.na(mode)) mode = NULL
   } else {
     tmp = data.table::fread(input = filename,
                             header = FALSE,
@@ -80,33 +84,50 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
     D = D[, -1, drop = FALSE]
   }
   # Identify columns with count data
-  acccol = vmcol = NA
+  acccol = NA
+  stepcol = NULL
   if (colnames == TRUE) {
     acccol = grep("axis|activity", colnames(D), ignore.case = TRUE)
     vmcol = grep("vector magnitude|vm", colnames(D), ignore.case = TRUE)
+    stepcol = grep("step", colnames(D), ignore.case = TRUE)
   } else {
-    # Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3
-    # First column is VM if ncol(D) < 3
-    # Note that in ActiLife software the user can select
-    # the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps")
-    # which may mean that our assumptions here are not necessarily true.
-    if (ncol(D) >= 3) {
-      acccol = 1:3
+    if (!is.null(mode)) {
+      if ((mode >= 12 && mode <= 15) | (mode >= 28 && mode <= 31) |
+          (mode >= 44 && mode <= 47) | (mode >= 60 && mode <= 63)) {
+        acccol = 1:3
+      } else {
+        acccol = 1
+      }
+      if (mode %in% c(13, 15, 29, 31, 45, 47, 61, 63)) {
+        stepcol = 4
+      }
     } else {
-      vmcol = 1
+      # Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3
+      # First column is VM if ncol(D) < 3
+      # Note that in ActiLife software the user can select
+      # the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps")
+      # which may mean that our assumptions here are not necessarily true.
+      if (ncol(D) >= 3) {
+        acccol = 1:3
+      } else {
+        acccol = 1
+      }
     }
   }
   # Assign colnames and formatting
-  if (is.na(acccol[1]) == FALSE) { 
+  if (length(acccol) == 3 && is.na(acccol[1]) == FALSE) { 
     colnames(D)[acccol] = c("y", "x", "z") # ActiGraph always stores y axis first
   }
-  if (is.na(vmcol[1]) == FALSE) { 
+  if (length(acccol) == 1 &&is.na(vmcol[1]) == FALSE) { 
     D = as.matrix(D, drop = FALSE) # Convert to matrix as data.frame will auto-collapse to vector
-    colnames(D)[vmcol] = c("vm")
+    colnames(D)[acccol] = "vm"
+  }
+  if (length(stepcol) == 1 && is.na(stepcol[1]) == FALSE) { 
+    colnames(D)[stepcol] = "steps"
   }
-  keep = c(acccol, vmcol)[!is.na(c(acccol, vmcol))]
+  keep = c(acccol, stepcol)[!is.na(c(acccol, stepcol))]
   D = D[, keep, drop = FALSE]
-  if (ncol(D) == 3 & is.na(vmcol)) {
+  if (ncol(D) >= 3) {
     D$vm = sqrt(D[, 1] ^ 2 + D[, 2] ^ 2 + D[, 3] ^ 2)
   }
   # Extract information from header, if present
@@ -154,7 +175,7 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
   if (!is.null(desiredEpochSize)) {
     if (desiredEpochSize > epSizeShort) {
       step = desiredEpochSize %/% epSizeShort
-      D = sumAggregate(D, step)
+      D = matAggregate(D, step)
       epSizeShort = epSizeShort * step
     }
     checkEpochMatch(desiredEpochSize, epSizeShort)

diff --git a/R/readActicalCount.R b/R/readActicalCount.R
@@ -48,18 +48,17 @@ readActicalCount = function(filename = file, desiredEpochSize = NULL,
   timestamp_POSIX = timestamp_POSIX[1]
   D = D[, -which(colnames(D) %in% c("date", "time"))]
   D = as.matrix(D, drop = FALSE)
-
+  if (quote == "") D = apply(D, 2, as.numeric)
   # If requested, aggregate data to lower resolution to match desired
   # epoch size in argument windowsizes
   if (!is.null(desiredEpochSize)) {
     if (desiredEpochSize > epSizeShort) {
       step = desiredEpochSize %/% epSizeShort
-      D = sumAggregate(D, step)
+      D = matAggregate(D, step)
       epSizeShort = epSizeShort * step
     }
     checkEpochMatch(desiredEpochSize, epSizeShort)
   }
-  if (quote == "") D = apply(D, 2, as.numeric)
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX))
 }
diff --git a/R/readActiwatchCount.R b/R/readActiwatchCount.R
@@ -13,24 +13,27 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
     startindex = 1000
     quote = detectQuote(fn = filename, index = startindex)
     index = findStartData(filename, quote, startindex)
-    D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote)
+    D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote, data.table = FALSE)
     # ! Assumption that column names are present 2 lines prior to timeseries
-    colnames = data.table::fread(input = filename,
+    colnames = data.table::fread(input = filename, data.table = FALSE,
                                  header = FALSE, sep = ",",
                                  skip = index - 2, nrows = 1, quote = quote)
     if (all(is.na(colnames))) {
       colnames = data.table::fread(input = filename,
                                    header = FALSE, sep = ",",
                                    skip = index - 4, nrows = 1, quote = quote)
     }
-    colnames(D) = as.character(colnames)[1:ncol(D)]
+    colnames = colnames[!is.na(colnames)]
+    D = D[, which(!is.na(colnames))]
+    colnames(D) = tolower(as.character(colnames))
     # ! Assumptions about columns names
-    colnames(D) = gsub(pattern = "datum|date", replacement = "date", 
-                       x = colnames(D), ignore.case = TRUE)
-    colnames(D) = gsub(pattern = "tijd|time", replacement = "time",
-                       x = colnames(D), ignore.case = TRUE)
-    colnames(D) = gsub(pattern = "activiteit|activity", replacement = "ZCY",
-                       x = colnames(D), ignore.case = TRUE)
+    # browser()
+    colnames(D)[grep(pattern = "datum|date", x = colnames(D))] = "date"
+    colnames(D)[grep(pattern = "tijd|time", x = colnames(D))] = "time"
+    colnames(D)[grep(pattern = "activiteit|activity", x = colnames(D))] = "counts"
+    colnames(D)[grep(pattern = "slapen|sleep", x = colnames(D))] = "sleep"
+    colnames(D)[grep(pattern = "niet-om|wear|worn", x = colnames(D))] = "nonwear"
+    D = D[, grep(pattern = "time|date|counts|sleep|nonwear", x = colnames(D))]
     timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
                                  format = timeformat,
                                  tz = tz)
@@ -41,7 +44,7 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
     epSizeShort = mean(diff(as.numeric(timestamp_POSIX)))
 
     timestamp_POSIX = timestamp_POSIX[1]
-    D = D[, "ZCY"]
+    D = D[, -which(colnames(D) %in% c("date", "time"))]
   } else if (fileExtension == "awd") {
     #=========================================================
     # AWD
@@ -64,8 +67,8 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
     }
     D = data.table::fread(input = filename, header = FALSE, sep = ",",
                           skip = index, quote = quote)
-    D = D[,1]
-    colnames(D)[1] = "ZCY"
+    D = D[, 1:2]
+    colnames(D)[1:2] = c("counts", "light")
     header = data.table::fread(input = filename, header = FALSE, sep = ",", 
                                nrows =  7, quote = quote)
     # Get epoch size
@@ -83,18 +86,17 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
 
   }
   D = as.matrix(D, drop = FALSE)
-
+  if (quote == "") D = apply(D, 2, as.numeric)  
   # If requested, aggregate data to lower resolution to match desired 
   # epoch size in argument windowsizes
   if (!is.null(desiredEpochSize)) {
     if (desiredEpochSize > epSizeShort) {
       step = desiredEpochSize %/% epSizeShort
-      D = sumAggregate(D, step)
+      D = matAggregate(D, step)
       epSizeShort = epSizeShort * step
     }
     checkEpochMatch(desiredEpochSize, epSizeShort)
   }
-  if (quote == "") D$ZCY = as.numeric(D$ZCY)
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX))
 }
diff --git a/R/utils_for_countdata.R b/R/utils_for_countdata.R
@@ -62,8 +62,10 @@ getExtension <- function(filename){
   return(ex[-1])
 }
 
-sumAggregate = function(mat, step) {
-  # Aggregate matrix mat by taking the sum over step number of rows
+matAggregate = function(mat, step) {
+  # Aggregate matrix mat by taking over step number of rows
+  # as sum unless column names is sleep or nonwear in that case 
+  # we take the rounded mean.
   mat = rbind(rep(0, ncol(mat)), mat)
   cumsum2 = function(x) {
     x = cumsum(ifelse(is.na(x), 0, x)) + x*0
@@ -72,6 +74,10 @@ sumAggregate = function(mat, step) {
   mat = apply(mat, 2, cumsum2)
   mat = mat[seq(1, nrow(mat), by = step), , drop = FALSE]
   mat = apply(mat, 2, diff)
+  # Correct non incremental variables
+  for (niv in c("sleep", "nonwear")) {
+    if (niv %in% colnames(D)) D[, niv] = round(D[, niv] / step)
+  }
   return(mat)
 }
 

diff --git a/tests/testthat/test_readActiGraphCount.R b/tests/testthat/test_readActiGraphCount.R
@@ -7,16 +7,18 @@ test_that("ActiGraph61 is correctly read", {
   expect_equal(D$epochSize, 10)
   expect_equal(format(D$startTime), "2016-08-15 21:35:00")
   expect_equal(nrow(D$data), 495)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 63952.33)
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33)
+  expect_equal(sum(D$data[, c("steps")]), 253)
 
   D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%m/%d/%Y %H:%M:%S", tz =  "")
   expect_equal(D$deviceSerialNumber, "MOS2D16160581")
   expect_equal(D$epochSize, 5)
   expect_equal(format(D$startTime), "2016-08-15 21:35:00")
   expect_equal(nrow(D$data), 990)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 63952.33)
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33)
+  expect_equal(sum(D$data[, c("steps")]), 253)
 })
 
 test_that("ActiGraph31 is correctly read", {
@@ -26,16 +28,19 @@ test_that("ActiGraph31 is correctly read", {
   expect_equal(D$epochSize, 15)
   expect_equal(format(D$startTime), "2013-08-26 09:00:00")
   expect_equal(nrow(D$data), 990)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 272870.6, tol = 0.1)
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1)
+  expect_equal(sum(D$data[, c("steps")]), 1118)
 
   D = readActiGraphCount(filename = file, desiredEpochSize = 30, timeformat = "%m/%d/%Y %H:%M:%S", tz =  "")
   expect_equal(D$deviceSerialNumber, "CLE2A2123456")
   expect_equal(D$epochSize, 30)
   expect_equal(format(D$startTime), "2013-08-26 09:00:00")
   expect_equal(nrow(D$data), 495)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 272870.6, tol = 0.1)
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1)
+  expect_equal(sum(D$data[, c("steps")]), 1118)
+
 })
 
 test_that("ActiGraph13_timestamps_headers.csv is correctly read", {
@@ -45,17 +50,18 @@ test_that("ActiGraph13_timestamps_headers.csv is correctly read", {
   expect_equal(D$epochSize, 1)
   expect_equal(format(D$startTime), "2017-12-09 15:00:00")
   expect_equal(nrow(D$data), 1000)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 256047)
-
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1)
+  expect_equal(sum(D$data[, c("steps")]), 442)
 
   D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%d-%m-%Y %H:%M:%S", tz =  "")
   expect_equal(D$deviceSerialNumber, "TAS1D48140206")
   expect_equal(D$epochSize, 5)
   expect_equal(format(D$startTime), "2017-12-09 15:00:00")
   expect_equal(nrow(D$data), 200)
-  expect_equal(ncol(D$data), 4)
-  expect_equal(sum(D$data), 256047)
+  expect_equal(ncol(D$data), 5)
+  expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1)
+  expect_equal(sum(D$data[, c("steps")]), 442)
 })
 
 test_that("Actiwatch csv error correctly", {

diff --git a/tests/testthat/test_readActiwatchCount.R b/tests/testthat/test_readActiwatchCount.R
@@ -6,31 +6,37 @@ test_that("Actiwatch csv is correctly read", {
   expect_equal(D$epochSize, 15)
   expect_equal(format(D$startTime), "2019-11-23 06:00:00")
   expect_equal(nrow(D$data), 860)
-  expect_equal(ncol(D$data), 1)
-  expect_equal(sum(D$data, na.rm = TRUE), 4589)
+  expect_equal(ncol(D$data), 3)
+  expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4589)
+  expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 55)
+  expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797)
 
   D = readActiwatchCount(filename = file, desiredEpochSize = 30, timeformat = "%d/%m/%Y %H:%M:%S", tz =  "")
   expect_equal(D$epochSize, 30)
   expect_equal(format(D$startTime), "2019-11-23 06:00:00")
   expect_equal(nrow(D$data), 430)
-  expect_equal(ncol(D$data), 1)
-  expect_equal(sum(D$data, na.rm = TRUE), 4569)
+  expect_equal(ncol(D$data), 3)
+  expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4569)
+  expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 54)
+  expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797)
 })
 test_that("Actiwatch awd is correctly read", {
   file = system.file("testfiles/Actiwatch.AWD", package = "GGIRread")
   D = readActiwatchCount(filename = file, desiredEpochSize = 60, timeformat = "%d-%b-%Y %H:%M:%S", tz =  "")
   expect_equal(D$epochSize, 60)
   expect_equal(format(D$startTime), "2009-10-01 17:00:00")
   expect_equal(nrow(D$data), 329)
-  expect_equal(ncol(D$data), 1)
-  expect_equal(sum(D$data, na.rm = TRUE), 108864)
+  expect_equal(ncol(D$data), 2)
+  expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108864)
+  expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0)
 
   D = readActiwatchCount(filename = file, desiredEpochSize = 300, timeformat =  "%d-%b-%Y %H:%M:%S", tz =  "")
   expect_equal(D$epochSize, 300)
   expect_equal(format(D$startTime), "2009-10-01 17:00:00")
   expect_equal(nrow(D$data), 65)
-  expect_equal(ncol(D$data), 1)
-  expect_equal(sum(D$data, na.rm = TRUE), 108713)
+  expect_equal(ncol(D$data), 2)
+  expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108713)
+  expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0)
 })
 
 test_that("Actiwatch awd error correctly", {