migrate function for reading Actiwatch to GGIRread and revise impleme…

…ntation for ActiGraph #68
wadpac · Sep 26, 2024 · 5149a61 · 5149a61
1 parent e6a302b
commit 5149a61
Show file tree

Hide file tree

Showing 11 changed files with 1,646 additions and 20 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,6 @@
 export(readGenea, readAxivity, readGENEActiv, 
        GENEActivReader, resample, readWav, 
-       readActiGraphCount)
+       readActiGraphCount, readActiwatchCount)
 useDynLib(GGIRread, .registration = TRUE)
 importFrom(Rcpp, sourceCpp)
 importFrom(data.table, fread)

diff --git a/NEWS.md b/NEWS.md
@@ -2,7 +2,8 @@
 
 - Added a `NEWS.md` file to track changes to the package.
 - Stops interactive calling of `chooseCRANmirror` on `.onAttach` if interactive and CRAN mirror not set GGIR #1141.
-- Migrate read function for ActiGraph count data (csv) to GGIRread #68.
+- Migrate read function for ActiGraph (csv) and Actiwatch (csv/awd) count data to GGIRread #68.
+
 
 # Changes in version 1.0.1 (release date:03-06-2024)
 

diff --git a/R/readActiGraphCount.R b/R/readActiGraphCount.R
@@ -1,6 +1,8 @@
 readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
-                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "") {
+                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "", timeformatName = "timeformat") {
+  # In GGIR set timeformatName to extEpochData_timeformat
   deviceSerialNumber = NULL
+
   # Test if file has header by reading first ten rows
   # and checking whether it contains the word
   # serial number.
@@ -147,11 +149,11 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
     }
   }
   # Check timestamp is meaningful
-  if (all(is.na(timestamp_POSIX))) {
-    stop(paste0("\nTime format in data ", timestamp, " does not match with time format ",
-                timeformat,
-                " as specified by argument extEpochData_timeformat, please correct.\n"))
-  }
+  checkTimeFormat(timestamp_POSIX = timestamp_POSIX, rawValue = timestamp[1],
+                  timeformat = timeformat,
+                  timeformatName = timeformatName)
+
+
   # If requested, aggregate data to lower resolution to match desired 
   # epoch size in argument windowsizes
   if (!is.null(desiredEpochSize)) {
@@ -163,12 +165,7 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
       D = apply(D, 2, diff)
       epSizeShort = epSizeShort * step
     }
-    if (epSizeShort != desiredEpochSize) {
-      stop(paste0("\nThe short epoch size as specified by the user as the first value of argument windowsizes (",
-                  desiredEpochSize,
-                  " seconds) does NOT match the short epoch size we see in the data (", epSizeShort),
-           " seconds). Please correct.", call. = FALSE)
-    }
+    checkEpochMatch(desiredEpochSize, epSizeShort)
   }
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX,

diff --git a/R/readActiwatchCount.R b/R/readActiwatchCount.R
@@ -0,0 +1,129 @@
+readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
+                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "", timeformatName = "timeformat") {
+  # In GGIR set timeformatName to extEpochData_timeformat
+
+  fileExtension = tolower(getExtension(filename))
+
+  if (fileExtension == "csv") {
+    #=========================================================
+    # CSV
+    #=========================================================
+    # ! Assumptions that timeseries start before line 1000
+    index = 1000
+    while (index > 0) {
+      quote = detectQuote(fn = filename, index = index)
+      testraw = data.table::fread(input = filename,
+                                  header = FALSE, sep = ",", skip = index,
+                                  nrows = 2, data.table = FALSE, quote = quote)
+      if (length(testraw) > 0) {
+        if (nrow(testraw) == 2) {
+          if (testraw$V1[2] == testraw$V1[1] + 1) {
+            break
+          }
+        }
+      }
+      index = index - 100
+    }
+    # ! Assumption that first column are the epoch numbers
+    delta = 1 - testraw$V1[1]
+    index = index + delta
+    startFound = FALSE
+    while (startFound == FALSE) {
+      Dtest = data.table::fread(input = filename, sep = ",", skip = index, quote = quote, nrows = 1)  
+      if (Dtest$V1[1] == 1) {
+        startFound = TRUE
+      } else {
+        # This happens when file is has an empty row between each measurement point is stored
+        index = index - 1
+        if (index < 1) stop("Could not find start of recording", call. = FALSE)
+      }
+    }
+    D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote)
+    # ! Assumption that column names are present 2 lines prior to timeseries
+    colnames = data.table::fread(input = filename,
+                                 header = FALSE, sep = ",",
+                                 skip = index - 2, nrows = 1, quote = quote)
+    if (all(is.na(colnames))) {
+      colnames = data.table::fread(input = filename,
+                                   header = FALSE, sep = ",",
+                                   skip = index - 4, nrows = 1, quote = quote)
+    }
+    colnames(D) = as.character(colnames)[1:ncol(D)]
+    # ! Assumptions about columns names
+    colnames(D) = gsub(pattern = "datum|date", replacement = "date", x = colnames(D), ignore.case = TRUE)
+    colnames(D) = gsub(pattern = "tijd|time", replacement = "time", x = colnames(D), ignore.case = TRUE)
+    colnames(D) = gsub(pattern = "activiteit|activity", replacement = "ZCY", x = colnames(D), ignore.case = TRUE)
+    timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
+                                 format = timeformat,
+                                 tz = tz)
+    checkTimeFormat(timestamp_POSIX[1], 
+                    rawValue = paste(D$date[1], D$time[1], sep = " "),
+                    timeformat = timeformat,
+                    timeformatName = timeformatName)
+    epSizeShort = mean(diff(as.numeric(timestamp_POSIX)))
+
+    timestamp_POSIX = timestamp_POSIX[1]
+    D = D[, "ZCY"]
+  } else if (fileExtension == "awd") {
+    #=========================================================
+    # AWD
+    #=========================================================
+    # ! Assumption that first data row equals the first row with 3 columns
+    index = 0
+
+    quote = detectQuote(fn = filename, index = 50)
+    NC = 1
+    while (NC >= 3) {
+      testraw = data.table::fread(input = filename,
+                                  header = FALSE, sep = ",", skip = index,
+                                  nrows = 1, data.table = TRUE, quote = quote)
+      NC = ncol(testraw)
+      if (NC >= 3) {
+        break()
+      } else {
+        index = index + 1
+      }
+    }
+    D = data.table::fread(input = filename, header = FALSE, sep = ",",
+                          skip = index, quote = quote)
+    D = D[,1]
+    colnames(D)[1] = "ZCY"
+    header = data.table::fread(input = filename, header = FALSE, sep = ",", 
+                               nrows =  7, quote = quote)
+    # Get epoch size
+    optionalEpochs = data.frame(code = c("1", "2", "4", "8", "20", "81", "C1", "C2"),
+                                size = c(15, 30, 60, 120, 300, 2, 5, 10))
+    epSizeShort = optionalEpochs$size[which(optionalEpochs$code == as.character(header[4]))]
+    # Get starttime 
+    timestampFormat = paste0(unlist(strsplit(timeformat, " "))[1], " %H:%M")
+    timestamp_POSIX = as.POSIXct(x = paste(header[2], header[3], sep = " "),
+                                 format = timestampFormat, tz = tz)
+    checkTimeFormat(timestamp_POSIX, 
+                    rawValue = header[2],
+                    timeformat = timeformat,
+                    timeformatName = timeformatName)
+
+  }
+  D = as.matrix(D, drop = FALSE)
+
+  # If requested, aggregate data to lower resolution to match desired 
+  # epoch size in argument windowsizes
+  if (!is.null(desiredEpochSize)) {
+    if (desiredEpochSize > epSizeShort) {
+      step = desiredEpochSize %/% epSizeShort
+      D = rbind(rep(0, ncol(D)), D)
+      cumsum2 = function(x) {
+        x = cumsum(ifelse(is.na(x), 0, x)) + x*0
+        return(x)
+      }
+      D = apply(D, 2, cumsum2)
+      D = D[seq(1, nrow(D), by = step), , drop = FALSE]
+      D = apply(D, 2, diff)
+      epSizeShort = epSizeShort * step
+    }
+    checkEpochMatch(desiredEpochSize, epSizeShort)
+  }
+  if (quote == "") D$ZCY = as.numeric(D$ZCY)
+  invisible(list(data = D, epochSize = epSizeShort,
+                 startTime = timestamp_POSIX))
+}
diff --git a/R/utils_for_countdata.R b/R/utils_for_countdata.R
@@ -0,0 +1,45 @@
+# Collection of short function used in functions readActigraphCount, readActiwatchCount
+checkTimeFormat = function(timestamp_POSIX, rawValue = " ?? ", timeformat = " ?? ",
+                           timeformatName = NULL) {
+  if (is.na(timestamp_POSIX)) {
+    stop(paste0("\nTime format in data ", rawValue, 
+                " does not match with time format ", timeformat,
+                " as specified by argument ", timeformatName,
+                ", please correct.\n"), call. = FALSE)
+  }
+}
+
+checkEpochMatch = function(desiredEpochSize, epSizeShort) {
+  if (!is.null(desiredEpochSize) && epSizeShort != desiredEpochSize) {
+    stop(paste0("\nThe short epoch size as specified by the user (",
+                desiredEpochSize, " seconds) does NOT match the short",
+                " epoch size we see in the data (", epSizeShort,
+                " seconds). Please correct."), call. = FALSE)
+  }
+  return()
+}
+
+detectQuote = function(fn, index) {
+  # data.table::fread has argument quote.
+  # On some computers the quotes in the files are
+  # not recognised, to catch this first try to check whether this is the case:
+  quote = "\""
+  Dtest = NULL
+  try(expr = {Dtest = data.table::fread(input = fn,
+                                        header = FALSE, sep = ",", skip = index,
+                                        nrows = 20, quote = quote)}, silent = TRUE)
+  if (length(Dtest) == 0) {
+    quote = ""
+  } else {
+    if (nrow(Dtest) <= 1) {
+      quote = "" 
+    }
+  }
+  return(quote)
+}
+
+getExtension <- function(filename){ 
+  ex <- unlist(strsplit(basename(filename), split = "[.]"))
+  if (length(ex) < 2) stop(paste0("Cannot recognise extension from '", filename, "' as filename, please check"), call. = FALSE)
+  return(ex[-1])
+}