Merge pull request #69 from wadpac/issue68_epochdata

Add functions to read epoch data files (not raw data)
wadpac · Oct 25, 2024 · 4bdd12b · 4bdd12b
2 parents f339be3 + 80a8508
commit 4bdd12b
Show file tree

Hide file tree

Showing 48 changed files with 6,732 additions and 61 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -6,5 +6,5 @@ Checklist before merging:
 - [ ] Existing tests still work (check by running the test suite, e.g. from RStudio).
 - [ ] Added tests (if you added functionality) or fixed existing test (if you fixed a bug).
 - [ ] Updated or expanded the documentation.
-- [ ] Updated release notes in `inst/NEWS.Rd` with a user-readable summary. Please, include references to relevant issues or PR discussions.
+- [ ] Updated release notes in `NEWS.md` with a user-readable summary. Please, include references to relevant issues or PR discussions.
 - [ ] Added your name to the contributors lists in the `DESCRIPTION` file, if you think you made a significant contribution.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: GGIRread
 Type: Package
 Title: Wearable Accelerometer Data File Readers
-Version: 1.0.1
-Date: 2024-06-03
+Version: 1.0.2
+Date: 2024-10-26
 Authors@R: c(person("Vincent T","van Hees",role=c("aut","cre"),
                   email="[email protected]"),
              person(given = "Patrick",family = "Bos",
@@ -17,12 +17,12 @@ Authors@R: c(person("Vincent T","van Hees",role=c("aut","cre"),
              person("Medical Research Council UK",  role = c("cph", "fnd")),
              person("Accelting",  role = c("cph", "fnd")))
 Maintainer: Vincent T van Hees <[email protected]>
-Description: Reads data collected from wearable acceleratometers as used in sleep and physical activity research. Currently supports file formats: binary data from 'GENEActiv' <https://activinsights.com/>, .bin-format from GENEA devices (not for sale), and .cwa-format from 'Axivity' <https://axivity.com>. Primarily designed to complement R package GGIR <https://CRAN.R-project.org/package=GGIR>.
+Description: Reads data collected from wearable acceleratometers as used in sleep and physical activity research. Currently supports file formats: binary data from 'GENEActiv' <https://activinsights.com/>, .bin-format from GENEA devices (not for sale), and .cwa-format from 'Axivity' <https://axivity.com>. Further, it has functions for reading text files with epoch level aggregates from Actical, Fitbit, Actiwatch, ActiGraph, and PhilipsHealthBand. Primarily designed to complement R package GGIR <https://CRAN.R-project.org/package=GGIR>.
 URL: https://github.com/wadpac/GGIRread/
 BugReports: https://github.com/wadpac/GGIRread/issues
 License: Apache License (== 2.0)
 Suggests: testthat
-Imports: matlab, bitops, Rcpp (>= 0.12.10)
+Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table, readxl, jsonlite
 Depends: stats, utils, R (>= 3.5.0)
 NeedsCompilation: yes
 LinkingTo: Rcpp

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,12 @@
-export(readGenea, readAxivity, readGENEActiv, GENEActivReader, resample, readWav)
+export(readGenea, readAxivity, readGENEActiv, 
+       GENEActivReader, resample, readWav, 
+       readActiGraphCount, readActiwatchCount,
+       readActicalCount, readPHBCount,
+       readFitbit, mergePHBdata,
+       mergeFitbitData)
 useDynLib(GGIRread, .registration = TRUE)
 importFrom(Rcpp, sourceCpp)
+importFrom(data.table, fread)
 importFrom("utils", "setTxtProgressBar", "txtProgressBar")
-importFrom("utils", "read.csv")
+importFrom("utils", "read.csv", "write.csv")
+importFrom("utils", "available.packages")
diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,19 @@
-# Changes in version 1.0.2 (release date:??-??-2024)
+# Changes in version 1.0.2 (release date:26-10-2024)
 
 - Added a `NEWS.md` file to track changes to the package.
 - Stops interactive calling of `chooseCRANmirror` on `.onAttach` if interactive and CRAN mirror not set GGIR #1141.
+- GGIRread version look-up in .onattach() no longer crashes when computer is offline.
+- Migrate read function for ActiGraph (csv) and Actiwatch (csv/awd) count data to GGIRread #68.
+- Add function for reading Actical (csv) count data #68.
+- Add functions for reading and merging Philips Health Band file pairs (xlsx) #68.
+- Add functions for reading and merging Fitbit (json) files with sleep, steps, and/or calories #68.
+
 
 # Changes in version 1.0.1 (release date:03-06-2024)
 
 - Progress bar fixed, issue #63 (credits: John Muschelli)
 
+
 # Changes in version 1.0.0 (release date:27-03-2024)
 
 - GENEActiv no longer prints error to console when more data is requested 

diff --git a/R/checkTimeFormat.R b/R/checkTimeFormat.R
@@ -0,0 +1,22 @@
+checkTimeFormat = function(timestamp_POSIX, rawValue = " ?? ", timeformat = " ?? ",
+                           timeformatName = NULL) {
+  # If timestamp_POSIX is NA gieve error message to inform user that something went wrong.
+  if (is.na(timestamp_POSIX)) {
+    stop(paste0("\nTime format in data ", rawValue, 
+                " does not match with time format ", timeformat,
+                " as specified by argument ", timeformatName,
+                ", please correct.\n"), call. = FALSE)
+  } else {
+    year = as.numeric(format(timestamp_POSIX, format = "%Y"))
+    if (year < 1980 || year > 2500) {
+      # Assumption that after 2500 no new ActiGraph data will be collected!
+      stop(paste0("\nTimestamp recognised as ", format(timestamp_POSIX), 
+                  " with year identified as ", year,
+                  ". This does not seem to be correct. Raw timestamp value is stored as ",
+                  rawValue, ". please change specification of ",
+                  "argument ", timeformatName, " (currently ",
+                  timeformat, ") to ensure correct interpretation of timestamp.\n"),
+           call. = FALSE)
+    }
+  }
+}
diff --git a/R/detectQuote.R b/R/detectQuote.R
@@ -0,0 +1,18 @@
+detectQuote = function(filename, skip) {
+  # data.table::fread has argument quote.
+  # On some computers the quotes in the files are
+  # not recognised, to catch this first try to check whether this is the case:
+  quote = "\""
+  Dtest = NULL
+  try(expr = {Dtest = data.table::fread(input = filename,
+                                        header = FALSE, sep = ",", skip = skip,
+                                        nrows = 20, quote = quote)}, silent = TRUE)
+  if (length(Dtest) == 0) {
+    quote = ""
+  } else {
+    if (nrow(Dtest) <= 1) {
+      quote = "" 
+    }
+  }
+  return(quote)
+}
diff --git a/R/findStartData.R b/R/findStartData.R
@@ -0,0 +1,32 @@
+findStartData = function(filename, quote, startindex) {
+  # Function used to find start of time series in Actiwatch and Actical data
+  # ! Assumptions that timeseries start before line 1000
+  while (startindex > 0) {
+    testraw = data.table::fread(input = filename,
+                                header = FALSE, sep = ",", skip = startindex,
+                                nrows = 2, data.table = FALSE, quote = quote)
+    if (length(testraw) > 0) {
+      if (nrow(testraw) == 2) {
+        if (testraw$V1[2] == testraw$V1[1] + 1) {
+          break
+        }
+      }
+    }
+    startindex = startindex - 100
+  }
+  # ! Assumption that first column are the epoch numbers
+  delta = 1 - testraw$V1[1]
+  startindex = startindex + delta
+  startFound = FALSE
+  while (startFound == FALSE) {
+    Dtest = data.table::fread(input = filename, sep = ",", skip = startindex, quote = quote, nrows = 1)
+    if (Dtest$V1[1] == 1) {
+      startFound = TRUE
+    } else {
+      # This happens when file is has an empty row between each measurement point is stored
+      startindex = startindex - 1
+      if (startindex < 1) stop("Could not find start of recording", call. = FALSE)
+    }
+  }
+  return(startindex)
+}
diff --git a/R/getExtension.R b/R/getExtension.R
@@ -0,0 +1,9 @@
+getExtension <- function(filename){ 
+  # Extract file extension
+  ex <- unlist(strsplit(basename(filename), split = "[.]"))
+  if (length(ex) < 2) stop(paste0("Cannot recognise extension from '", filename, "' as filename, please check"), call. = FALSE)
+  return(ex[-1])
+}
+
+
+
diff --git a/R/mergeFitbitData.R b/R/mergeFitbitData.R
@@ -0,0 +1,22 @@
+mergeFitbitData = function(filenames = NULL, desiredtz = "", configtz = NULL) {
+  if (length(filenames) < 2) {
+    stop("Provide at least two filenames")
+  }
+  cnt = 1
+  while (cnt <= length(filenames)) {
+    D = readFitbit(filename = filenames[cnt], desiredtz = desiredtz, configtz = configtz)
+    if (cnt == 1) {
+      data = D
+    } else {
+      if (length(intersect(x = data$dateTime, D$dateTime)) == 0) {
+        warning(paste0("Time series do not intersect for files ",
+                       basename(filenames[cnt]), " and ", basename(filenames[cnt - 1])),
+                call. = FALSE)
+      }
+
+      data = merge(data, D, by = "dateTime", all = TRUE)
+    }
+    cnt = cnt + 1
+  }
+  return(data)
+}
diff --git a/R/mergePHBdata.R b/R/mergePHBdata.R
@@ -0,0 +1,48 @@
+mergePHBdata = function(filenames = NULL,
+                             timeformat = "%m/%d/%Y %H:%M:%S",
+                             desiredtz = "", configtz = NULL,
+                             timeformatName = "timeformat") {
+  # merges Philips Health Band xlsx files per participant
+  # as there can be multiple files per participant.
+  if (length(filenames) != 2) {
+    stop("Provide two filenames")
+  }
+
+  # Identify both file
+  file1 = grep(pattern = "datalist", x = filenames, ignore.case = TRUE)
+  file2 = grep(pattern = "sleep_wake", x = filenames, ignore.case = TRUE)
+
+  # Datalist file (with all variables except sleep/wake scores)
+  deviceSN = NULL
+  if (length(file1) > 0) {
+    data1 = readPHBCount(filename = filenames[file1], timeformat = timeformat,
+                         desiredtz = desiredtz, configtz = configtz,
+                         timeformatName = timeformatName)
+    deviceSN = data1$deviceSN
+  }
+  # Sleep wake scores file
+  if (length(file2) > 0) {
+    data2 = readPHBCount(filename = filenames[file2], timeformat = timeformat,
+                         desiredtz = desiredtz, configtz = configtz,
+                         timeformatName = timeformatName)
+  }
+  if (length(file1) > 0 && length(file2) > 0) {
+    data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
+    d1 = data1$data
+    d2 = data2$data
+    if (length(which(is.na(d1$timestamp) == TRUE)) > 0 || 
+        length(which(is.na(d2$timestamp) == TRUE)) > 0) {
+      stop(paste0("NA values are found in the timestamps, ",
+                  "please check parameter ", timeformatName, 
+                  " which is set to ", timeformat), call. = FALSE)
+    }
+    data = merge(d1, d2, by = "timestamp")
+  } else {
+    if (length(file1) > 0) {
+      data = data1$data
+    } else {
+      data = data2$data
+    }
+  }
+  invisible(list(data = data, deviceSN = deviceSN))
+}