Skip to content

Commit

Permalink
migrate function for reading ActiGraph csv from GGIR to GGIRread #68
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Sep 26, 2024
1 parent f339be3 commit e6a302b
Show file tree
Hide file tree
Showing 9 changed files with 3,298 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ URL: https://github.com/wadpac/GGIRread/
BugReports: https://github.com/wadpac/GGIRread/issues
License: Apache License (== 2.0)
Suggests: testthat
Imports: matlab, bitops, Rcpp (>= 0.12.10)
Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table
Depends: stats, utils, R (>= 3.5.0)
NeedsCompilation: yes
LinkingTo: Rcpp
Expand Down
8 changes: 6 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
export(readGenea, readAxivity, readGENEActiv, GENEActivReader, resample, readWav)
export(readGenea, readAxivity, readGENEActiv,
GENEActivReader, resample, readWav,
readActiGraphCount)
useDynLib(GGIRread, .registration = TRUE)
importFrom(Rcpp, sourceCpp)
importFrom(data.table, fread)
importFrom("utils", "setTxtProgressBar", "txtProgressBar")
importFrom("utils", "read.csv")
importFrom("utils", "read.csv")
importFrom("utils", "available.packages")
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- Added a `NEWS.md` file to track changes to the package.
- Stops interactive calling of `chooseCRANmirror` on `.onAttach` if interactive and CRAN mirror not set GGIR #1141.
- Migrate read function for ActiGraph count data (csv) to GGIRread #68.

# Changes in version 1.0.1 (release date:03-06-2024)

Expand Down
176 changes: 176 additions & 0 deletions R/readActiGraphCount.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
timeformat = "%m/%d/%Y %H:%M:%S", tz = "") {
deviceSerialNumber = NULL
# Test if file has header by reading first ten rows
# and checking whether it contains the word
# serial number.
headerAvailable = FALSE
header = data.table::fread(
input = filename,
header = FALSE,
nrows = 10,
data.table = FALSE,
sep = ","
)
if (nrow(header) < 10) {
stop(paste0("File ", filename, " cannot be processed because it has less than ten rows"))
}
splitHeader = function(x) {
tmp = unlist(strsplit(x, " "))
item = gsub(
pattern = ":| ",
replacement = "",
x = paste0(tmp[1:(length(tmp) - 1)], collapse = "")
)
df = data.frame(item = tolower(item), value = tmp[length(tmp)])
return(df)
}
fileHeader = NULL
for (hh in header[2:9,1]) {
fileHeader = rbind(fileHeader, splitHeader(hh))
}
if (any(grepl("serialnumber", fileHeader$item))) headerAvailable = TRUE

# Depending on whether header is present assign number of rows to skip:
if (headerAvailable == TRUE) {
skip = 10
} else {
tmp = data.table::fread(input = filename,
header = FALSE,
data.table = FALSE,
skip = 0,
nrows = 1)
if (any(grepl("data|scoring", tmp[1,]))) {
skip = 1
} else {
skip = 0
}
}

# Check if file was exported with column names:
colnames = FALSE
colnames_test = data.table::fread(input = filename,
header = FALSE,
data.table = FALSE,
skip = skip,
nrows = 1)
if (any(grepl("Axis|vector magnitude|vm", colnames_test[1,], ignore.case = TRUE))) {
colnames = TRUE
}
# Increment skip if column names are present
Dtest = data.table::fread(input = filename,
header = colnames,
data.table = FALSE,
skip = skip, nrows = 1)
if (length(grep(pattern = "time|date", x = Dtest[1, 1], ignore.case = TRUE)) > 0) {
skip = skip + 1
}
# Read all data from file
D = data.table::fread(input = filename,
header = colnames,
data.table = FALSE,
skip = skip)

# Ignore time and date column if present
D = D[, grep(pattern = "time|date", x = Dtest[1, ], ignore.case = TRUE, invert = TRUE), drop = FALSE]
D = D[, grep(pattern = "time|date", x = colnames(Dtest), ignore.case = TRUE, invert = TRUE), drop = FALSE]
if (inherits(x = D[1,1], what = "POSIXt")) {
D = D[, -1, drop = FALSE]
}
# Identify columns with count data
acccol = vmcol = NA
if (colnames == TRUE) {
acccol = grep("axis|activity", colnames(D), ignore.case = TRUE)
vmcol = grep("vector magnitude|vm", colnames(D), ignore.case = TRUE)
} else {
# Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3
# First column is VM if ncol(D) < 3
# Note that in ActiLife software the user can select
# the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps")
# which may mean that our assumptions here are not necessarily true.
if (ncol(D) >= 3) {
acccol = 1:3
} else {
vmcol = 1
}
}
# Assign colnames and formatting
if (is.na(acccol[1]) == FALSE) {
colnames(D)[acccol] = c("y", "x", "z") # ActiGraph always stores y axis first
}
if (is.na(vmcol[1]) == FALSE) {
D = as.matrix(D, drop = FALSE) # Convert to matrix as data.frame will auto-collapse to vector
colnames(D)[vmcol] = c("vm")
}
keep = c(acccol, vmcol)[!is.na(c(acccol, vmcol))]
D = D[, keep, drop = FALSE]
if (ncol(D) == 3 & is.na(vmcol)) {
D$vm = sqrt(D[,1]^2 + D[,2]^2 + D[,3]^2)
}
# Extract information from header, if present
if (headerAvailable == TRUE) {
deviceSerialNumber = fileHeader$value[grep(pattern = "serialnumber", x = fileHeader$item)]
epochSize = fileHeader$value[grep(pattern = "epochperiod|cycleperiod", x = fileHeader$item)]
epSizeShort = sum(as.numeric(unlist(strsplit(epochSize, ":"))) * c(3600, 60, 1))
starttime = fileHeader$value[grep(pattern = "starttime", x = fileHeader$item)]
startdate = fileHeader$value[grep(pattern = "startdate", x = fileHeader$item)]
timestamp = paste0(startdate, " ", starttime)
timestamp_POSIX = as.POSIXlt(timestamp, tz = tz,
format = timeformat)
} else if (headerAvailable == FALSE) {
# Extract date/timestamp from first values of column
tmp = data.table::fread(input = filename,
header = colnames,
data.table = FALSE,
skip = skip,
nrows = 2)
if (colnames == TRUE) {
datecol = grep("date", colnames(tmp), ignore.case = TRUE)
timecol = grep("time|epoch", colnames(tmp), ignore.case = TRUE)
time = tmp[, timecol]
date = tmp[, datecol]

starttime = time[1]
starttime = date[1]
timestamp = paste0(date, " ", time)
format = timeformat
timestamp_POSIX = as.POSIXlt(timestamp, tz = tz, format = format)
if (all(is.na(timestamp_POSIX))) {
stop(paste0("\nTimestamps are not available in the file, neither has",
" it a header to extract the timestamps from. Therefore, the file",
" cannot be processed.\n"))
}
epochSize = difftime(timestamp_POSIX[2], timestamp_POSIX[1],
units = "secs")
epSizeShort = as.numeric(epochSize)
timestamp_POSIX = timestamp_POSIX[1] # startTime
}
}
# Check timestamp is meaningful
if (all(is.na(timestamp_POSIX))) {
stop(paste0("\nTime format in data ", timestamp, " does not match with time format ",
timeformat,
" as specified by argument extEpochData_timeformat, please correct.\n"))
}
# If requested, aggregate data to lower resolution to match desired
# epoch size in argument windowsizes
if (!is.null(desiredEpochSize)) {
if (desiredEpochSize > epSizeShort) {
step = desiredEpochSize %/% epSizeShort
D = rbind(rep(0, ncol(D)), D)
D = apply(D, 2, cumsum)
D = D[seq(1, nrow(D), by = step), , drop = FALSE]
D = apply(D, 2, diff)
epSizeShort = epSizeShort * step
}
if (epSizeShort != desiredEpochSize) {
stop(paste0("\nThe short epoch size as specified by the user as the first value of argument windowsizes (",
desiredEpochSize,
" seconds) does NOT match the short epoch size we see in the data (", epSizeShort),
" seconds). Please correct.", call. = FALSE)
}
}
invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX,
deviceSerialNumber = deviceSerialNumber))
}
Loading

0 comments on commit e6a302b

Please sign in to comment.