Skip to content

Commit

Permalink
extract additional columns from files #68
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Sep 27, 2024
1 parent 952d2e4 commit 38669db
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 56 deletions.
51 changes: 36 additions & 15 deletions R/readActiGraphCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
if (any(grepl("serialnumber", fileHeader$item))) headerAvailable = TRUE

# Depending on whether header is present assign number of rows to skip:
mode = NULL
if (headerAvailable == TRUE) {
skip = 10
# Extract mode number from header because this tells us how to interpret the columns
mode = as.numeric(fileHeader$value[grep(pattern = "mode", x = fileHeader$item)])
if (is.na(mode)) mode = NULL
} else {
tmp = data.table::fread(input = filename,
header = FALSE,
Expand Down Expand Up @@ -80,33 +84,50 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
D = D[, -1, drop = FALSE]
}
# Identify columns with count data
acccol = vmcol = NA
acccol = NA
stepcol = NULL
if (colnames == TRUE) {
acccol = grep("axis|activity", colnames(D), ignore.case = TRUE)
vmcol = grep("vector magnitude|vm", colnames(D), ignore.case = TRUE)
stepcol = grep("step", colnames(D), ignore.case = TRUE)
} else {
# Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3
# First column is VM if ncol(D) < 3
# Note that in ActiLife software the user can select
# the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps")
# which may mean that our assumptions here are not necessarily true.
if (ncol(D) >= 3) {
acccol = 1:3
if (!is.null(mode)) {
if ((mode >= 12 && mode <= 15) | (mode >= 28 && mode <= 31) |
(mode >= 44 && mode <= 47) | (mode >= 60 && mode <= 63)) {
acccol = 1:3
} else {
acccol = 1
}
if (mode %in% c(13, 15, 29, 31, 45, 47, 61, 63)) {
stepcol = 4
}
} else {
vmcol = 1
# Then assume first 3 columns are axis1, axis2, axis3 if ncol(D) >= 3
# First column is VM if ncol(D) < 3
# Note that in ActiLife software the user can select
# the columns to export (e.g, it could be "Axis1", "Vector Magnitude", "Steps")
# which may mean that our assumptions here are not necessarily true.
if (ncol(D) >= 3) {
acccol = 1:3
} else {
acccol = 1
}
}
}
# Assign colnames and formatting
if (is.na(acccol[1]) == FALSE) {
if (length(acccol) == 3 && is.na(acccol[1]) == FALSE) {
colnames(D)[acccol] = c("y", "x", "z") # ActiGraph always stores y axis first
}
if (is.na(vmcol[1]) == FALSE) {
if (length(acccol) == 1 &&is.na(vmcol[1]) == FALSE) {
D = as.matrix(D, drop = FALSE) # Convert to matrix as data.frame will auto-collapse to vector
colnames(D)[vmcol] = c("vm")
colnames(D)[acccol] = "vm"
}
if (length(stepcol) == 1 && is.na(stepcol[1]) == FALSE) {
colnames(D)[stepcol] = "steps"
}
keep = c(acccol, vmcol)[!is.na(c(acccol, vmcol))]
keep = c(acccol, stepcol)[!is.na(c(acccol, stepcol))]
D = D[, keep, drop = FALSE]
if (ncol(D) == 3 & is.na(vmcol)) {
if (ncol(D) >= 3) {
D$vm = sqrt(D[, 1] ^ 2 + D[, 2] ^ 2 + D[, 3] ^ 2)
}
# Extract information from header, if present
Expand Down Expand Up @@ -154,7 +175,7 @@ readActiGraphCount = function(filename = file, desiredEpochSize = NULL,
if (!is.null(desiredEpochSize)) {
if (desiredEpochSize > epSizeShort) {
step = desiredEpochSize %/% epSizeShort
D = sumAggregate(D, step)
D = matAggregate(D, step)
epSizeShort = epSizeShort * step
}
checkEpochMatch(desiredEpochSize, epSizeShort)
Expand Down
5 changes: 2 additions & 3 deletions R/readActicalCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,17 @@ readActicalCount = function(filename = file, desiredEpochSize = NULL,
timestamp_POSIX = timestamp_POSIX[1]
D = D[, -which(colnames(D) %in% c("date", "time"))]
D = as.matrix(D, drop = FALSE)

if (quote == "") D = apply(D, 2, as.numeric)
# If requested, aggregate data to lower resolution to match desired
# epoch size in argument windowsizes
if (!is.null(desiredEpochSize)) {
if (desiredEpochSize > epSizeShort) {
step = desiredEpochSize %/% epSizeShort
D = sumAggregate(D, step)
D = matAggregate(D, step)
epSizeShort = epSizeShort * step
}
checkEpochMatch(desiredEpochSize, epSizeShort)
}
if (quote == "") D = apply(D, 2, as.numeric)
invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX))
}
32 changes: 17 additions & 15 deletions R/readActiwatchCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
startindex = 1000
quote = detectQuote(fn = filename, index = startindex)
index = findStartData(filename, quote, startindex)
D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote)
D = data.table::fread(input = filename, sep = ",", skip = index, quote = quote, data.table = FALSE)
# ! Assumption that column names are present 2 lines prior to timeseries
colnames = data.table::fread(input = filename,
colnames = data.table::fread(input = filename, data.table = FALSE,
header = FALSE, sep = ",",
skip = index - 2, nrows = 1, quote = quote)
if (all(is.na(colnames))) {
colnames = data.table::fread(input = filename,
header = FALSE, sep = ",",
skip = index - 4, nrows = 1, quote = quote)
}
colnames(D) = as.character(colnames)[1:ncol(D)]
colnames = colnames[!is.na(colnames)]
D = D[, which(!is.na(colnames))]
colnames(D) = tolower(as.character(colnames))
# ! Assumptions about columns names
colnames(D) = gsub(pattern = "datum|date", replacement = "date",
x = colnames(D), ignore.case = TRUE)
colnames(D) = gsub(pattern = "tijd|time", replacement = "time",
x = colnames(D), ignore.case = TRUE)
colnames(D) = gsub(pattern = "activiteit|activity", replacement = "ZCY",
x = colnames(D), ignore.case = TRUE)
# browser()
colnames(D)[grep(pattern = "datum|date", x = colnames(D))] = "date"
colnames(D)[grep(pattern = "tijd|time", x = colnames(D))] = "time"
colnames(D)[grep(pattern = "activiteit|activity", x = colnames(D))] = "counts"
colnames(D)[grep(pattern = "slapen|sleep", x = colnames(D))] = "sleep"
colnames(D)[grep(pattern = "niet-om|wear|worn", x = colnames(D))] = "nonwear"
D = D[, grep(pattern = "time|date|counts|sleep|nonwear", x = colnames(D))]
timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
format = timeformat,
tz = tz)
Expand All @@ -41,7 +44,7 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
epSizeShort = mean(diff(as.numeric(timestamp_POSIX)))

timestamp_POSIX = timestamp_POSIX[1]
D = D[, "ZCY"]
D = D[, -which(colnames(D) %in% c("date", "time"))]
} else if (fileExtension == "awd") {
#=========================================================
# AWD
Expand All @@ -64,8 +67,8 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,
}
D = data.table::fread(input = filename, header = FALSE, sep = ",",
skip = index, quote = quote)
D = D[,1]
colnames(D)[1] = "ZCY"
D = D[, 1:2]
colnames(D)[1:2] = c("counts", "light")
header = data.table::fread(input = filename, header = FALSE, sep = ",",
nrows = 7, quote = quote)
# Get epoch size
Expand All @@ -83,18 +86,17 @@ readActiwatchCount = function(filename = file, desiredEpochSize = NULL,

}
D = as.matrix(D, drop = FALSE)

if (quote == "") D = apply(D, 2, as.numeric)
# If requested, aggregate data to lower resolution to match desired
# epoch size in argument windowsizes
if (!is.null(desiredEpochSize)) {
if (desiredEpochSize > epSizeShort) {
step = desiredEpochSize %/% epSizeShort
D = sumAggregate(D, step)
D = matAggregate(D, step)
epSizeShort = epSizeShort * step
}
checkEpochMatch(desiredEpochSize, epSizeShort)
}
if (quote == "") D$ZCY = as.numeric(D$ZCY)
invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX))
}
10 changes: 8 additions & 2 deletions R/utils_for_countdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ getExtension <- function(filename){
return(ex[-1])
}

sumAggregate = function(mat, step) {
# Aggregate matrix mat by taking the sum over step number of rows
matAggregate = function(mat, step) {
# Aggregate matrix mat by taking over step number of rows
# as sum unless column names is sleep or nonwear in that case
# we take the rounded mean.
mat = rbind(rep(0, ncol(mat)), mat)
cumsum2 = function(x) {
x = cumsum(ifelse(is.na(x), 0, x)) + x*0
Expand All @@ -72,6 +74,10 @@ sumAggregate = function(mat, step) {
mat = apply(mat, 2, cumsum2)
mat = mat[seq(1, nrow(mat), by = step), , drop = FALSE]
mat = apply(mat, 2, diff)
# Correct non incremental variables
for (niv in c("sleep", "nonwear")) {
if (niv %in% colnames(D)) D[, niv] = round(D[, niv] / step)
}
return(mat)
}

Expand Down
32 changes: 19 additions & 13 deletions tests/testthat/test_readActiGraphCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ test_that("ActiGraph61 is correctly read", {
expect_equal(D$epochSize, 10)
expect_equal(format(D$startTime), "2016-08-15 21:35:00")
expect_equal(nrow(D$data), 495)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 63952.33)
expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33)
expect_equal(sum(D$data[, c("steps")]), 253)

D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%m/%d/%Y %H:%M:%S", tz = "")
expect_equal(D$deviceSerialNumber, "MOS2D16160581")
expect_equal(D$epochSize, 5)
expect_equal(format(D$startTime), "2016-08-15 21:35:00")
expect_equal(nrow(D$data), 990)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 63952.33)
expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 63952.33)
expect_equal(sum(D$data[, c("steps")]), 253)
})

test_that("ActiGraph31 is correctly read", {
Expand All @@ -26,16 +28,19 @@ test_that("ActiGraph31 is correctly read", {
expect_equal(D$epochSize, 15)
expect_equal(format(D$startTime), "2013-08-26 09:00:00")
expect_equal(nrow(D$data), 990)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 272870.6, tol = 0.1)
expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1)
expect_equal(sum(D$data[, c("steps")]), 1118)

D = readActiGraphCount(filename = file, desiredEpochSize = 30, timeformat = "%m/%d/%Y %H:%M:%S", tz = "")
expect_equal(D$deviceSerialNumber, "CLE2A2123456")
expect_equal(D$epochSize, 30)
expect_equal(format(D$startTime), "2013-08-26 09:00:00")
expect_equal(nrow(D$data), 495)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 272870.6, tol = 0.1)
expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 272870.6, tol = 0.1)
expect_equal(sum(D$data[, c("steps")]), 1118)

})

test_that("ActiGraph13_timestamps_headers.csv is correctly read", {
Expand All @@ -45,17 +50,18 @@ test_that("ActiGraph13_timestamps_headers.csv is correctly read", {
expect_equal(D$epochSize, 1)
expect_equal(format(D$startTime), "2017-12-09 15:00:00")
expect_equal(nrow(D$data), 1000)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 256047)

expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1)
expect_equal(sum(D$data[, c("steps")]), 442)

D = readActiGraphCount(filename = file, desiredEpochSize = 5, timeformat = "%d-%m-%Y %H:%M:%S", tz = "")
expect_equal(D$deviceSerialNumber, "TAS1D48140206")
expect_equal(D$epochSize, 5)
expect_equal(format(D$startTime), "2017-12-09 15:00:00")
expect_equal(nrow(D$data), 200)
expect_equal(ncol(D$data), 4)
expect_equal(sum(D$data), 256047)
expect_equal(ncol(D$data), 5)
expect_equal(sum(D$data[, c("y", "x", "z", "vm")]), 255707.4, tol = 0.1)
expect_equal(sum(D$data[, c("steps")]), 442)
})

test_that("Actiwatch csv error correctly", {
Expand Down
22 changes: 14 additions & 8 deletions tests/testthat/test_readActiwatchCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,37 @@ test_that("Actiwatch csv is correctly read", {
expect_equal(D$epochSize, 15)
expect_equal(format(D$startTime), "2019-11-23 06:00:00")
expect_equal(nrow(D$data), 860)
expect_equal(ncol(D$data), 1)
expect_equal(sum(D$data, na.rm = TRUE), 4589)
expect_equal(ncol(D$data), 3)
expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4589)
expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 55)
expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797)

D = readActiwatchCount(filename = file, desiredEpochSize = 30, timeformat = "%d/%m/%Y %H:%M:%S", tz = "")
expect_equal(D$epochSize, 30)
expect_equal(format(D$startTime), "2019-11-23 06:00:00")
expect_equal(nrow(D$data), 430)
expect_equal(ncol(D$data), 1)
expect_equal(sum(D$data, na.rm = TRUE), 4569)
expect_equal(ncol(D$data), 3)
expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 4569)
expect_equal(sum(D$data[, "sleep"], na.rm = TRUE), 54)
expect_equal(sum(D$data[, "nonwear"], na.rm = TRUE), 797)
})
test_that("Actiwatch awd is correctly read", {
file = system.file("testfiles/Actiwatch.AWD", package = "GGIRread")
D = readActiwatchCount(filename = file, desiredEpochSize = 60, timeformat = "%d-%b-%Y %H:%M:%S", tz = "")
expect_equal(D$epochSize, 60)
expect_equal(format(D$startTime), "2009-10-01 17:00:00")
expect_equal(nrow(D$data), 329)
expect_equal(ncol(D$data), 1)
expect_equal(sum(D$data, na.rm = TRUE), 108864)
expect_equal(ncol(D$data), 2)
expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108864)
expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0)

D = readActiwatchCount(filename = file, desiredEpochSize = 300, timeformat = "%d-%b-%Y %H:%M:%S", tz = "")
expect_equal(D$epochSize, 300)
expect_equal(format(D$startTime), "2009-10-01 17:00:00")
expect_equal(nrow(D$data), 65)
expect_equal(ncol(D$data), 1)
expect_equal(sum(D$data, na.rm = TRUE), 108713)
expect_equal(ncol(D$data), 2)
expect_equal(sum(D$data[, "counts"], na.rm = TRUE), 108713)
expect_equal(sum(D$data[, "light"], na.rm = TRUE), 0)
})

test_that("Actiwatch awd error correctly", {
Expand Down

0 comments on commit 38669db

Please sign in to comment.