-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgroup_PERMANOVA.R
228 lines (149 loc) · 8.11 KB
/
group_PERMANOVA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
## Function for sequentially testing multiple variables in PERMANOVA, then
## adjusting pseudo-F values. Also calculates multivariate dispersion for
## categorical variables.
require(vegan)
require(tibble)
# First, a quick function to calculate omega squared based on:
# https://academic.oup.com/bioinformatics/article/31/15/2461/188732
# http://www.real-statistics.com/multiple-regression/other-measures-effect-size-anova/
# http://psych.colorado.edu/~willcutt/pdfs/Olejnik_2003.pdf if you want to make this generalized...
# https://gist.github.com/arnoud999/e677516ed45e9a11817e for some r code of generalization
PERMANOVA.omega2 <- function(adonis2.object, num.control.vars) {
SSE <- adonis2.object[ 1 + num.control.vars, "SumOfSqs"]
dfE <- adonis2.object[ 1 + num.control.vars, "Df"]
MSE <- adonis2.object["Residual", "SumOfSqs"]/adonis2.object["Residual", "Df"] # Mean Square for Error
SST <- sum(adonis2.object$SumOfSqs)
omega2 <- (SSE - dfE*MSE) / (SST + MSE)
return(omega2)
}
## Note that I have some defaults for adonis2: two processors (parallel = 2),
## 99999 permutations, and using the bray-curtis distance matrix (method =
## "bray")
## Note also that this does not test for interaction effects.
## Variable definitions:
## var.names should be a vector of column names, one for each variable to be tested
# Might consider adding functionality to take column names or position numbers, but for now...
## var.table is the table where the independent variables are.
## var.table.c is a string of the table name: eventually generate this automatically.
## species.table is the species/site matrix (e.g. created by matrify)
## species.table.c is a string of the table name: eventually generate this automatically.
## control.vars are any variables that need to be included before testing variables--character string, e.g. "var1 + var2"
## num.control.vars specifies the number of control variables used. Replace this later with something deriving it from control.vars text...
## by.adonsi2 is to pass through for the by = argument in adonis2
group.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.table, species.table.c, num.control.vars = 0, by.adonis2 = "terms", AIC.type = "AICc", perms = 99999, method = "bray") {
## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect.
var.table <- var.table[ , order(colnames(var.table))]
var.names <- var.names[order(var.names)]
output <- tibble(var.names = var.names,
var.explnd = rep(NA, times = length(var.names)),
avg.var.explnd = rep(NA),
pseudo.F = rep(NA),
F.pval = rep(NA),
adj.F.pval = rep(NA),
disp.F.pval = rep(NA),
adj.disp.F.pval = rep(NA),
omega2 = rep(NA),
AIC.stat = rep(NA)
)
col.numbers <- which(colnames(var.table) %in% var.names)
if (num.control.vars > 0) {
control.vars <- paste0(control.vars, " +")
}
## Populate the output table.
for (i in 1:length(var.names)) {
## No parallel argument--while it would be nice, 2.5.1 vegan + parallel
## update fubar'd something and taking it out was the easiest way to fix
## it.
temp <-
adonis2(
formula = as.formula(paste(
species.table.c, "~", control.vars, var.names[i]
)),
permutations = perms,
method = method,
by = by.adonis2,
data = var.table
)
output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] /
temp$SumOfSqs[length(temp$SumOfSqs)]
output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars]
output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars)
output$pseudo.F[i] <- temp$F[num.control.vars + 1]
output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1]
output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]]
# the ordering is so that col.names[i] works!
if (is.character(var.table[[col.numbers[i]]]) == TRUE |
is.factor(var.table[[col.numbers[i]]]) == TRUE) {
if (class(species.table) == "dist") {
temp.disp <-
anova(betadisper(
species.table,
as.factor(var.table[[col.numbers[i]]])
))
output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1]
}
else {temp.disp <-
anova(betadisper(
vegdist(species.table, method = method),
as.factor(var.table[[col.numbers[i]]])
))
output$disp.F.pval[i] <- temp.disp$`Pr(>F)`[1]
}
}
}
output$adj.F.pval <-
p.adjust(p = output$F.pval, method = "holm")
output$adj.disp.F.pval <-
p.adjust(p = output$disp.F.pval, method = "holm")
output$AIC.delta <-
output$AIC.stat - min(output$AIC.stat)
return(output)
}
group.univ.PERMANOVA <- function(var.names, var.table, var.table.c, control.vars = "", species.vector, species.vector.c, num.control.vars = 0, by.adonis2 = "terms", perms = 99999, method = "euclid", AIC.type ="AICc") {
## need to order columns otherwise the order of the columns and the order of col.numbers is incorrect.
var.table <- var.table[ , order(colnames(var.table))]
var.names <- var.names[order(var.names)]
output <- tibble(var.names = var.names,
var.explnd = rep(NA, times = length(var.names)),
avg.var.explnd = rep(NA),
pseudo.F = rep(NA),
F.pval = rep(NA),
adj.F.pval = rep(NA),
omega2 = rep(NA),
AIC.stat = rep(NA)
)
col.numbers <- which(colnames(var.table) %in% var.names)
if (num.control.vars > 0) {
control.vars <- paste0(control.vars, " +")
}
## Attempting to get the names from the tables
#var.table.t <- deparse(substitute(var.table))
## Populate the output table.
for (i in 1:length(var.names)) {
## No parallel argument--while it would be nice, 2.5.1 vegan + parallel
## update fubar'd something and taking it out was the easiest way to fix
## it.
temp <-
adonis2(
formula = as.formula(paste(
species.vector.c, "~", control.vars, var.names[i]
)),
permutations = perms,
method = method,
by = by.adonis2,
data = var.table
)
output$var.explnd[i] <- temp$SumOfSqs[1 + num.control.vars] / temp$SumOfSqs[length(temp$SumOfSqs)]
output$avg.var.explnd[i] <- output$var.explnd[i] / temp$Df[1 + num.control.vars]
output$omega2[i] <- PERMANOVA.omega2(temp, num.control.vars)
output$pseudo.F[i] <- temp$F[num.control.vars + 1]
output$F.pval[i] <- temp$`Pr(>F)`[num.control.vars + 1]
output$AIC.stat[i] <- AICc.PERMANOVA2(temp)[[AIC.type]]
# the ordering is so that col.names[i] works!
}
output$adj.F.pval <- p.adjust(p = output$F.pval, method = "holm")
output$delta.aic <- output$AIC.stat - min(output$AIC.stat)
return(output)
}
## with time, test:
# paste0("\"", sample.covariates, "\"" )