-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathPhaseIIFunctions.R
More file actions
240 lines (219 loc) · 13.5 KB
/
PhaseIIFunctions.R
File metadata and controls
240 lines (219 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
library(ggplot2)
library(reshape)
library(plyr)
library(readxl)
get_phase_ii_aggregate_data<-function(filename){
d = data.frame(read_excel(filename, sheet='TI_values'))
names(d)[1] = "metric"
d = d[-1,]
d = subset(d,Sensor != 'Triton')
r = melt(d,id.vars = c("metric","Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC"), measure.vars = c(4:80))
r = cbind(r,colsplit(r$variable,split="_",names=c('var','speed','unit')))
r$variable<-NULL
r$unit<-NULL
d = cast(r,metric+Company+Project+Region+Terrain+Height+Season+
Distance+Sensor+RefAnemType+Anem2Type+RefAnemClass+
Anem2Class+IEC+speed ~ var)
d$char_ti = d$mean+1.28*d$std
d = data.frame(d)
ti_count = data.frame(read_excel(filename, sheet='TI_count'))
names(ti_count)[1] = "metric"
ti_count = ti_count[-1,]
ti_count = subset(ti_count, Sensor != 'Triton')
tic = melt(ti_count,id.vars = c("metric","Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC"), measure.vars = c(4:41))
tic = cbind(tic,colsplit(tic$variable,split="_",names=c('var','speed','unit')))
tic$variable<-NULL
tic$unit<-NULL
ti_count = cast(tic,metric+Company+Project+Region+Terrain+Height+Season+
Distance+Sensor+RefAnemType+Anem2Type+RefAnemClass+
Anem2Class+IEC+speed ~ var)
ti_count$var = NULL
ti_count$metric = NULL
names(ti_count)[length(names(ti_count))] <- "count"
ti_threshold = data.frame(read_excel(filename, sheet='TI_count_threshold'))
names(ti_threshold)[1] = "metric"
ti_threshold = ti_threshold[-1,]
ti_threshold = subset(ti_threshold, Sensor != 'Triton')
tict = melt(ti_threshold,id.vars = c("metric","Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC"), measure.vars = c(4))
tict$variable = NULL
tict$metric = NULL
names(tict)[length(names(tict))] <- "threshold"
cmon = merge(d,tict,by=c("Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC"))
cmon = merge(cmon,ti_count,by=c("Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC","speed"))
cmon = subset(cmon,count>=threshold)
return(cmon)
}
get_uncertainties<-function(df){
return(data.frame(MAE = c(mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_Ane2_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_Ane2_TI))),
mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_corrTI_RSD_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_RSD_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
RMSE = c(sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_Ane2_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_Ane2_TI)^2)),
sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_corrTI_RSD_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI)^2)),
sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_RSD_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_RSD_TI)^2))),
MAD = 1.4826*c(
median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_Ane2_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_Ane2_TI))),
median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_corrTI_RSD_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_RSD_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
max = c(max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_Ane2_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_Ane2_TI))),
max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_corrTI_RSD_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_RSD_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
min = c(min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_Ane2_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_Ane2_TI))),
min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_corrTI_RSD_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_RSD_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
num = c(rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_Ane2_TI)),3),
rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_corrTI_RSD_TI)),3),
rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_RSD_TI)),3)),
variable = rep(c("char_ti","mean","std"),3),
metric=c(rep("Ref_TI",3),rep("corrTI_RSD_TI",3),rep("RSD_TI",3))))
}
category_function<-function(df,ca,percent=0.7,gain=0){
percents = df$count / sum(df$count)
for(n in 1:nrow(df)){
this_ti = df$value[n] + gain*df$RMSE[n]
this_spd = df$speed[n]
at = subset(ca,ws==this_spd & category=='A')$char_ti
bt = subset(ca,ws==this_spd & category=='B')$char_ti
ct = subset(ca,ws==this_spd & category=='C')$char_ti
if(this_ti<ct){
df$category[n] = "C"
} else if (this_ti<bt){
df$category[n] = "B"
} else if (this_ti<at){
df$category[n] = "A"
} else {
df$category[n] = "A+"
}
}
aplus = sum(percents[which(df$category=='A+')])
ap = sum(percents[which(df$category=='A')])
bp = sum(percents[which(df$category=='B')])
cp = sum(percents[which(df$category=='C')])
if(cp>percent){
category='C'
} else if ((cp+bp)>percent){
category='B'
} else if ((cp+bp+ap)>percent){
category='A'
} else {
category='A+'
}
return(data.frame(category,gain,percent))
}
get_ge_info<-function(filename){
d = data.frame(read_excel(filename, sheet='Regression'))
names(d)[1] = "metric"
d = d[-1,]
r = melt(d,id.vars = c("metric","Project","Company","Region","Terrain","Height",
"Season","Distance","Sensor","RefAnemType","Anem2Type",
"RefAnemClass","Anem2Class","IEC"), measure.vars = c(2:5))
#dat = subset(r,metric=="RSD vs. Ref TI regression Corrected")
return(r)
}
get_ge_uncertainties<-function(df){
return(data.frame(MAE = c(mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_simple_RSD_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI))),
mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_global_RSD_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
mean(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI))),
mean(abs(na.omit(df$mean_Ref_TI - df$mean_globalRaw_RSD_TI))),
mean(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
RMSE = c(sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_simple_RSD_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_RSD_TI)^2)),
sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_global_RSD_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI)^2)),
sqrt(mean(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI)^2)),
sqrt(mean(na.omit(df$mean_Ref_TI - df$mean_globalRaw_RSD_TI)^2)),
sqrt(mean(na.omit(df$std_Ref_TI - df$std_RSD_TI)^2))),
MAD = 1.4826*c(median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_simple_RSD_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI))),
median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_global_RSD_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
median(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI))),
median(abs(na.omit(df$mean_Ref_TI - df$mean_globalRaw_RSD_TI))),
median(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
max = c(max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_simple_RSD_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI))),
max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_global_RSD_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
max(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI))),
max(abs(na.omit(df$mean_Ref_TI - df$mean_globalRaw_RSD_TI))),
max(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
min = c(min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_simple_RSD_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI))),
min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_global_RSD_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_corrTI_RSD_TI))),
min(abs(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI))),
min(abs(na.omit(df$mean_Ref_TI - df$mean_globalRaw_RSD_TI))),
min(abs(na.omit(df$std_Ref_TI - df$std_RSD_TI)))),
num = c(rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_simple_RSD_TI)),3),
rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_global_RSD_TI)),3),
rep(length(na.omit(df$char_ti_Ref_TI - df$char_ti_globalRaw_RSD_TI)),3)),
variable = rep(c("char_ti","mean","std"),3),
metric=c(rep("Simple",3),rep("Global_Filtered",3),rep("Global_Raw",3))))
}
apply_global_model<-function(df){
m = mean(unique(na.omit(df$RSD_TI_Corr_m)))
c = mean(unique(na.omit(df$RSD_TI_Corr_c)))
mraw = mean(unique(na.omit(df$RSD_TI_Raw_m)))
craw = mean(unique(na.omit(df$RSD_TI_Raw_c)))
df$mean_global_RSD_TI = m*((df$mean_corrTI_RSD_TI - df$RSD_TI_Corr_c)/df$RSD_TI_Corr_m)+c
df$mean_globalRaw_RSD_TI = mraw*df$mean_RSD_TI+craw
df$char_ti_global_RSD_TI = df$mean_global_RSD_TI + df$std_corrTI_RSD_TI
df$char_ti_globalRaw_RSD_TI = df$mean_globalRaw_RSD_TI + df$std_RSD_TI
return(df)
}
apply_simple_model<-function(df){
df$mean_simple_RSD_TI = df$RSD_TI_Raw_m*((df$mean_corrTI_RSD_TI - df$RSD_TI_Corr_c)/df$RSD_TI_Corr_m)+df$RSD_TI_Raw_c
df$char_ti_simple_RSD_TI = df$mean_simple_RSD_TI + df$std_RSD_TI
return(df)
}