-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path03_arimax_steady_12.py
238 lines (184 loc) · 14.9 KB
/
03_arimax_steady_12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
############################
# ARIMAX - Fix TRAIN Size 12
############################
import pandas as pd
import pmdarima as pm
import numpy as np
import copy
import pickle
import time
import warnings
import pyarrow as pa
path_to_data = '/hpi/fs00/share/MPSS2021BA1/data/'
starttime = time.time()
print('Start reading the input file.')
TRAIN=12
# Run for the one or the other
# read dict where train size is TRAIN
# input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_hr.pickle', 'rb')
# input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_bp.pickle', 'rb')
# input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_o2.pickle', 'rb')
input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_hr_first1000.pickle', 'rb')
# input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_bp_first1000.pickle', 'rb')
# input_file = open(str(path_to_data) + 'arima_preprocessing/dict_of_chunk_iterations_with_steady_train_' + str(TRAIN) + '_o2_first1000.pickle', 'rb')
dict_of_chunk_series_with_test_and_train = pickle.load(input_file)
input_file.close()
# read dict where train size is expanding
# second_input_file = open(str(path_to_data) + 'dict_of_chunk_series_with_expanding_test_and_steady_train.pickle', 'rb')
# dict_of_chunk_series_with_test_and_train = pickle.load(second_input_file)
# second_input_file.close()
endtime = round(((time.time() - starttime) / 60), 5)
print('Reading of the input file completed after '+str(endtime)+' minutes.')
# Expand the previously created dictionary (dict_of_chunk_series_with_test_and_train) to also hold the prediction series next to the train and the test series (and threshold values for test)
runningtime = round(((time.time() - starttime) / 60), 5)
print('Starting setting up dictionaries. Running time '+str(runningtime)+' min.')
dict_of_chunk_series_with_test_and_train_and_forecast = copy.deepcopy(dict_of_chunk_series_with_test_and_train)
dict_of_chunk_series_with_forecast_df = {}
accuracy_dict_for_chunk_iterations = {}
chunk_iterations_with_runtime_warning = pd.DataFrame(columns=["CHUNK_ID_FILLED_TH","ITERATION","WARNING_MSG"])
# Convert warnings to exceptions
warnings.filterwarnings('error', category=RuntimeWarning)
np.seterr(all='warn')
runningtime = round(((time.time() - starttime) / 60), 5)
print('Completed setting up dictionaries. Running time '+str(runningtime)+' min.')
for j, chunk in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast):
dict_of_chunk_series_with_forecast_df[chunk] = {}
accuracy_dict_for_chunk_iterations[chunk] = {}
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+'): START. Running time '+str(runningtime)+' min.')
for i, chunk_iteration in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast[chunk]):
TEST = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST_MEDIAN"].size
tp, tn, fp, fn = 0, 0, 0, 0
accurracy_matrix_df_for_chunk_iteration = pd.DataFrame(columns=["TP","FN","FP","TN"])
########################
# ARIMAX for High Alarms
########################
current_train_list_high = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST_MAX"]
current_test_list_high = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST_MAX"]
current_train_list_exog_high = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST_MEDIAN"].values.reshape(-1, 1)
current_test_list_exog_high = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST_MEDIAN"].values.reshape(-1, 1)
try:
arimax_high = pm.auto_arima(current_train_list_high, X=current_train_list_exog_high, seasonal=False, suppress_warnings=True, error_action='ignore')
forecast_arimax_high = pd.Series(arimax_high.predict(TEST, X=current_test_list_exog_high), index=[*range(i+TRAIN,i+TRAIN+TEST,1)], name="forecast_list_arimax_high")
dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["FORECAST_LIST_ARIMAX_HIGH"] = forecast_arimax_high
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+') iteration '+str(chunk_iteration)+': Completed ARIMAX - High Alarms. Running time '+str(runningtime)+' min.')
########################
# ARIMAX for Low Alarms
########################
current_train_list_low = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST_MIN"]
current_test_list_low = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST_MIN"]
current_train_list_exog_low = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST_MEDIAN"].values.reshape(-1, 1)
current_test_list_exog_low = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST_MEDIAN"].values.reshape(-1, 1)
arimax_low = pm.auto_arima(current_train_list_low, X=current_train_list_exog_low, seasonal=False, suppress_warnings=True, error_action='ignore')
forecast_arimax_low = pd.Series(arimax_low.predict(TEST, X=current_test_list_exog_low), index=[*range(i+TRAIN,i+TRAIN+TEST,1)], name="forecast_list_arimax_low")
dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["FORECAST_LIST_ARIMAX_LOW"] = forecast_arimax_low
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+') iteration '+str(chunk_iteration)+': Completed ARIMAX - Low Alarms. Running time '+str(runningtime)+' min.')
# extract threshold series
threshold_high_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["THRESHOLD_HIGH_FOR_TEST_LIST"]
threshold_low_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["THRESHOLD_LOW_FOR_TEST_LIST"]
# write to dict_of_chunk_series_with_forecast_df dataframe
all_dict_lists_as_df = pd.concat([current_test_list_high,forecast_arimax_high,threshold_high_for_test_list,current_test_list_low,forecast_arimax_low,threshold_low_for_test_list],axis=1)
dict_of_chunk_series_with_forecast_df[chunk][chunk_iteration] = all_dict_lists_as_df
##############################################
# Add information whether alarm was triggered
##############################################
df_for_chunk_iteration = dict_of_chunk_series_with_forecast_df[chunk][chunk_iteration]
# True alarms
df_for_chunk_iteration['high_alarm_triggered'] = np.where(df_for_chunk_iteration['test_list_max'] > df_for_chunk_iteration['threshold_high_for_test_list'] ,1,0)
df_for_chunk_iteration['low_alarm_triggered'] = np.where(df_for_chunk_iteration['test_list_min'] < df_for_chunk_iteration['threshold_low_for_test_list'] ,1,0)
# ARIMAX forecast
df_for_chunk_iteration['high_alarm_triggered_forecast_arimax'] = np.where(df_for_chunk_iteration['forecast_list_arimax_high'] > df_for_chunk_iteration['threshold_high_for_test_list'],1,0)
df_for_chunk_iteration['low_alarm_triggered_forecast_arimax'] = np.where(df_for_chunk_iteration['forecast_list_arimax_low'] < df_for_chunk_iteration['threshold_low_for_test_list'],1,0)
#write to dict_of_chunk_series_with_forecast_and_alarm_df dataframe
dict_of_chunk_series_with_forecast_df[chunk][chunk_iteration] = df_for_chunk_iteration
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+') iteration '+str(chunk_iteration)+': Completed Alarm Identification. Running time '+str(runningtime)+' min.')
##########################################
# Calculate Confusion Matrix - High Alarms
##########################################
# select true high alarms triggered
column_index_of_high_alarm_triggered = df_for_chunk_iteration.columns.get_loc("high_alarm_triggered")
# select predicted high alarms
column_index_of_high_alarm_triggered_forecast_arimax = df_for_chunk_iteration.columns.get_loc("high_alarm_triggered_forecast_arimax")
# create df with bot as column
high_alarms = df_for_chunk_iteration.iloc[0:,[column_index_of_high_alarm_triggered,column_index_of_high_alarm_triggered_forecast_arimax]]
for row_in_high_alarms in high_alarms.iterrows():
if row_in_high_alarms[1][0] and row_in_high_alarms[1][1]:
tp +=1
# print("tp", tp)
if row_in_high_alarms[1][0] and not row_in_high_alarms[1][1]:
fn +=1
# print("fn", fn)
if not row_in_high_alarms[1][0] and row_in_high_alarms[1][1]:
fp +=1
# print("fp", fp)
if not row_in_high_alarms[1][0] and not row_in_high_alarms[1][1]:
tn +=1
# print("tn",tn)
a_new_row = {"TP":tp,"FN":fn,"FP":fp,"TN":tn}
a_new_row_series = pd.Series(a_new_row,name="accuracy_high_alarms_arimax")
accurracy_matrix_df_for_chunk_iteration = accurracy_matrix_df_for_chunk_iteration.append(a_new_row_series)
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+') iteration '+str(chunk_iteration)+': Completed Confusion Matrix - High Alarms. Running time '+str(runningtime)+' min.')
#########################################
# Calculate Confusion Matrix - Low Alarms
##########################################
# Reset tp, tn, fp, fn
tp, tn, fp, fn = 0, 0, 0, 0
# select true low alarms triggered
column_index_of_low_alarm_triggered = df_for_chunk_iteration.columns.get_loc("low_alarm_triggered")
# select predicted low alarms
column_index_of_low_alarm_triggered_forecast_arimax = df_for_chunk_iteration.columns.get_loc("low_alarm_triggered_forecast_arimax")
# create df with bot as column
low_alarms = df_for_chunk_iteration.iloc[0:,[column_index_of_low_alarm_triggered,column_index_of_low_alarm_triggered_forecast_arimax]]
for row_in_low_alarms in low_alarms.iterrows():
if row_in_low_alarms[1][0] and row_in_low_alarms[1][1]:
tp +=1
# print("tp", tp)
if row_in_low_alarms[1][0] and not row_in_low_alarms[1][1]:
fn +=1
# print("fn", fn)
if not row_in_low_alarms[1][0] and row_in_low_alarms[1][1]:
fp +=1
# print("fp", fp)
if not row_in_low_alarms[1][0] and not row_in_low_alarms[1][1]:
tn +=1
# print("tn",tn)
a_new_row = {"TP":tp,"FN":fn,"FP":fp,"TN":tn}
a_new_row_series = pd.Series(a_new_row,name="accuracy_low_alarms_arimax")
accurracy_matrix_df_for_chunk_iteration = accurracy_matrix_df_for_chunk_iteration.append(a_new_row_series)
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+') iteration '+str(chunk_iteration)+': Completed Confusion Matrix - Low Alarms. Running time '+str(runningtime)+' min.')
# Write confusion matrix into dictionary
accuracy_dict_for_chunk_iterations[chunk][chunk_iteration] = accurracy_matrix_df_for_chunk_iteration
except RuntimeWarning as rw:
rw_string = str(rw)
a_new_row = {"CHUNK_ID_FILLED_TH":chunk,"ITERATION":chunk_iteration,"WARNING_MSG":rw_string}
a_new_row_series = pd.Series(a_new_row)
chunk_iterations_with_runtime_warning = chunk_iterations_with_runtime_warning.append(a_new_row_series, ignore_index = True)
# chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_hr.parquet', engine='pyarrow')
# chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_bp.parquet', engine='pyarrow')
# chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_o2.parquet', engine='pyarrow')
chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_hr_first1000.parquet', engine='pyarrow')
# chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_bp_first1000.parquet', engine='pyarrow')
# chunk_iterations_with_runtime_warning.to_parquet(str(path_to_data)+'chunk_iterations_with_runtime_warning_for_arimax_'+str(TRAIN)+'_o2_first1000.parquet', engine='pyarrow')
print("RUNTIME WARNING DETECTED:")
print(a_new_row_series)
runningtime = round(((time.time() - starttime) / 60), 5)
print('Chunk '+str(j)+' (ID: '+str(chunk)+' ) : Completed chunk. Running time '+str(runningtime)+' min.')
print('--------------------')
endtime = round(((time.time() - starttime) / 60), 5)
print('DONE')
print('Completed in '+str(endtime)+' minutes.')
print('Starting saving dictionary.')
# output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_hr.pickle', 'wb')
# output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_bp.pickle', 'wb')
# output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_o2.pickle', 'wb')
output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_hr_first1000.pickle', 'wb')
# output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_bp_first1000.pickle', 'wb')
# output_file = open(str(path_to_data)+'accuracy_dict_for_chunk_iterations_arimax_'+str(TRAIN)+'_o2_first1000.pickle', 'wb')
pickle.dump(accuracy_dict_for_chunk_iterations, output_file)
output_file.close()
print('Completed saving dictionary.')