-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_gradients.py
223 lines (178 loc) · 7.27 KB
/
analyze_gradients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
Analyze the individual gradients (averaged per an epoch)
"""
import os, sys
import csv, json
import numpy as np
from tqdm import tqdm
# custom libs
from utils import io
# ------------------------------------------------------------------------------
# Global variables
# ------------------------------------------------------------------------------
run_mode = 'epoch'
results_base = (
# sample location
"results/analysis/ipoisoning/label-flips/"
"fashion_mnist_3_4.1.0_random_0.4/vanilla_lr_300_40_0.01"
)
results_folder = os.path.join(results_base, "param_updates")
mstore_file = os.path.join(results_base, "mextracts")
astore_file = os.path.join(results_base, "aextracts")
nstore_file = os.path.join(results_base, "noiseadds")
# ------------------------------------------------------------------------------
# Misc. functions
# ------------------------------------------------------------------------------
def _compute_cosine_similarly(data1, data2):
# flatten
vector1 = data1.flatten()
vector2 = data2.flatten()
# compute
dot_product = np.dot(vector1, vector2)
len_vector1 = np.sqrt(np.dot(vector1, vector1))
len_vector2 = np.sqrt(np.dot(vector2, vector2))
return dot_product / (len_vector1 * len_vector2)
# ------------------------------------------------------------------------------
# Analysis functions
# ------------------------------------------------------------------------------
def load_files_epoch(folder, param_index=0):
# data-holder
data_files = {}
# csv file
for each_file in tqdm( \
os.listdir(folder), desc=' [load_data]'):
# : skip, if the csv file is not in our interest
if not each_file.endswith('_{}.csv'.format(param_index)): continue
# : clean case
if 'clean' in each_file:
# :: extract the index
ctoken = each_file.split('_')
cindex = ctoken[0].zfill(3)
# :: add the data under the index
if cindex not in data_files:
data_files[cindex] = {
'clean': [], 'poison': [],
}
data_files[cindex]['clean'].append( \
os.path.join(results_folder, each_file))
# : poison case
elif 'poison' in each_file:
# :: extract the index
ptoken = each_file.split('_')
pindex = ptoken[0].zfill(3)
# :: add the data under the index
if pindex not in data_files:
data_files[pindex] = {
'clean': [], 'poison': [],
}
data_files[pindex]['poison'].append( \
os.path.join(results_folder, each_file))
# : if 'clean'...
# end for each_file...
return data_files
def extract_magnitudes_epoch(filedata):
# data holders
batch_counter = 0
prev_mclean = 0.
prev_mpoison = None
# result-holder...
epoch_magnitudes = []
# iterate over the csv files
for bidx, datanum in tqdm( \
enumerate(sorted(filedata.keys())), desc=' [m-extract]', total=len(filedata.keys())):
cur_filedata = filedata[datanum]
# : [clean] load the correct data
cur_mclean = prev_mclean
if 'clean' in cur_filedata:
# :: load the entire data
cur_cdata = []
for each_filedata in cur_filedata['clean']:
each_cdata = io.load_from_csv(each_filedata)
each_cdata = [list(map(float, each_cline)) for each_cline in each_cdata]
each_cdata = np.array(each_cdata)
cur_cdata.append(each_cdata)
cur_cdata = sum(cur_cdata) / len(cur_cdata)
cur_mclean = np.linalg.norm(cur_cdata, 2)
prev_mclean = cur_mclean
# : [poison] load the poison data
cur_mpoison = prev_mpoison
if 'poison' in cur_filedata:
# :: load the entire data
cur_pdata = []
for each_filedata in cur_filedata['poison']:
each_pdata = io.load_from_csv(each_filedata)
each_pdata = [list(map(float, each_pline)) for each_pline in each_pdata]
each_pdata = np.array(each_pdata)
cur_pdata.append(each_pdata)
cur_pdata = sum(cur_pdata) / len(cur_pdata)
cur_mpoison = np.linalg.norm(cur_pdata, 2)
prev_mpoison = cur_mpoison
# : store them to the data-holder
epoch_magnitudes.append([cur_mclean, cur_mpoison])
# end for bidx....
mstore_csvfile = '{}_epoch.csv'.format(mstore_file)
io.store_to_csv(mstore_csvfile, epoch_magnitudes)
print (' : Magnitudes are stored to [{}]'.format(mstore_csvfile))
# done.
def extract_anglediffs_epoch(filedata):
# data holders
batch_counter = 0
prev_angdiff = 0.
# result-holder...
epoch_anglediffs = []
# iterate over the csv files
for bidx, datanum in tqdm( \
enumerate(sorted(filedata.keys())), desc=' [a-extract]', total=len(filedata.keys())):
cur_filedata = filedata[datanum]
# : [clean] load the correct data
cur_cdata = np.array([])
if 'clean' in cur_filedata:
# :: load the entire data
cur_cdata = []
for each_filedata in cur_filedata['clean']:
each_cdata = io.load_from_csv(each_filedata)
each_cdata = [list(map(float, each_cline)) for each_cline in each_cdata]
each_cdata = np.array(each_cdata)
cur_cdata.append(each_cdata)
cur_cdata = sum(cur_cdata) / len(cur_cdata)
# : [poison] load the poison data
cur_pdata = np.array([])
if 'poison' in cur_filedata:
# :: load the entire data
cur_pdata = []
for each_filedata in cur_filedata['poison']:
each_pdata = io.load_from_csv(each_filedata)
each_pdata = [list(map(float, each_pline)) for each_pline in each_pdata]
each_pdata = np.array(each_pdata)
cur_pdata.append(each_pdata)
cur_pdata = sum(cur_pdata) / len(cur_pdata)
# : compute the angle between them
if (cur_cdata.size != 0) \
and (cur_pdata.size != 0):
cur_angdiff = _compute_cosine_similarly(cur_cdata, cur_pdata)
prev_angdiff = cur_angdiff
# : store them to the data-holder
epoch_anglediffs.append([cur_angdiff])
# end for bidx....
astore_csvfile = '{}_epoch.csv'.format(astore_file)
io.store_to_csv(astore_csvfile, epoch_anglediffs)
print (' : Angle differences are stored to [{}]'.format(astore_csvfile))
# done.
"""
Main (to compute the magnitudes and orientations from poison and clean gradients)
"""
if __name__ == "__main__":
# parameter index of our interest
param_index = 0
print (' : Start the analysis on [{}]th parameters'.format(param_index))
# [Epoch-based analysis]
if 'epoch' == run_mode:
# load the data (batch)
csv_files = load_files_epoch(results_folder, param_index=param_index)
print (' : Load the data for the [{}] epochs'.format(len(csv_files)))
# extract the magnitudes (per batch/epoch)
extract_magnitudes_epoch(csv_files)
extract_anglediffs_epoch(csv_files)
# end if ...
print (' : Done.')
# done.