Skip to content

Commit 9983285

Browse files
committed
big fat commmmits are bad.
1 parent b810754 commit 9983285

27 files changed

+2316
-184
lines changed

dataset.py

+120-33
Original file line numberDiff line numberDiff line change
@@ -57,26 +57,36 @@ def __init__(self, opt, subset=None, feature_dirs=[], fps=30, image_dir=None, im
5757
def _get_data(self):
5858
print(self.video_info_path)
5959
anno_df = pd.read_csv(self.video_info_path)
60-
video_name_list = sorted(list(set(anno_df.video.values[:])))
61-
60+
video_name_list = sorted(list(set(anno_df.video.values[:])))
61+
62+
extra_feature_path = 'features.' if self.feature_dirs else ''
6263
video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
6364
if 'gymnastics' in self.video_info_path:
64-
saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.exgymthresh.pkl' % (
65+
saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.exgymthresh.%spkl' % (
6566
self.subset, self.num_videoframes, self.skip_videoframes,
66-
len(video_name_list))
67-
)
67+
len(video_name_list), extra_feature_path
68+
))
6869
else:
69-
saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.pkl' % (
70+
saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.%spkl' % (
7071
self.subset, self.num_videoframes, self.skip_videoframes,
71-
len(video_name_list))
72-
)
72+
len(video_name_list), extra_feature_path
73+
))
7374

7475
print(saved_data_path)
7576
if os.path.exists(saved_data_path):
7677
print('Got saved data.')
7778
with open(saved_data_path, 'rb') as f:
7879
self.data, self.durations = pickle.load(f)
79-
print('Size of data: ', len(self.data['video_names']), flush=True)
80+
print('Size of data: ', len(self.data['video_names']), flush=True)
81+
if self.feature_dirs:
82+
# Pare away all of the dumb shit.
83+
valid_indices = [
84+
num for num, k in enumerate(self.data['video_data']) \
85+
if k.shape == (100, 2048)
86+
]
87+
print('Filtered size of data: ', len(valid_indices))
88+
self.data = {k: [v[num] for num in valid_indices]
89+
for k, v in self.data.items()}
8090
return
8191

8292
if self.feature_dirs:
@@ -104,19 +114,50 @@ def _get_data(self):
104114

105115
# NOTE: num_snippet is the number of snippets in this video.
106116
if self.image_dir:
117+
print('Doing imagedir...')
107118
image_dir = self._get_image_dir(video_name)
108119
num_snippet = len(os.listdir(image_dir))
109120
self.durations[video_name] = num_snippet
110121
num_snippet = int((num_snippet - start_snippet) / skip_videoframes)
111122
elif self.feature_dirs:
112-
feature_dfs = [
113-
pd.read_csv(os.path.join(feature_dir, '%s.csv' % video_name))
114-
for feature_dir in self.feature_dirs
115-
]
116-
num_snippet = min([len(df) for df in feature_dfs])
117-
df_data = np.concatenate([df.values[:num_snippet, :]
118-
for df in feature_dfs],
119-
axis=1)
123+
print('Doing feature dir ..')
124+
if 'gymnastics' in self.video_info_path:
125+
# Assuming that rgbs is the first feature_df... -_-
126+
rgb_path = os.path.join(self.feature_dirs[0], video_name)
127+
rgb_files = os.listdir(rgb_path)
128+
orig_rgb_len = len(rgb_files)
129+
if len(self.feature_dirs) > 1:
130+
flow_path = os.path.join(self.feature_dirs[1], video_name)
131+
flow_files = os.listdir(flow_path)
132+
orig_flow_len = len(flow_files)
133+
converted_flow_files = [
134+
'%010.4f.npy' % (int(k[:-4]) / 12)
135+
for k in flow_files
136+
]
137+
flow_indices = [num for num, flow in enumerate(converted_flow_files) \
138+
if flow in rgb_files]
139+
rgb_indices = [num for num, rgb in enumerate(rgb_files) \
140+
if rgb in converted_flow_files]
141+
flow_files = [flow_files[num] for num in flow_indices]
142+
rgb_files = [rgb_files[num] for num in rgb_indices]
143+
print(video_name, ' rgb/flow: ', len(rgb_files), len(flow_files), ' orig: ', orig_rgb_len, orig_flow_len)
144+
num_snippet = min(len(flow_files), len(rgb_files))
145+
rgb_data = np.array([np.load(os.path.join(rgb_path, rgb_file)) for rgb_file in rgb_files])
146+
flow_data = np.array([np.load(os.path.join(flow_path, flow_file)) for flow_file in flow_files])
147+
df_data = np.concatenate([rgb_data, flow_data], axis=1)
148+
else:
149+
rgb_data = np.array([np.load(os.path.join(rgb_path, rgb_file)) for rgb_file in rgb_files])
150+
df_data = rgb_data
151+
num_snippet = len(rgb_files)
152+
else:
153+
feature_dfs = [
154+
pd.read_csv(os.path.join(feature_dir, '%s.csv' % video_name))
155+
for feature_dir in self.feature_dirs
156+
]
157+
num_snippet = min([len(df) for df in feature_dfs])
158+
df_data = np.concatenate([df.values[:num_snippet, :]
159+
for df in feature_dfs],
160+
axis=1)
120161

121162
df_snippet = [start_snippet + skip_videoframes*i for i in range(num_snippet)]
122163
num_windows = int((num_snippet + stride - num_videoframes) / stride)
@@ -125,8 +166,11 @@ def _get_data(self):
125166
windows_start = [0]
126167
if self.feature_dirs:
127168
# Add on a bunch of zero data if there aren't enough windows.
128-
tmp_data = np.zeros((num_videoframes - num_snippet, 400))
129-
df_data = np.concatenate((df_data, tmp_data), axis=0)
169+
if 'gymnastics' in self.video_info_path:
170+
pass
171+
else:
172+
tmp_data = np.zeros((num_videoframes - num_snippet, 400))
173+
df_data = np.concatenate((df_data, tmp_data), axis=0)
130174
df_snippet.extend([
131175
df_snippet[-1] + skip_videoframes*(i+1)
132176
for i in range(num_videoframes - num_snippet)
@@ -248,21 +292,34 @@ def __len__(self):
248292
class TEMImages(TEMDataset):
249293
def __init__(self, opt, subset=None, fps=30, image_dir=None, img_loading_func=None, video_info_path=None):
250294
self.do_augment = opt['do_augment'] and subset == 'train'
295+
self.ext = 'npy'
296+
if '240x426' in opt['gym_image_dir']:
297+
self.ext = 'png'
251298
super(TEMImages, self).__init__(opt, subset, feature_dirs=None, fps=fps, image_dir=image_dir, img_loading_func=img_loading_func, video_info_path=video_info_path)
252299

253300
def _get_video_data(self, data, index):
254301
indices = data['indices'][index]
255302
name = data['video_names'][index]
256303
path = os.path.join(self.image_dir, name)
257304
path = Path(path)
258-
paths = [path / ('%010.4f.npy' % (i / self.fps)) for i in indices]
305+
paths = [path / ('%010.4f.%s' % ((i / self.fps), self.ext)) for i in indices]
259306
imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
260307
for p in paths if p.exists()]
261-
if type(imgs[0]) == np.array:
262-
video_data = np.array(imgs)
263-
video_data = torch.Tensor(video_data)
264-
elif type(imgs[0]) == torch.Tensor:
265-
video_data = torch.stack(imgs)
308+
try:
309+
if type(imgs[0]) == np.array:
310+
video_data = np.array(imgs)
311+
video_data = torch.Tensor(video_data)
312+
elif type(imgs[0]) == torch.Tensor:
313+
video_data = torch.stack(imgs)
314+
elif type(imgs[0]) == np.ndarray:
315+
# This is for TSN
316+
video_data = np.array(imgs)
317+
video_data = torch.from_numpy(video_data)
318+
video_data = video_data.type(torch.FloatTensor)
319+
except Exception as e:
320+
print(paths)
321+
print([p.exists() for p in paths])
322+
raise
266323

267324
if len(video_data) < self.num_videoframes:
268325
shape = [self.num_videoframes - len(video_data)]
@@ -352,6 +409,15 @@ def _get_image_dir(self, video_name):
352409
return os.path.join(self.image_dir, target_dir)
353410

354411

412+
class GymnasticsFeatures(TEMDataset):
413+
414+
def __init__(self, opt, subset=None, feature_dirs=[], video_info_path=None):
415+
super(GymnasticsFeatures, self).__init__(opt, subset, feature_dirs, fps=None, image_dir=None, img_loading_func=None, video_info_path=video_info_path)
416+
417+
def _get_video_data(self, data, index):
418+
return data['video_data'][index]
419+
420+
355421
class VideoDataset(data.Dataset):
356422
def __init__(self, opt, transforms, subset, fraction=1.):
357423
"""file_list is a list of [/path/to/mp4 key-to-df]"""
@@ -368,8 +434,9 @@ def __init__(self, opt, transforms, subset, fraction=1.):
368434
self.anno_df = pd.read_csv(self.video_info_path)
369435
print(self.anno_df)
370436
print(subset, subset_translate.get(subset))
371-
self.anno_df = self.anno_df[self.anno_df.subset == subset_translate[subset]]
372-
print(self.anno_df)
437+
if subset != 'full':
438+
self.anno_df = self.anno_df[self.anno_df.subset == subset_translate[subset]]
439+
print(self.anno_df)
373440

374441
file_loc = opt['%s_video_file_list' % subset]
375442
with open(file_loc, 'r') as f:
@@ -421,7 +488,10 @@ def _subset_dataset(self, fraction):
421488
print(sorted(self.datum_indices)[-10:])
422489

423490
def __len__(self):
424-
return len(self.datum_indices)
491+
if self.mode == 'train':
492+
return len(self.datum_indices)
493+
else:
494+
return self.video_clips.num_clips()
425495

426496
def _retrieve_valid_datums(self):
427497
video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
@@ -463,10 +533,15 @@ def __getitem__(self, index):
463533
if self.mode == "train":
464534
datum_index = self.datum_indices[index]
465535
flat_index, anchor_xmin, anchor_xmax, gt_bbox = self.datums[datum_index]
536+
else:
537+
flat_index = index
538+
466539
video, _, _, video_idx = self.video_clips.get_clip(flat_index)
467540

468541
video_data = video[0::self.skip_videoframes]
542+
print('Bef transform: ', video_data, type(video_data))
469543
video_data = self.transforms(video_data)
544+
print('AFt transform: ', video_data, type(video_data))
470545
video_data = torch.transpose(video_data, 0, 1)
471546

472547
_, clip_idx = self.video_clips.get_clip_location(index)
@@ -477,7 +552,10 @@ def __getitem__(self, index):
477552
match_score_action, match_score_start, match_score_end = self._get_train_label(gt_bbox, anchor_xmin, anchor_xmax)
478553
return video_data, match_score_action, match_score_start, match_score_end
479554
else:
480-
video_name = self.keys_list[video_idx]
555+
try:
556+
video_name = self.keys_list[video_idx]
557+
except Exception as e:
558+
print('Whoops: VideoReader ...', video_idx, len(self.keys_list), index, flat_index)
481559
return flat_index, video_data, video_name, snippets
482560

483561
def _get_training_anchors(self, snippets, key):
@@ -620,7 +698,7 @@ def _exists(self, video_name):
620698
def _getDatasetDict(self):
621699
anno_df = pd.read_csv(self.video_info_path)
622700
anno_database = load_json(self.video_anno_path)
623-
print(self.video_anno_path, self.video_info_path)
701+
print(self.subset, self.video_anno_path, self.video_info_path)
624702
self.video_dict = {}
625703
for i in range(len(anno_df)):
626704
video_name = anno_df.video.values[i]
@@ -636,7 +714,9 @@ def _getDatasetDict(self):
636714
if self.subset in video_subset:
637715
self.video_dict[video_name] = video_info
638716
self.video_list = sorted(self.video_dict.keys())
717+
print('Init size of video_list: ', len(self.video_list))
639718
self.video_list = [k for k in self.video_list if self._exists(k)]
719+
print('Exists size of video_list: ', len(self.video_list))
640720

641721
if self.opt['pem_do_index']:
642722
self.features = {}
@@ -645,8 +725,11 @@ def _getDatasetDict(self):
645725
for video_name in self.video_list:
646726
pgm_proposals_path = os.path.join(self.opt['pgm_proposals_dir'], '%s.proposals.csv' % video_name)
647727
pgm_features_path = os.path.join(self.opt['pgm_features_dir'], '%s.features.npy' % video_name)
648-
pdf = pd.read_csv(pgm_proposals_path)
728+
pdf = pd.read_csv(pgm_proposals_path)
649729
video_feature = np.load(pgm_features_path)
730+
if not len(pdf) and self.mode == "train":
731+
continue
732+
650733
pre_count = len(pdf)
651734
if self.top_K > 0:
652735
try:
@@ -655,13 +738,17 @@ def _getDatasetDict(self):
655738
pdf['score'] = pdf.xmin_score * pdf.xmax_score
656739
pdf = pdf.sort_values(by="score", ascending=False)
657740
pdf = pdf[:self.top_K]
658-
video_feature = video_feature[pdf.index]
741+
try:
742+
video_feature = video_feature[pdf.index]
743+
except Exception as e:
744+
print('WAT IS HTIS: ', pgm_proposals_path, pgm_features_path)
745+
raise
659746

660747
# print(video_name, pre_count, len(pdf), video_feature.shape, pgm_proposals_path, pgm_features_path)
661748
self.proposals[video_name] = pdf
662749
self.features[video_name] = video_feature
663750
self.indices.extend([(video_name, i) for i in range(len(pdf))])
664-
print('Num indices: ', len(self.indices))
751+
print('Num indices: ', len(self.indices), len(self.proposals), len(self.features))
665752

666753
def __len__(self):
667754
if self.opt['pem_do_index'] > 0:

eval2.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,18 @@ def plot_metric(opt,
259259
plt.setp(plt.axes().get_xticklabels(), fontsize=fn_size)
260260
plt.setp(plt.axes().get_yticklabels(), fontsize=fn_size)
261261
#plt.show()
262-
save_path = os.path.join(opt['postprocessed_results_dir'], 'evaluation_result.jpg')
262+
save_path = os.path.join(opt['postprocessed_results_dir'], 'evaluation_result.width%d.jpg' % opt['postproc_width_init'])
263263
plt.savefig(save_path)
264264

265265

266266
def evaluation_proposal(opt):
267+
width_init = opt['postproc_width_init']
267268
if 'thumos' in opt['dataset']:
268-
bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.csv'))
269+
bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.width%d.csv' % width_init))
269270
elif 'gymnastics' in opt['dataset']:
270-
bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'gym_results.csv'))
271+
bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'gym_results.width%d.csv' % width_init))
272+
elif 'activitynet' in opt['dataset']:
273+
bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'activitynet_results.width%d.csv' % width_init))
271274
ground_truth = pd.read_csv(opt['video_info'])
272275

273276
# Computes average recall vs average number of proposals.
@@ -278,7 +281,7 @@ def evaluation_proposal(opt):
278281
interp_results = [(k, f(k)) for k in [50, 100, 200, 500, 1000]]
279282
interp_str = ', '.join(['%d: %.4f' % (k, v) for k, v in interp_results])
280283

281-
with open(os.path.join(opt['postprocessed_results_dir'], 'output.txt'), 'w') as f:
284+
with open(os.path.join(opt['postprocessed_results_dir'], 'output.width%d.txt' % width_init), 'w') as f:
282285
f.write('[RESULTS] Performance on %s proposal task.\n' % opt['dataset'])
283286
f.write('\tArea Under the AR vs AN curve: {}%\n'.format(
284287
100. * float(area_under_curve) / average_nr_proposals[-1]))

gen_pem_results_jobs.py

+46-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Example commands:
44
python gen_tem_results_jobs.py
55
"""
6+
import json
67
import os
78
import re
89
import sys
@@ -33,16 +34,55 @@
3334
943: 17, 950: 17, 976: 6, 977: 2, 928: 8, 1016: 6,
3435
1040: 8, 1000: 1, 1005: 1,
3536
1063: 35, 1051: 34, 1117: 10, 1045: 28, 1081: 35, 1094: 27, 1123: 13, 1106: 12, 1115: 16, 1095: 7, 1128: 20,
36-
1319: 31, 1201: 6, 1186: 7, 1286: 8, 1188: 5, 1228: 16
37+
1319: 31, 1201: 6, 1186: 7, 1286: 8, 1188: 5, 1228: 16,
38+
2055: 24, 2057: 15, 2093: 16, 1937: 16, 1944: 4, 1949: 9, 1950: 12, 1935: 11, 1994: 19, 1971: 20, 2037: 20, 2031: 26, 2179: 13, 2189: 26, 2186: 16, 2145: 16,
39+
# TSN Gymnastics
40+
3720: 9, 3684: 20, 3726: 2, 3705: 25, 3687: 14,
41+
# Corrflow NFC anet
42+
3471: 29, 3477: 16, 3465: 23, 3468: 14, 3483: 13,
43+
# ResNet NFC anet
44+
3357: 15, 3387: 5, 3381: 7, 3351: 12, 3369: 27,
45+
# Resnet dfc anet
46+
3405: 10, 3423: 16, 3435: 4, 3393: 2, 3429: 12, 3417: 23,
47+
# CCC anet
48+
3543: 13, 3561: 24, 3567: 6, 3549: 8, 3573: 17,
49+
# Ugh I'm silly. Here's the CorrfloW DFC anet that I never put in
50+
3513: 13, 3489: 19, 3525: 11, 3501: 9, 3531: 11, 3495: 26, 3507: 17,
51+
# AMDIM DFC anet
52+
3639: 21, 3651: 2, 3633: 4, 3645: 3,
53+
# TSN RGB 1
54+
3975: 28, 3984: 6,
55+
# TSN RGB 2
56+
4005: 1, 4006: 15, 4003: 17, 4002: 6, 4001: 9,
3757
}
3858

3959

60+
fixed = json.load(open('/checkpoint/cinjon/spaceofmotion/bsn/peminf/fixeddata.json', 'r'))
61+
# print('Fixed: ', len(fixed), fixed.keys())
62+
# print(sorted(fixed.keys()))
63+
# fixed = {int(k.split('.')[0]): {i:j for i, j in v.items() if not i.startswith('base') and not 'curr_' in i and not 'start_time' in i} for k, v in fixed.items()}
64+
# print(sorted(fixed.items())[0])
65+
66+
67+
check = 0
68+
goods = []
69+
bads = []
4070
for ckpt_subdir in os.listdir(ckpt_directory):
41-
c1, c2 = regex.match(ckpt_subdir).groups()
71+
c1, c2 = regex.match(ckpt_subdir).groups()
4272
c1 = int(c1)
4373
c2 = int(c2)
4474
counter = c2
45-
_job = run(find_counter=counter)
75+
# print('\n', ckpt_subdir, '\n')
76+
if c1 in fixed:
77+
print('Got from fixed')
78+
_job = fixed[c1]
79+
elif str(c1) in fixed:
80+
print('Got from fixed str')
81+
_job = fixed[str(c1)]
82+
else:
83+
print('Run...')
84+
_job = run(find_counter=counter)
85+
4686
_job['num_gpus'] = num_gpus
4787
_job['num_cpus'] = num_gpus * 10
4888
_job['gb'] = 64 * num_gpus
@@ -57,5 +97,7 @@
5797
_job['checkpoint_epoch'] = ckpt_epoch
5898
_job['name'] = '%s.ckpt%d' % (name, ckpt_epoch)
5999
print(ckpt_subdir, counter, _job['name'])
60-
# print(sorted(_job.items()))
100+
check += 1
101+
# print(sorted(_job.items()), '\n')
61102
fb_run_batch(_job, counter, email, code_directory)
103+
print(check)

0 commit comments

Comments
 (0)