cinjon
diff --git a/‎dataset.py
Lines changed: 120 additions & 33 deletions b/‎dataset.py
Lines changed: 120 additions & 33 deletions
diff --git a/‎eval2.py
Lines changed: 7 additions & 4 deletions b/‎eval2.py
Lines changed: 7 additions & 4 deletions
diff --git a/‎gen_pem_results_jobs.py
Lines changed: 46 additions & 4 deletions b/‎gen_pem_results_jobs.py
Lines changed: 46 additions & 4 deletions
@@ -57,26 +57,36 @@ def __init__(self, opt, subset=None, feature_dirs=[], fps=30, image_dir=None, im
     def _get_data(self):
         print(self.video_info_path)
         anno_df = pd.read_csv(self.video_info_path)
-        video_name_list = sorted(list(set(anno_df.video.values[:])))
-        
+        video_name_list = sorted(list(set(anno_df.video.values[:])))        
+
+        extra_feature_path = 'features.' if self.feature_dirs else ''
         video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
         if 'gymnastics' in self.video_info_path:
-            saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.exgymthresh.pkl' % (
+            saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.exgymthresh.%spkl' % (
                 self.subset, self.num_videoframes, self.skip_videoframes,
-                len(video_name_list))
-            )
+                len(video_name_list), extra_feature_path
+            ))
         else:
-            saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.pkl' % (
+            saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.num%d.%spkl' % (
                 self.subset, self.num_videoframes, self.skip_videoframes,
-                len(video_name_list))
-            )
+                len(video_name_list), extra_feature_path
+            ))
 
         print(saved_data_path)
         if os.path.exists(saved_data_path):
             print('Got saved data.')
             with open(saved_data_path, 'rb') as f:
                 self.data, self.durations = pickle.load(f)
-            print('Size of data: ', len(self.data['video_names']), flush=True)    
+            print('Size of data: ', len(self.data['video_names']), flush=True)
+            if self.feature_dirs:
+                # Pare away all of the dumb shit.
+                valid_indices = [
+                    num for num, k in enumerate(self.data['video_data']) \
+                    if k.shape == (100, 2048)
+                ]
+                print('Filtered size of data: ', len(valid_indices))
+                self.data = {k: [v[num] for num in valid_indices]
+                             for k, v in self.data.items()}
             return
 
         if self.feature_dirs:
@@ -104,19 +114,50 @@ def _get_data(self):
 
             # NOTE: num_snippet is the number of snippets in this video.
             if self.image_dir:
+                print('Doing imagedir...')
                 image_dir = self._get_image_dir(video_name)
                 num_snippet = len(os.listdir(image_dir))
                 self.durations[video_name] = num_snippet
                 num_snippet = int((num_snippet - start_snippet) / skip_videoframes)
             elif self.feature_dirs:
-                feature_dfs = [
-                    pd.read_csv(os.path.join(feature_dir, '%s.csv' % video_name))
-                    for feature_dir in self.feature_dirs
-                ]
-                num_snippet = min([len(df) for df in feature_dfs])
-                df_data = np.concatenate([df.values[:num_snippet, :]
-                                          for df in feature_dfs],
-                                         axis=1)
+                print('Doing feature dir ..')
+                if 'gymnastics' in self.video_info_path:
+                    # Assuming that rgbs is the first feature_df... -_-
+                    rgb_path = os.path.join(self.feature_dirs[0], video_name)
+                    rgb_files = os.listdir(rgb_path)
+                    orig_rgb_len = len(rgb_files)
+                    if len(self.feature_dirs) > 1:
+                        flow_path = os.path.join(self.feature_dirs[1], video_name)
+                        flow_files = os.listdir(flow_path)
+                        orig_flow_len = len(flow_files)
+                        converted_flow_files = [
+                            '%010.4f.npy' % (int(k[:-4]) / 12)
+                            for k in flow_files
+                        ]
+                        flow_indices = [num for num, flow in enumerate(converted_flow_files) \
+                                        if flow in rgb_files]
+                        rgb_indices = [num for num, rgb in enumerate(rgb_files) \
+                                       if rgb in converted_flow_files]
+                        flow_files = [flow_files[num] for num in flow_indices]
+                        rgb_files = [rgb_files[num] for num in rgb_indices]
+                        print(video_name, ' rgb/flow: ', len(rgb_files), len(flow_files), ' orig: ', orig_rgb_len, orig_flow_len)
+                        num_snippet = min(len(flow_files), len(rgb_files))
+                        rgb_data = np.array([np.load(os.path.join(rgb_path, rgb_file)) for rgb_file in rgb_files])
+                        flow_data = np.array([np.load(os.path.join(flow_path, flow_file)) for flow_file in flow_files])
+                        df_data = np.concatenate([rgb_data, flow_data], axis=1)
+                    else:
+                        rgb_data = np.array([np.load(os.path.join(rgb_path, rgb_file)) for rgb_file in rgb_files])
+                        df_data = rgb_data
+                        num_snippet = len(rgb_files)
+                else:
+                    feature_dfs = [
+                        pd.read_csv(os.path.join(feature_dir, '%s.csv' % video_name))
+                        for feature_dir in self.feature_dirs
+                    ]
+                    num_snippet = min([len(df) for df in feature_dfs])
+                    df_data = np.concatenate([df.values[:num_snippet, :]
+                                              for df in feature_dfs],
+                                             axis=1)
 
             df_snippet = [start_snippet + skip_videoframes*i for i in range(num_snippet)]
             num_windows = int((num_snippet + stride - num_videoframes) / stride)
@@ -125,8 +166,11 @@ def _get_data(self):
                 windows_start = [0]
                 if self.feature_dirs:
                     # Add on a bunch of zero data if there aren't enough windows.
-                    tmp_data = np.zeros((num_videoframes - num_snippet, 400))
-                    df_data = np.concatenate((df_data, tmp_data), axis=0)
+                    if 'gymnastics' in self.video_info_path:
+                        pass
+                    else:
+                        tmp_data = np.zeros((num_videoframes - num_snippet, 400))
+                        df_data = np.concatenate((df_data, tmp_data), axis=0)
                 df_snippet.extend([
                     df_snippet[-1] + skip_videoframes*(i+1)
                     for i in range(num_videoframes - num_snippet)
@@ -248,21 +292,34 @@ def __len__(self):
 class TEMImages(TEMDataset):
     def __init__(self, opt, subset=None, fps=30, image_dir=None, img_loading_func=None, video_info_path=None):
         self.do_augment = opt['do_augment'] and subset == 'train'
+        self.ext = 'npy'
+        if '240x426' in opt['gym_image_dir']:
+            self.ext = 'png'
         super(TEMImages, self).__init__(opt, subset, feature_dirs=None, fps=fps, image_dir=image_dir, img_loading_func=img_loading_func, video_info_path=video_info_path) 
 
     def _get_video_data(self, data, index):
         indices = data['indices'][index]
         name = data['video_names'][index]
         path = os.path.join(self.image_dir, name)
         path = Path(path)
-        paths = [path / ('%010.4f.npy' % (i / self.fps)) for i in indices]
+        paths = [path / ('%010.4f.%s' % ((i / self.fps), self.ext)) for i in indices]
         imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
                 for p in paths if p.exists()]
-        if type(imgs[0]) == np.array:
-            video_data = np.array(imgs)
-            video_data = torch.Tensor(video_data)
-        elif type(imgs[0]) == torch.Tensor:
-            video_data = torch.stack(imgs)
+        try:
+            if type(imgs[0]) == np.array:
+                video_data = np.array(imgs)
+                video_data = torch.Tensor(video_data)
+            elif type(imgs[0]) == torch.Tensor:
+                video_data = torch.stack(imgs)
+            elif type(imgs[0]) == np.ndarray:
+                # This is for TSN
+                video_data = np.array(imgs)
+                video_data = torch.from_numpy(video_data)
+                video_data = video_data.type(torch.FloatTensor)
+        except Exception as e:
+            print(paths)
+            print([p.exists() for p in paths])
+            raise
 
         if len(video_data) < self.num_videoframes:
             shape = [self.num_videoframes - len(video_data)]
@@ -352,6 +409,15 @@ def _get_image_dir(self, video_name):
         return os.path.join(self.image_dir, target_dir)
 
 
+class GymnasticsFeatures(TEMDataset):
+
+    def __init__(self, opt, subset=None, feature_dirs=[], video_info_path=None):
+        super(GymnasticsFeatures, self).__init__(opt, subset, feature_dirs, fps=None, image_dir=None, img_loading_func=None, video_info_path=video_info_path)
+
+    def _get_video_data(self, data, index):
+        return data['video_data'][index]
+
+    
 class VideoDataset(data.Dataset):
     def __init__(self, opt, transforms, subset, fraction=1.):
         """file_list is a list of [/path/to/mp4 key-to-df]"""
@@ -368,8 +434,9 @@ def __init__(self, opt, transforms, subset, fraction=1.):
         self.anno_df = pd.read_csv(self.video_info_path)
         print(self.anno_df)
         print(subset, subset_translate.get(subset))
-        self.anno_df = self.anno_df[self.anno_df.subset == subset_translate[subset]]
-        print(self.anno_df)
+        if subset != 'full':
+            self.anno_df = self.anno_df[self.anno_df.subset == subset_translate[subset]]
+            print(self.anno_df)
 
         file_loc = opt['%s_video_file_list' % subset]
         with open(file_loc, 'r') as f:
@@ -421,7 +488,10 @@ def _subset_dataset(self, fraction):
         print(sorted(self.datum_indices)[-10:])
 
     def __len__(self):
-        return len(self.datum_indices)
+        if self.mode == 'train':
+            return len(self.datum_indices)
+        else:
+            return self.video_clips.num_clips()
 
     def _retrieve_valid_datums(self):
         video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
@@ -463,10 +533,15 @@ def __getitem__(self, index):
         if self.mode == "train":
             datum_index = self.datum_indices[index]
             flat_index, anchor_xmin, anchor_xmax, gt_bbox = self.datums[datum_index]
+        else:
+            flat_index = index
+
         video, _, _, video_idx = self.video_clips.get_clip(flat_index)
 
         video_data = video[0::self.skip_videoframes]
+        print('Bef transform: ', video_data, type(video_data))
         video_data = self.transforms(video_data)
+        print('AFt transform: ', video_data, type(video_data))
         video_data = torch.transpose(video_data, 0, 1)
 
         _, clip_idx = self.video_clips.get_clip_location(index)
@@ -477,7 +552,10 @@ def __getitem__(self, index):
             match_score_action, match_score_start, match_score_end = self._get_train_label(gt_bbox, anchor_xmin, anchor_xmax)
             return video_data, match_score_action, match_score_start, match_score_end
         else:
-            video_name = self.keys_list[video_idx]
+            try:
+                video_name = self.keys_list[video_idx]
+            except Exception as e:
+                print('Whoops: VideoReader ...', video_idx, len(self.keys_list), index, flat_index)
             return flat_index, video_data, video_name, snippets
 
     def _get_training_anchors(self, snippets, key):
@@ -620,7 +698,7 @@ def _exists(self, video_name):
     def _getDatasetDict(self):
         anno_df = pd.read_csv(self.video_info_path)
         anno_database = load_json(self.video_anno_path)
-        print(self.video_anno_path, self.video_info_path)
+        print(self.subset, self.video_anno_path, self.video_info_path)
         self.video_dict = {}
         for i in range(len(anno_df)):
             video_name = anno_df.video.values[i]
@@ -636,7 +714,9 @@ def _getDatasetDict(self):
             if self.subset in video_subset:
                 self.video_dict[video_name] = video_info
         self.video_list = sorted(self.video_dict.keys())
+        print('Init size of video_list: ', len(self.video_list))
         self.video_list = [k for k in self.video_list if self._exists(k)]
+        print('Exists size of video_list: ', len(self.video_list))        
 
         if self.opt['pem_do_index']:
             self.features = {}
@@ -645,8 +725,11 @@ def _getDatasetDict(self):
             for video_name in self.video_list:
                 pgm_proposals_path = os.path.join(self.opt['pgm_proposals_dir'], '%s.proposals.csv' % video_name)
                 pgm_features_path = os.path.join(self.opt['pgm_features_dir'], '%s.features.npy' % video_name)
-                pdf = pd.read_csv(pgm_proposals_path)                    
+                pdf = pd.read_csv(pgm_proposals_path)
                 video_feature = np.load(pgm_features_path)
+                if not len(pdf) and self.mode == "train":
+                    continue
+                
                 pre_count = len(pdf)
                 if self.top_K > 0:
                     try:
@@ -655,13 +738,17 @@ def _getDatasetDict(self):
                         pdf['score'] = pdf.xmin_score * pdf.xmax_score
                         pdf = pdf.sort_values(by="score", ascending=False)
                     pdf = pdf[:self.top_K]
-                    video_feature = video_feature[pdf.index]
+                    try:
+                        video_feature = video_feature[pdf.index]
+                    except Exception as e:
+                        print('WAT IS HTIS: ', pgm_proposals_path, pgm_features_path)
+                        raise
 
                 # print(video_name, pre_count, len(pdf), video_feature.shape, pgm_proposals_path, pgm_features_path)
                 self.proposals[video_name] = pdf
                 self.features[video_name] = video_feature
                 self.indices.extend([(video_name, i) for i in range(len(pdf))])
-            print('Num indices: ', len(self.indices))
+            print('Num indices: ', len(self.indices), len(self.proposals), len(self.features))
 
     def __len__(self):
         if self.opt['pem_do_index'] > 0:
 
@@ -259,15 +259,18 @@ def plot_metric(opt,
     plt.setp(plt.axes().get_xticklabels(), fontsize=fn_size)
     plt.setp(plt.axes().get_yticklabels(), fontsize=fn_size)
     #plt.show()
-    save_path = os.path.join(opt['postprocessed_results_dir'], 'evaluation_result.jpg')
+    save_path = os.path.join(opt['postprocessed_results_dir'], 'evaluation_result.width%d.jpg' % opt['postproc_width_init'])
     plt.savefig(save_path)
 
 
 def evaluation_proposal(opt):
+    width_init = opt['postproc_width_init']
     if 'thumos' in opt['dataset']:
-        bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.csv'))
+        bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.width%d.csv' % width_init))
     elif 'gymnastics' in opt['dataset']:
-        bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'gym_results.csv'))        
+        bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'gym_results.width%d.csv' % width_init))        
+    elif 'activitynet' in opt['dataset']:
+        bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'activitynet_results.width%d.csv' % width_init))        
     ground_truth = pd.read_csv(opt['video_info'])
 
     # Computes average recall vs average number of proposals.
@@ -278,7 +281,7 @@ def evaluation_proposal(opt):
     interp_results = [(k, f(k)) for k in [50, 100, 200, 500, 1000]]
     interp_str = ', '.join(['%d: %.4f' % (k, v) for k, v in interp_results])
 
-    with open(os.path.join(opt['postprocessed_results_dir'], 'output.txt'), 'w') as f:
+    with open(os.path.join(opt['postprocessed_results_dir'], 'output.width%d.txt' % width_init), 'w') as f:
         f.write('[RESULTS] Performance on %s proposal task.\n' % opt['dataset'])
         f.write('\tArea Under the AR vs AN curve: {}%\n'.format(
             100. * float(area_under_curve) / average_nr_proposals[-1]))
 
@@ -3,6 +3,7 @@
 Example commands:
 python gen_tem_results_jobs.py
 """
+import json
 import os
 import re
 import sys
@@ -33,16 +34,55 @@
     943: 17, 950: 17, 976: 6, 977: 2, 928: 8, 1016: 6,
     1040: 8, 1000: 1, 1005: 1,
     1063: 35, 1051: 34, 1117: 10, 1045: 28, 1081: 35, 1094: 27, 1123: 13, 1106: 12, 1115: 16, 1095: 7, 1128: 20,
-    1319: 31, 1201: 6, 1186: 7, 1286: 8, 1188: 5, 1228: 16
+    1319: 31, 1201: 6, 1186: 7, 1286: 8, 1188: 5, 1228: 16,
+    2055: 24, 2057: 15, 2093: 16, 1937: 16, 1944: 4, 1949: 9, 1950: 12, 1935: 11, 1994: 19, 1971: 20, 2037: 20, 2031: 26, 2179: 13, 2189: 26, 2186: 16, 2145: 16,
+    # TSN Gymnastics
+    3720: 9, 3684: 20, 3726: 2, 3705: 25, 3687: 14,
+    # Corrflow NFC anet
+    3471: 29, 3477: 16, 3465: 23, 3468: 14, 3483: 13,
+    # ResNet NFC anet
+    3357: 15, 3387: 5, 3381: 7, 3351: 12, 3369: 27,
+    # Resnet dfc anet
+    3405: 10, 3423: 16, 3435: 4, 3393: 2, 3429: 12, 3417: 23,
+    # CCC anet
+    3543: 13, 3561: 24, 3567: 6, 3549: 8, 3573: 17,
+    # Ugh I'm silly. Here's the CorrfloW DFC anet that I never put in
+    3513: 13, 3489: 19, 3525: 11, 3501: 9, 3531: 11, 3495: 26, 3507: 17,
+    # AMDIM DFC anet
+    3639: 21, 3651: 2, 3633: 4, 3645: 3,
+    # TSN RGB 1
+    3975: 28, 3984: 6,
+    # TSN RGB 2
+    4005: 1, 4006: 15, 4003: 17, 4002: 6, 4001: 9,
 }
 
 
+fixed = json.load(open('/checkpoint/cinjon/spaceofmotion/bsn/peminf/fixeddata.json', 'r'))
+# print('Fixed: ', len(fixed), fixed.keys())
+# print(sorted(fixed.keys()))
+# fixed = {int(k.split('.')[0]): {i:j for i, j in v.items() if not i.startswith('base') and not 'curr_' in i and not 'start_time' in i} for k, v in fixed.items()}
+# print(sorted(fixed.items())[0])
+
+
+check = 0
+goods = []
+bads = []
 for ckpt_subdir in os.listdir(ckpt_directory):
-    c1, c2 = regex.match(ckpt_subdir).groups()
+    c1, c2 = regex.match(ckpt_subdir).groups()    
     c1 = int(c1)
     c2 = int(c2)
     counter = c2
-    _job = run(find_counter=counter)
+    # print('\n', ckpt_subdir, '\n')
+    if c1 in fixed:
+        print('Got from fixed')
+        _job = fixed[c1]
+    elif str(c1) in fixed:
+        print('Got from fixed str')
+        _job = fixed[str(c1)]
+    else:
+        print('Run...')
+        _job = run(find_counter=counter)
+        
     _job['num_gpus'] = num_gpus
     _job['num_cpus'] = num_gpus * 10
     _job['gb'] = 64 * num_gpus
@@ -57,5 +97,7 @@
     _job['checkpoint_epoch'] = ckpt_epoch
     _job['name'] = '%s.ckpt%d' % (name, ckpt_epoch)
     print(ckpt_subdir, counter, _job['name'])
-    # print(sorted(_job.items()))
+    check += 1
+    # print(sorted(_job.items()), '\n')
     fb_run_batch(_job, counter, email, code_directory)
+print(check)