cinjon
diff --git a/‎dataset.py
+196-6 b/‎dataset.py
+196-6
diff --git a/‎gen_pem_results_jobs.py
+9-14 b/‎gen_pem_results_jobs.py
+9-14
diff --git a/‎gen_postprocessed_results_jobs.py
+2-1 b/‎gen_postprocessed_results_jobs.py
+2-1
diff --git a/‎gen_tem_results_jobs.py
+6-2 b/‎gen_tem_results_jobs.py
+6-2
diff --git a/‎loss_function.py
+1-1 b/‎loss_function.py
+1-1
@@ -3,6 +3,7 @@
 import json
 import os
 from pathlib import Path
+import random
 import re
 import pickle
 from urllib.parse import unquote
@@ -11,6 +12,7 @@
 import pandas as pd
 import torch.utils.data as data
 import torch
+from torchvision.datasets.video_utils import VideoClips
 
 
 def load_json(file):
@@ -24,6 +26,7 @@ def ioa_with_anchors(anchors_min, anchors_max, box_min, box_max):
     int_xmin = np.maximum(anchors_min, box_min)
     int_xmax = np.minimum(anchors_max, box_max)
     inter_len = np.maximum(int_xmax - int_xmin, 0.)
+    # print(anchors_min, anchors_max, box_min, box_max, int_xmin, int_xmax, inter_len)
     scores = np.divide(inter_len, len_anchors)
     return scores
 
@@ -47,7 +50,7 @@ def __init__(self, opt, subset=None, feature_dirs=[], fps=30, image_dir=None, im
         self.fps = fps
 
         # e.g. /data/thumos14_annotations/Test_Annotation.csv
-        self.video_info_path = os.path.join(opt["video_info"], '%s_Annotation.csv' % self.subset)
+        self.video_info_path = opt["video_info"]        
         self._get_data()
 
     def _get_data(self):        
@@ -325,6 +328,198 @@ def _get_image_dir(self, video_name):
         return os.path.join(self.image_dir, target_dir)
 
 
+class VideoDataset(data.Dataset):
+    def __init__(self, opt, transforms, subset, fraction=1.):
+        """file_list is a list of [/path/to/mp4 key-to-df]"""
+        self.subset = subset
+        self.video_info_path = opt["video_info"]
+        self.mode = opt["mode"]
+        self.boundary_ratio = opt['boundary_ratio']
+        self.skip_videoframes = opt['skip_videoframes']
+        self.num_videoframes = opt['num_videoframes']
+        self.dist_videoframes = opt['dist_videoframes']
+        self.fraction = fraction
+
+        subset_translate = {'train': 'training', 'val': 'validation'}
+        self.anno_df = pd.read_csv(self.video_info_path)
+        print(self.anno_df)
+        print(subset, subset_translate.get(subset))
+        self.anno_df = self.anno_df[self.anno_df.subset == subset_translate[subset]]
+        print(self.anno_df)
+
+        file_loc = opt['%s_video_file_list' % subset]
+        with open(file_loc, 'r') as f:
+            lines = [k.strip() for k in f.readlines()]
+            
+        file_list = [k.split(' ')[0] for k in lines]
+        keys_list = [k.split(' ')[1][:-4] for k in lines]
+        print(keys_list[:5])
+        valid_key_indices = [num for num, k in enumerate(keys_list) \
+                             if k in set(self.anno_df.video.unique())]
+        self.keys_list = [keys_list[num] for num in valid_key_indices]
+        self.file_list = [file_list[num] for num in valid_key_indices]
+        print('Number of indices: ', len(valid_key_indices), subset)
+
+        video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
+        clip_length_in_frames = self.num_videoframes * self.skip_videoframes
+        frames_between_clips = self.dist_videoframes
+        saved_video_clips = os.path.join(
+            video_info_dir, 'video_clips.%s.%df.%ds.pkl' % (
+                subset, clip_length_in_frames, frames_between_clips))
+        if os.path.exists(saved_video_clips):
+            print('Path Exists for video_clips: ', saved_video_clips)
+            self.video_clips = pickle.load(open(saved_video_clips, 'rb'))
+        else:
+            print('Path does NOT exist for video_clips: ', saved_video_clips)            
+            self.video_clips = VideoClips(
+                self.file_list, clip_length_in_frames=clip_length_in_frames,
+                frames_between_clips=frames_between_clips, frame_rate=opt['fps'])
+            pickle.dump(self.video_clips, open(saved_video_clips, 'wb'))
+        print('Length of vid clips: ', self.video_clips.num_clips(), self.subset)
+
+        if self.mode == "train":
+            self.datums = self._retrieve_valid_datums()
+            self.datum_indices = list(range(len(self.datums)))
+            if fraction < 1:
+                print('DOING the subset dataset on %s ...' % subset)
+                self._subset_dataset(fraction)
+            print('Len of %s datums: ' % subset, len(self.datum_indices))
+                
+        self.transforms = transforms
+
+    def _subset_dataset(self, fraction):
+        num_datums = int(len(self.datums) * fraction)
+        self.datum_indices = list(range(num_datums))
+        random.shuffle(self.datum_indices)
+        self.datum_indices = self.datum_indices[:num_datums]
+    
+    def __len__(self):
+        return len(self.datum_indices)
+
+    def _retrieve_valid_datums(self):
+        video_info_dir = '/'.join(self.video_info_path.split('/')[:-1])
+        num_clips = self.video_clips.num_clips()
+        saved_data_path = os.path.join(video_info_dir, 'saved.%s.nf%d.sf%d.df%d.vid%d.pkl' % (
+            self.subset, self.num_videoframes, self.skip_videoframes, self.dist_videoframes,
+            num_clips
+            )
+        )
+        print(saved_data_path)
+        if os.path.exists(saved_data_path):
+            print('Got saved data.')
+            with open(saved_data_path, 'rb') as f:
+                return pickle.load(f)
+                    
+        ret = []
+        for flat_index in range(num_clips):
+            video_idx, clip_idx = self.video_clips.get_clip_location(flat_index)
+            start_frame = clip_idx * self.dist_videoframes
+            snippets = [start_frame + self.skip_videoframes*i
+                        for i in range(self.num_videoframes)]
+            key = self.keys_list[video_idx]
+            training_anchors = self._get_training_anchors(snippets, key)
+            if not training_anchors:
+                continue
+
+            anchor_xmins, anchor_xmaxs, gt_bbox = training_anchors
+            ret.append((flat_index, anchor_xmins, anchor_xmaxs, gt_bbox))
+
+        print('Size of data: ', len(ret), flush=True)
+        with open(saved_data_path, 'wb') as f:
+            pickle.dump(ret, f)
+        print('Dumped data...')
+        return ret
+    
+    def __getitem__(self, index):
+        # The video_data retrieved has shape [nf * sf, w, h, c].
+        # We want to pick every sf'th frame out of that.
+        if self.mode == "train":
+            datum_index = self.datum_indices[index]
+            flat_index, anchor_xmin, anchor_xmax, gt_bbox = self.datums[datum_index]
+        video, _, _, video_idx = self.video_clips.get_clip(flat_index)
+            
+        video_data = video[0::self.skip_videoframes]
+        video_data = self.transforms(video_data)
+        video_data = torch.transpose(video_data, 0, 1)
+
+        _, clip_idx = self.video_clips.get_clip_location(index)
+        start_frame = clip_idx * self.dist_videoframes
+        snippets = [start_frame + self.skip_videoframes*i
+                    for i in range(self.num_videoframes)]
+        if self.mode == "train":
+            match_score_action, match_score_start, match_score_end = self._get_train_label(gt_bbox, anchor_xmin, anchor_xmax)
+            return video_data, match_score_action, match_score_start, match_score_end
+        else:
+            video_name = self.keys_list[video_idx]
+            return flat_index, video_data, video_name, snippets
+
+    def _get_training_anchors(self, snippets, key):
+        tmp_anchor_xmins = np.array(snippets) - self.skip_videoframes/2.
+        tmp_anchor_xmaxs = np.array(snippets) + self.skip_videoframes/2.
+        tmp_gt_bbox = []
+        tmp_ioa_list = []
+        anno_df_video = self.anno_df[self.anno_df.video == key]
+        gt_xmins = anno_df_video.startFrame.values[:]
+        gt_xmaxs = anno_df_video.endFrame.values[:]
+        if len(gt_xmins) == 0:
+            print('Yo wat gt_xmins: ', key)
+            raise
+        
+        for idx in range(len(gt_xmins)):
+            tmp_ioa = ioa_with_anchors(gt_xmins[idx], gt_xmaxs[idx],
+                                       tmp_anchor_xmins[0],
+                                       tmp_anchor_xmaxs[-1])
+            tmp_ioa_list.append(tmp_ioa)
+            if tmp_ioa > 0:
+                tmp_gt_bbox.append([gt_xmins[idx], gt_xmaxs[idx]])
+
+        # print(len(tmp_gt_bbox), max(tmp_ioa_list), tmp_ioa_list)
+        if len(tmp_gt_bbox) > 0:
+            # NOTE: Removed the threshold of 0.9... ruh roh.
+            return tmp_anchor_xmins, tmp_anchor_xmaxs, tmp_gt_bbox
+        return None
+        
+    def _get_train_label(self, gt_bbox, anchor_xmin, anchor_xmax):
+        gt_bbox = np.array(gt_bbox)
+        gt_xmins = gt_bbox[:, 0]
+        gt_xmaxs = gt_bbox[:, 1]
+        # same as gt_len but using the thumos code repo :/.
+        gt_duration = gt_xmaxs - gt_xmins
+        gt_duration_boundary = np.maximum(
+            self.skip_videoframes, gt_duration * self.boundary_ratio)
+        gt_start_bboxs = np.stack(
+            (gt_xmins - gt_duration_boundary / 2, gt_xmins + gt_duration_boundary / 2),
+            axis=1
+        )
+        gt_end_bboxs = np.stack(
+            (gt_xmaxs - gt_duration_boundary / 2, gt_xmaxs + gt_duration_boundary / 2),
+            axis=1
+        )
+
+        match_score_action = [
+            np.max(
+                ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx],
+                                       gt_xmins, gt_xmaxs))
+            for jdx in range(len(anchor_xmin))
+        ]
+
+        match_score_start = [
+            np.max(
+                ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx],
+                                       gt_start_bboxs[:, 0], gt_start_bboxs[:, 1]))
+            for jdx in range(len(anchor_xmin))
+        ]
+
+        match_score_end = [
+            np.max(
+                ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx],
+                                       gt_end_bboxs[:, 0], gt_end_bboxs[:, 1]))
+            for jdx in range(len(anchor_xmin))
+        ]
+        
+        return torch.Tensor(match_score_action), torch.Tensor(match_score_start), torch.Tensor(match_score_end)        
+
+    
 class ProposalSampler(data.WeightedRandomSampler):
     def __init__(self, proposals, frame_list, max_zero_weight=0.25):
         """
@@ -558,8 +753,3 @@ def make_on_anno_files(mmd, videotable):
                 current_end = e
         wv['annotations'].append({'label': 'on', 'segment': [current_start, current_end]})
         onmmd_anno[k] = wv
-        
-            
-            
-            
-            
 
@@ -23,26 +23,21 @@
 regex = re.compile('.*(\d{5}).ckpt.*-(\d{5}).*')
 num_gpus = 4
 
-matches1 = {
+matches = {
     775: 19, 525: 2, 447: 6, 435: 11, 507: 4, 483: 5, 567: 4,
-    713: 3, 531: 0, 459: 3
-}
-matches2 = {
+    713: 3, 531: 0, 459: 3,
     108: 19, 66: 2, 3: 6, 10: 11, 130: 4, 35: 5, 147: 4,
-    114: 3, 81: 0, 25: 3
-}
-matches3 = {
+    114: 3, 81: 0, 25: 3,
     828: 13, 906: 6, 898: 15, 872: 25, 836: 15, 856: 25,
-    822: 37, 861: 14, 835: 3, 847: 15
-}
-matches4 = {
+    822: 37, 861: 14, 835: 3, 847: 15,
     943: 17, 950: 17, 976: 6, 977: 2, 928: 8, 1016: 6,
-    1040: 8, 1000: 1, 1005: 1
+    1040: 8, 1000: 1, 1005: 1,
+    1063: 35, 1051: 34, 1117: 10, 1045: 28, 1081: 35, 1094: 27, 1123: 13, 1106: 12, 1115: 16, 1095: 7, 1128: 20,
+    1319: 31, 1201: 6, 1186: 7, 1286: 8, 1188: 5, 1228: 16
 }
 
 
 for ckpt_subdir in os.listdir(ckpt_directory):
-    print(ckpt_subdir)
     c1, c2 = regex.match(ckpt_subdir).groups()
     c1 = int(c1)
     c2 = int(c2)
@@ -58,9 +53,9 @@
     _job['checkpoint_path'] = os.path.join(ckpt_directory, ckpt_subdir)
 
     name = _job['name']
-    ckpt_epoch = matches4[c1]
+    ckpt_epoch = matches[c1]
     _job['checkpoint_epoch'] = ckpt_epoch
     _job['name'] = '%s.ckpt%d' % (name, ckpt_epoch)
-    print(counter, _job['name'])
+    print(ckpt_subdir, counter, _job['name'])
     # print(sorted(_job.items()))
     fb_run_batch(_job, counter, email, code_directory)
@@ -48,5 +48,6 @@
         elif 'gymnastics' in _job['dataset']:
             _job['video_info'] = _job['video_info'].replace('Full_Annotation.csv', 'ground_truth.csv')
         _job['name'] = '%s.%s' % (pem_results_subdir, ckpt_subdir)
-            
+
         func(_job, counter, email, code_directory)
+        print()
@@ -31,13 +31,17 @@
     861: 5, 872: 5, 856: 5, 828: 7, 847: 8, 836: 5, 822: 21, 835: 5,
     906: 5, 898: 1,
     950: 2, 943: 2, 928: 1, 976: 15, 1000: 14, 1005: 12, 1016: 9,
-    977: 3, 1040: 15
+    977: 3, 1040: 15,
+    1128: 3, 1115: 2, 1106: 8, 1094: 8, 1117: 28, 1123: 2, 1095: 2, 1051: 1, 1045: 1, 1063: 1, 1081: 1,
+    1180: 1, 1156: 1, 1147: 2, 1188: 3, 1186: 3, 1201: 4, 1228: 3,
+    1286: 2, 1319: 1, 1252: 13
 }
 
+
 num_gpus = 4
 for ns, ckpt_subdir in enumerate(sorted(os.listdir(ckpt_directory))):
     counter = int(regex.match(ckpt_subdir).groups()[0])
-
+    print(counter, ckpt_subdir)
     _job = run(find_counter=counter)
     _job['num_gpus'] = num_gpus
     _job['num_cpus'] = num_gpus * 6 # 10
 
@@ -18,7 +18,7 @@ def bi_loss(scores, anchors, opt):
     ratio = (num_entries + 1) / (num_positive + 1)
     ratio += 1e-6
     ###
-    ratio = num_entries / num_positive 
+    # ratio = num_entries / num_positive 
 
     coef_0 = 0.5 * (ratio) / (ratio - 1)
     coef_1 = coef_0 * (ratio - 1)