cinjon
diff --git a/‎data/gymnastics_annotations/video_info.sep052019.fps12.csv
+413-413 b/‎data/gymnastics_annotations/video_info.sep052019.fps12.csv
+413-413
diff --git a/‎dataset.py
+10-4 b/‎dataset.py
+10-4
diff --git a/‎eval2.py
+223 b/‎eval2.py
+223
diff --git a/‎gen_pem_results_jobs.py
+15-10 b/‎gen_pem_results_jobs.py
+15-10
diff --git a/‎gen_postprocessed_results_jobs.py
+46 b/‎gen_postprocessed_results_jobs.py
+46
diff --git a/‎gen_tem_results_jobs.py
+1-4 b/‎gen_tem_results_jobs.py
+1-4
@@ -209,6 +209,7 @@ def _get_video_data(self, data, index):
 class ThumosImages(Thumos):
 
     def __init__(self, opt, subset=None, fps=30, image_dir=None, img_loading_func=None):
+        self.do_augment = opts['do_augment'] and subset == 'train'
         super(ThumosImages, self).__init__(opt, subset, feature_dirs=None, fps=fps, image_dir=image_dir, img_loading_func=img_loading_func)        
 
     def _get_video_data(self, data, index):
@@ -217,7 +218,8 @@ def _get_video_data(self, data, index):
         path = os.path.join(self.image_dir, name)
         path = Path(path)
         paths = [path / ('%010.4f.npy' % (i / self.fps)) for i in indices]
-        imgs = [self.img_loading_func(p.absolute()) for p in paths if p.exists()]
+        imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
+                for p in paths if p.exists()]
         # if len(imgs) < self.window_size:
         #     imgs.extend([np.zeros(imgs[-1].shape) for _ in range(self.window_size - len(imgs))])
 
@@ -275,6 +277,7 @@ class GymnasticsDataSet(data.Dataset):
     def __init__(self, opt, subset="train", img_loading_func=None, overlap_windows=False):
         self.subset = subset
         self.mode = opt["mode"]
+        self.do_augment = opt['do_augment'] and subset == 'train'
         self.img_loading_func = img_loading_func
         self.overlap_windows = overlap_windows
 
@@ -361,7 +364,10 @@ def _get_base_data(self, index, start=None, end=None):
             for i in range(start, end, self.skip_videoframes) \
             if i < max_frames
         ]
-        imgs = [self.img_loading_func(p.absolute()) for p in paths]
+
+        imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
+                for p in paths if p.exists()]
+
         diff = len(list(range(start, end, self.skip_videoframes))) - len(imgs)
         if type(imgs[0]) == np.array:
             if diff > 0:
@@ -558,7 +564,7 @@ def _getDatasetDict(self):
                 pdf = pd.read_csv(pgm_proposals_path)                    
                 video_feature = np.load(pgm_features_path)
                 pre_count = len(pdf)
-                if self.top_K is not None:
+                if self.top_K > 0:
                     try:
                         pdf = pdf.sort_values(by="score", ascending=False)
                     except KeyError:
@@ -606,7 +612,7 @@ def __getitem__(self, index):
             pdf = pdf.sort_values(by="score", ascending=False)
             # ***
             video_feature = np.load(pgm_features_path)
-            if self.top_K is not None:
+            if self.top_K > 0:
                 pdf = pdf[:self.top_K]
                 video_feature = video_feature[:self.top_K, :]
 
 
@@ -0,0 +1,223 @@
+import numpy as np
+import pandas as pd
+from scipy.interpolate import interp1d
+import os
+
+
+def segment_tiou(target_segments, test_segments):
+    """Compute intersection over union btw segments
+    Parameters
+    ----------
+    target_segments : ndarray
+        2-dim array in format [m x 2:=[init, end]]
+    test_segments : ndarray
+        2-dim array in format [n x 2:=[init, end]]
+    Outputs
+    -------
+    tiou : ndarray
+        2-dim array [m x n] with IOU ratio.
+    Note: It assumes that target-segments are more scarce that test-segments
+    """
+    if target_segments.ndim != 2 or test_segments.ndim != 2:
+        raise ValueError('Dimension of arguments is incorrect')
+    
+    m, n = target_segments.shape[0], test_segments.shape[0]
+    tiou = np.empty((m, n))
+    for i in range(m):
+        tt1 = np.maximum(target_segments[i, 0], test_segments[:, 0])
+        tt2 = np.minimum(target_segments[i, 1], test_segments[:, 1])
+        
+        # Non-negative overlap score
+        intersection = (tt2 - tt1 + 1.0).clip(0)
+        union = ((test_segments[:, 1] - test_segments[:, 0] + 1) + (
+            target_segments[i, 1] - target_segments[i, 0] + 1) - intersection)
+        # Compute overlap as the ratio of the intersection
+        # over union of two segments at the frame level.
+        tiou[i, :] = intersection / union
+    return tiou
+    
+    
+def average_recall_vs_nr_proposals(proposals,
+                                   ground_truth,
+                                   tiou_thresholds=np.linspace(0.5, 1.0, 11)):
+    """ Computes the average recall given an average number 
+        of proposals per video.
+        
+    Parameters
+    ----------
+    proposals : DataFrame
+        pandas table with the resulting proposals. It must include 
+        the following columns: {'video-name': (str) Video identifier,
+                                'f-init': (int) Starting index Frame,
+                                'f-end': (int) Ending index Frame,
+                                'score': (float) Proposal confidence}
+    ground_truth : DataFrame
+        pandas table with annotations of the dataset. It must include 
+        the following columns: {'video-name': (str) Video identifier,
+                                'f-init': (int) Starting index Frame,
+                                'f-end': (int) Ending index Frame}
+    tiou_thresholds : 1darray, optional
+        array with tiou threholds.
+        
+    Outputs
+    -------
+    average_recall : 1darray
+        recall averaged over a list of tiou threshold.
+    proposals_per_video : 1darray
+        average number of proposals per video.
+    """
+    # Get list of videos.
+    video_lst = proposals['video-name'].unique()
+    
+    # For each video, computes tiou scores among the retrieved proposals.
+    score_lst = []
+    for videoid in video_lst:
+        
+        # Get proposals for this video.
+        prop_idx = proposals['video-name'] == videoid
+        this_video_proposals = proposals[prop_idx][['f-init', 'f-end']].values
+        # Sort proposals by score.
+        sort_idx = proposals[prop_idx]['score'].argsort()[::-1]
+        this_video_proposals = this_video_proposals[sort_idx, :]
+        
+        # Get ground-truth instances associated to this video.
+        gt_idx = ground_truth['video-name'] == videoid
+        this_video_ground_truth = ground_truth[gt_idx][['f-init',
+                                                        'f-end']].values
+        
+        # Compute tiou scores.
+        tiou = segment_tiou(this_video_ground_truth, this_video_proposals)
+        score_lst.append(tiou)
+        
+    # Given that the length of the videos is really varied, we
+    # compute the number of proposals in terms of a ratio of the total
+    # proposals retrieved, i.e. average recall at a percentage of proposals
+    # retrieved per video.
+    
+    # Computes average recall.
+    pcn_lst = np.arange(1, 201) / 200.0
+    matches = np.empty((video_lst.shape[0], pcn_lst.shape[0]))
+    positives = np.empty(video_lst.shape[0])
+    recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0]))
+    # Iterates over each tiou threshold.
+    for ridx, tiou in enumerate(tiou_thresholds):
+        
+        # Inspect positives retrieved per video at different
+        # number of proposals (percentage of the total retrieved).
+        for i, score in enumerate(score_lst):
+            # Total positives per video.
+            positives[i] = score.shape[0]
+            
+            for j, pcn in enumerate(pcn_lst):
+                # Get number of proposals as a percentage of total retrieved.
+                nr_proposals = int(score.shape[1] * pcn)
+                # Find proposals that satisfies minimum tiou threhold.
+                matches[i, j] = ((score[:, :nr_proposals] >= tiou).sum(axis=1) >
+                                 0).sum()
+                
+        # Computes recall given the set of matches per video.
+        recall[ridx, :] = matches.sum(axis=0) / positives.sum()
+        
+    # Recall is averaged.
+    recall = recall.mean(axis=0)
+    
+    # Get the average number of proposals per video.
+    proposals_per_video = pcn_lst * (
+        float(proposals.shape[0]) / video_lst.shape[0])
+    
+    return recall, proposals_per_video
+                
+                
+def recall_vs_tiou_thresholds(proposals,
+                              ground_truth,
+                              nr_proposals=1000,
+                              tiou_thresholds=np.arange(0.05, 1.05, 0.05)):
+    """ Computes recall at different tiou thresholds given a fixed 
+        average number of proposals per video.
+    
+    Parameters
+    ----------
+    proposals : DataFrame
+        pandas table with the resulting proposals. It must include 
+        the following columns: {'video-name': (str) Video identifier,
+                                'f-init': (int) Starting index Frame,
+                                'f-end': (int) Ending index Frame,
+                                'score': (float) Proposal confidence}
+    ground_truth : DataFrame
+        pandas table with annotations of the dataset. It must include 
+        the following columns: {'video-name': (str) Video identifier,
+                                'f-init': (int) Starting index Frame,
+                                'f-end': (int) Ending index Frame}
+    nr_proposals : int
+        average number of proposals per video.
+    tiou_thresholds : 1darray, optional
+        array with tiou threholds.
+        
+    Outputs
+    -------
+    average_recall : 1darray
+        recall averaged over a list of tiou threshold.
+    proposals_per_video : 1darray
+        average number of proposals per video.
+    """
+    # Get list of videos.
+    video_lst = proposals['video-name'].unique()
+    
+    # For each video, computes tiou scores among the retrieved proposals.
+    score_lst = []
+    for videoid in video_lst:
+        
+        # Get proposals for this video.
+        prop_idx = proposals['video-name'] == videoid
+        this_video_proposals = proposals[prop_idx][['f-init', 'f-end']].values
+        # Sort proposals by score.
+        sort_idx = proposals[prop_idx]['score'].argsort()[::-1]
+        this_video_proposals = this_video_proposals[sort_idx, :]
+        
+        # Get ground-truth instances associated to this video.
+        gt_idx = ground_truth['video-name'] == videoid
+        this_video_ground_truth = ground_truth[gt_idx][['f-init',
+                                                        'f-end']].values
+        
+        # Compute tiou scores.
+        tiou = segment_tiou(this_video_ground_truth, this_video_proposals)
+        score_lst.append(tiou)
+        
+    # To obtain the average number of proposals, we need to define a
+    # percentage of proposals to get per video.
+    pcn = (video_lst.shape[0] * float(nr_proposals)) / proposals.shape[0]
+        
+    # Computes recall at different tiou thresholds.
+    matches = np.empty((video_lst.shape[0], tiou_thresholds.shape[0]))
+    positives = np.empty(video_lst.shape[0])
+    recall = np.empty(tiou_thresholds.shape[0])
+    # Iterates over each tiou threshold.
+    for ridx, tiou in enumerate(tiou_thresholds):
+        
+        for i, score in enumerate(score_lst):
+            # Total positives per video.
+            positives[i] = score.shape[0]
+            
+            # Get number of proposals at the fixed percentage of total retrieved.
+            nr_proposals = int(score.shape[1] * pcn)
+            # Find proposals that satisfies minimum tiou threhold.
+            matches[i, ridx] = ((score[:, :nr_proposals] >= tiou).sum(axis=1) >
+                                0).sum()
+            
+        # Computes recall given the set of matches per video.
+        recall[ridx] = matches[:, ridx].sum(axis=0) / positives.sum()
+            
+    return recall, tiou_thresholds
+            
+
+def evaluation_proposal(opt):
+    bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.csv'))
+    ground_truth = pd.read_csv(opt['video_info'])
+    
+    # Computes average recall vs average number of proposals.
+    average_recall, average_nr_proposals = average_recall_vs_nr_proposals(
+        bsn_results, ground_truth)
+
+    print(average_nr_proposals.shape)
+    f = interp1d(average_nr_proposals, average_recall, axis=0)
+    print(f(50), f(100), f(200), f(500), f(1000))
@@ -12,28 +12,33 @@
 
 email = '[email protected]'
 code_directory = '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch'
-anno_directory = '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch/data/gymnastics_annotations'
+
 base_dir = '/checkpoint/cinjon/spaceofmotion/bsn'
 pem_dir = os.path.join(base_dir, 'peminf')
 pem_results_dir = os.path.join(pem_dir, 'results')
 if not os.path.exists(pem_results_dir):
     os.makedirs(pem_results_dir)
 ckpt_directory = os.path.join(pem_dir, 'do_ckpts')
 
-regex = re.compile('.*(\d{5}).*')
+regex = re.compile('.*ckpt.*-(\d{5}).*')
+num_gpus = 4
 
 for ckpt_subdir in os.listdir(ckpt_directory):
     counter = int(regex.match(ckpt_subdir).groups()[0])
     _job = run(find_counter=counter)
-    _job['num_gpus'] = 8
-    _job['num_cpus'] = 8 * 10
-    _job['gb'] = 64 * 8
-    _job['time'] = 4 # what time should this be?
+    _job['num_gpus'] = num_gpus
+    _job['num_cpus'] = num_gpus * 10
+    _job['gb'] = 64 * num_gpus
+    _job['time'] = 1.5
     _job['pem_inference_results_dir'] = pem_results_dir
-    _job['pem_results_subset'] = _job['pem_train_subset']
     _job['pem_inference_subset'] = 'full'
     _job['mode'] = 'inference'
     _job['checkpoint_path'] = os.path.join(ckpt_directory, ckpt_subdir)
-    
-    print(counter, sorted(_job.items()))
-    fb_run_batch(_job, counter, email, code_directory)
+
+    name = _job['name']
+    for ckpt_epoch in [15, 30]:
+        _job['checkpoint_epoch'] = ckpt_epoch
+        _job['name'] = '%s.ckpt%d' % (name, ckpt_epoch)
+        print(counter, _job['name'])
+        print(sorted(_job.items()))
+        fb_run_batch(_job, counter, email, code_directory)
@@ -0,0 +1,46 @@
+"""Run the jobs that generate TEM Results.
+
+Example commands:
+python gen_postprocessed_results_jobs.py
+"""
+from copy import deepcopy
+import os
+import re
+import sys
+
+from run_on_cluster import fb_run_batch as func
+from pem_jobs import run as pemrun
+
+email = '[email protected]'
+code_directory = '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch'
+
+base_dir = '/checkpoint/cinjon/spaceofmotion/bsn'
+checkpoint_path = os.path.join(base_dir, 'checkpoint', 'pem')
+pem_dir = os.path.join(base_dir, 'peminf')
+pem_results_dir = os.path.join(pem_dir, 'results')
+postprocessed_results_dir = os.path.join(base_dir, 'postprocessing')
+
+regex = re.compile('.*ckpt.*-(\d{5}).*')
+num_gpus = 0
+
+
+for pem_results_subdir in os.listdir(pem_results_dir):
+    counter = int(regex.match(pem_results_subdir).groups()[0])
+    job = pemrun(find_counter=counter)
+    
+    name = job['name']
+    for ckpt_subdir in os.listdir(os.path.join(pem_results_dir, pem_results_subdir)):
+        _job = deepcopy(job)
+        _job['module'] = 'Post_processing'
+        dirkey = '%s/%s' % (pem_results_subdir, ckpt_subdir)
+        _job['postprocessed_results_dir'] = os.path.join(postprocessed_results_dir, dirkey)
+        _job['pem_inference_results_dir'] = os.path.join(pem_results_dir, dirkey)
+        if 'thumos' in _job['dataset']:
+            _job['video_info'] = _job['video_info'].replace('Full_Annotation.csv', 'thumos14_test_groundtruth.csv')
+        _job['name'] = '2019.09.18.%s.%s' % (pem_results_subdir, ckpt_subdir)
+        _job['num_gpus'] = num_gpus
+        _job['num_cpus'] = 48
+        _job['gb'] = 64
+        _job['time'] = 4
+            
+        func(_job, counter, email, code_directory)
@@ -13,7 +13,7 @@
 
 email = '[email protected]'
 code_directory = '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch'
-# anno_directory = '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch/data/gymnastics_annotations'
+
 base_dir = '/checkpoint/cinjon/spaceofmotion/bsn'
 tem_dir = os.path.join(base_dir, 'teminf')
 tem_results_dir = os.path.join(tem_dir, 'results')
@@ -23,9 +23,6 @@
 
 regex = re.compile('.*(\d{5}).*')
 
-now = datetime.datetime.now()
-print(now)
-
 for ckpt_subdir in os.listdir(ckpt_directory):
     counter = int(regex.match(ckpt_subdir).groups()[0])
     if counter not in [195]: