|
| 1 | +import json |
| 2 | +import urllib2 |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | + |
| 8 | + |
| 9 | + |
| 10 | +def interpolated_prec_rec(prec, rec): |
| 11 | + """Interpolated AP - VOCdevkit from VOC 2011. |
| 12 | + """ |
| 13 | + mprec = np.hstack([[0], prec, [0]]) |
| 14 | + mrec = np.hstack([[0], rec, [1]]) |
| 15 | + for i in range(len(mprec) - 1)[::-1]: |
| 16 | + mprec[i] = max(mprec[i], mprec[i + 1]) |
| 17 | + idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1 |
| 18 | + ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx]) |
| 19 | + return ap |
| 20 | + |
| 21 | +def segment_iou(target_segment, candidate_segments): |
| 22 | + """Compute the temporal intersection over union between a |
| 23 | + target segment and all the test segments. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + target_segment : 1d array |
| 28 | + Temporal target segment containing [starting, ending] times. |
| 29 | + candidate_segments : 2d array |
| 30 | + Temporal candidate segments containing N x [starting, ending] times. |
| 31 | +
|
| 32 | + Outputs |
| 33 | + ------- |
| 34 | + tiou : 1d array |
| 35 | + Temporal intersection over union score of the N's candidate segments. |
| 36 | + """ |
| 37 | + tt1 = np.maximum(target_segment[0], candidate_segments[:, 0]) |
| 38 | + tt2 = np.minimum(target_segment[1], candidate_segments[:, 1]) |
| 39 | + # Intersection including Non-negative overlap score. |
| 40 | + segments_intersection = (tt2 - tt1).clip(0) |
| 41 | + # Segment union. |
| 42 | + segments_union = (candidate_segments[:, 1] - candidate_segments[:, 0]) \ |
| 43 | + + (target_segment[1] - target_segment[0]) - segments_intersection |
| 44 | + # Compute overlap as the ratio of the intersection |
| 45 | + # over union of two segments. |
| 46 | + tIoU = segments_intersection.astype(float) / segments_union |
| 47 | + return tIoU |
| 48 | + |
| 49 | +def wrapper_segment_iou(target_segments, candidate_segments): |
| 50 | + """Compute intersection over union btw segments |
| 51 | + Parameters |
| 52 | + ---------- |
| 53 | + target_segments : ndarray |
| 54 | + 2-dim array in format [m x 2:=[init, end]] |
| 55 | + candidate_segments : ndarray |
| 56 | + 2-dim array in format [n x 2:=[init, end]] |
| 57 | + Outputs |
| 58 | + ------- |
| 59 | + tiou : ndarray |
| 60 | + 2-dim array [n x m] with IOU ratio. |
| 61 | + Note: It assumes that candidate-segments are more scarce that target-segments |
| 62 | + """ |
| 63 | + if candidate_segments.ndim != 2 or target_segments.ndim != 2: |
| 64 | + raise ValueError('Dimension of arguments is incorrect') |
| 65 | + |
| 66 | + n, m = candidate_segments.shape[0], target_segments.shape[0] |
| 67 | + tiou = np.empty((n, m)) |
| 68 | + for i in xrange(m): |
| 69 | + tiou[:, i] = segment_iou(target_segments[i,:], candidate_segments) |
| 70 | + |
| 71 | + return tiou |
| 72 | + |
| 73 | + |
| 74 | +class ANETproposal(object): |
| 75 | + |
| 76 | + GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] |
| 77 | + PROPOSAL_FIELDS = ['results', 'version', 'external_data'] |
| 78 | + |
| 79 | + def __init__(self, ground_truth_filename=None, proposal_filename=None, |
| 80 | + ground_truth_fields=GROUND_TRUTH_FIELDS, |
| 81 | + proposal_fields=PROPOSAL_FIELDS, |
| 82 | + tiou_thresholds=np.linspace(0.5, 0.95, 10), |
| 83 | + max_avg_nr_proposals=None, |
| 84 | + subset='validation', verbose=False, |
| 85 | + check_status=False): |
| 86 | + if not ground_truth_filename: |
| 87 | + raise IOError('Please input a valid ground truth file.') |
| 88 | + if not proposal_filename: |
| 89 | + raise IOError('Please input a valid proposal file.') |
| 90 | + self.subset = subset |
| 91 | + self.tiou_thresholds = tiou_thresholds |
| 92 | + self.max_avg_nr_proposals = max_avg_nr_proposals |
| 93 | + self.verbose = verbose |
| 94 | + self.gt_fields = ground_truth_fields |
| 95 | + self.pred_fields = proposal_fields |
| 96 | + self.recall = None |
| 97 | + self.avg_recall = None |
| 98 | + self.proposals_per_video = None |
| 99 | + self.check_status = check_status |
| 100 | + # Retrieve blocked videos from server. |
| 101 | + if self.check_status: |
| 102 | + self.blocked_videos = get_blocked_videos() |
| 103 | + else: |
| 104 | + self.blocked_videos = list() |
| 105 | + # Import ground truth and proposals. |
| 106 | + self.ground_truth, self.activity_index = self._import_ground_truth( |
| 107 | + ground_truth_filename) |
| 108 | + self.proposal = self._import_proposal(proposal_filename) |
| 109 | + |
| 110 | + if self.verbose: |
| 111 | + print '[INIT] Loaded annotations from {} subset.'.format(subset) |
| 112 | + nr_gt = len(self.ground_truth) |
| 113 | + print '\tNumber of ground truth instances: {}'.format(nr_gt) |
| 114 | + nr_pred = len(self.proposal) |
| 115 | + print '\tNumber of proposals: {}'.format(nr_pred) |
| 116 | + print '\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds) |
| 117 | + |
| 118 | + def _import_ground_truth(self, ground_truth_filename): |
| 119 | + """Reads ground truth file, checks if it is well formatted, and returns |
| 120 | + the ground truth instances and the activity classes. |
| 121 | +
|
| 122 | + Parameters |
| 123 | + ---------- |
| 124 | + ground_truth_filename : str |
| 125 | + Full path to the ground truth json file. |
| 126 | +
|
| 127 | + Outputs |
| 128 | + ------- |
| 129 | + ground_truth : df |
| 130 | + Data frame containing the ground truth instances. |
| 131 | + activity_index : dict |
| 132 | + Dictionary containing class index. |
| 133 | + """ |
| 134 | + with open(ground_truth_filename, 'r') as fobj: |
| 135 | + data = json.load(fobj) |
| 136 | + # Checking format |
| 137 | + if not all([field in data.keys() for field in self.gt_fields]): |
| 138 | + raise IOError('Please input a valid ground truth file.') |
| 139 | + |
| 140 | + # Read ground truth data. |
| 141 | + activity_index, cidx = {}, 0 |
| 142 | + video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] |
| 143 | + for videoid, v in data['database'].iteritems(): |
| 144 | + if self.subset != v['subset']: |
| 145 | + continue |
| 146 | + if videoid in self.blocked_videos: |
| 147 | + continue |
| 148 | + for ann in v['annotations']: |
| 149 | + if ann['label'] not in activity_index: |
| 150 | + activity_index[ann['label']] = cidx |
| 151 | + cidx += 1 |
| 152 | + video_lst.append(videoid) |
| 153 | + t_start_lst.append(ann['segment'][0]) |
| 154 | + t_end_lst.append(ann['segment'][1]) |
| 155 | + label_lst.append(activity_index[ann['label']]) |
| 156 | + |
| 157 | + ground_truth = pd.DataFrame({'video-id': video_lst, |
| 158 | + 't-start': t_start_lst, |
| 159 | + 't-end': t_end_lst, |
| 160 | + 'label': label_lst}) |
| 161 | + return ground_truth, activity_index |
| 162 | + |
| 163 | + def _import_proposal(self, proposal_filename): |
| 164 | + """Reads proposal file, checks if it is well formatted, and returns |
| 165 | + the proposal instances. |
| 166 | +
|
| 167 | + Parameters |
| 168 | + ---------- |
| 169 | + proposal_filename : str |
| 170 | + Full path to the proposal json file. |
| 171 | +
|
| 172 | + Outputs |
| 173 | + ------- |
| 174 | + proposal : df |
| 175 | + Data frame containing the proposal instances. |
| 176 | + """ |
| 177 | + with open(proposal_filename, 'r') as fobj: |
| 178 | + data = json.load(fobj) |
| 179 | + # Checking format... |
| 180 | + if not all([field in data.keys() for field in self.pred_fields]): |
| 181 | + raise IOError('Please input a valid proposal file.') |
| 182 | + |
| 183 | + # Read predictions. |
| 184 | + video_lst, t_start_lst, t_end_lst = [], [], [] |
| 185 | + score_lst = [] |
| 186 | + for videoid, v in data['results'].iteritems(): |
| 187 | + if videoid in self.blocked_videos: |
| 188 | + continue |
| 189 | + for result in v: |
| 190 | + video_lst.append(videoid) |
| 191 | + t_start_lst.append(result['segment'][0]) |
| 192 | + t_end_lst.append(result['segment'][1]) |
| 193 | + score_lst.append(result['score']) |
| 194 | + proposal = pd.DataFrame({'video-id': video_lst, |
| 195 | + 't-start': t_start_lst, |
| 196 | + 't-end': t_end_lst, |
| 197 | + 'score': score_lst}) |
| 198 | + return proposal |
| 199 | + |
| 200 | + def evaluate(self): |
| 201 | + """Evaluates a proposal file. To measure the performance of a |
| 202 | + method for the proposal task, we computes the area under the |
| 203 | + average recall vs average number of proposals per video curve. |
| 204 | + """ |
| 205 | + recall, avg_recall, proposals_per_video = average_recall_vs_avg_nr_proposals( |
| 206 | + self.ground_truth, self.proposal, |
| 207 | + max_avg_nr_proposals=self.max_avg_nr_proposals, |
| 208 | + tiou_thresholds=self.tiou_thresholds) |
| 209 | + |
| 210 | + area_under_curve = np.trapz(avg_recall, proposals_per_video) |
| 211 | + |
| 212 | + if self.verbose: |
| 213 | + print '[RESULTS] Performance on ActivityNet proposal task.' |
| 214 | + print '\tArea Under the AR vs AN curve: {}%'.format(100.*float(area_under_curve)/proposals_per_video[-1]) |
| 215 | + |
| 216 | + self.recall = recall |
| 217 | + self.avg_recall = avg_recall |
| 218 | + self.proposals_per_video = proposals_per_video |
| 219 | + |
| 220 | +def average_recall_vs_avg_nr_proposals(ground_truth, proposals, |
| 221 | + max_avg_nr_proposals=None, |
| 222 | + tiou_thresholds=np.linspace(0.5, 0.95, 10)): |
| 223 | + """ Computes the average recall given an average number |
| 224 | + of proposals per video. |
| 225 | + |
| 226 | + Parameters |
| 227 | + ---------- |
| 228 | + ground_truth : df |
| 229 | + Data frame containing the ground truth instances. |
| 230 | + Required fields: ['video-id', 't-start', 't-end'] |
| 231 | + proposal : df |
| 232 | + Data frame containing the proposal instances. |
| 233 | + Required fields: ['video-id, 't-start', 't-end', 'score'] |
| 234 | + tiou_thresholds : 1darray, optional |
| 235 | + array with tiou thresholds. |
| 236 | + |
| 237 | + Outputs |
| 238 | + ------- |
| 239 | + recall : 2darray |
| 240 | + recall[i,j] is recall at ith tiou threshold at the jth average number of average number of proposals per video. |
| 241 | + average_recall : 1darray |
| 242 | + recall averaged over a list of tiou threshold. This is equivalent to recall.mean(axis=0). |
| 243 | + proposals_per_video : 1darray |
| 244 | + average number of proposals per video. |
| 245 | + """ |
| 246 | + |
| 247 | + # Get list of videos. |
| 248 | + video_lst = ground_truth['video-id'].unique() |
| 249 | + |
| 250 | + if not max_avg_nr_proposals: |
| 251 | + max_avg_nr_proposals = float(proposals.shape[0])/video_lst.shape[0] |
| 252 | + |
| 253 | + ratio = max_avg_nr_proposals*float(video_lst.shape[0])/proposals.shape[0] |
| 254 | + |
| 255 | + # Adaptation to query faster |
| 256 | + ground_truth_gbvn = ground_truth.groupby('video-id') |
| 257 | + proposals_gbvn = proposals.groupby('video-id') |
| 258 | + |
| 259 | + # For each video, computes tiou scores among the retrieved proposals. |
| 260 | + score_lst = [] |
| 261 | + total_nr_proposals = 0 |
| 262 | + for videoid in video_lst: |
| 263 | + |
| 264 | + # Get proposals for this video. |
| 265 | + proposals_videoid = proposals_gbvn.get_group(videoid) |
| 266 | + this_video_proposals = proposals_videoid.loc[:, ['t-start', 't-end']].values |
| 267 | + |
| 268 | + # Sort proposals by score. |
| 269 | + sort_idx = proposals_videoid['score'].argsort()[::-1] |
| 270 | + this_video_proposals = this_video_proposals[sort_idx, :] |
| 271 | + |
| 272 | + # Get ground-truth instances associated to this video. |
| 273 | + ground_truth_videoid = ground_truth_gbvn.get_group(videoid) |
| 274 | + this_video_ground_truth = ground_truth_videoid.loc[:,['t-start', 't-end']].values |
| 275 | + |
| 276 | + if this_video_proposals.shape[0] == 0: |
| 277 | + n = this_video_ground_truth.shape[0] |
| 278 | + score_lst.append(np.zeros((n, 1))) |
| 279 | + continue |
| 280 | + |
| 281 | + if this_video_proposals.ndim != 2: |
| 282 | + this_video_proposals = np.expand_dims(this_video_proposals, axis=0) |
| 283 | + if this_video_ground_truth.ndim != 2: |
| 284 | + this_video_ground_truth = np.expand_dims(this_video_ground_truth, axis=0) |
| 285 | + |
| 286 | + nr_proposals = np.minimum(int(this_video_proposals.shape[0] * ratio), this_video_proposals.shape[0]) |
| 287 | + total_nr_proposals += nr_proposals |
| 288 | + this_video_proposals = this_video_proposals[:nr_proposals, :] |
| 289 | + |
| 290 | + # Compute tiou scores. |
| 291 | + tiou = wrapper_segment_iou(this_video_proposals, this_video_ground_truth) |
| 292 | + score_lst.append(tiou) |
| 293 | + |
| 294 | + # Given that the length of the videos is really varied, we |
| 295 | + # compute the number of proposals in terms of a ratio of the total |
| 296 | + # proposals retrieved, i.e. average recall at a percentage of proposals |
| 297 | + # retrieved per video. |
| 298 | + |
| 299 | + # Computes average recall. |
| 300 | + pcn_lst = np.arange(1, 101) / 100.0 *(max_avg_nr_proposals*float(video_lst.shape[0])/total_nr_proposals) |
| 301 | + matches = np.empty((video_lst.shape[0], pcn_lst.shape[0])) |
| 302 | + positives = np.empty(video_lst.shape[0]) |
| 303 | + recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0])) |
| 304 | + # Iterates over each tiou threshold. |
| 305 | + for ridx, tiou in enumerate(tiou_thresholds): |
| 306 | + |
| 307 | + # Inspect positives retrieved per video at different |
| 308 | + # number of proposals (percentage of the total retrieved). |
| 309 | + for i, score in enumerate(score_lst): |
| 310 | + # Total positives per video. |
| 311 | + positives[i] = score.shape[0] |
| 312 | + # Find proposals that satisfies minimum tiou threshold. |
| 313 | + true_positives_tiou = score >= tiou |
| 314 | + # Get number of proposals as a percentage of total retrieved. |
| 315 | + pcn_proposals = np.minimum((score.shape[1] * pcn_lst).astype(np.int), score.shape[1]) |
| 316 | + |
| 317 | + for j, nr_proposals in enumerate(pcn_proposals): |
| 318 | + # Compute the number of matches for each percentage of the proposals |
| 319 | + matches[i, j] = np.count_nonzero((true_positives_tiou[:, :nr_proposals]).sum(axis=1)) |
| 320 | + |
| 321 | + # Computes recall given the set of matches per video. |
| 322 | + recall[ridx, :] = matches.sum(axis=0) / positives.sum() |
| 323 | + |
| 324 | + # Recall is averaged. |
| 325 | + avg_recall = recall.mean(axis=0) |
| 326 | + |
| 327 | + # Get the average number of proposals per video. |
| 328 | + proposals_per_video = pcn_lst * (float(total_nr_proposals) / video_lst.shape[0]) |
| 329 | + |
| 330 | + return recall, avg_recall, proposals_per_video |
| 331 | + |
0 commit comments