|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +from scipy.interpolate import interp1d |
| 4 | +import os |
| 5 | + |
| 6 | + |
| 7 | +def segment_tiou(target_segments, test_segments): |
| 8 | + """Compute intersection over union btw segments |
| 9 | + Parameters |
| 10 | + ---------- |
| 11 | + target_segments : ndarray |
| 12 | + 2-dim array in format [m x 2:=[init, end]] |
| 13 | + test_segments : ndarray |
| 14 | + 2-dim array in format [n x 2:=[init, end]] |
| 15 | + Outputs |
| 16 | + ------- |
| 17 | + tiou : ndarray |
| 18 | + 2-dim array [m x n] with IOU ratio. |
| 19 | + Note: It assumes that target-segments are more scarce that test-segments |
| 20 | + """ |
| 21 | + if target_segments.ndim != 2 or test_segments.ndim != 2: |
| 22 | + raise ValueError('Dimension of arguments is incorrect') |
| 23 | + |
| 24 | + m, n = target_segments.shape[0], test_segments.shape[0] |
| 25 | + tiou = np.empty((m, n)) |
| 26 | + for i in range(m): |
| 27 | + tt1 = np.maximum(target_segments[i, 0], test_segments[:, 0]) |
| 28 | + tt2 = np.minimum(target_segments[i, 1], test_segments[:, 1]) |
| 29 | + |
| 30 | + # Non-negative overlap score |
| 31 | + intersection = (tt2 - tt1 + 1.0).clip(0) |
| 32 | + union = ((test_segments[:, 1] - test_segments[:, 0] + 1) + ( |
| 33 | + target_segments[i, 1] - target_segments[i, 0] + 1) - intersection) |
| 34 | + # Compute overlap as the ratio of the intersection |
| 35 | + # over union of two segments at the frame level. |
| 36 | + tiou[i, :] = intersection / union |
| 37 | + return tiou |
| 38 | + |
| 39 | + |
| 40 | +def average_recall_vs_nr_proposals(proposals, |
| 41 | + ground_truth, |
| 42 | + tiou_thresholds=np.linspace(0.5, 1.0, 11)): |
| 43 | + """ Computes the average recall given an average number |
| 44 | + of proposals per video. |
| 45 | + |
| 46 | + Parameters |
| 47 | + ---------- |
| 48 | + proposals : DataFrame |
| 49 | + pandas table with the resulting proposals. It must include |
| 50 | + the following columns: {'video-name': (str) Video identifier, |
| 51 | + 'f-init': (int) Starting index Frame, |
| 52 | + 'f-end': (int) Ending index Frame, |
| 53 | + 'score': (float) Proposal confidence} |
| 54 | + ground_truth : DataFrame |
| 55 | + pandas table with annotations of the dataset. It must include |
| 56 | + the following columns: {'video-name': (str) Video identifier, |
| 57 | + 'f-init': (int) Starting index Frame, |
| 58 | + 'f-end': (int) Ending index Frame} |
| 59 | + tiou_thresholds : 1darray, optional |
| 60 | + array with tiou threholds. |
| 61 | + |
| 62 | + Outputs |
| 63 | + ------- |
| 64 | + average_recall : 1darray |
| 65 | + recall averaged over a list of tiou threshold. |
| 66 | + proposals_per_video : 1darray |
| 67 | + average number of proposals per video. |
| 68 | + """ |
| 69 | + # Get list of videos. |
| 70 | + video_lst = proposals['video-name'].unique() |
| 71 | + |
| 72 | + # For each video, computes tiou scores among the retrieved proposals. |
| 73 | + score_lst = [] |
| 74 | + for videoid in video_lst: |
| 75 | + |
| 76 | + # Get proposals for this video. |
| 77 | + prop_idx = proposals['video-name'] == videoid |
| 78 | + this_video_proposals = proposals[prop_idx][['f-init', 'f-end']].values |
| 79 | + # Sort proposals by score. |
| 80 | + sort_idx = proposals[prop_idx]['score'].argsort()[::-1] |
| 81 | + this_video_proposals = this_video_proposals[sort_idx, :] |
| 82 | + |
| 83 | + # Get ground-truth instances associated to this video. |
| 84 | + gt_idx = ground_truth['video-name'] == videoid |
| 85 | + this_video_ground_truth = ground_truth[gt_idx][['f-init', |
| 86 | + 'f-end']].values |
| 87 | + |
| 88 | + # Compute tiou scores. |
| 89 | + tiou = segment_tiou(this_video_ground_truth, this_video_proposals) |
| 90 | + score_lst.append(tiou) |
| 91 | + |
| 92 | + # Given that the length of the videos is really varied, we |
| 93 | + # compute the number of proposals in terms of a ratio of the total |
| 94 | + # proposals retrieved, i.e. average recall at a percentage of proposals |
| 95 | + # retrieved per video. |
| 96 | + |
| 97 | + # Computes average recall. |
| 98 | + pcn_lst = np.arange(1, 201) / 200.0 |
| 99 | + matches = np.empty((video_lst.shape[0], pcn_lst.shape[0])) |
| 100 | + positives = np.empty(video_lst.shape[0]) |
| 101 | + recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0])) |
| 102 | + # Iterates over each tiou threshold. |
| 103 | + for ridx, tiou in enumerate(tiou_thresholds): |
| 104 | + |
| 105 | + # Inspect positives retrieved per video at different |
| 106 | + # number of proposals (percentage of the total retrieved). |
| 107 | + for i, score in enumerate(score_lst): |
| 108 | + # Total positives per video. |
| 109 | + positives[i] = score.shape[0] |
| 110 | + |
| 111 | + for j, pcn in enumerate(pcn_lst): |
| 112 | + # Get number of proposals as a percentage of total retrieved. |
| 113 | + nr_proposals = int(score.shape[1] * pcn) |
| 114 | + # Find proposals that satisfies minimum tiou threhold. |
| 115 | + matches[i, j] = ((score[:, :nr_proposals] >= tiou).sum(axis=1) > |
| 116 | + 0).sum() |
| 117 | + |
| 118 | + # Computes recall given the set of matches per video. |
| 119 | + recall[ridx, :] = matches.sum(axis=0) / positives.sum() |
| 120 | + |
| 121 | + # Recall is averaged. |
| 122 | + recall = recall.mean(axis=0) |
| 123 | + |
| 124 | + # Get the average number of proposals per video. |
| 125 | + proposals_per_video = pcn_lst * ( |
| 126 | + float(proposals.shape[0]) / video_lst.shape[0]) |
| 127 | + |
| 128 | + return recall, proposals_per_video |
| 129 | + |
| 130 | + |
| 131 | +def recall_vs_tiou_thresholds(proposals, |
| 132 | + ground_truth, |
| 133 | + nr_proposals=1000, |
| 134 | + tiou_thresholds=np.arange(0.05, 1.05, 0.05)): |
| 135 | + """ Computes recall at different tiou thresholds given a fixed |
| 136 | + average number of proposals per video. |
| 137 | + |
| 138 | + Parameters |
| 139 | + ---------- |
| 140 | + proposals : DataFrame |
| 141 | + pandas table with the resulting proposals. It must include |
| 142 | + the following columns: {'video-name': (str) Video identifier, |
| 143 | + 'f-init': (int) Starting index Frame, |
| 144 | + 'f-end': (int) Ending index Frame, |
| 145 | + 'score': (float) Proposal confidence} |
| 146 | + ground_truth : DataFrame |
| 147 | + pandas table with annotations of the dataset. It must include |
| 148 | + the following columns: {'video-name': (str) Video identifier, |
| 149 | + 'f-init': (int) Starting index Frame, |
| 150 | + 'f-end': (int) Ending index Frame} |
| 151 | + nr_proposals : int |
| 152 | + average number of proposals per video. |
| 153 | + tiou_thresholds : 1darray, optional |
| 154 | + array with tiou threholds. |
| 155 | + |
| 156 | + Outputs |
| 157 | + ------- |
| 158 | + average_recall : 1darray |
| 159 | + recall averaged over a list of tiou threshold. |
| 160 | + proposals_per_video : 1darray |
| 161 | + average number of proposals per video. |
| 162 | + """ |
| 163 | + # Get list of videos. |
| 164 | + video_lst = proposals['video-name'].unique() |
| 165 | + |
| 166 | + # For each video, computes tiou scores among the retrieved proposals. |
| 167 | + score_lst = [] |
| 168 | + for videoid in video_lst: |
| 169 | + |
| 170 | + # Get proposals for this video. |
| 171 | + prop_idx = proposals['video-name'] == videoid |
| 172 | + this_video_proposals = proposals[prop_idx][['f-init', 'f-end']].values |
| 173 | + # Sort proposals by score. |
| 174 | + sort_idx = proposals[prop_idx]['score'].argsort()[::-1] |
| 175 | + this_video_proposals = this_video_proposals[sort_idx, :] |
| 176 | + |
| 177 | + # Get ground-truth instances associated to this video. |
| 178 | + gt_idx = ground_truth['video-name'] == videoid |
| 179 | + this_video_ground_truth = ground_truth[gt_idx][['f-init', |
| 180 | + 'f-end']].values |
| 181 | + |
| 182 | + # Compute tiou scores. |
| 183 | + tiou = segment_tiou(this_video_ground_truth, this_video_proposals) |
| 184 | + score_lst.append(tiou) |
| 185 | + |
| 186 | + # To obtain the average number of proposals, we need to define a |
| 187 | + # percentage of proposals to get per video. |
| 188 | + pcn = (video_lst.shape[0] * float(nr_proposals)) / proposals.shape[0] |
| 189 | + |
| 190 | + # Computes recall at different tiou thresholds. |
| 191 | + matches = np.empty((video_lst.shape[0], tiou_thresholds.shape[0])) |
| 192 | + positives = np.empty(video_lst.shape[0]) |
| 193 | + recall = np.empty(tiou_thresholds.shape[0]) |
| 194 | + # Iterates over each tiou threshold. |
| 195 | + for ridx, tiou in enumerate(tiou_thresholds): |
| 196 | + |
| 197 | + for i, score in enumerate(score_lst): |
| 198 | + # Total positives per video. |
| 199 | + positives[i] = score.shape[0] |
| 200 | + |
| 201 | + # Get number of proposals at the fixed percentage of total retrieved. |
| 202 | + nr_proposals = int(score.shape[1] * pcn) |
| 203 | + # Find proposals that satisfies minimum tiou threhold. |
| 204 | + matches[i, ridx] = ((score[:, :nr_proposals] >= tiou).sum(axis=1) > |
| 205 | + 0).sum() |
| 206 | + |
| 207 | + # Computes recall given the set of matches per video. |
| 208 | + recall[ridx] = matches[:, ridx].sum(axis=0) / positives.sum() |
| 209 | + |
| 210 | + return recall, tiou_thresholds |
| 211 | + |
| 212 | + |
| 213 | +def evaluation_proposal(opt): |
| 214 | + bsn_results = pd.read_csv(os.path.join(opt['postprocessed_results_dir'], 'thumos14_results.csv')) |
| 215 | + ground_truth = pd.read_csv(opt['video_info']) |
| 216 | + |
| 217 | + # Computes average recall vs average number of proposals. |
| 218 | + average_recall, average_nr_proposals = average_recall_vs_nr_proposals( |
| 219 | + bsn_results, ground_truth) |
| 220 | + |
| 221 | + print(average_nr_proposals.shape) |
| 222 | + f = interp1d(average_nr_proposals, average_recall, axis=0) |
| 223 | + print(f(50), f(100), f(200), f(500), f(1000)) |
0 commit comments