Skip to content

Commit d6b318c

Browse files
authored
[benchmark/WIP] Benchmarking pyscenedetect detectors' performance (#484)
* create benchmark/ directory for pyscenedetect performance evaluation * implemented evaluator on the BBC dataset
1 parent febe4a5 commit d6b318c

File tree

6 files changed

+166
-0
lines changed

6 files changed

+166
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ tests/resources/*
99
*.mkv
1010
*.m4v
1111
*.csv
12+
benchmarks/BCC/*.mp4
13+
*.txt
14+
benchmarks/RAI/*.mp4
15+
*.txt
1216

1317

1418
# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore

benchmarks/BBC/.gitkeep

Whitespace-only changes.

benchmarks/README.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Benchmarking PySceneDetect
2+
This repository benchmarks the performance of PySceneDetect in terms of both latency and accuracy.
3+
We evaluate it using the standard dataset for video shot detection: [BBC](https://zenodo.org/records/14865504).
4+
5+
## Dataset Download
6+
### BBC
7+
```
8+
# annotation
9+
wget -O BBC/fixed.zip https://zenodo.org/records/14873790/files/fixed.zip
10+
unzip BBC/fixed.zip -d BBC
11+
rm -rf BBC/fixed.zip
12+
13+
# videos
14+
wget -O BBC/videos.zip https://zenodo.org/records/14873790/files/videos.zip
15+
unzip BBC/videos.zip -d BBC
16+
rm -rf BBC/videos.zip
17+
```
18+
19+
### Evaluation
20+
To evaluate PySceneDetect on a dataset, run the following command:
21+
```
22+
python benchmark.py -d <dataset_name> --detector <detector_name>
23+
```
24+
For example, to evaluate ContentDetector on the BBC dataset:
25+
```
26+
python evaluate.py -d BBC --detector detect-content
27+
```
28+
29+
### Result
30+
The performance is computed as recall, precision, f1, and elapsed time.
31+
The following results indicate that ContentDetector achieves the highest performance on the BBC dataset.
32+
33+
| Detector | Recall | Precision | F1 | Elapsed time (second) |
34+
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
35+
| AdaptiveDetector | 7.80 | 96.18 | 14.44 | 25.75 |
36+
| ContentDetector | 84.52 | 88.77 | 86.59 | 25.50 |
37+
| HashDetector | 8.57 | 80.27 | 15.48 | 23.78 |
38+
| HistogramDetector | 8.22 | 70.82 | 14.72 | 18.60 |
39+
| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 |
40+
41+
## Citation
42+
### BBC
43+
```
44+
@InProceedings{bbc_dataset,
45+
author = {Lorenzo Baraldi and Costantino Grana and Rita Cucchiara},
46+
title = {A Deep Siamese Network for Scene Detection in Broadcast Videos},
47+
booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia},
48+
year = {2015},
49+
}
50+
```

benchmarks/bbc_dataset.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
import glob
3+
4+
class BBCDataset:
5+
"""
6+
The BBC Dataset, proposed by Baraldi et al. in A deep siamese network for scene detection in broadcast videos
7+
Link: https://arxiv.org/abs/1510.08893
8+
The dataset consists of 11 videos (BBC/videos/bbc_01.mp4 to BBC/videos/bbc_11.mp4).
9+
The annotated scenes are provided in corresponding files (BBC/fixed/[i]-scenes.txt).
10+
"""
11+
def __init__(self, dataset_dir: str):
12+
self._video_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'videos', '*.mp4')))]
13+
self._scene_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'fixed', '*-scenes.txt')))]
14+
assert (len(self._video_files) == len(self._scene_files))
15+
for video_file, scene_file in zip(self._video_files, self._scene_files):
16+
video_id = os.path.basename(video_file).replace('bbc_', '').split('.')[0]
17+
scene_id = os.path.basename(scene_file).split('-')[0]
18+
assert (video_id == scene_id)
19+
20+
def __getitem__(self, index):
21+
video_file = self._video_files[index]
22+
scene_file = self._scene_files[index]
23+
return video_file, scene_file
24+
25+
def __len__(self):
26+
return len(self._video_files)

benchmarks/benchmark.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import time
2+
import argparse
3+
from bbc_dataset import BBCDataset
4+
from evaluator import Evaluator
5+
6+
from tqdm import tqdm
7+
from scenedetect import detect
8+
from scenedetect import AdaptiveDetector, ContentDetector, HashDetector, HistogramDetector, ThresholdDetector
9+
10+
def _load_detector(detector_name: str):
11+
detector_map = {
12+
'detect-adaptive': AdaptiveDetector(),
13+
'detect-content': ContentDetector(),
14+
'detect-hash': HashDetector(),
15+
'detect-hist': HistogramDetector(),
16+
'detect-threshold': ThresholdDetector(),
17+
}
18+
return detector_map[detector_name]
19+
20+
def _detect_scenes(detector, dataset):
21+
pred_scenes = {}
22+
for video_file, scene_file in tqdm(dataset):
23+
start = time.time()
24+
pred_scene_list = detect(video_file, detector)
25+
elapsed = time.time() - start
26+
27+
pred_scenes[scene_file] = {
28+
'video_file': video_file,
29+
'elapsed': elapsed,
30+
'pred_scenes': [scene[1].frame_num for scene in pred_scene_list]
31+
}
32+
33+
return pred_scenes
34+
35+
def main(args):
36+
dataset = BBCDataset('BBC')
37+
detector = _load_detector(args.detector)
38+
pred_scenes = _detect_scenes(detector, dataset)
39+
evaluator = Evaluator()
40+
result = evaluator.evaluate_performance(pred_scenes)
41+
42+
print('Detector: {} Recall: {:.2f}, Precision: {:.2f}, F1: {:.2f} Elapsed time: {:.2f}'
43+
.format(args.detector, result['recall'], result['precision'], result['f1'], result['elapsed']))
44+
45+
46+
if __name__ == '__main__':
47+
parser = argparse.ArgumentParser(description='Benchmarking PySceneDetect performance.')
48+
parser.add_argument('--detector', type=str, choices=['detect-adaptive', 'detect-content', 'detect-hash', 'detect-hist', 'detect-threshold'],
49+
default='detect-content', help='Detector name. Implemented detectors are listed: https://www.scenedetect.com/docs/latest/cli.html')
50+
args = parser.parse_args()
51+
main(args)

benchmarks/evaluator.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from statistics import mean
2+
3+
class Evaluator:
4+
def __init__(self):
5+
pass
6+
7+
def _load_scenes(self, scene_filename):
8+
with open(scene_filename) as f:
9+
gt_scene_list = [x.strip().split('\t')[1] for x in f.readlines()]
10+
gt_scene_list = [int(x) + 1 for x in gt_scene_list]
11+
return gt_scene_list
12+
13+
def evaluate_performance(self, pred_scenes):
14+
total_correct = 0
15+
total_pred = 0
16+
total_gt = 0
17+
18+
for scene_file, pred in pred_scenes.items():
19+
gt_scene_list = self._load_scenes(scene_file)
20+
pred_list = pred['pred_scenes']
21+
total_correct += len(set(pred_list) & set(gt_scene_list))
22+
total_pred += len(pred_list)
23+
total_gt += len(gt_scene_list)
24+
25+
recall = total_correct / total_gt
26+
precision = total_correct / total_pred
27+
f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0
28+
avg_elapsed = mean([x['elapsed'] for x in pred_scenes.values()])
29+
result = {
30+
'recall': recall * 100,
31+
'precision': precision * 100,
32+
'f1': f1 * 100,
33+
'elapsed': avg_elapsed
34+
}
35+
return result

0 commit comments

Comments
 (0)