diff --git a/torchbench/image_classification/cifar10.py b/torchbench/image_classification/cifar10.py index 867263c..2de26e8 100644 --- a/torchbench/image_classification/cifar10.py +++ b/torchbench/image_classification/cifar10.py @@ -36,6 +36,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -70,6 +71,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/cifar100.py b/torchbench/image_classification/cifar100.py index 1f24341..45188df 100644 --- a/torchbench/image_classification/cifar100.py +++ b/torchbench/image_classification/cifar100.py @@ -36,6 +36,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -70,6 +71,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/imagenet.py b/torchbench/image_classification/imagenet.py index 47630e7..92fc312 100644 --- a/torchbench/image_classification/imagenet.py +++ b/torchbench/image_classification/imagenet.py @@ -102,6 +102,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: """Benchmarking function. @@ -206,6 +207,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/mnist.py b/torchbench/image_classification/mnist.py index 7b8ff83..c132b90 100644 --- a/torchbench/image_classification/mnist.py +++ b/torchbench/image_classification/mnist.py @@ -35,6 +35,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -69,6 +70,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/stl10.py b/torchbench/image_classification/stl10.py index 01304c3..6553cd2 100644 --- a/torchbench/image_classification/stl10.py +++ b/torchbench/image_classification/stl10.py @@ -35,6 +35,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -69,6 +70,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/svhn.py b/torchbench/image_classification/svhn.py index 14e7add..0d1386b 100644 --- a/torchbench/image_classification/svhn.py +++ b/torchbench/image_classification/svhn.py @@ -38,6 +38,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -72,6 +73,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print( diff --git a/torchbench/image_classification/utils.py b/torchbench/image_classification/utils.py index 179a114..b20cf46 100644 --- a/torchbench/image_classification/utils.py +++ b/torchbench/image_classification/utils.py @@ -16,6 +16,7 @@ def evaluate_classification( model_output_transform, send_data_to_device, device="cuda", + force=False ): top1 = AverageMeter() top5 = AverageMeter() @@ -45,24 +46,25 @@ def evaluate_classification( iterator.close() break - # get the cached values from sotabench.com if available - client = Client.public() - cached_res = client.get_results_by_run_hash(run_hash) - if cached_res: - iterator.close() - print( - "No model change detected (using the first batch run " - "hash). Returning cached results." - ) - - speed_mem_metrics = { - 'Tasks / Evaluation Time': None, - 'Evaluation Time': None, - 'Tasks': None, - 'Max Memory Allocated (Total)': None, - } - - return cached_res, speed_mem_metrics, run_hash + if not force: + # get the cached values from sotabench.com if available + client = Client.public() + cached_res = client.get_results_by_run_hash(run_hash) + if cached_res: + iterator.close() + print( + "No model change detected (using the first batch run " + "hash). Returning cached results." + ) + + speed_mem_metrics = { + 'Tasks / Evaluation Time': None, + 'Evaluation Time': None, + 'Tasks': None, + 'Max Memory Allocated (Total)': None, + } + + return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) diff --git a/torchbench/language_modelling/utils.py b/torchbench/language_modelling/utils.py index 90528c4..9853ee4 100644 --- a/torchbench/language_modelling/utils.py +++ b/torchbench/language_modelling/utils.py @@ -14,6 +14,7 @@ def evaluate_language_model( model_output_transform, send_data_to_device, device="cuda", + force=False ): n_steps, eval_loss = 0, 0 @@ -50,15 +51,16 @@ def evaluate_language_model( iterator.close() break - # get the cached values from sotabench.com if available - client = Client.public() - cached_res = client.get_results_by_run_hash(run_hash) - if cached_res: - iterator.close() - print( - "No model change detected (using the first batch " - "run_hash). Returning cached results." - ) - return cached_res, run_hash + if not force: + # get the cached values from sotabench.com if available + client = Client.public() + cached_res = client.get_results_by_run_hash(run_hash) + if cached_res: + iterator.close() + print( + "No model change detected (using the first batch " + "run_hash). Returning cached results." + ) + return cached_res, run_hash return {"Perplexity": np.exp(eval_loss / n_steps)}, run_hash diff --git a/torchbench/language_modelling/wikitext103.py b/torchbench/language_modelling/wikitext103.py index 93ff88d..0bfe602 100644 --- a/torchbench/language_modelling/wikitext103.py +++ b/torchbench/language_modelling/wikitext103.py @@ -31,6 +31,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -66,6 +67,7 @@ def benchmark( send_data_to_device=cls.send_data_to_device, test_loader=test_loader, device=device, + force=force ) # Valid Split @@ -90,6 +92,7 @@ def benchmark( send_data_to_device=cls.send_data_to_device, test_loader=valid_loader, device=device, + force=force ) # Get final results diff --git a/torchbench/object_detection/coco.py b/torchbench/object_detection/coco.py index 772f890..3689f36 100644 --- a/torchbench/object_detection/coco.py +++ b/torchbench/object_detection/coco.py @@ -114,6 +114,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: """Benchmarking function. @@ -223,6 +224,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/object_detection/utils.py b/torchbench/object_detection/utils.py index 7e56ad6..63c0c96 100644 --- a/torchbench/object_detection/utils.py +++ b/torchbench/object_detection/utils.py @@ -193,6 +193,7 @@ def evaluate_detection_coco( model_output_transform, send_data_to_device, device="cuda", + force=False ): coco = get_coco_api_from_dataset(test_loader.dataset) @@ -222,24 +223,25 @@ def evaluate_detection_coco( iterator.close() break - # get the cached values from sotabench.com if available - client = Client.public() - cached_res = client.get_results_by_run_hash(run_hash) - if cached_res: - iterator.close() - print( - "No model change detected (using the first batch run " - "hash). Returning cached results." - ) - - speed_mem_metrics = { - 'Tasks / Evaluation Time': None, - 'Evaluation Time': None, - 'Tasks': None, - 'Max Memory Allocated (Total)': None, - } - - return cached_res, speed_mem_metrics, run_hash + if not force: + # get the cached values from sotabench.com if available + client = Client.public() + cached_res = client.get_results_by_run_hash(run_hash) + if cached_res: + iterator.close() + print( + "No model change detected (using the first batch run " + "hash). Returning cached results." + ) + + speed_mem_metrics = { + 'Tasks / Evaluation Time': None, + 'Evaluation Time': None, + 'Tasks': None, + 'Max Memory Allocated (Total)': None, + } + + return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) diff --git a/torchbench/semantic_segmentation/ade20k.py b/torchbench/semantic_segmentation/ade20k.py index 7b8a2ec..fb78e6e 100644 --- a/torchbench/semantic_segmentation/ade20k.py +++ b/torchbench/semantic_segmentation/ade20k.py @@ -49,6 +49,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -91,6 +92,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/semantic_segmentation/camvid.py b/torchbench/semantic_segmentation/camvid.py index 7fd71f2..f298077 100644 --- a/torchbench/semantic_segmentation/camvid.py +++ b/torchbench/semantic_segmentation/camvid.py @@ -49,6 +49,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -91,6 +92,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/semantic_segmentation/cityscapes.py b/torchbench/semantic_segmentation/cityscapes.py index 9050a49..b2a8ed9 100644 --- a/torchbench/semantic_segmentation/cityscapes.py +++ b/torchbench/semantic_segmentation/cityscapes.py @@ -51,6 +51,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -94,6 +95,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/semantic_segmentation/pascalcontext.py b/torchbench/semantic_segmentation/pascalcontext.py index c73fab8..b088c60 100644 --- a/torchbench/semantic_segmentation/pascalcontext.py +++ b/torchbench/semantic_segmentation/pascalcontext.py @@ -49,6 +49,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -91,6 +92,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/semantic_segmentation/pascalvoc.py b/torchbench/semantic_segmentation/pascalvoc.py index a9b165a..9c8f652 100644 --- a/torchbench/semantic_segmentation/pascalvoc.py +++ b/torchbench/semantic_segmentation/pascalvoc.py @@ -92,6 +92,7 @@ def benchmark( paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, + force: bool = False ) -> BenchmarkResult: config = locals() @@ -136,6 +137,7 @@ def benchmark( model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, + force=force ) print(test_results) diff --git a/torchbench/semantic_segmentation/utils.py b/torchbench/semantic_segmentation/utils.py index e6197d2..990ac3d 100644 --- a/torchbench/semantic_segmentation/utils.py +++ b/torchbench/semantic_segmentation/utils.py @@ -139,6 +139,7 @@ def evaluate_segmentation( model_output_transform, send_data_to_device, device="cuda", + force=False ): confmat = ConfusionMatrix(test_loader.no_classes) @@ -161,24 +162,25 @@ def evaluate_segmentation( iterator.close() break - # get the cached values from sotabench.com if available - client = Client.public() - cached_res = client.get_results_by_run_hash(run_hash) - if cached_res: - iterator.close() - print( - "No model change detected (using the first batch run " - "hash). Returning cached results." - ) - - speed_mem_metrics = { - 'Tasks / Evaluation Time': None, - 'Evaluation Time': None, - 'Tasks': None, - 'Max Memory Allocated (Total)': None, - } - - return cached_res, speed_mem_metrics, run_hash + if not force: + # get the cached values from sotabench.com if available + client = Client.public() + cached_res = client.get_results_by_run_hash(run_hash) + if cached_res: + iterator.close() + print( + "No model change detected (using the first batch run " + "hash). Returning cached results." + ) + + speed_mem_metrics = { + 'Tasks / Evaluation Time': None, + 'Evaluation Time': None, + 'Tasks': None, + 'Max Memory Allocated (Total)': None, + } + + return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time)