diff --git a/torchbench/image_classification/cifar10.py b/torchbench/image_classification/cifar10.py
index 867263c..2de26e8 100644
--- a/torchbench/image_classification/cifar10.py
+++ b/torchbench/image_classification/cifar10.py
@@ -36,6 +36,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -70,6 +71,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/cifar100.py b/torchbench/image_classification/cifar100.py
index 1f24341..45188df 100644
--- a/torchbench/image_classification/cifar100.py
+++ b/torchbench/image_classification/cifar100.py
@@ -36,6 +36,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -70,6 +71,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/imagenet.py b/torchbench/image_classification/imagenet.py
index 47630e7..92fc312 100644
--- a/torchbench/image_classification/imagenet.py
+++ b/torchbench/image_classification/imagenet.py
@@ -102,6 +102,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
         """Benchmarking function.
 
@@ -206,6 +207,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/mnist.py b/torchbench/image_classification/mnist.py
index 7b8ff83..c132b90 100644
--- a/torchbench/image_classification/mnist.py
+++ b/torchbench/image_classification/mnist.py
@@ -35,6 +35,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -69,6 +70,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/stl10.py b/torchbench/image_classification/stl10.py
index 01304c3..6553cd2 100644
--- a/torchbench/image_classification/stl10.py
+++ b/torchbench/image_classification/stl10.py
@@ -35,6 +35,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -69,6 +70,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/svhn.py b/torchbench/image_classification/svhn.py
index 14e7add..0d1386b 100644
--- a/torchbench/image_classification/svhn.py
+++ b/torchbench/image_classification/svhn.py
@@ -38,6 +38,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -72,6 +73,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(
diff --git a/torchbench/image_classification/utils.py b/torchbench/image_classification/utils.py
index 179a114..b20cf46 100644
--- a/torchbench/image_classification/utils.py
+++ b/torchbench/image_classification/utils.py
@@ -16,6 +16,7 @@ def evaluate_classification(
     model_output_transform,
     send_data_to_device,
     device="cuda",
+    force=False
 ):
     top1 = AverageMeter()
     top5 = AverageMeter()
@@ -45,24 +46,25 @@ def evaluate_classification(
                     iterator.close()
                     break
 
-                # get the cached values from sotabench.com if available
-                client = Client.public()
-                cached_res = client.get_results_by_run_hash(run_hash)
-                if cached_res:
-                    iterator.close()
-                    print(
-                        "No model change detected (using the first batch run "
-                        "hash). Returning cached results."
-                    )
-
-                    speed_mem_metrics = {
-                        'Tasks / Evaluation Time': None,
-                        'Evaluation Time': None,
-                        'Tasks': None,
-                        'Max Memory Allocated (Total)': None,
-                    }
-
-                    return cached_res, speed_mem_metrics, run_hash
+                if not force:
+                    # get the cached values from sotabench.com if available
+                    client = Client.public()
+                    cached_res = client.get_results_by_run_hash(run_hash)
+                    if cached_res:
+                        iterator.close()
+                        print(
+                            "No model change detected (using the first batch run "
+                            "hash). Returning cached results."
+                        )
+
+                        speed_mem_metrics = {
+                            'Tasks / Evaluation Time': None,
+                            'Evaluation Time': None,
+                            'Tasks': None,
+                            'Max Memory Allocated (Total)': None,
+                        }
+
+                        return cached_res, speed_mem_metrics, run_hash
 
     exec_time = (time.time() - init_time)
 
diff --git a/torchbench/language_modelling/utils.py b/torchbench/language_modelling/utils.py
index 90528c4..9853ee4 100644
--- a/torchbench/language_modelling/utils.py
+++ b/torchbench/language_modelling/utils.py
@@ -14,6 +14,7 @@ def evaluate_language_model(
     model_output_transform,
     send_data_to_device,
     device="cuda",
+    force=False
 ):
     n_steps, eval_loss = 0, 0
 
@@ -50,15 +51,16 @@ def evaluate_language_model(
                     iterator.close()
                     break
 
-                # get the cached values from sotabench.com if available
-                client = Client.public()
-                cached_res = client.get_results_by_run_hash(run_hash)
-                if cached_res:
-                    iterator.close()
-                    print(
-                        "No model change detected (using the first batch "
-                        "run_hash). Returning cached results."
-                    )
-                    return cached_res, run_hash
+                if not force:
+                    # get the cached values from sotabench.com if available
+                    client = Client.public()
+                    cached_res = client.get_results_by_run_hash(run_hash)
+                    if cached_res:
+                        iterator.close()
+                        print(
+                            "No model change detected (using the first batch "
+                            "run_hash). Returning cached results."
+                        )
+                        return cached_res, run_hash
 
     return {"Perplexity": np.exp(eval_loss / n_steps)}, run_hash
diff --git a/torchbench/language_modelling/wikitext103.py b/torchbench/language_modelling/wikitext103.py
index 93ff88d..0bfe602 100644
--- a/torchbench/language_modelling/wikitext103.py
+++ b/torchbench/language_modelling/wikitext103.py
@@ -31,6 +31,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -66,6 +67,7 @@ def benchmark(
             send_data_to_device=cls.send_data_to_device,
             test_loader=test_loader,
             device=device,
+            force=force
         )
 
         # Valid Split
@@ -90,6 +92,7 @@ def benchmark(
             send_data_to_device=cls.send_data_to_device,
             test_loader=valid_loader,
             device=device,
+            force=force
         )
 
         # Get final results
diff --git a/torchbench/object_detection/coco.py b/torchbench/object_detection/coco.py
index 772f890..3689f36 100644
--- a/torchbench/object_detection/coco.py
+++ b/torchbench/object_detection/coco.py
@@ -114,6 +114,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
         """Benchmarking function.
 
@@ -223,6 +224,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(test_results)
diff --git a/torchbench/object_detection/utils.py b/torchbench/object_detection/utils.py
index 7e56ad6..63c0c96 100644
--- a/torchbench/object_detection/utils.py
+++ b/torchbench/object_detection/utils.py
@@ -193,6 +193,7 @@ def evaluate_detection_coco(
     model_output_transform,
     send_data_to_device,
     device="cuda",
+    force=False
 ):
 
     coco = get_coco_api_from_dataset(test_loader.dataset)
@@ -222,24 +223,25 @@ def evaluate_detection_coco(
                     iterator.close()
                     break
 
-                # get the cached values from sotabench.com if available
-                client = Client.public()
-                cached_res = client.get_results_by_run_hash(run_hash)
-                if cached_res:
-                    iterator.close()
-                    print(
-                        "No model change detected (using the first batch run "
-                        "hash). Returning cached results."
-                    )
-
-                    speed_mem_metrics = {
-                        'Tasks / Evaluation Time': None,
-                        'Evaluation Time': None,
-                        'Tasks': None,
-                        'Max Memory Allocated (Total)': None,
-                    }
-
-                    return cached_res, speed_mem_metrics, run_hash
+                if not force:
+                    # get the cached values from sotabench.com if available
+                    client = Client.public()
+                    cached_res = client.get_results_by_run_hash(run_hash)
+                    if cached_res:
+                        iterator.close()
+                        print(
+                            "No model change detected (using the first batch run "
+                            "hash). Returning cached results."
+                        )
+
+                        speed_mem_metrics = {
+                            'Tasks / Evaluation Time': None,
+                            'Evaluation Time': None,
+                            'Tasks': None,
+                            'Max Memory Allocated (Total)': None,
+                        }
+
+                        return cached_res, speed_mem_metrics, run_hash
 
     exec_time = (time.time() - init_time)
 
diff --git a/torchbench/semantic_segmentation/ade20k.py b/torchbench/semantic_segmentation/ade20k.py
index 7b8a2ec..fb78e6e 100644
--- a/torchbench/semantic_segmentation/ade20k.py
+++ b/torchbench/semantic_segmentation/ade20k.py
@@ -49,6 +49,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -91,6 +92,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(test_results)
diff --git a/torchbench/semantic_segmentation/camvid.py b/torchbench/semantic_segmentation/camvid.py
index 7fd71f2..f298077 100644
--- a/torchbench/semantic_segmentation/camvid.py
+++ b/torchbench/semantic_segmentation/camvid.py
@@ -49,6 +49,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -91,6 +92,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(test_results)
diff --git a/torchbench/semantic_segmentation/cityscapes.py b/torchbench/semantic_segmentation/cityscapes.py
index 9050a49..b2a8ed9 100644
--- a/torchbench/semantic_segmentation/cityscapes.py
+++ b/torchbench/semantic_segmentation/cityscapes.py
@@ -51,6 +51,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -94,6 +95,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(test_results)
diff --git a/torchbench/semantic_segmentation/pascalcontext.py b/torchbench/semantic_segmentation/pascalcontext.py
index c73fab8..b088c60 100644
--- a/torchbench/semantic_segmentation/pascalcontext.py
+++ b/torchbench/semantic_segmentation/pascalcontext.py
@@ -49,6 +49,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -91,6 +92,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
 
         print(test_results)
diff --git a/torchbench/semantic_segmentation/pascalvoc.py b/torchbench/semantic_segmentation/pascalvoc.py
index a9b165a..9c8f652 100644
--- a/torchbench/semantic_segmentation/pascalvoc.py
+++ b/torchbench/semantic_segmentation/pascalvoc.py
@@ -92,6 +92,7 @@ def benchmark(
         paper_pwc_id: str = None,
         paper_results: dict = None,
         pytorch_hub_url: str = None,
+        force: bool = False
     ) -> BenchmarkResult:
 
         config = locals()
@@ -136,6 +137,7 @@ def benchmark(
             model_output_transform=model_output_transform,
             send_data_to_device=send_data_to_device,
             device=device,
+            force=force
         )
         print(test_results)
 
diff --git a/torchbench/semantic_segmentation/utils.py b/torchbench/semantic_segmentation/utils.py
index e6197d2..990ac3d 100644
--- a/torchbench/semantic_segmentation/utils.py
+++ b/torchbench/semantic_segmentation/utils.py
@@ -139,6 +139,7 @@ def evaluate_segmentation(
     model_output_transform,
     send_data_to_device,
     device="cuda",
+    force=False
 ):
     confmat = ConfusionMatrix(test_loader.no_classes)
 
@@ -161,24 +162,25 @@ def evaluate_segmentation(
                     iterator.close()
                     break
 
-                # get the cached values from sotabench.com if available
-                client = Client.public()
-                cached_res = client.get_results_by_run_hash(run_hash)
-                if cached_res:
-                    iterator.close()
-                    print(
-                        "No model change detected (using the first batch run "
-                        "hash). Returning cached results."
-                    )
-
-                    speed_mem_metrics = {
-                        'Tasks / Evaluation Time': None,
-                        'Evaluation Time': None,
-                        'Tasks': None,
-                        'Max Memory Allocated (Total)': None,
-                    }
-
-                    return cached_res, speed_mem_metrics, run_hash
+                if not force:
+                    # get the cached values from sotabench.com if available
+                    client = Client.public()
+                    cached_res = client.get_results_by_run_hash(run_hash)
+                    if cached_res:
+                        iterator.close()
+                        print(
+                            "No model change detected (using the first batch run "
+                            "hash). Returning cached results."
+                        )
+
+                        speed_mem_metrics = {
+                            'Tasks / Evaluation Time': None,
+                            'Evaluation Time': None,
+                            'Tasks': None,
+                            'Max Memory Allocated (Total)': None,
+                        }
+
+                        return cached_res, speed_mem_metrics, run_hash
 
     exec_time = (time.time() - init_time)