Separated utils into backend and benchmark

Egor-Krivov · web-flow · commit a0fa7c73f0fd · 2024-03-12T19:41:21.000+01:00
diff --git a/dl_bench/backend.py b/dl_bench/backend.py
@@ -1,91 +1,8 @@
-import time
 from typing import Any
 
 import numpy as np
 import torch
 from torch.nn import Module
-from torch.utils.data import DataLoader, Dataset
-
-
-def get_time():
-    return time.perf_counter()
-
-
-class RandomInfDataset(Dataset):
-    def __init__(self, n, in_shape, seed=42):
-        super().__init__()
-        np.random.seed(seed)
-
-        self.values = np.random.randn(n, *in_shape).astype(np.float32)
-
-    def __len__(self):
-        return len(self.values)
-
-    def __getitem__(self, index):
-        return self.values[index]
-
-
-def get_inf_loaders(n, in_shape, batch_size, device: str):
-    # This speeds up data copy for cuda devices
-    pin_memory = device == "cuda"
-
-    ds = RandomInfDataset(n, in_shape)
-    train_loader = DataLoader(
-        ds, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=pin_memory
-    )
-    test_loader = DataLoader(
-        ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=pin_memory
-    )
-    return train_loader, test_loader
-
-
-def recursively_convert_to_numpy(o: Any):
-    if isinstance(o, torch.Tensor):
-        return o.numpy()
-    if isinstance(o, tuple):
-        return tuple(recursively_convert_to_numpy(x) for x in o)
-    if isinstance(o, list):
-        return [recursively_convert_to_numpy(x) for x in o]
-    if isinstance(o, dict):
-        return {k: recursively_convert_to_numpy(v) for k, v in o.items()}
-    # No-op cases. Explicitly enumerated to avoid things sneaking through.
-    if isinstance(o, str):
-        return o
-    if isinstance(o, float):
-        return o
-    if isinstance(o, int):
-        return o
-    raise Exception(f"Unexpected Python function input: {o}")
-
-
-def recursively_convert_from_numpy(o: Any):
-    if isinstance(o, np.ndarray):
-        return torch.from_numpy(o)
-    if isinstance(o, tuple):
-        return tuple(recursively_convert_from_numpy(x) for x in o)
-    if isinstance(o, list):
-        return [recursively_convert_from_numpy(x) for x in o]
-    if isinstance(o, dict):
-        return {k: recursively_convert_from_numpy(v) for k, v in o.items()}
-    # No-op cases. Explicitly enumerated to avoid things sneaking through.
-    if isinstance(o, str):
-        return o
-    if isinstance(o, float):
-        return o
-    if isinstance(o, int):
-        return o
-    raise Exception(f"Unexpected Python function output: {o}")
-
-
-def refine_result_type(_result):
-    if isinstance(_result, tuple):
-        return tuple(refine_result_type(x) for x in _result)
-    elif isinstance(_result, np.ndarray):
-        return torch.from_numpy(_result)
-    elif isinstance(_result, (bool, int, float)):
-        return _result
-    else:
-        raise ValueError(f"Unhandled return type {type(_result)}")
 
 
 def str_to_dtype(dtype: str):
@@ -337,157 +254,31 @@ def _get_device(device_name):
             raise ValueError(f"Unknown execution device {device_name}.")
 
 
-def get_report(fw_times, duration_s, n_items, flops_per_sample):
-    return {
-        "duration_s": duration_s,
-        "samples_per_s": n_items / sum(fw_times),
-        "samples_per_s_dirty": n_items / duration_s,
-        "flops_per_sample": flops_per_sample,
-        "n_items": n_items,
-        "p00": np.percentile(fw_times, 0),
-        "p50": np.percentile(fw_times, 50),
-        "p90": np.percentile(fw_times, 90),
-        "p100": max(fw_times),
-    }
-
-
-class Benchmark:
-    def __init__(
-        self,
-        net,
-        in_shape,
-        dataset,
-        batch_size,
-        min_batches=10,
-        min_seconds=10,
-        warmup_batches=3,
-    ) -> None:
-        self.model = net
-        self.in_shape = in_shape
-        self.dataset = dataset
-        self.batch_size = batch_size
-        self.warmup_batches = warmup_batches
-        self.min_batches = min_batches
-        self.min_seconds = min_seconds
-
-    def compile(self, sample, backend: Backend):
-        self.model = backend.prepare_eval_model(self.model, sample_input=sample)
-
-    def inference(self, backend: Backend):
-        # timout if running for more than 3 minutes already
-        max_time = 180
-
-        test_loader = torch.utils.data.DataLoader(
-            self.dataset,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=0,
-            pin_memory=backend.device_name == "cuda",
-        )
-
-        try:
-            print("Torch cpu capability:", torch.backends.cpu.get_cpu_capability())
-        except:
-            pass
-
-        flops_per_sample = get_macs(self.model, self.in_shape, backend) * 2
-
-        sample = next(iter(test_loader))
-        self.compile(sample, backend)
-
-        n_items = 0
-        outputs = []
-        fw_times = []
-
-        self.model.eval()
-        with torch.inference_mode():
-            start = get_time()
-            for i, x in enumerate(test_loader):
-                backend.sync()
-                s = get_time()
-                x = backend.to_device(x)
-                if backend.dtype != torch.float32:
-                    with torch.autocast(
-                        device_type=backend.device_name,
-                        dtype=backend.dtype,
-                    ):
-                        y = self.model(x)
-                else:
-                    y = self.model(x)
-
-                backend.sync()
-
-                if i < self.warmup_batches:
-                    # We restart timer because that was just a warmup
-                    start = time.perf_counter()
-                    continue
-
-                fw_times.append(get_time() - s)
-                n_items += len(x)
-                outputs.append(y)
-
-                # early stopping if we have 10+ batches and were running for 10+ seconds
-                if (
-                    (time.perf_counter() - start) > self.min_seconds
-                    and n_items >= self.batch_size * self.min_batches
-                ):
-                    break
-
-                if (get_time() - start) > max_time:
-                    break
-
-        stop = get_time()
-
-        report = get_report(
-            fw_times=fw_times,
-            duration_s=stop - start,
-            n_items=n_items,
-            flops_per_sample=flops_per_sample,
-        )
-        return report, outputs
-
-    def train(self):
-        # We are not interested in training yet.
-        # criterion = nn.CrossEntropyLoss()
-        # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
-
-        # N_EPOCHS = 3
-        # epoch_stats = {}
-        # n_report = 10
-        # for epoch in range(n_epochs):  # loop over the dataset multiple times
-        #     running_loss = 0.0
-
-        #     n_items = 0
-        #     start = get_time()
-        #     for i, (x, y) in enumerate(trainloader):
-        #         optimizer.zero_grad()
-
-        #         outputs = net(x)
-        #         loss = criterion(outputs, y)
-        #         loss.backward()
-        #         optimizer.step()
-
-        #         n_items += len(x)
-
-        #         running_loss += loss.item()
-        #         if i % n_report == (n_report - 1):
-        #             print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / n_report:.3f}')
-        #             running_loss = 0.0
-
-        #     stop = get_time()
-        #     print(f"{n_items} took {stop - start}")
-
-        # print('Finished Training')
-        pass
-
-
-def get_macs(model, in_shape, backend):
-    """Calculate MACs, conventional FLOPS = MACs * 2."""
-    from ptflops import get_model_complexity_info
-
-    model.eval()
-    with torch.no_grad():
-        macs, params = get_model_complexity_info(
-            model, in_shape, as_strings=False, print_per_layer_stat=False, verbose=True
-        )
-    return macs
+def recursively_convert_to_numpy(o: Any):
+    if isinstance(o, torch.Tensor):
+        return o.numpy()
+    if isinstance(o, tuple):
+        return tuple(recursively_convert_to_numpy(x) for x in o)
+    if isinstance(o, list):
+        return [recursively_convert_to_numpy(x) for x in o]
+    if isinstance(o, dict):
+        return {k: recursively_convert_to_numpy(v) for k, v in o.items()}
+    # No-op cases. Explicitly enumerated to avoid things sneaking through.
+    if isinstance(o, str):
+        return o
+    if isinstance(o, float):
+        return o
+    if isinstance(o, int):
+        return o
+    raise Exception(f"Unexpected Python function input: {o}")
+
+
+def refine_result_type(_result):
+    if isinstance(_result, tuple):
+        return tuple(refine_result_type(x) for x in _result)
+    elif isinstance(_result, np.ndarray):
+        return torch.from_numpy(_result)
+    elif isinstance(_result, (bool, int, float)):
+        return _result
+    else:
+        raise ValueError(f"Unhandled return type {type(_result)}")
diff --git a/dl_bench/bench/cnn.py b/dl_bench/bench/cnn.py
@@ -1,4 +1,4 @@
-from dl_bench.utils import Benchmark, RandomInfDataset
+from dl_bench.benchmark import Benchmark, RandomInfDataset
 
 
 def get_cnn(name):
@@ -43,5 +43,11 @@ def __init__(self, params) -> None:
         net = get_cnn(name=name)
 
         super().__init__(
-            net=net, in_shape=in_shape, dataset=dataset, batch_size=batch_size, min_batches=min_batches, warmup_batches=warmup, min_seconds=min_seconds
+            net=net,
+            in_shape=in_shape,
+            dataset=dataset,
+            batch_size=batch_size,
+            min_batches=min_batches,
+            warmup_batches=warmup,
+            min_seconds=min_seconds,
         )
diff --git a/dl_bench/bench/llm.py b/dl_bench/bench/llm.py
@@ -3,15 +3,15 @@
 import math
 
 import torch
-import numpy as np
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     LlamaForCausalLM,
     LlamaTokenizer,
 )
 
-from dl_bench.utils import Benchmark, get_report, get_time, str_to_dtype
+from dl_bench.benchmark import Benchmark, get_report, get_time
+from dl_bench.backend import str_to_dtype
 
 
 def get_llm(name, dtype):
@@ -81,7 +81,6 @@ def inference(self, backend):
         outputs = []
         fw_times = []
 
-
         # Ipex gives error with eval, other backends have no effect
         # self.model.eval()
         for i in range(self.n_iter):
diff --git a/dl_bench/bench/mlp.py b/dl_bench/bench/mlp.py
@@ -3,7 +3,7 @@
 import torch
 import torch.nn as nn
 
-from dl_bench.utils import Benchmark, RandomInfDataset
+from dl_bench.benchmark import Benchmark, RandomInfDataset
 
 
 size2_struct = [512, 1024, 2048, 512]
diff --git a/dl_bench/bench/mlp_basic.py b/dl_bench/bench/mlp_basic.py
@@ -1,12 +1,10 @@
-import time
+from typing import List
 
 import torch
 from torch.nn import Module, Linear
 import torch.nn.functional as F
 
-from dl_bench.utils import Benchmark
-from dl_bench.bench.mlp import RandomInfDataset
-from typing import List
+from dl_bench.benchmark import Benchmark, RandomInfDataset
 
 
 class MLP(Module):
@@ -48,7 +46,7 @@ def __init__(self, params) -> None:
 
 
 def train(model: Module, device):
-    from tools import train, validate_accuracy
+    from dl_bench.tools import train, validate_accuracy
 
     epochs = 2
 
diff --git a/dl_bench/benchmark.py b/dl_bench/benchmark.py
diff --git a/dl_bench/cli/launcher.py b/dl_bench/cli/launcher.py