UWARG · KarthiU · Oct 14, 2023 · Oct 14, 2023 · Oct 14, 2023 · Oct 15, 2023
diff --git a/config.yaml b/config.yaml
@@ -3,6 +3,7 @@
 queue_max_size: 10
 
 log_directory_path: "logs"
+profiling_length: 300
 
 video_input:
     camera_name: 0

diff --git a/modules/detect_target/detect_target.py b/modules/detect_target/detect_target.py
@@ -27,9 +27,12 @@ def __init__(self, device: "str | int", model_path: str, override_full: bool, sa
         self.__device = device
         self.__model = ultralytics.YOLO(model_path)
         self.__counter = 0
-        self.__enable_half_precision = False if self.__device == "cpu" else True
+        self.__enable_half_precision = False if self.__device == "cpu" else False
+        #modified so override_full controls if its half or full - FOR PROFILING ONLY 
         if override_full:
             self.__enable_half_precision = False
+        elif override_full is False:
+            self.__enable_half_precision = True
         self.__filename_prefix = ""
         if save_name != "":
             self.__filename_prefix = save_name + "_" + str(int(time.time())) + "_"
@@ -39,6 +42,8 @@ def run(self, data: image_and_time.ImageAndTime) -> "tuple[bool, np.ndarray | No
         Returns annotated image.
         TODO: Change to DetectionsAndTime
         """
+        start_time = time.time()
+
         image = data.image
         predictions = self.__model.predict(
             source=image,
@@ -75,6 +80,22 @@ def run(self, data: image_and_time.ImageAndTime) -> "tuple[bool, np.ndarray | No
                 assert detection is not None
                 detections.append(detection)
 
+        stop_time = time.time()
+
+        elapsed_time = stop_time - start_time
+
+        for pred in predictions: 
+            with open('profiler.txt', 'a') as file:
+                speeds = pred.speed
+                preprocess_speed = round(speeds['preprocess'], 3)
+                inference_speed = round(speeds['inference'], 3)
+                postprocess_speed = round(speeds['postprocess'], 3)
+                elapsed_time_ms = elapsed_time * 1000
+                precision_string = "half" if self.__enable_half_precision else "full"
+
+
+                file.write(f"{preprocess_speed}, {inference_speed}, {postprocess_speed}, {elapsed_time_ms}, {precision_string}\n")
+
         # Logging
         if self.__filename_prefix != "":
             filename = self.__filename_prefix + str(self.__counter)

diff --git a/profiler/__init__.py b/profiler/__init__.py
diff --git a/profiler/profile_data/bus.jpg b/profiler/profile_data/bus.jpg
diff --git a/profiler/profile_data/zidane.jpg b/profiler/profile_data/zidane.jpg
diff --git a/profiler/profile_detect_target.py b/profiler/profile_detect_target.py
@@ -0,0 +1,95 @@
+"""
+Profile detect target using full/half precision.
+"""
+import multiprocessing as mp
+import time
+
+import cv2
+import numpy as np
+import os
+import timeit
+import torch
+
+from functools import partial
+from modules.detect_target import detect_target, detect_target_worker
+from modules import image_and_time
+# from modules import points_and_time
+from utilities.workers import queue_proxy_wrapper
+from utilities.workers import worker_controller
+
+
+MODEL_PATH = "tests/model_example/yolov8s_ultralytics_pretrained_default.pt"
+IMAGE_BUS_PATH = "tests/model_example/bus.jpg"
+IMAGE_ZIDANE_PATH = "tests/model_example/zidane.jpg"
+TEST_DATA_DIR = "profiler/profile_data"
+
+THROUGHPUT_TEXT_WORK_COUNT = 50
+OVERRIDE_FULL = False
+
+
+def time_single_image(device: "str | int", image_path: str, use_full_precision: bool) -> float:
+    detection = detect_target.DetectTarget(device, MODEL_PATH, use_full_precision)
+    image = cv2.imread(image_path)
+    result, value = image_and_time.ImageAndTime.create(image)
+
+    assert result
+    assert value is not None
+
+    times = timeit.Timer(partial(detection.run, value)).repeat(10,10)
+    single_time = min(times)/100
+    return single_time
+
+def time_throughput(device: "str | int", image_folder_path: str, use_full_precision: bool) -> "tuple[int, int]":
+    image_names = os.listdir(image_folder_path)
+
+    start_time = time.time_ns()
+    # Setup worker
+    detection = detect_target.DetectTarget(device, MODEL_PATH, use_full_precision)
+    # Run
+    for image_name in image_names:
+        image_path = os.path.join(image_folder_path, image_name);
+        image = cv2.imread(image_path)
+        result, value = image_and_time.ImageAndTime.create(image)
+
+        assert result
+        assert value is not None
+        status, result = detection.run(value)
+
+    n_images = len(image_names)
+
+    time_taken = time.time_ns() - start_time
+    return n_images, time_taken
+
+
+
+if __name__ == "__main__":
+    # Setup
+    # single image test
+    device = 0 if torch.cuda.is_available() else "cpu"
+    full_precision_time = time_single_image(device, IMAGE_BUS_PATH, use_full_precision = True)
+    half_precision_time = time_single_image(device, IMAGE_BUS_PATH, use_full_precision = False)
+
+    # throughput test
+    n_images1, fp_worker_time = time_throughput(
+        device=device,
+        image_folder_path=TEST_DATA_DIR,
+        use_full_precision=True
+    )
+    n_images2, hp_worker_time = time_throughput(
+        device=device,
+        image_folder_path=TEST_DATA_DIR,
+        use_full_precision=False
+    )
+
+    # output data
+    print(f"Single image full precision: {full_precision_time}")
+    print(f"Single image half precision: {half_precision_time}")
+
+    full_precision_throughput = full_precision_time / n_images1
+    print(f"Full precision worker completed {n_images1} images in {full_precision_time} ns")
+    print(f"Average time per image: {round(full_precision_time/n_images1)} ns")
+
+    half_precision_throughput = half_precision_time / n_images1
+    print(f"half precision worker completed {n_images1} images in {half_precision_time} ns")
+    print(f"Average time per image: {round(half_precision_time/n_images1)} ns")
+
diff --git a/profiler_detect_target_2024.py b/profiler_detect_target_2024.py
@@ -0,0 +1,203 @@
+import argparse
+import multiprocessing as mp
+import pathlib
+import queue
+import time
+import numpy as np 
+import os
+import pandas as pd
+
+import cv2
+import yaml
+
+from modules.detect_target import detect_target_worker
+
+from modules.video_input import video_input_worker
+from utilities.workers import queue_proxy_wrapper
+from utilities.workers import worker_controller
+from utilities.workers import worker_manager
+
+
+
+CONFIG_FILE_PATH = pathlib.Path("config.yaml")
+
+
+def main() -> int:
+    """
+    copied from airside code main function
+    """
+    # Open config file
+    try:
+        with CONFIG_FILE_PATH.open("r", encoding="utf8") as file:
+            try:
+                config = yaml.safe_load(file)
+            except yaml.YAMLError as exc:
+                print(f"Error parsing YAML file: {exc}")
+                return -1
+    except FileNotFoundError:
+        print(f"File not found: {CONFIG_FILE_PATH}")
+        return -1
+    except IOError as exc:
+        print(f"Error when opening file: {exc}")
+        return -1
+
+    # Parse whether or not to force cpu from command line
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--cpu", action="store_true", help="option to force cpu")
+    parser.add_argument("--full", action="store_true", help="option to force full precision")
+    args = parser.parse_args()
+
+    try:
+        QUEUE_MAX_SIZE = config["queue_max_size"]
+
+        LOG_DIRECTORY_PATH = config["log_directory_path"]
+
+        VIDEO_INPUT_CAMERA_NAME = config["video_input"]["camera_name"]
+        VIDEO_INPUT_WORKER_PERIOD = config["video_input"]["worker_period"]
+        VIDEO_INPUT_SAVE_NAME_PREFIX = config["video_input"]["save_prefix"]
+        VIDEO_INPUT_SAVE_PREFIX = f"{LOG_DIRECTORY_PATH}/{VIDEO_INPUT_SAVE_NAME_PREFIX}"
+
+        DETECT_TARGET_WORKER_COUNT = config["detect_target"]["worker_count"]
+        DETECT_TARGET_DEVICE =  "cpu" if args.cpu else config["detect_target"]["device"]
+        DETECT_TARGET_MODEL_PATH = config["detect_target"]["model_path"]
+        DETECT_TARGET_OVERRIDE_FULL_PRECISION = args.full #note: if not set, defaults to False (with profiler implementation)
+        DETECT_TARGET_SAVE_NAME_PREFIX = config["detect_target"]["save_prefix"]
+        DETECT_TARGET_SAVE_PREFIX = f"{LOG_DIRECTORY_PATH}/{DETECT_TARGET_SAVE_NAME_PREFIX}"
+        PROFILING_LENGTH = config["profiling_length"]  # 300 seconds = 5 minutes
+
+    except KeyError:
+        print("Config key(s) not found")
+        return -1
+
+    pathlib.Path(LOG_DIRECTORY_PATH).mkdir(exist_ok=True)
+
+    # Setup
+    if os.path.exists('profiler.txt'):
+    # Delete the contents of the profiler.txt file
+        open('profiler.txt', 'w').close()
+        print("Contents of profiler.txt deleted")
+
+    with open('profiler.txt', 'w') as file:
+        file.write("preprocess, inference, postprocess, elapsed_time, half/full precision\n")
+
+    controller = worker_controller.WorkerController()
+
+    mp_manager = mp.Manager()
+    video_input_to_detect_target_queue = queue_proxy_wrapper.QueueProxyWrapper(
+        mp_manager,
+        QUEUE_MAX_SIZE,
+    )
+    detect_target_to_main_queue = queue_proxy_wrapper.QueueProxyWrapper(
+        mp_manager,
+        QUEUE_MAX_SIZE,
+    )
+    video_input_manager = worker_manager.WorkerManager()
+    video_input_manager.create_workers(
+        1,
+        video_input_worker.video_input_worker,
+        (
+            VIDEO_INPUT_CAMERA_NAME,
+            VIDEO_INPUT_WORKER_PERIOD,
+            VIDEO_INPUT_SAVE_PREFIX,
+            video_input_to_detect_target_queue,
+            controller,
+        ),
+    )
+
+    detect_target_manager = worker_manager.WorkerManager()
+    detect_target_manager.create_workers(
+        DETECT_TARGET_WORKER_COUNT,
+        detect_target_worker.detect_target_worker,
+        (
+            DETECT_TARGET_DEVICE,
+            DETECT_TARGET_MODEL_PATH,
+            DETECT_TARGET_OVERRIDE_FULL_PRECISION,
+            DETECT_TARGET_SAVE_PREFIX,
+            video_input_to_detect_target_queue,
+            detect_target_to_main_queue,
+            controller,
+        ),
+    )
+
+
+    # Run
+    video_input_manager.start_workers()
+    detect_target_manager.start_workers()
+
+    start_time = time.time()
+
+    while True:
+        try:
+            if time.time() - start_time > PROFILING_LENGTH:  # 300 seconds = 5 minutes
+                break
+            image = detect_target_to_main_queue.queue.get_nowait()
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+        except queue.Empty:
+            image = None
+
+
+    controller.request_exit()
+
+    video_input_to_detect_target_queue.fill_and_drain_queue()
+    detect_target_to_main_queue.fill_and_drain_queue()
+
+    video_input_manager.join_workers()
+    detect_target_manager.join_workers()
+
+
+    #====PROFILING CODE FOR METRIC CALCULATIONS=====
+    # Read data from the text file
+    timing_data = [] #stores raw timing data (float)
+    column_names = [] #stores col names (str)
+    header_row = True  # Flag to identify row of column names 
+
+
+    with open('profiler.txt', 'r') as file:
+        for line in file:
+            if header_row:
+                header_row = False
+                column_names = line.strip().split(',')
+                continue  # Skip processing the first row
+
+            row = line.strip().split(',')
+            try:
+            # Convert all elements except the last one to float and append to data
+                row_except_last = [float(value) for value in row[:-1]]
+                timing_data.append(row_except_last)
+            except ValueError:
+                print(f"Skipping invalid data: {line.strip()}")
+
+    # Convert the data into a numpy array for metric calculations
+    data_array = np.array(timing_data)
+
+
+    # Check if the data array is empty
+    if data_array.size == 0:
+        print("No data found.")
+    else:
+        # Calculates metrics (skips first row of data which is skewed - see profiler.txt)
+        averages = np.nanmean(data_array[1:], axis=0)
+        mins = np.nanmin(data_array[1:], axis=0)
+        maxs = np.nanmax(data_array[1:], axis=0)  
+        medians = np.median(data_array[1:], axis=0)
+        initial = data_array[0]
+
+
+        # Create and prints DF
+        df = pd.DataFrame({'Average (ms)': averages, 'Min (ms)': mins, 'Max (ms)': maxs, 'Median (ms)': medians, 'Initial Pred (ms)': initial}, index=column_names[:-1])
+        print(f"Profiling results for {'full' if DETECT_TARGET_OVERRIDE_FULL_PRECISION else 'half'}:")
+        print(df)
+
+
+
+
+    return 0
+
+
+if __name__ == "__main__":
+    result_run = main()
+    if result_run < 0:
+        print(f"ERROR: Status code: {result_run}")
+
+    print("Done!")