teamunitlab
diff --git a/‎Dockerfile.cpu
+31 b/‎Dockerfile.cpu
+31
diff --git a/‎Dockerfile.gpu
+42 b/‎Dockerfile.gpu
+42
diff --git a/‎docker-compose-cpu.yml
+21 b/‎docker-compose-cpu.yml
+21
diff --git a/‎docker-compose-gpu.yml
+28 b/‎docker-compose-gpu.yml
+28
diff --git a/‎enrtrypoint/proxy/nginx-cpu-worker.conf
+41 b/‎enrtrypoint/proxy/nginx-cpu-worker.conf
+41
diff --git a/‎enrtrypoint/proxy/nginx-gpu-worker.conf
+41 b/‎enrtrypoint/proxy/nginx-gpu-worker.conf
+41
diff --git a/‎modules/onnx_engine.py
+55 b/‎modules/onnx_engine.py
+55
@@ -0,0 +1,31 @@
+FROM python:3.10.3-slim-bullseye
+
+# set environment variables
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+
+RUN apt-get update \
+	&& apt-get install -y \
+	libfontconfig1 \
+	libglib2.0-0 \
+	libsm6 \
+	libxext6 \
+	libgl1 \
+	libxrender1 \
+	unzip \
+	wget \
+	&& rm -rf /var/lib/apt/lists/* 
+
+RUN rm -rf weights
+RUN mkdir weights
+
+RUN wget --progress=bar:force https://enterprise-unitlab.s3.us-east-2.amazonaws.com/weights/yolov8x-seg_model.onnx -O weights/model.onnx
+
+COPY ./requirement-cpu.txt .
+
+RUN pip install --upgrade pip
+
+# install dependencies
+RUN pip install -r requirement-cpu.txt
+
+COPY . .
@@ -0,0 +1,42 @@
+FROM nvcr.io/nvidia/tensorrt:23.03-py3
+
+ENV TZ=Europe/Minsk
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get update \
+	&& apt-get install -y \
+	libfontconfig1 \
+	libglib2.0-0 \
+	libsm6 \
+	libxext6 \
+	libgl1 \
+	python3-pip \
+	libxrender1 \
+	unzip \
+	wget \
+	&& rm -rf /var/lib/apt/lists/* 
+
+
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1  
+
+
+RUN rm -rf weights
+RUN mkdir weights
+
+RUN wget --progress=bar:force https://enterprise-unitlab.s3.us-east-2.amazonaws.com/weights/generalx-seg.engine -O weights/model.engine
+
+
+COPY ./requirement-gpu.txt .
+
+RUN pip3 install --upgrade pip
+
+RUN pip3 install --extra-index-url=https://pypi.ngc.nvidia.com --trusted-host pypi.ngc.nvidia.com -r requirement-gpu.txt
+
+COPY . .
+
+ENV LC_ALL C.UTF-8
+ENV LANG C.UTF-8
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+LABEL com.nvidia.volumes.needed="nvidia_driver"
@@ -0,0 +1,21 @@
+version: '2'
+
+services:
+  aiworker1_yolo8_onnx:
+    build:
+      context: ./
+      dockerfile: Dockerfile.cpu
+    environment:
+      - BACKEND_MODULE=ONNX
+    command: gunicorn --timeout 60  -k gevent -b unix:/tmp/gunicorn.sock -w 1 --bind 0.0.0.0:2121 serve.wsgi:app
+    ports:
+      - "2121:2121"
+
+  ngnix-cpu-worker:
+    image: nginx:alpine
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./enrtrypoint/proxy/nginx-cpu-worker.conf:/etc/nginx/nginx.conf
+    depends_on:
+      - aiworker1_yolo8_onnx
@@ -0,0 +1,28 @@
+version: '2'
+
+services:
+  aiworker1_yolo8_tensorrt:
+    build:
+      context: ./
+      dockerfile: Dockerfile.gpu
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: [ '0' ]
+              capabilities: [ gpu ]
+    environment:
+      - BACKEND_MODULE=TENSORRT
+    command: gunicorn --timeout 60  -k gevent -b unix:/tmp/gunicorn.sock -w 1 --bind 0.0.0.0:2222 serve.wsgi:app
+    ports:
+      - "2222:2222"
+
+  ngnix-gpu-worker:
+    image: nginx:alpine
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./enrtrypoint/proxy/nginx-gpu-worker.conf:/etc/nginx/nginx.conf
+    depends_on:
+      - aiworker1_yolo8_tensorrt
@@ -0,0 +1,41 @@
+user  nginx;
+worker_processes  4;
+error_log  /var/log/nginx/error.log warn;
+pid        /var/run/nginx.pid;
+events {
+    worker_connections  2048;
+}
+
+
+http {
+    include       /etc/nginx/mime.types;
+    default_type  application/octet-stream;
+
+
+   server {
+        listen 8080;
+        server_name example.com;
+
+        gzip on;
+        gzip_types text/plain text/css text/javascript application/javascript application/json application/xml;
+        
+        location /api {
+            proxy_pass http://aiworker1_yolo8_onnx:2121;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        }
+        
+        
+    }
+    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                      '$status $body_bytes_sent "$http_referer" '
+                      '"$http_user_agent" "$http_x_forwarded_for"';
+    access_log  /var/log/nginx/access.log  main;
+
+    sendfile        on;
+    keepalive_timeout  85;
+    include /etc/nginx/conf.d/*.conf;
+
+
+}
@@ -0,0 +1,41 @@
+user  nginx;
+worker_processes  4;
+error_log  /var/log/nginx/error.log warn;
+pid        /var/run/nginx.pid;
+events {
+    worker_connections  2048;
+}
+
+
+http {
+    include       /etc/nginx/mime.types;
+    default_type  application/octet-stream;
+
+
+   server {
+        listen 8080;
+        server_name example.com;
+
+        gzip on;
+        gzip_types text/plain text/css text/javascript application/javascript application/json application/xml;
+        
+        location /api {
+            proxy_pass http://aiworker1_yolo8_tensorrt:2222;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        }
+        
+        
+    }
+    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                      '$status $body_bytes_sent "$http_referer" '
+                      '"$http_user_agent" "$http_x_forwarded_for"';
+    access_log  /var/log/nginx/access.log  main;
+
+    sendfile        on;
+    keepalive_timeout  85;
+    include /etc/nginx/conf.d/*.conf;
+
+
+}
@@ -0,0 +1,55 @@
+import onnxruntime
+from typing import Any, Dict, List
+
+class ONNXModule:
+    """
+    A class that encapsulates an ONNX model for inference.
+
+    Attributes:
+        weight (str): Path to the ONNX model file.
+        session (onnxruntime.InferenceSession): The ONNX Runtime inference session for the model.
+
+    Methods:
+        __init__(self, weight: str): Initializes the EnhancedONNXModule instance.
+        __init_engine(self): Initializes the ONNX Runtime inference engine.
+        __call__(self, inputs: Dict[str, Any]): Performs inference on the given inputs.
+    """
+
+    def __init__(self, weight: str) -> None:
+        """
+        Initializes the EnhancedONNXModule with the given ONNX model.
+
+        Parameters:
+            weight (str): The path to the ONNX model file.
+        """
+        self.weight = weight
+        self.session: onnxruntime.InferenceSession = self.__init_engine()
+
+    def __init_engine(self) -> onnxruntime.InferenceSession:
+        """
+        Initializes the ONNX Runtime inference engine with the model.
+
+        Returns:
+            onnxruntime.InferenceSession: The initialized inference session.
+        """
+        try:
+            session = onnxruntime.InferenceSession(self.weight, providers=['CPUExecutionProvider'])
+            return session
+        except onnxruntime.OnnxRuntimeException as e:
+            raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}")
+
+    def __call__(self, inputs: Dict[str, Any]) -> List[Any]:
+        """
+        Performs inference on the provided inputs using the ONNX model.
+
+        Parameters:
+            inputs (Dict[str, Any]): The inputs for the model inference. Keys are input names, and values are input tensors.
+
+        Returns:
+            List[Any]: The outputs from the model inference.
+        """
+        try:
+            outputs = self.session.run(None, inputs)
+            return outputs
+        except Exception as e:
+            raise RuntimeError(f"Inference failed: {e}")