TorchServe XPU chart (#597)

Signed-off-by: Srikanth Ramakrishna <[email protected]> Signed-off-by: Srikanth Ramakrishna <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jitendra Patil <[email protected]>
intel · Jan 13, 2025 · 8e94dcd · 8e94dcd
1 parent 3681db3
commit 8e94dcd
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 13 deletions.
diff --git a/workflows/charts/torchserve/README.md b/workflows/charts/torchserve/README.md
@@ -20,11 +20,13 @@ Then, follow the installation notes to test the deployment
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | deploy.env | object | `{"configMapName":"intel-proxy-config","enabled":true}` | Add Environment mapping |
-| deploy.image | string | `"intel/intel-optimized-pytorch:2.3.0-serving-cpu"` | Intel Optimized torchserve image |
+| deploy.hostname | string | `""` | Name of the GPU Host. Please add hostname before running |
+| deploy.image | string | `"intel/intel-optimized-pytorch:2.5.10-serving-xpu"` | Intel Optimized torchserve image |
 | deploy.modelConfig | string | `"/home/model-server/config.properties"` | Model Server Configuration file location |
 | deploy.models | string | `"all"` | Models to be loaded |
 | deploy.replicas | int | `1` | Number of pods |
-| deploy.resources.limits | object | `{"cpu":"4000m","memory":"1Gi"}` | Maximum resources per pod |
+| deploy.resources.limits | object | `{"cpu":"4000m","gpu.intel.com/i915":1,"memory":"2Gi"}` | Maximum resources per pod |
+| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration |
 | deploy.resources.requests | object | `{"cpu":"1000m","memory":"512Mi"}` | Minimum resources per pod |
 | deploy.storage.nfs | object | `{"enabled":false,"path":"nil","readOnly":true,"server":"nil","subPath":"nil"}` | Network File System (NFS) storage for models |
 | deploy.tokens_disabled | bool | `true` | Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. |

diff --git a/workflows/charts/torchserve/templates/deploy.yaml b/workflows/charts/torchserve/templates/deploy.yaml
@@ -29,7 +29,7 @@ spec:
         {{- include "torchserve.selectorLabels" . | nindent 8 }}
     spec:
       containers:
-        - name: torchserve
+        - name: {{ .Chart.Name }}
           image: {{ .Values.deploy.image }}
           args:
             - 'torchserve'
@@ -62,6 +62,10 @@ spec:
             - name: grpc-2
               containerPort: 7071
           volumeMounts:
+            - name: dshm
+              mountPath: /dev/shm
+            - name: dri
+              mountPath: /dev/dri
           {{- if .Values.deploy.storage.nfs.enabled }}
             - name: model
               mountPath: /home/model-server/model-store
@@ -71,16 +75,18 @@ spec:
               mountPath: /home/model-server/model-store
           {{- end }}
           resources:
-            requests:
-              cpu: {{ .Values.deploy.resources.requests.cpu }}
-              memory: {{ .Values.deploy.resources.requests.memory }}
-            limits:
-              cpu: {{ .Values.deploy.resources.limits.cpu }}
-              memory: {{ .Values.deploy.resources.limits.memory }}
+            {{- toYaml .Values.deploy.resources | nindent 12 }}
       securityContext:
-        fsGroup: 1000
         runAsUser: 1000
+        fsGroup: 1000
       volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
       {{- if .Values.deploy.storage.nfs.enabled }}
         - name: model
           nfs:
@@ -93,3 +99,5 @@ spec:
           persistentVolumeClaim:
             claimName: {{ include "torchserve.fullname" . }}-model-dir
       {{- end }}
+      nodeSelector:
+        kubernetes.io/hostname: {{ .Values.deploy.hostname }}
diff --git a/workflows/charts/torchserve/values.yaml b/workflows/charts/torchserve/values.yaml
@@ -17,8 +17,8 @@ nameOverride: ""
 # -- Full qualified Domain Name
 fullnameOverride: ""
 deploy:
-  # -- Intel Optimized torchserve image
-  image: intel/intel-optimized-pytorch:2.3.0-serving-cpu
+  # -- Torchserve on Intel Image
+  image: intel/intel-extension-for-pytorch:2.5.10-serving-xpu
   # -- Add Environment mapping
   env:
     configMapName: intel-proxy-config
@@ -35,11 +35,14 @@ deploy:
     # -- Maximum resources per pod
     limits:
       cpu: 4000m
-      memory: 1Gi
+      memory: 2Gi
+      # -- Intel GPU Device Configuration
+      gpu.intel.com/i915: 1
     # -- Minimum resources per pod
     requests:
       cpu: 1000m
       memory: 512Mi
+      gpu.intel.com/i915: 1
   storage:
     # -- Network File System (NFS) storage for models
     nfs:
@@ -48,6 +51,8 @@ deploy:
       path: nil
       readOnly: true
       subPath: nil
+    # -- Name of the GPU Host
+  hostname: ""
 service:
   # -- Type of service
   type: NodePort