diff --git a/workflows/charts/torchserve/README.md b/workflows/charts/torchserve/README.md index 821859cb..feb432af 100644 --- a/workflows/charts/torchserve/README.md +++ b/workflows/charts/torchserve/README.md @@ -20,11 +20,13 @@ Then, follow the installation notes to test the deployment | Key | Type | Default | Description | |-----|------|---------|-------------| | deploy.env | object | `{"configMapName":"intel-proxy-config","enabled":true}` | Add Environment mapping | -| deploy.image | string | `"intel/intel-optimized-pytorch:2.3.0-serving-cpu"` | Intel Optimized torchserve image | +| deploy.hostname | string | `""` | Name of the GPU Host. Please add hostname before running | +| deploy.image | string | `"intel/intel-optimized-pytorch:2.5.10-serving-xpu"` | Intel Optimized torchserve image | | deploy.modelConfig | string | `"/home/model-server/config.properties"` | Model Server Configuration file location | | deploy.models | string | `"all"` | Models to be loaded | | deploy.replicas | int | `1` | Number of pods | -| deploy.resources.limits | object | `{"cpu":"4000m","memory":"1Gi"}` | Maximum resources per pod | +| deploy.resources.limits | object | `{"cpu":"4000m","gpu.intel.com/i915":1,"memory":"2Gi"}` | Maximum resources per pod | +| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration | | deploy.resources.requests | object | `{"cpu":"1000m","memory":"512Mi"}` | Minimum resources per pod | | deploy.storage.nfs | object | `{"enabled":false,"path":"nil","readOnly":true,"server":"nil","subPath":"nil"}` | Network File System (NFS) storage for models | | deploy.tokens_disabled | bool | `true` | Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. | diff --git a/workflows/charts/torchserve/templates/deploy.yaml b/workflows/charts/torchserve/templates/deploy.yaml index 85f03142..718e3f56 100644 --- a/workflows/charts/torchserve/templates/deploy.yaml +++ b/workflows/charts/torchserve/templates/deploy.yaml @@ -29,7 +29,7 @@ spec: {{- include "torchserve.selectorLabels" . | nindent 8 }} spec: containers: - - name: torchserve + - name: {{ .Chart.Name }} image: {{ .Values.deploy.image }} args: - 'torchserve' @@ -62,6 +62,10 @@ spec: - name: grpc-2 containerPort: 7071 volumeMounts: + - name: dshm + mountPath: /dev/shm + - name: dri + mountPath: /dev/dri {{- if .Values.deploy.storage.nfs.enabled }} - name: model mountPath: /home/model-server/model-store @@ -71,16 +75,18 @@ spec: mountPath: /home/model-server/model-store {{- end }} resources: - requests: - cpu: {{ .Values.deploy.resources.requests.cpu }} - memory: {{ .Values.deploy.resources.requests.memory }} - limits: - cpu: {{ .Values.deploy.resources.limits.cpu }} - memory: {{ .Values.deploy.resources.limits.memory }} + {{- toYaml .Values.deploy.resources | nindent 12 }} securityContext: - fsGroup: 1000 runAsUser: 1000 + fsGroup: 1000 volumes: + - name: dshm + emptyDir: + medium: Memory + - name: dri + hostPath: + path: /dev/dri + type: Directory {{- if .Values.deploy.storage.nfs.enabled }} - name: model nfs: @@ -93,3 +99,5 @@ spec: persistentVolumeClaim: claimName: {{ include "torchserve.fullname" . }}-model-dir {{- end }} + nodeSelector: + kubernetes.io/hostname: {{ .Values.deploy.hostname }} diff --git a/workflows/charts/torchserve/values.yaml b/workflows/charts/torchserve/values.yaml index f59e1c40..6510aedb 100644 --- a/workflows/charts/torchserve/values.yaml +++ b/workflows/charts/torchserve/values.yaml @@ -17,8 +17,8 @@ nameOverride: "" # -- Full qualified Domain Name fullnameOverride: "" deploy: - # -- Intel Optimized torchserve image - image: intel/intel-optimized-pytorch:2.3.0-serving-cpu + # -- Torchserve on Intel Image + image: intel/intel-extension-for-pytorch:2.5.10-serving-xpu # -- Add Environment mapping env: configMapName: intel-proxy-config @@ -35,11 +35,14 @@ deploy: # -- Maximum resources per pod limits: cpu: 4000m - memory: 1Gi + memory: 2Gi + # -- Intel GPU Device Configuration + gpu.intel.com/i915: 1 # -- Minimum resources per pod requests: cpu: 1000m memory: 512Mi + gpu.intel.com/i915: 1 storage: # -- Network File System (NFS) storage for models nfs: @@ -48,6 +51,8 @@ deploy: path: nil readOnly: true subPath: nil + # -- Name of the GPU Host + hostname: "" service: # -- Type of service type: NodePort