diff --git a/charts/intel-gpu-resource-driver/README.md b/charts/intel-gpu-resource-driver/README.md index f00f276..a0b43c6 100644 --- a/charts/intel-gpu-resource-driver/README.md +++ b/charts/intel-gpu-resource-driver/README.md @@ -10,9 +10,22 @@ More info: [Intel Resource Drivers for Kubernetes](https://github.com/intel/inte ## Installing the chart ``` -helm install intel-gpu-resource-driver oci://ghcr.io/intel/intel-resource-drivers-for-kubernetes/intel-gpu-resource-driver \ +helm install \ + --namespace "intel-gpu-resource-driver" \ --create-namespace \ - --namespace intel-gpu-resource-driver + intel-gpu-resource-driver oci://ghcr.io/intel/intel-resource-drivers-for-kubernetes/intel-gpu-resource-driver +``` + +> [!NOTE] +> For Kubernetes clusters using [Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/), +> pre-create the namespace with the respective label allowing to use HostPath Volumes. + +``` +kubectl create namespace intel-gpu-resource-driver +kubectl label --overwrite namespace intel-gpu-resource-driver pod-security.kubernetes.io/enforce=privileged +helm install \ + --namespace "intel-gpu-resource-driver" \ + intel-gpu-resource-driver oci://ghcr.io/intel/intel-resource-drivers-for-kubernetes/intel-gpu-resource-driver ``` ## Uninstalling the chart diff --git a/charts/intel-gpu-resource-driver/templates/_helpers.tpl b/charts/intel-gpu-resource-driver/templates/_helpers.tpl index 01c4419..58b22b3 100644 --- a/charts/intel-gpu-resource-driver/templates/_helpers.tpl +++ b/charts/intel-gpu-resource-driver/templates/_helpers.tpl @@ -20,10 +20,6 @@ intel-gpu-resource-driver {{- end -}} {{- end }} -{{- define "intel-gpu-resource-driver.namespace" -}} -{{- default .Release.Namespace .Values.namespaceOverride }} -{{- end }} - {{/* Labels for templates */}} {{- define "intel-gpu-resource-driver.labels" -}} helm.sh/chart: {{ include "intel-gpu-resource-driver.chart" . }} diff --git a/charts/intel-gpu-resource-driver/templates/clusterrole.yaml b/charts/intel-gpu-resource-driver/templates/clusterrole.yaml index a4ff6a7..e05ca20 100644 --- a/charts/intel-gpu-resource-driver/templates/clusterrole.yaml +++ b/charts/intel-gpu-resource-driver/templates/clusterrole.yaml @@ -2,7 +2,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: {{ include "intel-gpu-resource-driver.clusterRoleName" . }} - namespace: {{ include "intel-gpu-resource-driver.namespace" . }} + namespace: {{ .Release.Namespace }} rules: - apiGroups: [""] resources: ["nodes"] diff --git a/charts/intel-gpu-resource-driver/templates/clusterrolebinding.yaml b/charts/intel-gpu-resource-driver/templates/clusterrolebinding.yaml index 20b387d..accedc2 100644 --- a/charts/intel-gpu-resource-driver/templates/clusterrolebinding.yaml +++ b/charts/intel-gpu-resource-driver/templates/clusterrolebinding.yaml @@ -2,11 +2,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: {{ include "intel-gpu-resource-driver.clusterRoleBindingName" . }} - namespace: {{ include "intel-gpu-resource-driver.namespace" . }} + namespace: {{ .Release.Namespace }} subjects: - kind: ServiceAccount name: {{ include "intel-gpu-resource-driver.serviceAccountName" . }} - namespace: {{ include "intel-gpu-resource-driver.namespace" . }} + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: {{ include "intel-gpu-resource-driver.clusterRoleName" . }} diff --git a/charts/intel-gpu-resource-driver/templates/nfd.yaml b/charts/intel-gpu-resource-driver/templates/node-feature-rules.yaml similarity index 97% rename from charts/intel-gpu-resource-driver/templates/nfd.yaml rename to charts/intel-gpu-resource-driver/templates/node-feature-rules.yaml index 322b399..020c9d6 100644 --- a/charts/intel-gpu-resource-driver/templates/nfd.yaml +++ b/charts/intel-gpu-resource-driver/templates/node-feature-rules.yaml @@ -1,4 +1,4 @@ -{{- if .Values.nfd.enabled }} +{{- if or .Values.nodeFeatureRules.enabled .Values.nfd.enabled }} apiVersion: nfd.k8s-sigs.io/v1alpha1 kind: NodeFeatureRule metadata: diff --git a/charts/intel-gpu-resource-driver/templates/resource-driver-namespace.yaml b/charts/intel-gpu-resource-driver/templates/resource-driver-namespace.yaml deleted file mode 100644 index a57604a..0000000 --- a/charts/intel-gpu-resource-driver/templates/resource-driver-namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: intel-gpu-resource-driver diff --git a/charts/intel-gpu-resource-driver/templates/resource-driver.yaml b/charts/intel-gpu-resource-driver/templates/resource-driver.yaml index 400c471..fad84e8 100644 --- a/charts/intel-gpu-resource-driver/templates/resource-driver.yaml +++ b/charts/intel-gpu-resource-driver/templates/resource-driver.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: DaemonSet metadata: name: intel-gpu-resource-driver-kubelet-plugin - namespace: {{ include "intel-gpu-resource-driver.namespace" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "intel-gpu-resource-driver.labels" . | nindent 4 }} spec: @@ -14,7 +14,6 @@ spec: labels: app: intel-gpu-resource-driver spec: - serviceAccount: intel-gpu-resource-driver-service-account serviceAccountName: {{ include "intel-gpu-resource-driver.serviceAccountName" . }} containers: - name: kubelet-plugin @@ -62,10 +61,10 @@ spec: path: /var/lib/kubelet/plugins - name: cdi hostPath: - path: /etc/cdi + path: {{ .Values.cdi.staticPath }} - name: varruncdi hostPath: - path: /var/run/cdi + path: {{ .Values.cdi.dynamicPath}} - name: sysfs hostPath: path: /sys @@ -73,7 +72,7 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} - {{- if .Values.nfd.enabled }} + {{- if or .Values.nodeFeatureRules.enabled .Values.nfd.enabled }} nodeSelector: intel.feature.node.kubernetes.io/gpu: "true" {{- else }} diff --git a/charts/intel-gpu-resource-driver/templates/serviceaccount.yaml b/charts/intel-gpu-resource-driver/templates/serviceaccount.yaml index 1c88089..3046a48 100644 --- a/charts/intel-gpu-resource-driver/templates/serviceaccount.yaml +++ b/charts/intel-gpu-resource-driver/templates/serviceaccount.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "intel-gpu-resource-driver.serviceAccountName" . }} - namespace: {{ include "intel-gpu-resource-driver.namespace" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "intel-gpu-resource-driver.labels" . | nindent 4 }} {{- with .Values.serviceAccount.annotations }} diff --git a/charts/intel-gpu-resource-driver/templates/validating-admission-policy.yaml b/charts/intel-gpu-resource-driver/templates/validating-admission-policy.yaml index 503aeb5..637c92c 100644 --- a/charts/intel-gpu-resource-driver/templates/validating-admission-policy.yaml +++ b/charts/intel-gpu-resource-driver/templates/validating-admission-policy.yaml @@ -13,7 +13,7 @@ spec: matchConditions: - name: isRestrictedUser expression: >- - request.userInfo.username == "system:serviceaccount:intel-gpu-resource-driver:intel-gpu-resource-driver-service-account" + request.userInfo.username == "system:serviceaccount:{{ .Release.Namespace }}:{{ include "intel-gpu-resource-driver.serviceAccountName" . }}" variables: - name: userNodeName expression: >- diff --git a/charts/intel-gpu-resource-driver/values.yaml b/charts/intel-gpu-resource-driver/values.yaml index 0613473..80c39d4 100644 --- a/charts/intel-gpu-resource-driver/values.yaml +++ b/charts/intel-gpu-resource-driver/values.yaml @@ -1,6 +1,5 @@ # Default values for intel-gpu-resource-driver. nameOverride: "" -namespaceOverride: "intel-gpu-resource-driver" fullnameOverride: "" selectorLabelsOverride: {} @@ -14,14 +13,12 @@ image: serviceAccount: create: true annotations: {} - name: intel-gpu-resource-driver-service-account + name: "" automount: true kubeletPlugin: podAnnotations: {} - nodeSelector: {} - # label used when nfd.enabled is true - #intel.feature.node.kubernetes.io/gpu: "true" + nodeSelector: {} # ignored when .Values.nodeFeatureRules.enabled or .Values.nfd.enabled tolerations: - key: node-role.kubernetes.io/master operator: Exists @@ -37,6 +34,13 @@ kubeletPlugin: effect: "NoSchedule" affinity: {} +cdi: + staticPath: /etc/cdi + dynamicPath: /var/run/cdi + +nodeFeatureRules: + enabled: false + nfd: enabled: false # change to true to install NFD to the cluster nameOverride: intel-gpu-nfd diff --git a/doc/gpu/USAGE.md b/doc/gpu/USAGE.md index eb3a872..63ae30e 100644 --- a/doc/gpu/USAGE.md +++ b/doc/gpu/USAGE.md @@ -273,17 +273,7 @@ Unlike with normal GPU ResourceClaims: * Monitor deployment gets access to all GPU devices on a node * `adminAccess` ResourceClaim allocations are not counted by scheduler as consumed resource, and can be allocated to workloads -### Helm Charts +### Helm Chart -[Intel GPU Resource Driver Helm Chart](https://github.com/intel/helm-charts/tree/main/charts/intel-gpu-resource-driver) is located in Intel Helm Charts repository. - -To add repo: -``` -helm repo add intel https://intel.github.io/helm-charts -``` - -To install Helm Chart: -``` -helm install intel-gpu-resource-driver intel/intel-gpu-resource-driver \ ---create-namespace --namespace intel-gpu-resource-driver -``` +The [Intel GPU Resource Driver Helm Chart](../../charts/intel-gpu-resource-driver) is published +as a package to GitHub OCI registry, and can be installed directly with Helm.