Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
gatekeeper: [ "3.20.1", "3.21.0" ]
gatekeeper: [ "3.21.1", "3.22.0" ]
engine: [ "cel", "rego" ]
name: "Integration test on Gatekeeper ${{ matrix.gatekeeper }} for ${{ matrix.engine }} policies"
steps:
Expand Down Expand Up @@ -131,7 +131,7 @@ jobs:
strategy:
matrix:
engine: [ "cel", "rego" ]
gatekeeper: [ "3.20.1", "3.21.0" ]
gatekeeper: [ "3.21.1", "3.22.0" ]
name: "Verify assertions in suite.yaml files for ${{ matrix.engine }} policies"
steps:
- name: Harden Runner
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 1.0.0
name: k8sgpuactivedeadline
displayName: GPU Active Deadline Required
createdAt: "2026-03-17T00:04:28Z"
description: Requires pods that request NVIDIA GPU resources (nvidia.com/gpu) to set activeDeadlineSeconds. This prevents runaway training jobs from holding GPU resources indefinitely.
digest: f15fa92d15ee17101b77cea310b9766253332b9bfcd50447c4487f9eeaef856c
license: Apache-2.0
homeURL: https://open-policy-agent.github.io/gatekeeper-library/website/gpuactivedeadline
keywords:
- gatekeeper
- open-policy-agent
- policies
readme: |-
# GPU Active Deadline Required
Requires pods that request NVIDIA GPU resources (nvidia.com/gpu) to set activeDeadlineSeconds. This prevents runaway training jobs from holding GPU resources indefinitely.
install: |-
### Usage
```shell
kubectl apply -f https://raw.githubusercontent.com/open-policy-agent/gatekeeper-library/master/artifacthub/library/general/gpuactivedeadline/1.0.0/template.yaml
```
provider:
name: Gatekeeper Library
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuActiveDeadline
metadata:
name: require-gpu-deadline
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
parameters:
maxActiveDeadlineSeconds: 86400
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
name: gpu-job-with-deadline
spec:
activeDeadlineSeconds: 3600
containers:
- name: training
image: nvidia/cuda:12.0-runtime
resources:
limits:
nvidia.com/gpu: "1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuActiveDeadline
metadata:
name: require-gpu-deadline
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: v1
kind: Pod
metadata:
name: gpu-job-without-deadline
spec:
containers:
- name: training
image: nvidia/cuda:12.0-runtime
resources:
limits:
nvidia.com/gpu: "1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuActiveDeadline
metadata:
name: require-gpu-deadline
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
name: non-gpu-job
spec:
containers:
- name: web
image: nginx:1.25
resources:
limits:
cpu: "500m"
memory: "128Mi"
29 changes: 29 additions & 0 deletions artifacthub/library/general/gpuactivedeadline/1.0.0/suite.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
kind: Suite
apiVersion: test.gatekeeper.sh/v1alpha1
metadata:
name: gpuactivedeadline
tests:
- name: gpu-job-with-deadline
template: template.yaml
constraint: samples/gpu-job-with-deadline/constraint.yaml
cases:
- name: example-allowed
object: samples/gpu-job-with-deadline/example_allowed.yaml
assertions:
- violations: no
- name: gpu-job-without-deadline
template: template.yaml
constraint: samples/gpu-job-without-deadline/constraint.yaml
cases:
- name: example-disallowed
object: samples/gpu-job-without-deadline/example_disallowed.yaml
assertions:
- violations: yes
- name: non-gpu-job
template: template.yaml
constraint: samples/non-gpu-job/constraint.yaml
cases:
- name: example-allowed
object: samples/non-gpu-job/example_allowed.yaml
assertions:
- violations: no
145 changes: 145 additions & 0 deletions artifacthub/library/general/gpuactivedeadline/1.0.0/template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
apiVersion: templates.gatekeeper.sh/v1
kind: ConstraintTemplate
metadata:
name: k8sgpuactivedeadline
annotations:
metadata.gatekeeper.sh/title: "GPU Active Deadline Required"
metadata.gatekeeper.sh/version: 1.0.0
metadata.gatekeeper.sh/bundle: "gatekeeper-ai-training-policies"
description: >-
Requires pods that request NVIDIA GPU resources (nvidia.com/gpu) to set
activeDeadlineSeconds. This prevents runaway training jobs from holding
GPU resources indefinitely.
spec:
crd:
spec:
names:
kind: K8sGpuActiveDeadline
validation:
openAPIV3Schema:
type: object
description: >-
Requires GPU pods to set activeDeadlineSeconds.
properties:
maxActiveDeadlineSeconds:
description: >-
The maximum value allowed for activeDeadlineSeconds. Set to 0 to
only require the field is present without enforcing a maximum.
type: integer
exemptImages:
description: >-
Any container that uses an image that matches an entry in this list will be excluded
from enforcement. Prefix-matching can be signified with `*`.
type: array
items:
type: string
targets:
- target: admission.k8s.gatekeeper.sh
code:
- engine: K8sNativeValidation
source:
variables:
- name: containers
expression: 'has(variables.anyObject.spec.containers) ? variables.anyObject.spec.containers : []'
- name: initContainers
expression: 'has(variables.anyObject.spec.initContainers) ? variables.anyObject.spec.initContainers : []'
- name: ephemeralContainers
expression: 'has(variables.anyObject.spec.ephemeralContainers) ? variables.anyObject.spec.ephemeralContainers : []'
- name: exemptImagePrefixes
expression: |
!has(variables.params.exemptImages) ? [] :
variables.params.exemptImages.filter(image, image.endsWith("*")).map(image, string(image).replace("*", ""))
- name: exemptImageExplicit
expression: |
!has(variables.params.exemptImages) ? [] :
variables.params.exemptImages.filter(image, !image.endsWith("*"))
- name: exemptImages
expression: |
(variables.containers + variables.initContainers + variables.ephemeralContainers).filter(container,
container.image in variables.exemptImageExplicit ||
variables.exemptImagePrefixes.exists(exemption, string(container.image).startsWith(exemption))
).map(container, container.image)
- name: podRequestsGpu
expression: |
(variables.containers + variables.initContainers + variables.ephemeralContainers).exists(container,
!(container.image in variables.exemptImages) &&
has(container.resources) &&
has(container.resources.limits) &&
"nvidia.com/gpu" in container.resources.limits &&
quantity(string(container.resources.limits["nvidia.com/gpu"])).compareTo(quantity("0")) > 0
)
- name: hasDeadline
expression: 'has(variables.anyObject.spec.activeDeadlineSeconds)'
- name: maxDeadline
expression: 'has(variables.params.maxActiveDeadlineSeconds) ? variables.params.maxActiveDeadlineSeconds : 0'
validations:
- expression: '!variables.podRequestsGpu || variables.hasDeadline'
messageExpression: '"Pod <" + variables.anyObject.metadata.name + "> requests GPU resources but does not set activeDeadlineSeconds"'
- expression: '!variables.podRequestsGpu || !variables.hasDeadline || variables.maxDeadline == 0 || variables.anyObject.spec.activeDeadlineSeconds <= variables.maxDeadline'
messageExpression: '"Pod <" + variables.anyObject.metadata.name + "> sets activeDeadlineSeconds to " + string(variables.anyObject.spec.activeDeadlineSeconds) + ", which exceeds the maximum allowed " + string(variables.maxDeadline)'
- engine: Rego
source:
rego: |
package k8sgpuactivedeadline

import data.lib.exempt_container.is_exempt

violation[{"msg": msg}] {
pod_requests_gpu
not has_active_deadline
msg := sprintf("Pod <%v> requests GPU resources but does not set activeDeadlineSeconds", [input.review.object.metadata.name])
}

violation[{"msg": msg}] {
pod_requests_gpu
has_active_deadline
max_deadline := object.get(input, ["parameters", "maxActiveDeadlineSeconds"], 0)
max_deadline > 0
deadline := input.review.object.spec.activeDeadlineSeconds
deadline > max_deadline
msg := sprintf("Pod <%v> sets activeDeadlineSeconds to %v, which exceeds the maximum allowed %v", [input.review.object.metadata.name, deadline, max_deadline])
}

pod_requests_gpu {
container := input_containers[_]
not is_exempt(container)
gpu := container.resources.limits["nvidia.com/gpu"]
to_number(gpu) > 0
}

has_active_deadline {
input.review.object.spec.activeDeadlineSeconds
}

input_containers[c] {
c := input.review.object.spec.containers[_]
}

input_containers[c] {
c := input.review.object.spec.initContainers[_]
}

input_containers[c] {
c := input.review.object.spec.ephemeralContainers[_]
}
libs:
- |
package lib.exempt_container

is_exempt(container) {
exempt_images := object.get(object.get(input, "parameters", {}), "exemptImages", [])
img := container.image
exemption := exempt_images[_]
_matches_exemption(img, exemption)
}

_matches_exemption(img, exemption) {
not endswith(exemption, "*")
exemption == img
}

_matches_exemption(img, exemption) {
endswith(exemption, "*")
prefix := trim_suffix(exemption, "*")
startswith(img, prefix)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 1.0.0
name: k8sgpunodetargeting
displayName: GPU Node Targeting
createdAt: "2026-04-10T20:39:17Z"
description: Requires pods that request NVIDIA GPU resources (nvidia.com/gpu) to target GPU-labeled nodes using required node affinity or nodeSelector. This helps ensure GPU workloads only land on nodes that advertise GPU capacity.
digest: 49b1e6ea5ea6abba9b4050cc8e4c5788a597877d976c210355298d070990ce29
license: Apache-2.0
homeURL: https://open-policy-agent.github.io/gatekeeper-library/website/gpunodetargeting
keywords:
- gatekeeper
- open-policy-agent
- policies
readme: |-
# GPU Node Targeting
Requires pods that request NVIDIA GPU resources (nvidia.com/gpu) to target GPU-labeled nodes using required node affinity or nodeSelector. This helps ensure GPU workloads only land on nodes that advertise GPU capacity.
install: |-
### Usage
```shell
kubectl apply -f https://raw.githubusercontent.com/open-policy-agent/gatekeeper-library/master/artifacthub/library/general/gpunodetargeting/1.0.0/template.yaml
```
provider:
name: Gatekeeper Library
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuNodeTargeting
metadata:
name: require-gpu-node-targeting
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
parameters:
nodeLabelKey: "nvidia.com/gpu.present"
nodeLabelValues:
- "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: v1
kind: Pod
metadata:
name: gpu-pod-with-node-affinity
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nvidia.com/gpu.present
operator: In
values:
- "true"
containers:
- name: training
image: nvidia/cuda:12.0-runtime
resources:
limits:
nvidia.com/gpu: "1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuNodeTargeting
metadata:
name: require-gpu-node-targeting
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
parameters:
nodeLabelKey: "nvidia.com/gpu.present"
nodeLabelValues:
- "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Pod
metadata:
name: gpu-pod-with-node-selector
spec:
nodeSelector:
nvidia.com/gpu.present: "true"
containers:
- name: training
image: nvidia/cuda:12.0-runtime
resources:
limits:
nvidia.com/gpu: "1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sGpuNodeTargeting
metadata:
name: require-gpu-node-targeting
spec:
match:
kinds:
- apiGroups: [""]
kinds: ["Pod"]
parameters:
nodeLabelKey: "nvidia.com/gpu.present"
nodeLabelValues:
- "true"
Loading
Loading