Skip to content

Commit 11984fc

Browse files
authored
hacks to avoid dockerhub rate limits during CI (#42)
1 parent de7fbf8 commit 11984fc

File tree

3 files changed

+13
-31
lines changed

3 files changed

+13
-31
lines changed

hack/e2e-util.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ export KA_BIN=_output/bin
2020
export WAIT_TIME="20s"
2121
export KUTTL_VERSION=0.15.0
2222
export CERTMANAGER_VERSION=v1.13.3
23-
export KUBEFLOW_VERSION=v1.7.0
2423
DUMP_LOGS="true"
2524

25+
# These must be kept in synch -- we prepull and load these to mitigate dockerhub rate limits
26+
export KUBEFLOW_VERSION=v1.7.0
27+
export IMAGE_KUBEFLOW_OPERATOR="docker.io/kubeflow/training-operator:v1-855e096"
28+
2629
# These are small images used by the e2e tests.
2730
# Pull and kind load to avoid long delays during testing
2831
export IMAGE_ECHOSERVER="quay.io/project-codeflare/echo-server:1.0"
2932
export IMAGE_BUSY_BOX_LATEST="quay.io/project-codeflare/busybox:latest"
30-
export IMAGE_ALPINE_310="docker.io/alpine:3.10"
3133

3234
function update_test_host {
3335

@@ -115,7 +117,7 @@ function check_prerequisites {
115117
}
116118

117119
function pull_images {
118-
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_ALPINE_310}
120+
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR}
119121
do
120122
docker pull $image
121123
if [ $? -ne 0 ]
@@ -138,7 +140,7 @@ function kind_up_cluster {
138140
fi
139141
CLUSTER_STARTED="true"
140142

141-
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_ALPINE_310}
143+
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR}
142144
do
143145
kind load docker-image ${image} ${CLUSTER_CONTEXT}
144146
if [ $? -ne 0 ]

test/e2e/appwrapper_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
6868

6969
Describe("Creation of Kubeflow Training Operator GVKs", func() {
7070
It("PyTorch Jobs", func() {
71-
aw := createAppWrapper(ctx, pytorchjob(1, 100, 2, 250))
71+
aw := createAppWrapper(ctx, pytorchjob(2, 250))
7272
appwrappers = append(appwrappers, aw)
7373
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
7474
})

test/e2e/fixtures_test.go

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -209,29 +209,16 @@ func batchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent {
209209
}
210210
}
211211

212+
// This is not a useful PyTorchJob:
213+
// 1. Using a dummy busybox image to avoid pulling a large & rate-limited image from dockerhub
214+
// 2. We avoid needing the injected sidecar (alpine:3.10 from dockerhub) by not specifying a Master
212215
const pytorchYAML = `
213216
apiVersion: "kubeflow.org/v1"
214217
kind: PyTorchJob
215218
metadata:
216219
name: %v
217220
spec:
218221
pytorchReplicaSpecs:
219-
Master:
220-
replicas: %v
221-
restartPolicy: OnFailure
222-
template:
223-
spec:
224-
terminationGracePeriodSeconds: 0
225-
containers:
226-
- name: pytorch
227-
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
228-
command:
229-
- "python3"
230-
- "/opt/pytorch-mnist/mnist.py"
231-
- "--epochs=1"
232-
resources:
233-
requests:
234-
cpu: %v
235222
Worker:
236223
replicas: %v
237224
restartPolicy: OnFailure
@@ -240,31 +227,24 @@ spec:
240227
terminationGracePeriodSeconds: 0
241228
containers:
242229
- name: pytorch
243-
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
244-
command:
245-
- "python3"
246-
- "/opt/pytorch-mnist/mnist.py"
247-
- "--epochs=1"
230+
image: quay.io/project-codeflare/busybox:1.36
231+
command: ["sh", "-c", "sleep 10"]
248232
resources:
249233
requests:
250234
cpu: %v
251235
`
252236

253-
func pytorchjob(replicasMaster int, milliCPUMaster int64, replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent {
237+
func pytorchjob(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent {
254238
yamlString := fmt.Sprintf(pytorchYAML,
255239
randName("pytorchjob"),
256-
replicasMaster,
257-
resource.NewMilliQuantity(milliCPUMaster, resource.DecimalSI),
258240
replicasWorker,
259241
resource.NewMilliQuantity(milliCPUWorker, resource.DecimalSI),
260242
)
261243
jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString))
262244
Expect(err).NotTo(HaveOccurred())
263-
masters := int32(replicasMaster)
264245
workers := int32(replicasWorker)
265246
return workloadv1beta2.AppWrapperComponent{
266247
PodSets: []workloadv1beta2.AppWrapperPodSet{
267-
{Replicas: &masters, Path: "template.spec.pytorchReplicaSpecs.Master.template"},
268248
{Replicas: &workers, Path: "template.spec.pytorchReplicaSpecs.Worker.template"},
269249
},
270250
Template: runtime.RawExtension{Raw: jsonBytes},

0 commit comments

Comments
 (0)