@@ -209,29 +209,16 @@ func batchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent {
209
209
}
210
210
}
211
211
212
+ // This is not a useful PyTorchJob:
213
+ // 1. Using a dummy busybox image to avoid pulling a large & rate-limited image from dockerhub
214
+ // 2. We avoid needing the injected sidecar (alpine:3.10 from dockerhub) by not specifying a Master
212
215
const pytorchYAML = `
213
216
apiVersion: "kubeflow.org/v1"
214
217
kind: PyTorchJob
215
218
metadata:
216
219
name: %v
217
220
spec:
218
221
pytorchReplicaSpecs:
219
- Master:
220
- replicas: %v
221
- restartPolicy: OnFailure
222
- template:
223
- spec:
224
- terminationGracePeriodSeconds: 0
225
- containers:
226
- - name: pytorch
227
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
228
- command:
229
- - "python3"
230
- - "/opt/pytorch-mnist/mnist.py"
231
- - "--epochs=1"
232
- resources:
233
- requests:
234
- cpu: %v
235
222
Worker:
236
223
replicas: %v
237
224
restartPolicy: OnFailure
@@ -240,31 +227,24 @@ spec:
240
227
terminationGracePeriodSeconds: 0
241
228
containers:
242
229
- name: pytorch
243
- image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
244
- command:
245
- - "python3"
246
- - "/opt/pytorch-mnist/mnist.py"
247
- - "--epochs=1"
230
+ image: quay.io/project-codeflare/busybox:1.36
231
+ command: ["sh", "-c", "sleep 10"]
248
232
resources:
249
233
requests:
250
234
cpu: %v
251
235
`
252
236
253
- func pytorchjob (replicasMaster int , milliCPUMaster int64 , replicasWorker int , milliCPUWorker int64 ) workloadv1beta2.AppWrapperComponent {
237
+ func pytorchjob (replicasWorker int , milliCPUWorker int64 ) workloadv1beta2.AppWrapperComponent {
254
238
yamlString := fmt .Sprintf (pytorchYAML ,
255
239
randName ("pytorchjob" ),
256
- replicasMaster ,
257
- resource .NewMilliQuantity (milliCPUMaster , resource .DecimalSI ),
258
240
replicasWorker ,
259
241
resource .NewMilliQuantity (milliCPUWorker , resource .DecimalSI ),
260
242
)
261
243
jsonBytes , err := yaml .YAMLToJSON ([]byte (yamlString ))
262
244
Expect (err ).NotTo (HaveOccurred ())
263
- masters := int32 (replicasMaster )
264
245
workers := int32 (replicasWorker )
265
246
return workloadv1beta2.AppWrapperComponent {
266
247
PodSets : []workloadv1beta2.AppWrapperPodSet {
267
- {Replicas : & masters , Path : "template.spec.pytorchReplicaSpecs.Master.template" },
268
248
{Replicas : & workers , Path : "template.spec.pytorchReplicaSpecs.Worker.template" },
269
249
},
270
250
Template : runtime.RawExtension {Raw : jsonBytes },
0 commit comments