diff --git a/examples/mpi/tensorflow-mnist-elastic.yaml b/examples/mpi/tensorflow-mnist-elastic.yaml new file mode 100644 index 0000000000..b28a2ad001 --- /dev/null +++ b/examples/mpi/tensorflow-mnist-elastic.yaml @@ -0,0 +1,43 @@ +apiVersion: kubeflow.org/v1 +kind: MPIJob +metadata: + name: tensorflow-mnist-elastic +spec: + slotsPerWorker: 1 + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + replicas: 1 + template: + spec: + containers: + - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu + name: mpi-launcher + command: + - horovodrun + args: + - -np + - "2" + - --min-np + - "1" + - --max-np + - "3" + - --host-discovery-script + - /etc/mpi/discover_hosts.sh + - python + - /examples/elastic/tensorflow2_mnist_elastic.py + resources: + limits: + cpu: 1 + memory: 2Gi + Worker: + replicas: 2 + template: + spec: + containers: + - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu + name: mpi-worker + resources: + limits: + cpu: 2 + memory: 4Gi