From 8c0e348d180980fc249e50b18237f62e008a8f6c Mon Sep 17 00:00:00 2001 From: Syulin7 <735122171@qq.com> Date: Sun, 29 Jan 2023 11:42:10 +0800 Subject: [PATCH] Support Koordinator Gang Scheduler Signed-off-by: Syulin7 <735122171@qq.com> --- cmd/training-operator.v1/main.go | 5 ++++- go.mod | 2 ++ go.sum | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cmd/training-operator.v1/main.go b/cmd/training-operator.v1/main.go index 4b09586cdb..67ae8480bc 100644 --- a/cmd/training-operator.v1/main.go +++ b/cmd/training-operator.v1/main.go @@ -73,7 +73,8 @@ func main() { flag.StringVar(&leaderElectionID, "leader-election-id", "1ca428e5.training-operator.kubeflow.org", "The ID for leader election.") flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+ " Now supporting TFJob, PyTorchJob, MXNetJob, XGBoostJob, PaddleJob. By default, all supported schemes will be enabled.") - flag.StringVar(&gangSchedulerName, "gang-scheduler-name", "none", "The scheduler to gang-schedule kubeflow jobs, defaults to none") + flag.StringVar(&gangSchedulerName, "gang-scheduler-name", "none", "The scheduler to gang-schedule kubeflow jobs, defaults to none."+ + " Now supporting none, volcano, scheduler-plugins, koord-scheduler.") flag.StringVar(&namespace, "namespace", os.Getenv(commonutil.EnvKubeflowNamespace), "The namespace to monitor kubeflow jobs. If unset, it monitors all namespaces cluster-wide."+ "If set, it only monitors kubeflow jobs in the given namespace.") flag.IntVar(&monitoringPort, "monitoring-port", 9443, "Endpoint port for displaying monitoring metrics. "+ @@ -121,6 +122,8 @@ func main() { gangSchedulingSetupFunc = common.GenVolcanoSetupFunc(volcanoClientSet) } else if strings.EqualFold(gangSchedulerName, string(common.GangSchedulerSchedulerPlugins)) { gangSchedulingSetupFunc = common.GenSchedulerPluginsSetupFunc(mgr.GetClient()) + } else if strings.EqualFold(gangSchedulerName, string(common.GangSchedulerKoordinator)) { + gangSchedulingSetupFunc = common.GenKoordinatorSetupFunc(mgr.GetClient()) } // TODO: We need a general manager. all rest reconciler addsToManager diff --git a/go.mod b/go.mod index a2c8d0d52b..4cfeebbe79 100644 --- a/go.mod +++ b/go.mod @@ -89,3 +89,5 @@ require ( sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect ) + +replace github.com/kubeflow/common v0.4.6 => github.com/Syulin7/common v0.0.0-20230131030120-5b91d801ed27 diff --git a/go.sum b/go.sum index b9fbfacc7b..5eba3dd0d1 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/Syulin7/common v0.0.0-20230131030120-5b91d801ed27 h1:ojBmit2zMs0dm3W5GaAVs06kth/DVZ8OVICiP7jslCg= +github.com/Syulin7/common v0.0.0-20230131030120-5b91d801ed27/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -327,8 +329,6 @@ github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubeflow/common v0.4.6 h1:yzJf/HEdS6ginD0GlVkgbOFie0Sp66VdGjXidAGZIlk= -github.com/kubeflow/common v0.4.6/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=