From b0a87c6ae207ccd958a54fc516bcafbff490c7ae Mon Sep 17 00:00:00 2001 From: nkwangleiGIT Date: Thu, 11 Apr 2024 22:50:33 +0800 Subject: [PATCH] fix: Add SYSTEM_ARGS env and fix related issue --- .../arcadia_v1alpha1_worker_baichuan2-7b.yaml | 2 +- ...dia_v1alpha1_worker_bge-large-zh-v1.5.yaml | 2 +- .../arcadia_v1alpha1_worker_qwen-7b-chat.yaml | 2 +- config/samples/ray.io_v1_raycluster.yaml | 4 +- deploy/charts/llm-worker/values.yaml | 2 +- deploy/llms/start-worker.sh | 2 +- pkg/worker/runner.go | 40 ++++++++----------- 7 files changed, 24 insertions(+), 30 deletions(-) diff --git a/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml b/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml index f60224138..3cdfe1262 100644 --- a/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml +++ b/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml @@ -25,7 +25,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.36 + image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix imagePullPolicy: IfNotPresent resources: limits: diff --git a/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml b/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml index f1ddd8de3..670a64502 100644 --- a/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml +++ b/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml @@ -12,7 +12,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.36 + image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix imagePullPolicy: IfNotPresent model: kind: "Models" diff --git a/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml b/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml index f3c99ed12..05bcabb0d 100644 --- a/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml +++ b/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml @@ -15,7 +15,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.36 + image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix imagePullPolicy: IfNotPresent resources: limits: diff --git a/config/samples/ray.io_v1_raycluster.yaml b/config/samples/ray.io_v1_raycluster.yaml index 6ec0b612f..c19deb51a 100644 --- a/config/samples/ray.io_v1_raycluster.yaml +++ b/config/samples/ray.io_v1_raycluster.yaml @@ -18,7 +18,7 @@ spec: runAsGroup: 0 fsGroup: 0 containers: - - image: kubeagi/ray-ml:2.9.3-py39-vllm + - image: kubeagi/ray-ml:2.9.3-py39-vllm-0.4.0 name: ray-head resources: limits: @@ -48,7 +48,7 @@ spec: app.kubernetes.io/name: kuberay spec: containers: - - image: kubeagi/ray-ml:2.9.3-py39-vllm + - image: kubeagi/ray-ml:2.9.3-py39-vllm-0.4.0 name: ray-worker resources: limits: diff --git a/deploy/charts/llm-worker/values.yaml b/deploy/charts/llm-worker/values.yaml index 94ea01e11..ef67bc2fe 100644 --- a/deploy/charts/llm-worker/values.yaml +++ b/deploy/charts/llm-worker/values.yaml @@ -5,7 +5,7 @@ image: repository: kubeagi/arcadia-fastchat-worker pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.2.0" + tag: "vllm-v0.4.0-hotfix" env: - name: FASTCHAT_MODEL_NAME value: "baichuan2-7b" diff --git a/deploy/llms/start-worker.sh b/deploy/llms/start-worker.sh index 506857d05..abfefc1da 100755 --- a/deploy/llms/start-worker.sh +++ b/deploy/llms/start-worker.sh @@ -31,4 +31,4 @@ python3.9 -m $FASTCHAT_WORKER_NAME --model-names $FASTCHAT_REGISTRATION_MODEL_NA --model-path $FASTCHAT_MODEL_NAME_PATH --worker-address $FASTCHAT_WORKER_ADDRESS \ --controller-address $FASTCHAT_CONTROLLER_ADDRESS \ --num-gpus $NUMBER_GPUS \ - --host 0.0.0.0 --port 21002 $EXTRA_ARGS + --host 0.0.0.0 --port 21002 $SYSTEM_ARGS $EXTRA_ARGS diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go index a35153570..bce283559 100644 --- a/pkg/worker/runner.go +++ b/pkg/worker/runner.go @@ -93,24 +93,16 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1. return nil, fmt.Errorf("failed to get arcadia config with %w", err) } - extraAgrs := "" - for _, envItem := range runner.w.Spec.AdditionalEnvs { - if envItem.Name == "EXTRA_ARGS" { - extraAgrs = envItem.Value - break - } - } - modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name) additionalEnvs := []corev1.EnvVar{} - extraArgs := fmt.Sprintf("--device %s %s", runner.Device().String(), extraAgrs) + systemArgs := fmt.Sprintf("--device %s", runner.Device().String()) if runner.modelFileFromRemote { m := arcadiav1alpha1.Model{} if err := runner.c.Get(ctx, types.NamespacedName{Namespace: *model.Namespace, Name: model.Name}, &m); err != nil { return nil, err } if m.Spec.Revision != "" { - extraArgs += fmt.Sprintf(" --revision %s ", m.Spec.Revision) + systemArgs += fmt.Sprintf(" --revision %s ", m.Spec.Revision) } if m.Spec.ModelSource == modelSourceFromHugginfFace { modelFileDir = m.Spec.HuggingFaceRepo @@ -139,7 +131,6 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1. {Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s:%d", runner.w.Name+WokerCommonSuffix, runner.w.Namespace, arcadiav1alpha1.DefaultWorkerPort)}, {Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller}, {Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()}, - {Name: "EXTRA_ARGS", Value: extraArgs}, }, Ports: []corev1.ContainerPort{ {Name: "http", ContainerPort: arcadiav1alpha1.DefaultWorkerPort}, @@ -149,6 +140,7 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1. }, Resources: runner.w.Spec.Resources, } + additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "SYSTEM_ARGS", Value: systemArgs}) container.Env = append(container.Env, additionalEnvs...) return container, nil @@ -193,12 +185,12 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp return nil, fmt.Errorf("failed to get arcadia config with %w", err) } - extraAgrs := "" + systemArgs := "" additionalEnvs := []corev1.EnvVar{} // configure ray cluster resources := runner.w.Spec.Resources - gpus := runner.NumberOfGPUs() + gpuEnvExist := false // default ray cluster which can only utilize gpus on single nodes rayCluster := config.DefaultRayCluster() for _, envItem := range runner.w.Spec.AdditionalEnvs { @@ -223,12 +215,10 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp // By default, gpu_memory_utilization will be 0.9 if envItem.Name == "GPU_MEMORY_UTILIZATION" { gpuMemoryUtilization, _ := strconv.ParseFloat(envItem.Value, 64) - extraAgrs += fmt.Sprintf(" --gpu_memory_utilization %f", gpuMemoryUtilization) + systemArgs += fmt.Sprintf(" --gpu_memory_utilization %f", gpuMemoryUtilization) } - - // extra arguments to run llm - if envItem.Name == "EXTRA_ARGS" { - extraAgrs = envItem.Value + if envItem.Name == "NUMBER_GPUS" { + gpuEnvExist = true } } klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String()) @@ -245,18 +235,16 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp Name: "PYTHON_VERSION", Value: rayCluster.GetPythonVersion(), }) - // Set gpu number to the number of GPUs in the worker's resource - additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: gpus}) modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name) - extraAgrs = fmt.Sprintf("%s --trust-remote-code", extraAgrs) + systemArgs = fmt.Sprintf("%s --trust-remote-code", systemArgs) if runner.modelFileFromRemote { m := arcadiav1alpha1.Model{} if err := runner.c.Get(ctx, types.NamespacedName{Namespace: *model.Namespace, Name: model.Name}, &m); err != nil { return nil, err } if m.Spec.Revision != "" { - extraAgrs += fmt.Sprintf(" --revision %s", m.Spec.Revision) + systemArgs += fmt.Sprintf(" --revision %s", m.Spec.Revision) } if m.Spec.ModelSource == modelSourceFromHugginfFace { modelFileDir = m.Spec.HuggingFaceRepo @@ -283,7 +271,6 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp {Name: "FASTCHAT_MODEL_NAME", Value: model.Name}, {Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s:%d", runner.w.Name+WokerCommonSuffix, runner.w.Namespace, arcadiav1alpha1.DefaultWorkerPort)}, {Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller}, - {Name: "EXTRA_ARGS", Value: extraAgrs}, }, Ports: []corev1.ContainerPort{ {Name: "http", ContainerPort: arcadiav1alpha1.DefaultWorkerPort}, @@ -295,6 +282,13 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp }, Resources: resources, } + if !gpuEnvExist { + // if env doesn't exist, set gpu number to the number of GPUs in the worker's resource + additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()}) + } + + additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "SYSTEM_ARGS", Value: systemArgs}) + container.Env = append(container.Env, additionalEnvs...) return container, nil }