From cf429d4dec37a5e98dbbcccb9b7a21d5364241a7 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 29 Jan 2025 17:05:37 -0800 Subject: [PATCH 1/4] feat: Add DeepSeek model.go --- presets/workspace/models/deepseek/model.go | 139 +++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 presets/workspace/models/deepseek/model.go diff --git a/presets/workspace/models/deepseek/model.go b/presets/workspace/models/deepseek/model.go new file mode 100644 index 000000000..d57014108 --- /dev/null +++ b/presets/workspace/models/deepseek/model.go @@ -0,0 +1,139 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +package deepseek + +import ( + "time" + + kaitov1alpha1 "github.com/kaito-project/kaito/api/v1alpha1" + "github.com/kaito-project/kaito/pkg/model" + "github.com/kaito-project/kaito/pkg/utils/plugin" + "github.com/kaito-project/kaito/pkg/workspace/inference" + "github.com/kaito-project/kaito/pkg/workspace/tuning" +) + +func init() { + plugin.KaitoModelRegister.Register(&plugin.Registration{ + Name: PresetDeepSeekR1DistillLlama8BModel, + Instance: &deepseekA, + }) + plugin.KaitoModelRegister.Register(&plugin.Registration{ + Name: PresetDeepSeekR1DistillQwen14BModel, + Instance: &deepseekB, + }) +} + +var ( + PresetDeepSeekR1DistillLlama8BModel = "deepseek-r1-distill-llama-8b" + PresetDeepSeekR1DistillQwen14BModel = "deepseek-r1-distill-qwen-14b" + + PresetDeepSeekTagMap = map[string]string{ + "DeepSeekDistillLlama8B": "0.0.1", + "DeepSeekDistillQwen14B": "0.0.1", + } + + baseCommandPresetDeepseekInference = "accelerate launch" + baseCommandPresetDeepseekTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch" + deepseekLlama8bRunParams = map[string]string{ + "torch_dtype": "bfloat16", + "pipeline": "text-generation", + "chat_template": "/workspace/chat_templates/deepseek-r1-distill-llama-8b.jinja", + } + deepseekLlama8bRunParamsVLLM = map[string]string{ + "dtype": "float16", + "chat-template": "/workspace/chat_templates/deepseek-r1-distill-llama-8b.jinja", + } + deepseekQwen14bRunParams = map[string]string{ + "torch_dtype": "bfloat16", + "pipeline": "text-generation", + "chat_template": "/workspace/chat_templates/deepseek-r1-distill-qwen-14b.jinja", + } + deepseekQwen14bRunParamsVLLM = map[string]string{ + "dtype": "float16", + "chat-template": "/workspace/chat_templates/deepseek-r1-distill-qwen-14b.jinja", + } +) + +var deepseekA llama8b + +type llama8b struct{} + +func (*llama8b) GetInferenceParameters() *model.PresetParam { + return &model.PresetParam{ + ModelFamilyName: "DeepSeek", + ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic), + DiskStorageRequirement: "50Gi", + GPUCountRequirement: "1", + TotalGPUMemoryRequirement: "14Gi", + PerGPUMemoryRequirement: "0Gi", // We run DeepSeek using native vertical model parallel, no per GPU memory requirement. + RuntimeParam: model.RuntimeParam{ + Transformers: model.HuggingfaceTransformersParam{ + BaseCommand: baseCommandPresetDeepseekInference, + TorchRunParams: inference.DefaultAccelerateParams, + InferenceMainFile: inference.DefautTransformersMainFile, + ModelRunParams: deepseekLlama8bRunParams, + }, + VLLM: model.VLLMParam{ + BaseCommand: baseCommandPresetDeepseekInference, + ModelName: "/workspace/vllm/weights", + ModelRunParams: deepseekLlama8bRunParamsVLLM, + }, + // vllm requires the model specification to be exactly divisible by + // the number of GPUs(tensor parallel level). + DisableTensorParallelism: true, + }, + ReadinessTimeout: time.Duration(30) * time.Minute, + Tag: PresetDeepSeekTagMap["DeepSeekDistillLlama8B"], + } +} +func (*llama8b) GetTuningParameters() *model.PresetParam { + return nil +} +func (*llama8b) SupportDistributedInference() bool { + return false +} +func (*llama8b) SupportTuning() bool { + return false +} + +var deepseekB qwen14b + +type qwen14b struct{} + +func (*qwen14b) GetInferenceParameters() *model.PresetParam { + return &model.PresetParam{ + ModelFamilyName: "DeepSeek", + ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic), + DiskStorageRequirement: "50Gi", + GPUCountRequirement: "1", + TotalGPUMemoryRequirement: "25.7Gi", + PerGPUMemoryRequirement: "0Gi", // We run DeepSeek using native vertical model parallel, no per GPU memory requirement. + RuntimeParam: model.RuntimeParam{ + Transformers: model.HuggingfaceTransformersParam{ + BaseCommand: baseCommandPresetDeepseekInference, + TorchRunParams: inference.DefaultAccelerateParams, + InferenceMainFile: inference.DefautTransformersMainFile, + ModelRunParams: deepseekQwen14bRunParams, + }, + VLLM: model.VLLMParam{ + BaseCommand: inference.DefaultVLLMCommand, + ModelName: "/workspace/vllm/weights", + ModelRunParams: deepseekQwen14bRunParamsVLLM, + }, + // vllm requires the model specification to be exactly divisible by + // the number of GPUs(tensor parallel level). + DisableTensorParallelism: true, + }, + ReadinessTimeout: time.Duration(30) * time.Minute, + Tag: PresetDeepSeekTagMap["DeepSeekDistillQwen14B"], + } +} +func (*qwen14b) GetTuningParameters() *model.PresetParam { + return nil +} +func (*qwen14b) SupportDistributedInference() bool { + return false +} +func (*qwen14b) SupportTuning() bool { + return false +} From 24102212f3a68a5f484048dda2efa016fba15719 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 29 Jan 2025 17:11:48 -0800 Subject: [PATCH 2/4] feat: Add init models.go --- cmd/workspace/models.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/workspace/models.go b/cmd/workspace/models.go index c272dacc5..8cd0d297b 100644 --- a/cmd/workspace/models.go +++ b/cmd/workspace/models.go @@ -10,4 +10,5 @@ import ( _ "github.com/kaito-project/kaito/presets/workspace/models/phi2" _ "github.com/kaito-project/kaito/presets/workspace/models/phi3" _ "github.com/kaito-project/kaito/presets/workspace/models/qwen" + _ "github.com/kaito-project/kaito/presets/workspace/models/deepseek" ) From 13674ad24b250740624c2eac5e6400d10d0b7333 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 29 Jan 2025 17:30:26 -0800 Subject: [PATCH 3/4] fix: remove path --- presets/workspace/models/deepseek/model.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/presets/workspace/models/deepseek/model.go b/presets/workspace/models/deepseek/model.go index d57014108..13b15280e 100644 --- a/presets/workspace/models/deepseek/model.go +++ b/presets/workspace/models/deepseek/model.go @@ -9,7 +9,6 @@ import ( "github.com/kaito-project/kaito/pkg/model" "github.com/kaito-project/kaito/pkg/utils/plugin" "github.com/kaito-project/kaito/pkg/workspace/inference" - "github.com/kaito-project/kaito/pkg/workspace/tuning" ) func init() { @@ -35,22 +34,18 @@ var ( baseCommandPresetDeepseekInference = "accelerate launch" baseCommandPresetDeepseekTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch" deepseekLlama8bRunParams = map[string]string{ - "torch_dtype": "bfloat16", - "pipeline": "text-generation", - "chat_template": "/workspace/chat_templates/deepseek-r1-distill-llama-8b.jinja", + "torch_dtype": "bfloat16", + "pipeline": "text-generation", } deepseekLlama8bRunParamsVLLM = map[string]string{ - "dtype": "float16", - "chat-template": "/workspace/chat_templates/deepseek-r1-distill-llama-8b.jinja", + "dtype": "float16", } deepseekQwen14bRunParams = map[string]string{ - "torch_dtype": "bfloat16", - "pipeline": "text-generation", - "chat_template": "/workspace/chat_templates/deepseek-r1-distill-qwen-14b.jinja", + "torch_dtype": "bfloat16", + "pipeline": "text-generation", } deepseekQwen14bRunParamsVLLM = map[string]string{ - "dtype": "float16", - "chat-template": "/workspace/chat_templates/deepseek-r1-distill-qwen-14b.jinja", + "dtype": "float16", } ) From 53c907e0623606d26a5bb42516d467f9b2525586 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 29 Jan 2025 17:42:19 -0800 Subject: [PATCH 4/4] fix: formatted --- cmd/workspace/models.go | 2 +- presets/workspace/models/deepseek/model.go | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cmd/workspace/models.go b/cmd/workspace/models.go index 8cd0d297b..15eef611b 100644 --- a/cmd/workspace/models.go +++ b/cmd/workspace/models.go @@ -3,6 +3,7 @@ package main import ( + _ "github.com/kaito-project/kaito/presets/workspace/models/deepseek" _ "github.com/kaito-project/kaito/presets/workspace/models/falcon" _ "github.com/kaito-project/kaito/presets/workspace/models/llama2" _ "github.com/kaito-project/kaito/presets/workspace/models/llama2chat" @@ -10,5 +11,4 @@ import ( _ "github.com/kaito-project/kaito/presets/workspace/models/phi2" _ "github.com/kaito-project/kaito/presets/workspace/models/phi3" _ "github.com/kaito-project/kaito/presets/workspace/models/qwen" - _ "github.com/kaito-project/kaito/presets/workspace/models/deepseek" ) diff --git a/presets/workspace/models/deepseek/model.go b/presets/workspace/models/deepseek/model.go index 13b15280e..1e6912499 100644 --- a/presets/workspace/models/deepseek/model.go +++ b/presets/workspace/models/deepseek/model.go @@ -23,17 +23,16 @@ func init() { } var ( - PresetDeepSeekR1DistillLlama8BModel = "deepseek-r1-distill-llama-8b" - PresetDeepSeekR1DistillQwen14BModel = "deepseek-r1-distill-qwen-14b" + PresetDeepSeekR1DistillLlama8BModel = "deepseek-r1-distill-llama-8b" + PresetDeepSeekR1DistillQwen14BModel = "deepseek-r1-distill-qwen-14b" PresetDeepSeekTagMap = map[string]string{ - "DeepSeekDistillLlama8B": "0.0.1", - "DeepSeekDistillQwen14B": "0.0.1", + "DeepSeekDistillLlama8B": "0.0.1", + "DeepSeekDistillQwen14B": "0.0.1", } baseCommandPresetDeepseekInference = "accelerate launch" - baseCommandPresetDeepseekTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch" - deepseekLlama8bRunParams = map[string]string{ + deepseekLlama8bRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", }