Skip to content

Commit

Permalink
feat: Add Phi-3 Medium Plugin (#494)
Browse files Browse the repository at this point in the history
**Reason for Change**:
 Add Phi-3 Medium Plugin
  • Loading branch information
ishaansehgal99 authored Jul 2, 2024
1 parent ada4b14 commit 2e06aff
Showing 1 changed file with 96 additions and 6 deletions.
102 changes: 96 additions & 6 deletions presets/models/phi3/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,27 @@ func init() {
Name: PresetPhi3Mini128kModel,
Instance: &phi3MiniB,
})
plugin.KaitoModelRegister.Register(&plugin.Registration{
Name: PresetPhi3Medium4kModel,
Instance: &phi3MediumA,
})
plugin.KaitoModelRegister.Register(&plugin.Registration{
Name: PresetPhi3Medium128kModel,
Instance: &phi3MediumB,
})
}

var (
PresetPhi3Mini4kModel = "phi-3-mini-4k-instruct"
PresetPhi3Mini128kModel = "phi-3-mini-128k-instruct"
PresetPhi3Medium4kModel = "phi-3-medium-4k-instruct"
PresetPhi3Medium128kModel = "phi-3-medium-128k-instruct"

PresetPhiTagMap = map[string]string{
"Phi3Mini4kInstruct": "0.0.1",
"Phi3Mini128kInstruct": "0.0.1",
"Phi3Mini4kInstruct": "0.0.1",
"Phi3Mini128kInstruct": "0.0.1",
"Phi3Medium4kInstruct": "0.0.1",
"Phi3Medium128kInstruct": "0.0.1",
}

baseCommandPresetPhi = "accelerate launch"
Expand All @@ -46,7 +58,7 @@ type phi3Mini4KInst struct{}
func (*phi3Mini4KInst) GetInferenceParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePrivate),
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "9Gi",
Expand All @@ -61,7 +73,7 @@ func (*phi3Mini4KInst) GetInferenceParameters() *model.PresetParam {
func (*phi3Mini4KInst) GetTuningParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePrivate),
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
Expand All @@ -85,7 +97,7 @@ type phi3Mini128KInst struct{}
func (*phi3Mini128KInst) GetInferenceParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePrivate),
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "9Gi",
Expand All @@ -100,7 +112,7 @@ func (*phi3Mini128KInst) GetInferenceParameters() *model.PresetParam {
func (*phi3Mini128KInst) GetTuningParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePrivate),
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
Expand All @@ -116,3 +128,81 @@ func (*phi3Mini128KInst) SupportDistributedInference() bool { return false }
func (*phi3Mini128KInst) SupportTuning() bool {
return true
}

var phi3MediumA Phi3Medium4kInstruct

type Phi3Medium4kInstruct struct{}

func (*Phi3Medium4kInstruct) GetInferenceParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "0Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
TorchRunParams: inference.DefaultAccelerateParams,
ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
BaseCommand: baseCommandPresetPhi,
Tag: PresetPhiTagMap["Phi3Medium4kInstruct"],
}
}
func (*Phi3Medium4kInstruct) GetTuningParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "13Gi",
PerGPUMemoryRequirement: "13Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
BaseCommand: baseCommandPresetPhi,
Tag: PresetPhiTagMap["Phi3Medium4kInstruct"],
}
}
func (*Phi3Medium4kInstruct) SupportDistributedInference() bool { return false }
func (*Phi3Medium4kInstruct) SupportTuning() bool {
return true
}

var phi3MediumB Phi3Medium128kInstruct

type Phi3Medium128kInstruct struct{}

func (*Phi3Medium128kInstruct) GetInferenceParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "0Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
TorchRunParams: inference.DefaultAccelerateParams,
ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
BaseCommand: baseCommandPresetPhi,
Tag: PresetPhiTagMap["Phi3Medium128kInstruct"],
}
}
func (*Phi3Medium128kInstruct) GetTuningParameters() *model.PresetParam {
return &model.PresetParam{
ModelFamilyName: "Phi3",
ImageAccessMode: string(kaitov1alpha1.ModelImageAccessModePublic),
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "13Gi",
PerGPUMemoryRequirement: "13Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
BaseCommand: baseCommandPresetPhi,
Tag: PresetPhiTagMap["Phi3Medium128kInstruct"],
}
}
func (*Phi3Medium128kInstruct) SupportDistributedInference() bool { return false }
func (*Phi3Medium128kInstruct) SupportTuning() bool {
return true
}

0 comments on commit 2e06aff

Please sign in to comment.