Skip to content

Commit

Permalink
feat: Update README.md to add Phi-Medium (#537)
Browse files Browse the repository at this point in the history
**Reason for Change**:
Add Phi-Medium to readme

---------

Signed-off-by: Ishaan Sehgal <ishaanforthewin@gmail.com>
  • Loading branch information
ishaansehgal99 authored Jul 25, 2024
1 parent 66f5711 commit 5413920
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion presets/models/mistral/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (*mistral7b) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "100Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "16Gi", // We run Mistral using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "16Gi",
//TorchRunParams: tuning.DefaultAccelerateParams,
//ModelRunParams: mistralRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down
2 changes: 1 addition & 1 deletion presets/models/phi2/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (*phi2) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "16Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "16Gi",
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down
11 changes: 6 additions & 5 deletions presets/models/phi3/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
## Supported Models
| Model name | Model source | Sample workspace|Kubernetes Workload|Distributed inference|
|--------------------------|:----:|:----:| :----: |:----: |
| phi-3-mini-4k-instruct |[microsoft](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)|[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|
| phi-3-mini-128k-instruct |[microsoft](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)|[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|

| Model name | Model source | Sample workspace|Kubernetes Workload|Distributed inference|
|--------------------------|:-----------------------------------------------------------------------:|:----:| :----: |:----: |
| phi-3-mini-4k-instruct | [microsoft](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) |[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|
| phi-3-mini-128k-instruct | [microsoft](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) |[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|
| phi-3-mini-4k-instruct | [microsoft](https://huggingface.co/microsoft/Phi-3-medium-4k-instruct) |[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|
| phi-3-mini-128k-instruct | [microsoft](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct) |[link](../../../examples/inference/kaito_workspace_phi_3.yaml)|Deployment| false|

## Image Source
- **Public**: Kaito maintainers manage the lifecycle of the inference service images that contain model weights. The images are available in Microsoft Container Registry (MCR).
Expand Down
8 changes: 4 additions & 4 deletions presets/models/phi3/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (*phi3Mini4KInst) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "16Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "16Gi",
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down Expand Up @@ -116,7 +116,7 @@ func (*phi3Mini128KInst) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "16Gi",
PerGPUMemoryRequirement: "16Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "16Gi",
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down Expand Up @@ -155,7 +155,7 @@ func (*Phi3Medium4kInstruct) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "13Gi",
PerGPUMemoryRequirement: "13Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "13Gi",
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down Expand Up @@ -194,7 +194,7 @@ func (*Phi3Medium128kInstruct) GetTuningParameters() *model.PresetParam {
DiskStorageRequirement: "50Gi",
GPUCountRequirement: "1",
TotalGPUMemoryRequirement: "13Gi",
PerGPUMemoryRequirement: "13Gi", // We run Phi using native vertical model parallel, no per GPU memory requirement.
PerGPUMemoryRequirement: "13Gi",
// TorchRunParams: inference.DefaultAccelerateParams,
// ModelRunParams: phiRunParams,
ReadinessTimeout: time.Duration(30) * time.Minute,
Expand Down

0 comments on commit 5413920

Please sign in to comment.