Skip to content

Commit

Permalink
feat: Updated Deep Infra models
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Dec 30, 2024
1 parent b21d040 commit 2575f86
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 19 deletions.
35 changes: 23 additions & 12 deletions src/DeepInfra/src/DeepInfraModelIds.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ public enum DeepInfraModelIds
/// Name: Llama-3.3-70B-Instruct-Turbo <br/>
/// Organization: meta-llama <br/>
/// Context Length: 131072 <br/>
/// Prompt Cost: $0.13/MTok <br/>
/// Completion Cost: $0.13/MTok <br/>
/// Prompt Cost: $0.12/MTok <br/>
/// Completion Cost: $0.12/MTok <br/>
/// Description: Llama 3.3-70B Turbo is a highly optimized version of the Llama 3.3-70B model, utilizing FP8 quantization to deliver significantly faster inference speeds with a minor trade-off in accuracy. The model is designed to be helpful, safe, and flexible, with a focus on responsible deployment and mitigating potential risks such as bias, toxicity, and misinformation. It achieves state-of-the-art performance on various benchmarks, including conversational tasks, language translation, and text generation. <br/>
/// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct-Turbo">https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct-Turbo</a>
/// </summary>
Expand Down Expand Up @@ -54,8 +54,8 @@ public enum DeepInfraModelIds
/// Name: Meta-Llama-3.1-405B-Instruct <br/>
/// Organization: meta-llama <br/>
/// Context Length: 32768 <br/>
/// Prompt Cost: $0.9/MTok <br/>
/// Completion Cost: $0.9/MTok <br/>
/// Prompt Cost: $0.8/MTok <br/>
/// Completion Cost: $0.8/MTok <br/>
/// Description: Meta developed and released the Meta Llama 3.1 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B, 70B and 405B sizes <br/>
/// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct">https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct</a>
/// </summary>
Expand Down Expand Up @@ -87,8 +87,8 @@ public enum DeepInfraModelIds
/// Name: Meta-Llama-3.1-70B-Instruct-Turbo <br/>
/// Organization: meta-llama <br/>
/// Context Length: 131072 <br/>
/// Prompt Cost: $0.13/MTok <br/>
/// Completion Cost: $0.13/MTok <br/>
/// Prompt Cost: $0.12/MTok <br/>
/// Completion Cost: $0.12/MTok <br/>
/// Description: Meta developed and released the Meta Llama 3.1 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B, 70B and 405B sizes <br/>
/// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo">https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo</a>
/// </summary>
Expand All @@ -98,8 +98,8 @@ public enum DeepInfraModelIds
/// Name: Qwen2.5-Coder-32B-Instruct <br/>
/// Organization: Qwen <br/>
/// Context Length: 32768 <br/>
/// Prompt Cost: $0.08/MTok <br/>
/// Completion Cost: $0.08/MTok <br/>
/// Prompt Cost: $0.07/MTok <br/>
/// Completion Cost: $0.07/MTok <br/>
/// Description: Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). It has significant improvements in code generation, code reasoning and code fixing. A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies. <br/>
/// HuggingFace Url: <a href="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct">https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct</a>
/// </summary>
Expand All @@ -109,8 +109,8 @@ public enum DeepInfraModelIds
/// Name: Llama-3.1-Nemotron-70B-Instruct <br/>
/// Organization: nvidia <br/>
/// Context Length: 131072 <br/>
/// Prompt Cost: $0.23/MTok <br/>
/// Completion Cost: $0.23/MTok <br/>
/// Prompt Cost: $0.12/MTok <br/>
/// Completion Cost: $0.12/MTok <br/>
/// Description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo. As of 16th Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet. <br/>
/// HuggingFace Url: <a href="https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct">https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct</a>
/// </summary>
Expand Down Expand Up @@ -230,8 +230,8 @@ public enum DeepInfraModelIds
/// Name: Hermes-3-Llama-3.1-405B <br/>
/// Organization: NousResearch <br/>
/// Context Length: 131072 <br/>
/// Prompt Cost: $0.9/MTok <br/>
/// Completion Cost: $0.9/MTok <br/>
/// Prompt Cost: $0.8/MTok <br/>
/// Completion Cost: $0.8/MTok <br/>
/// Description: Hermes 3 is a cutting-edge language model that offers advanced capabilities in roleplaying, reasoning, and conversation. It's a fine-tuned version of the Llama-3.1 405B foundation model, designed to align with user needs and provide powerful control. Key features include reliable function calling, structured output, generalist assistant capabilities, and improved code generation. Hermes 3 is competitive with Llama-3.1 Instruct models, with its own strengths and weaknesses. <br/>
/// HuggingFace Url: <a href="https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B">https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B</a>
/// </summary>
Expand All @@ -248,6 +248,17 @@ public enum DeepInfraModelIds
/// </summary>
PhindCodellama34BV2,

/// <summary>
/// Name: QVQ-72B-Preview <br/>
/// Organization: Qwen <br/>
/// Context Length: 128000 <br/>
/// Prompt Cost: $0.25/MTok <br/>
/// Completion Cost: $0.25/MTok <br/>
/// Description: QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark <br/>
/// HuggingFace Url: <a href="https://huggingface.co/Qwen/QVQ-72B-Preview">https://huggingface.co/Qwen/QVQ-72B-Preview</a>
/// </summary>
Qvq72BPreview,

/// <summary>
/// Name: Qwen2-72B-Instruct <br/>
/// Organization: Qwen <br/>
Expand Down
15 changes: 8 additions & 7 deletions src/DeepInfra/src/DeepInfraModelProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ public static class DeepInfraModelProvider
private static Dictionary<DeepInfraModelIds, ChatModelMetadata> Models { get; set; } = new()
{
{ DeepInfraModelIds.Llama3370BInstruct, ToMetadata("meta-llama/Llama-3.3-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.3E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
{ DeepInfraModelIds.MetaLlama3170BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.MetaLlama318BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-8B-Instruct",131072,3E-08,5.0000000000000004E-08)},
{ DeepInfraModelIds.MetaLlama31405BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-405B-Instruct",32768,9.000000000000001E-07,9.000000000000001E-07)},
{ DeepInfraModelIds.MetaLlama31405BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-405B-Instruct",32768,8.000000000000001E-07,8.000000000000001E-07)},
{ DeepInfraModelIds.Qwq32BPreview, ToMetadata("Qwen/QwQ-32B-Preview",32768,1.2E-07,1.8E-07)},
{ DeepInfraModelIds.MetaLlama318BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",131072,2E-08,5.0000000000000004E-08)},
{ DeepInfraModelIds.MetaLlama3170BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",131072,1.3E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Qwen25Coder32BInstruct, ToMetadata("Qwen/Qwen2.5-Coder-32B-Instruct",32768,8E-08,1.8E-07)},
{ DeepInfraModelIds.Llama31Nemotron70BInstruct, ToMetadata("nvidia/Llama-3.1-Nemotron-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.MetaLlama3170BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
{ DeepInfraModelIds.Qwen25Coder32BInstruct, ToMetadata("Qwen/Qwen2.5-Coder-32B-Instruct",32768,7E-08,1.6E-07)},
{ DeepInfraModelIds.Llama31Nemotron70BInstruct, ToMetadata("nvidia/Llama-3.1-Nemotron-70B-Instruct",131072,1.2E-07,3E-07)},
{ DeepInfraModelIds.Qwen2572BInstruct, ToMetadata("Qwen/Qwen2.5-72B-Instruct",32768,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Llama3290BVisionInstruct, ToMetadata("meta-llama/Llama-3.2-90B-Vision-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Llama3211BVisionInstruct, ToMetadata("meta-llama/Llama-3.2-11B-Vision-Instruct",131072,6E-08,6E-08)},
Expand All @@ -29,8 +29,9 @@ public static class DeepInfraModelProvider
{ DeepInfraModelIds.MythomaxL213BTurbo, ToMetadata("Gryphe/MythoMax-L2-13b-turbo",4096,1.3E-07,1.3E-07)},
{ DeepInfraModelIds.ZephyrOrpo141BA35bV01, ToMetadata("HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",65536,6.5E-07,6.5E-07)},
{ DeepInfraModelIds.Llama213BTiefighter, ToMetadata("KoboldAI/LLaMA2-13B-Tiefighter",4096,1.0000000000000001E-07,1.0000000000000001E-07)},
{ DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,9.000000000000001E-07,9.000000000000001E-07)},
{ DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,8.000000000000001E-07,8.000000000000001E-07)},
{ DeepInfraModelIds.PhindCodellama34BV2, ToMetadata("Phind/Phind-CodeLlama-34B-v2",4096,6E-07,6E-07)},
{ DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",128000,2.5E-07,5E-07)},
{ DeepInfraModelIds.Qwen272BInstruct, ToMetadata("Qwen/Qwen2-72B-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Qwen27BInstruct, ToMetadata("Qwen/Qwen2-7B-Instruct",32768,6E-08,6E-08)},
{ DeepInfraModelIds.Qwen257BInstruct, ToMetadata("Qwen/Qwen2.5-7B-Instruct",32768,2E-08,5.0000000000000004E-08)},
Expand All @@ -56,7 +57,7 @@ public static class DeepInfraModelProvider
{ DeepInfraModelIds.Llama270BChatHf, ToMetadata("meta-llama/Llama-2-70b-chat-hf",4096,6.4E-07,8.000000000000001E-07)},
{ DeepInfraModelIds.Llama27BChatHf, ToMetadata("meta-llama/Llama-2-7b-chat-hf",4096,7E-08,7E-08)},
{ DeepInfraModelIds.Llama321BInstruct, ToMetadata("meta-llama/Llama-3.2-1B-Instruct",131072,1E-08,2E-08)},
{ DeepInfraModelIds.Llama323BInstruct, ToMetadata("meta-llama/Llama-3.2-3B-Instruct",131072,2E-08,3E-08)},
{ DeepInfraModelIds.Llama323BInstruct, ToMetadata("meta-llama/Llama-3.2-3B-Instruct",131072,2E-08,2E-08)},
{ DeepInfraModelIds.MetaLlama370BInstruct, ToMetadata("meta-llama/Meta-Llama-3-70B-Instruct",8192,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.MetaLlama38BInstruct, ToMetadata("meta-llama/Meta-Llama-3-8B-Instruct",8192,3E-08,6E-08)},
{ DeepInfraModelIds.Phi3Medium4KInstruct, ToMetadata("microsoft/Phi-3-medium-4k-instruct",4096,1.4E-07,1.4E-07)},
Expand Down
5 changes: 5 additions & 0 deletions src/DeepInfra/src/Predefined/AllModels.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ public class Hermes3Llama31405BModel(DeepInfraProvider provider) : DeepInfraMode
public class PhindCodellama34BV2Model(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.PhindCodellama34BV2);


/// <inheritdoc cref="DeepInfraModelIds.Qvq72BPreview"/>
/// <param name="provider">Deep Infra Provider Instance</param>
public class Qvq72BPreviewModel(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.Qvq72BPreview);


/// <inheritdoc cref="DeepInfraModelIds.Qwen272BInstruct"/>
/// <param name="provider">Deep Infra Provider Instance</param>
public class Qwen272BInstructModel(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.Qwen272BInstruct);
Expand Down

0 comments on commit 2575f86

Please sign in to comment.