feat: Updated Deep Infra models

tryAGI · Dec 30, 2024 · 2575f86 · 2575f86
1 parent b21d040
commit 2575f86
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 19 deletions.
diff --git a/src/DeepInfra/src/DeepInfraModelIds.cs b/src/DeepInfra/src/DeepInfraModelIds.cs
@@ -21,8 +21,8 @@ public enum DeepInfraModelIds
     /// Name: Llama-3.3-70B-Instruct-Turbo <br/>
     /// Organization: meta-llama <br/>
     /// Context Length: 131072 <br/>
-    /// Prompt Cost: $0.13/MTok <br/>
-    /// Completion Cost: $0.13/MTok <br/>
+    /// Prompt Cost: $0.12/MTok <br/>
+    /// Completion Cost: $0.12/MTok <br/>
     /// Description: Llama 3.3-70B Turbo is a highly optimized version of the Llama 3.3-70B model, utilizing FP8 quantization to deliver significantly faster inference speeds with a minor trade-off in accuracy. The model is designed to be helpful, safe, and flexible, with a focus on responsible deployment and mitigating potential risks such as bias, toxicity, and misinformation. It achieves state-of-the-art performance on various benchmarks, including conversational tasks, language translation, and text generation. <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct-Turbo">https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct-Turbo</a> 
     /// </summary>
@@ -54,8 +54,8 @@ public enum DeepInfraModelIds
     /// Name: Meta-Llama-3.1-405B-Instruct <br/>
     /// Organization: meta-llama <br/>
     /// Context Length: 32768 <br/>
-    /// Prompt Cost: $0.9/MTok <br/>
-    /// Completion Cost: $0.9/MTok <br/>
+    /// Prompt Cost: $0.8/MTok <br/>
+    /// Completion Cost: $0.8/MTok <br/>
     /// Description: Meta developed and released the Meta Llama 3.1 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B, 70B and 405B sizes <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct">https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct</a> 
     /// </summary>
@@ -87,8 +87,8 @@ public enum DeepInfraModelIds
     /// Name: Meta-Llama-3.1-70B-Instruct-Turbo <br/>
     /// Organization: meta-llama <br/>
     /// Context Length: 131072 <br/>
-    /// Prompt Cost: $0.13/MTok <br/>
-    /// Completion Cost: $0.13/MTok <br/>
+    /// Prompt Cost: $0.12/MTok <br/>
+    /// Completion Cost: $0.12/MTok <br/>
     /// Description: Meta developed and released the Meta Llama 3.1 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B, 70B and 405B sizes <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo">https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo</a> 
     /// </summary>
@@ -98,8 +98,8 @@ public enum DeepInfraModelIds
     /// Name: Qwen2.5-Coder-32B-Instruct <br/>
     /// Organization: Qwen <br/>
     /// Context Length: 32768 <br/>
-    /// Prompt Cost: $0.08/MTok <br/>
-    /// Completion Cost: $0.08/MTok <br/>
+    /// Prompt Cost: $0.07/MTok <br/>
+    /// Completion Cost: $0.07/MTok <br/>
     /// Description: Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). It has significant improvements in code generation, code reasoning and code fixing. A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies. <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct">https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct</a> 
     /// </summary>
@@ -109,8 +109,8 @@ public enum DeepInfraModelIds
     /// Name: Llama-3.1-Nemotron-70B-Instruct <br/>
     /// Organization: nvidia <br/>
     /// Context Length: 131072 <br/>
-    /// Prompt Cost: $0.23/MTok <br/>
-    /// Completion Cost: $0.23/MTok <br/>
+    /// Prompt Cost: $0.12/MTok <br/>
+    /// Completion Cost: $0.12/MTok <br/>
     /// Description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo.  As of 16th Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet. <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct">https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct</a> 
     /// </summary>
@@ -230,8 +230,8 @@ public enum DeepInfraModelIds
     /// Name: Hermes-3-Llama-3.1-405B <br/>
     /// Organization: NousResearch <br/>
     /// Context Length: 131072 <br/>
-    /// Prompt Cost: $0.9/MTok <br/>
-    /// Completion Cost: $0.9/MTok <br/>
+    /// Prompt Cost: $0.8/MTok <br/>
+    /// Completion Cost: $0.8/MTok <br/>
     /// Description: Hermes 3 is a cutting-edge language model that offers advanced capabilities in roleplaying, reasoning, and conversation. It's a fine-tuned version of the Llama-3.1 405B foundation model, designed to align with user needs and provide powerful control. Key features include reliable function calling, structured output, generalist assistant capabilities, and improved code generation. Hermes 3 is competitive with Llama-3.1 Instruct models, with its own strengths and weaknesses. <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B">https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B</a> 
     /// </summary>
@@ -248,6 +248,17 @@ public enum DeepInfraModelIds
     /// </summary>
     PhindCodellama34BV2,
 
+    /// <summary>
+    /// Name: QVQ-72B-Preview <br/>
+    /// Organization: Qwen <br/>
+    /// Context Length: 128000 <br/>
+    /// Prompt Cost: $0.25/MTok <br/>
+    /// Completion Cost: $0.25/MTok <br/>
+    /// Description: QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark <br/>
+    /// HuggingFace Url: <a href="https://huggingface.co/Qwen/QVQ-72B-Preview">https://huggingface.co/Qwen/QVQ-72B-Preview</a> 
+    /// </summary>
+    Qvq72BPreview,
+
     /// <summary>
     /// Name: Qwen2-72B-Instruct <br/>
     /// Organization: Qwen <br/>

diff --git a/src/DeepInfra/src/DeepInfraModelProvider.cs b/src/DeepInfra/src/DeepInfraModelProvider.cs
@@ -10,15 +10,15 @@ public static class DeepInfraModelProvider
     private static Dictionary<DeepInfraModelIds, ChatModelMetadata> Models { get; set; } = new()
     {
         { DeepInfraModelIds.Llama3370BInstruct, ToMetadata("meta-llama/Llama-3.3-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
-        { DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.3E-07,4.0000000000000003E-07)},
+        { DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
         { DeepInfraModelIds.MetaLlama3170BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.MetaLlama318BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-8B-Instruct",131072,3E-08,5.0000000000000004E-08)},
-        { DeepInfraModelIds.MetaLlama31405BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-405B-Instruct",32768,9.000000000000001E-07,9.000000000000001E-07)},
+        { DeepInfraModelIds.MetaLlama31405BInstruct, ToMetadata("meta-llama/Meta-Llama-3.1-405B-Instruct",32768,8.000000000000001E-07,8.000000000000001E-07)},
         { DeepInfraModelIds.Qwq32BPreview, ToMetadata("Qwen/QwQ-32B-Preview",32768,1.2E-07,1.8E-07)},
         { DeepInfraModelIds.MetaLlama318BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",131072,2E-08,5.0000000000000004E-08)},
-        { DeepInfraModelIds.MetaLlama3170BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",131072,1.3E-07,4.0000000000000003E-07)},
-        { DeepInfraModelIds.Qwen25Coder32BInstruct, ToMetadata("Qwen/Qwen2.5-Coder-32B-Instruct",32768,8E-08,1.8E-07)},
-        { DeepInfraModelIds.Llama31Nemotron70BInstruct, ToMetadata("nvidia/Llama-3.1-Nemotron-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
+        { DeepInfraModelIds.MetaLlama3170BInstructTurbo, ToMetadata("meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
+        { DeepInfraModelIds.Qwen25Coder32BInstruct, ToMetadata("Qwen/Qwen2.5-Coder-32B-Instruct",32768,7E-08,1.6E-07)},
+        { DeepInfraModelIds.Llama31Nemotron70BInstruct, ToMetadata("nvidia/Llama-3.1-Nemotron-70B-Instruct",131072,1.2E-07,3E-07)},
         { DeepInfraModelIds.Qwen2572BInstruct, ToMetadata("Qwen/Qwen2.5-72B-Instruct",32768,2.3000000000000002E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.Llama3290BVisionInstruct, ToMetadata("meta-llama/Llama-3.2-90B-Vision-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.Llama3211BVisionInstruct, ToMetadata("meta-llama/Llama-3.2-11B-Vision-Instruct",131072,6E-08,6E-08)},
@@ -29,8 +29,9 @@ public static class DeepInfraModelProvider
         { DeepInfraModelIds.MythomaxL213BTurbo, ToMetadata("Gryphe/MythoMax-L2-13b-turbo",4096,1.3E-07,1.3E-07)},
         { DeepInfraModelIds.ZephyrOrpo141BA35bV01, ToMetadata("HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",65536,6.5E-07,6.5E-07)},
         { DeepInfraModelIds.Llama213BTiefighter, ToMetadata("KoboldAI/LLaMA2-13B-Tiefighter",4096,1.0000000000000001E-07,1.0000000000000001E-07)},
-        { DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,9.000000000000001E-07,9.000000000000001E-07)},
+        { DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,8.000000000000001E-07,8.000000000000001E-07)},
         { DeepInfraModelIds.PhindCodellama34BV2, ToMetadata("Phind/Phind-CodeLlama-34B-v2",4096,6E-07,6E-07)},
+        { DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",128000,2.5E-07,5E-07)},
         { DeepInfraModelIds.Qwen272BInstruct, ToMetadata("Qwen/Qwen2-72B-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.Qwen27BInstruct, ToMetadata("Qwen/Qwen2-7B-Instruct",32768,6E-08,6E-08)},
         { DeepInfraModelIds.Qwen257BInstruct, ToMetadata("Qwen/Qwen2.5-7B-Instruct",32768,2E-08,5.0000000000000004E-08)},
@@ -56,7 +57,7 @@ public static class DeepInfraModelProvider
         { DeepInfraModelIds.Llama270BChatHf, ToMetadata("meta-llama/Llama-2-70b-chat-hf",4096,6.4E-07,8.000000000000001E-07)},
         { DeepInfraModelIds.Llama27BChatHf, ToMetadata("meta-llama/Llama-2-7b-chat-hf",4096,7E-08,7E-08)},
         { DeepInfraModelIds.Llama321BInstruct, ToMetadata("meta-llama/Llama-3.2-1B-Instruct",131072,1E-08,2E-08)},
-        { DeepInfraModelIds.Llama323BInstruct, ToMetadata("meta-llama/Llama-3.2-3B-Instruct",131072,2E-08,3E-08)},
+        { DeepInfraModelIds.Llama323BInstruct, ToMetadata("meta-llama/Llama-3.2-3B-Instruct",131072,2E-08,2E-08)},
         { DeepInfraModelIds.MetaLlama370BInstruct, ToMetadata("meta-llama/Meta-Llama-3-70B-Instruct",8192,2.3000000000000002E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.MetaLlama38BInstruct, ToMetadata("meta-llama/Meta-Llama-3-8B-Instruct",8192,3E-08,6E-08)},
         { DeepInfraModelIds.Phi3Medium4KInstruct, ToMetadata("microsoft/Phi-3-medium-4k-instruct",4096,1.4E-07,1.4E-07)},

diff --git a/src/DeepInfra/src/Predefined/AllModels.cs b/src/DeepInfra/src/Predefined/AllModels.cs
@@ -110,6 +110,11 @@ public class Hermes3Llama31405BModel(DeepInfraProvider provider) : DeepInfraMode
 public class PhindCodellama34BV2Model(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.PhindCodellama34BV2);
 
 
+/// <inheritdoc cref="DeepInfraModelIds.Qvq72BPreview"/>
+/// <param name="provider">Deep Infra Provider Instance</param>
+public class Qvq72BPreviewModel(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.Qvq72BPreview);
+
+
 /// <inheritdoc cref="DeepInfraModelIds.Qwen272BInstruct"/>
 /// <param name="provider">Deep Infra Provider Instance</param>
 public class Qwen272BInstructModel(DeepInfraProvider provider) : DeepInfraModel(provider, DeepInfraModelIds.Qwen272BInstruct);