Merge pull request #161 from tryAGI/bot/update-deep-infra_202502100644

feat:Updated Deep Infra models
tryAGI · Feb 10, 2025 · 8fa3e4d · 8fa3e4d
2 parents 834d4d9 + fcaa73c
commit 8fa3e4d
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 7 deletions.
diff --git a/src/DeepInfra/src/DeepInfraModelIds.cs b/src/DeepInfra/src/DeepInfraModelIds.cs
@@ -9,7 +9,7 @@ public enum DeepInfraModelIds
     /// <summary>
     /// Name: DeepSeek-R1 <br/>
     /// Organization: deepseek-ai <br/>
-    /// Context Length: 16000 <br/>
+    /// Context Length: 32768 <br/>
     /// Prompt Cost: $0.75/MTok <br/>
     /// Completion Cost: $0.75/MTok <br/>
     /// Description: We introduce DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.  <br/>
@@ -31,10 +31,10 @@ public enum DeepInfraModelIds
     /// <summary>
     /// Name: DeepSeek-V3 <br/>
     /// Organization: deepseek-ai <br/>
-    /// Context Length: 16000 <br/>
+    /// Context Length: 32768 <br/>
     /// Prompt Cost: $0.49/MTok <br/>
     /// Completion Cost: $0.49/MTok <br/>
-    /// Description:  <br/>
+    /// Description: DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2.  <br/>
     /// HuggingFace Url: <a href="https://huggingface.co/deepseek-ai/DeepSeek-V3">https://huggingface.co/deepseek-ai/DeepSeek-V3</a> 
     /// </summary>
     DeepseekV3,
@@ -328,7 +328,7 @@ public enum DeepInfraModelIds
     /// <summary>
     /// Name: QVQ-72B-Preview <br/>
     /// Organization: Qwen <br/>
-    /// Context Length: 128000 <br/>
+    /// Context Length: 32000 <br/>
     /// Prompt Cost: $0.25/MTok <br/>
     /// Completion Cost: $0.25/MTok <br/>
     /// Description: QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark <br/>

diff --git a/src/DeepInfra/src/DeepInfraModelProvider.cs b/src/DeepInfra/src/DeepInfraModelProvider.cs
@@ -9,9 +9,9 @@ public static class DeepInfraModelProvider
 {
     private static Dictionary<DeepInfraModelIds, ChatModelMetadata> Models { get; set; } = new()
     {
-        { DeepInfraModelIds.DeepseekR1, ToMetadata("deepseek-ai/DeepSeek-R1",16000,7.5E-07,2.4E-06)},
+        { DeepInfraModelIds.DeepseekR1, ToMetadata("deepseek-ai/DeepSeek-R1",32768,7.5E-07,2.4E-06)},
         { DeepInfraModelIds.DeepseekR1DistillLlama70B, ToMetadata("deepseek-ai/DeepSeek-R1-Distill-Llama-70B",131072,2.3000000000000002E-07,6.9E-07)},
-        { DeepInfraModelIds.DeepseekV3, ToMetadata("deepseek-ai/DeepSeek-V3",16000,4.9E-07,8.900000000000001E-07)},
+        { DeepInfraModelIds.DeepseekV3, ToMetadata("deepseek-ai/DeepSeek-V3",32768,4.9E-07,8.900000000000001E-07)},
         { DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
         { DeepInfraModelIds.Llama3370BInstruct, ToMetadata("meta-llama/Llama-3.3-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.MistralSmall24BInstruct2501, ToMetadata("mistralai/Mistral-Small-24B-Instruct-2501",32768,7E-08,1.4E-07)},
@@ -38,7 +38,7 @@ public static class DeepInfraModelProvider
         { DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,8.000000000000001E-07,8.000000000000001E-07)},
         { DeepInfraModelIds.SkyT132BPreview, ToMetadata("NovaSky-AI/Sky-T1-32B-Preview",32768,1.2E-07,1.8E-07)},
         { DeepInfraModelIds.PhindCodellama34BV2, ToMetadata("Phind/Phind-CodeLlama-34B-v2",4096,6E-07,6E-07)},
-        { DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",128000,2.5E-07,5E-07)},
+        { DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",32000,2.5E-07,5E-07)},
         { DeepInfraModelIds.Qwen272BInstruct, ToMetadata("Qwen/Qwen2-72B-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
         { DeepInfraModelIds.Qwen27BInstruct, ToMetadata("Qwen/Qwen2-7B-Instruct",32768,6E-08,6E-08)},
         { DeepInfraModelIds.Qwen257BInstruct, ToMetadata("Qwen/Qwen2.5-7B-Instruct",32768,2E-08,5.0000000000000004E-08)},