Skip to content

Commit

Permalink
Merge pull request #161 from tryAGI/bot/update-deep-infra_202502100644
Browse files Browse the repository at this point in the history
feat:Updated Deep Infra models
  • Loading branch information
github-actions[bot] authored Feb 10, 2025
2 parents 834d4d9 + fcaa73c commit 8fa3e4d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
8 changes: 4 additions & 4 deletions src/DeepInfra/src/DeepInfraModelIds.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public enum DeepInfraModelIds
/// <summary>
/// Name: DeepSeek-R1 <br/>
/// Organization: deepseek-ai <br/>
/// Context Length: 16000 <br/>
/// Context Length: 32768 <br/>
/// Prompt Cost: $0.75/MTok <br/>
/// Completion Cost: $0.75/MTok <br/>
/// Description: We introduce DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. <br/>
Expand All @@ -31,10 +31,10 @@ public enum DeepInfraModelIds
/// <summary>
/// Name: DeepSeek-V3 <br/>
/// Organization: deepseek-ai <br/>
/// Context Length: 16000 <br/>
/// Context Length: 32768 <br/>
/// Prompt Cost: $0.49/MTok <br/>
/// Completion Cost: $0.49/MTok <br/>
/// Description: <br/>
/// Description: DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. <br/>
/// HuggingFace Url: <a href="https://huggingface.co/deepseek-ai/DeepSeek-V3">https://huggingface.co/deepseek-ai/DeepSeek-V3</a>
/// </summary>
DeepseekV3,
Expand Down Expand Up @@ -328,7 +328,7 @@ public enum DeepInfraModelIds
/// <summary>
/// Name: QVQ-72B-Preview <br/>
/// Organization: Qwen <br/>
/// Context Length: 128000 <br/>
/// Context Length: 32000 <br/>
/// Prompt Cost: $0.25/MTok <br/>
/// Completion Cost: $0.25/MTok <br/>
/// Description: QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark <br/>
Expand Down
6 changes: 3 additions & 3 deletions src/DeepInfra/src/DeepInfraModelProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ public static class DeepInfraModelProvider
{
private static Dictionary<DeepInfraModelIds, ChatModelMetadata> Models { get; set; } = new()
{
{ DeepInfraModelIds.DeepseekR1, ToMetadata("deepseek-ai/DeepSeek-R1",16000,7.5E-07,2.4E-06)},
{ DeepInfraModelIds.DeepseekR1, ToMetadata("deepseek-ai/DeepSeek-R1",32768,7.5E-07,2.4E-06)},
{ DeepInfraModelIds.DeepseekR1DistillLlama70B, ToMetadata("deepseek-ai/DeepSeek-R1-Distill-Llama-70B",131072,2.3000000000000002E-07,6.9E-07)},
{ DeepInfraModelIds.DeepseekV3, ToMetadata("deepseek-ai/DeepSeek-V3",16000,4.9E-07,8.900000000000001E-07)},
{ DeepInfraModelIds.DeepseekV3, ToMetadata("deepseek-ai/DeepSeek-V3",32768,4.9E-07,8.900000000000001E-07)},
{ DeepInfraModelIds.Llama3370BInstructTurbo, ToMetadata("meta-llama/Llama-3.3-70B-Instruct-Turbo",131072,1.2E-07,3E-07)},
{ DeepInfraModelIds.Llama3370BInstruct, ToMetadata("meta-llama/Llama-3.3-70B-Instruct",131072,2.3000000000000002E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.MistralSmall24BInstruct2501, ToMetadata("mistralai/Mistral-Small-24B-Instruct-2501",32768,7E-08,1.4E-07)},
Expand All @@ -38,7 +38,7 @@ public static class DeepInfraModelProvider
{ DeepInfraModelIds.Hermes3Llama31405B, ToMetadata("NousResearch/Hermes-3-Llama-3.1-405B",131072,8.000000000000001E-07,8.000000000000001E-07)},
{ DeepInfraModelIds.SkyT132BPreview, ToMetadata("NovaSky-AI/Sky-T1-32B-Preview",32768,1.2E-07,1.8E-07)},
{ DeepInfraModelIds.PhindCodellama34BV2, ToMetadata("Phind/Phind-CodeLlama-34B-v2",4096,6E-07,6E-07)},
{ DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",128000,2.5E-07,5E-07)},
{ DeepInfraModelIds.Qvq72BPreview, ToMetadata("Qwen/QVQ-72B-Preview",32000,2.5E-07,5E-07)},
{ DeepInfraModelIds.Qwen272BInstruct, ToMetadata("Qwen/Qwen2-72B-Instruct",32768,3.5E-07,4.0000000000000003E-07)},
{ DeepInfraModelIds.Qwen27BInstruct, ToMetadata("Qwen/Qwen2-7B-Instruct",32768,6E-08,6E-08)},
{ DeepInfraModelIds.Qwen257BInstruct, ToMetadata("Qwen/Qwen2.5-7B-Instruct",32768,2E-08,5.0000000000000004E-08)},
Expand Down

0 comments on commit 8fa3e4d

Please sign in to comment.