Skip to content

Commit

Permalink
Added support for massive embedding model for azure openai.
Browse files Browse the repository at this point in the history
  • Loading branch information
alkampfergit committed May 28, 2024
1 parent d5fa29f commit af5574e
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 0 deletions.
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Alkampfer.KernelMemory.ElasticSearch" Version="0.6.1" />
<PackageVersion Include="Azure.AI.OpenAI" Version="1.0.0-beta.17" />
<PackageVersion Include="CommandDotNet.Spectre" Version="3.0.2" />
<PackageVersion Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Http.Resilience" Version="8.5.0" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;
using System.Threading;
using System.Threading.Tasks;

namespace KernelMemory.Extensions.Interfaces
{
/// <summary>
/// For better performances we can support embedding generators that
/// can process multiple texts at once.
/// </summary>
public interface IBulkTextEmbeddingGenerator : ITextEmbeddingGenerator
{
Task<Embedding[]> GenerateEmbeddingsAsync(
string[] text,
CancellationToken cancellationToken = default);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using Azure;
using Azure.AI.OpenAI;
using KernelMemory.Extensions.Interfaces;
using Microsoft.KernelMemory;
using System;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;

namespace KernelMemory.Extensions.Helper
{
/// <summary>
/// Used to create massive embedding vectors for a given text.
/// </summary>
public class AzureOpenaiEmbeddingGenerator : IBulkTextEmbeddingGenerator
{
private readonly OpenAIClient _client;
private readonly MicrosoftMlTiktokenTokenizer _microsoftMlTiktokenTokenizer;
private readonly string _deployment;
private readonly int? _dimensions;

public AzureOpenaiEmbeddingGenerator(
AzureOpenAIConfig azureOpenAIConfig,
MicrosoftMlTiktokenTokenizer microsoftMlTiktokenTokenizer)
{
_client = new OpenAIClient(new Uri(azureOpenAIConfig.Endpoint), new AzureKeyCredential(azureOpenAIConfig.APIKey));
MaxTokens = azureOpenAIConfig.MaxTokenTotal;
_microsoftMlTiktokenTokenizer = microsoftMlTiktokenTokenizer;
_deployment = azureOpenAIConfig.Deployment;
_dimensions = azureOpenAIConfig.EmbeddingDimensions;
}

public AzureOpenaiEmbeddingGenerator(
AzureOpenAIConfig azureOpenAIConfig,
string modelName) : this (azureOpenAIConfig, new MicrosoftMlTiktokenTokenizer(modelName))
{
}

public int MaxTokens { get; }

public int CountTokens(string text)
{
return _microsoftMlTiktokenTokenizer.CountTokens(text);
}

public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
{
var result = await GenerateEmbeddingsAsync([text], cancellationToken);
return result[0];
}

public async Task<Embedding[]> GenerateEmbeddingsAsync(string[] text, CancellationToken cancellationToken = default)
{
var options = new EmbeddingsOptions(_deployment, text);
if (_dimensions.HasValue)
{
options.Dimensions = _dimensions.Value;
}
var result = await _client.GetEmbeddingsAsync(options, cancellationToken);
return result.Value.Data.Select(ei => new Embedding(ei.Embedding)).ToArray();
}
}
}
1 change: 1 addition & 0 deletions src/KernelMemory.Extensions/KernelMemory.Extensions.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Azure.AI.OpenAI" />
<PackageReference Include="CommandDotNet.Spectre" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Http.Resilience" />
Expand Down

0 comments on commit af5574e

Please sign in to comment.