Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: add milvus vectordb #420

Merged
merged 3 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Elasticsearch", "Elasticsea
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Elasticsearch", "src\Databases\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj", "{6C053245-EB8B-4C98-85AC-B00BA222ABBB}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Milvus", "Milvus", "{911357BA-532A-43B8-9EA8-B32C0C2273EC}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Milvus", "Milvus", "{3CB20A37-5E76-40D1-91F6-47C96FA06AE5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Milvus", "src\Databases\Milvus\src\LangChain.Databases.Milvus.csproj", "{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -725,6 +731,10 @@ Global
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Release|Any CPU.Build.0 = Release|Any CPU
{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18}.Debug|Any CPU.Build.0 = Debug|Any CPU
{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18}.Release|Any CPU.ActiveCfg = Release|Any CPU
{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -869,6 +879,8 @@ Global
{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F} = {7A2A589D-F8EF-4744-9BEE-B06A5F109851}
{3FCA798D-2CBE-41E7-B5F7-65B4AF6EBC8A} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{6C053245-EB8B-4C98-85AC-B00BA222ABBB} = {3FCA798D-2CBE-41E7-B5F7-65B4AF6EBC8A}
{3CB20A37-5E76-40D1-91F6-47C96FA06AE5} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{259C2E5D-6AA9-4384-8F55-24C4D4F1CF18} = {3CB20A37-5E76-40D1-91F6-47C96FA06AE5}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5C00D0F1-6138-4ED9-846B-97E43D6DFF1C}
Expand Down
62 changes: 60 additions & 2 deletions src/Databases/IntegrationTests/DatabaseTests.Configure.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
using LangChain.Databases.Chroma;
using LangChain.Databases.Elasticsearch;
using LangChain.Databases.InMemory;
using LangChain.Databases.Milvus;
using LangChain.Databases.Mongo;
using LangChain.Databases.OpenSearch;
using LangChain.Databases.Postgres;
using LangChain.Databases.Sqlite;
using LangChain.Databases.Mongo;
using Testcontainers.Elasticsearch;
using LangChain.Databases.DuckDb;
using Microsoft.SemanticKernel.Connectors.DuckDB;
using Microsoft.SemanticKernel.Connectors.Milvus;
using Testcontainers.MongoDb;
using Testcontainers.PostgreSql;
using Microsoft.SemanticKernel.Connectors.DuckDB;

namespace LangChain.Databases.IntegrationTests;

Expand Down Expand Up @@ -145,6 +147,62 @@ private static async Task<DatabaseTestEnvironment> StartEnvironmentForAsync(Supp
Container = container,
};
}
case SupportedDatabase.Milvus:
{
var network = new NetworkBuilder()
.WithName("milvus-network")
.Build();

var etcdContainer = new ContainerBuilder()
.WithImage("quay.io/coreos/etcd:v3.5.5")
.WithName("milvus-etcd")
.WithEnvironment("ETCD_AUTO_COMPACTION_MODE", "revision")
.WithEnvironment("ETCD_AUTO_COMPACTION_RETENTION", "1000")
.WithEnvironment("ETCD_QUOTA_BACKEND_BYTES", "4294967296")
.WithEnvironment("ETCD_SNAPSHOT_COUNT", "50000")
.WithPortBinding(2379, 2379)
.WithCommand("etcd",
"-advertise-client-urls=http://0.0.0.0:2379",
"-listen-client-urls=http://0.0.0.0:2379",
"--data-dir", "/etcd")
.Build();

var minioContainer = new ContainerBuilder()
.WithImage("minio/minio:RELEASE.2023-03-20T20-16-18Z")
.WithName("milvus-minio")
.WithPortBinding(9000, 9000)
.WithPortBinding(9001, 9001)
.WithEnvironment("MINIO_ACCESS_KEY", "minioadmin")
.WithEnvironment("MINIO_SECRET_KEY", "minioadmin")
.WithCommand("minio", "server", "/minio_data", "--console-address", ":9001")
.Build();

var milvusContainer = new ContainerBuilder()
.WithImage("milvusdb/milvus:v2.3.0")
.WithName("milvus-standalone")
.WithPortBinding(19530, 19530)
.WithPortBinding(9091, 9091)
.WithEnvironment("ETCD_ENDPOINTS", "milvus-etcd:2379")
.WithEnvironment("MINIO_ADDRESS", "milvus-minio:9000")
.WithCommand("milvus", "run", "standalone")
.WithWaitStrategy(Wait.ForUnixContainer().UntilPortIsAvailable(2379))
.WithWaitStrategy(Wait.ForUnixContainer().UntilPortIsAvailable(9000))
.WithWaitStrategy(Wait.ForUnixContainer().UntilPortIsAvailable(9001))
.DependsOn(etcdContainer)
.DependsOn(minioContainer)
.Build();

await etcdContainer.StartAsync(cancellationToken);
await minioContainer.StartAsync(cancellationToken);
await milvusContainer.StartAsync(cancellationToken);

return new DatabaseTestEnvironment
{
VectorDatabase = new MilvusVectorDatabase(new MilvusMemoryStore("localhost")),
Container = milvusContainer,
};
}

default:
throw new ArgumentOutOfRangeException(nameof(database), database, null);
}
Expand Down
11 changes: 9 additions & 2 deletions src/Databases/IntegrationTests/DatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ public partial class DatabaseTests
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
//[TestCase(SupportedDatabase.Elasticsearch)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task CreateAndDeleteCollection_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -62,6 +63,7 @@ await vectorDatabase.Invoking(y => y.GetCollectionAsync(environment.CollectionNa
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task AddDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -109,6 +111,7 @@ public async Task AddDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task AddTexts_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -160,6 +163,7 @@ public async Task AddTexts_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task DeleteDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -199,6 +203,7 @@ public async Task DeleteDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task SimilaritySearch_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -232,6 +237,7 @@ public async Task SimilaritySearch_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -259,8 +265,9 @@ public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
//[TestCase(SupportedDatabase.Elasticsearch)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
//[TestCase(SupportedDatabase.Milvus)]
public async Task SimilaritySearchWithScores_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
<ProjectReference Include="..\Qdrant\src\LangChain.Databases.Qdrant.csproj" />
<ProjectReference Include="..\Redis\src\LangChain.Databases.Redis.csproj" />
<ProjectReference Include="..\Sqlite\src\LangChain.Databases.Sqlite.csproj" />
<ProjectReference Include="..\Milvus\src\LangChain.Databases.Milvus.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down
3 changes: 2 additions & 1 deletion src/Databases/IntegrationTests/SupportedDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ public enum SupportedDatabase
Redis,
Mongo,
Elasticsearch,
DuckDb
DuckDb,
Milvus,
}
18 changes: 18 additions & 0 deletions src/Databases/Milvus/src/LangChain.Databases.Milvus.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<NoWarn>$(NoWarn);SKEXP0020;SKEXP0001;CS3001</NoWarn>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.SemanticKernel.Connectors.Milvus"/>
<PackageReference Include="System.Linq.Async"/>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\Utilities\Pollyfils\src\LangChain.Polyfills.csproj"/>
<ProjectReference Include="..\..\Abstractions\src\LangChain.Databases.Abstractions.csproj"/>
</ItemGroup>

</Project>
64 changes: 64 additions & 0 deletions src/Databases/Milvus/src/MilvusVectorCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using Microsoft.SemanticKernel.Connectors.Milvus;
using Microsoft.SemanticKernel.Memory;

namespace LangChain.Databases.Milvus;

public sealed class MilvusVectorCollection(
MilvusMemoryStore store,
string name = VectorCollection.DefaultName,
string? id = null)
: VectorCollection(name, id), IVectorCollection
{
public async Task<IReadOnlyCollection<string>> AddAsync(IReadOnlyCollection<Vector> items, CancellationToken cancellationToken = default)
{
items = items ?? throw new ArgumentNullException(nameof(items));

List<string> list = [];
foreach (var item in items)
{
string? metadata = null;
//TODO: review way to map metadata
if (item.Metadata != null)
metadata = string.Join("#", item.Metadata.Select(kv => kv.Key + "&" + kv.Value));
var record = MemoryRecord.LocalRecord(item.Id, item.Text, null, item.Embedding, metadata);
var insert = await store.UpsertAsync(Name, record, cancellationToken).ConfigureAwait(false);
list.Add(insert);
}
return list;
}

public async Task<bool> DeleteAsync(IEnumerable<string> ids, CancellationToken cancellationToken = default)
{
await store.RemoveBatchAsync(Name, ids, cancellationToken).ConfigureAwait(false);
return true;
}

public async Task<Vector?> GetAsync(string id, CancellationToken cancellationToken = default)
{
var record = await store.GetAsync(Name, id, cancellationToken: cancellationToken).ConfigureAwait(false);

Dictionary<string, object>? metadata = null;
if (record?.Metadata?.AdditionalMetadata != null)
metadata = record.Metadata.AdditionalMetadata
.Split('#')
.Select(part => part.Split('&'))
.ToDictionary(split => split[0], split => (object)split[1]);

return record != null ? new Vector { Id = id, Text = record.Metadata.Text, Metadata = metadata } : null;
}

public async Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
var collections = store.GetCollectionsAsync(cancellationToken);
return !(await collections.CountAsync(cancellationToken).ConfigureAwait(false) > 0);
}

public async Task<VectorSearchResponse> SearchAsync(VectorSearchRequest request, VectorSearchSettings? settings = null, CancellationToken cancellationToken = default)
{
request = request ?? throw new ArgumentNullException(nameof(request));
settings ??= new VectorSearchSettings();
var results = await store.GetNearestMatchesAsync(Name, request.Embeddings.First(), limit: settings.NumberOfResults, cancellationToken: cancellationToken)
.ToListAsync(cancellationToken).ConfigureAwait(false);
return new VectorSearchResponse { Items = results.Select(x => new Vector { Text = x.Item1.Metadata.Text }).ToList() };
}
}
44 changes: 44 additions & 0 deletions src/Databases/Milvus/src/MilvusVectorDatabase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using Microsoft.SemanticKernel.Connectors.Milvus;

namespace LangChain.Databases.Milvus;

public class MilvusVectorDatabase(
MilvusMemoryStore store)
: IVectorDatabase
{
public async Task CreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
await store.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
await store.DeleteCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task<IVectorCollection> GetCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
var collections = await ListCollectionsAsync(cancellationToken).ConfigureAwait(false);
var collection = collections.FirstOrDefault(x => x == collectionName);
return collection != null ? new MilvusVectorCollection(store, collection)
: throw new InvalidOperationException("Collection not found");
}

public async Task<IVectorCollection> GetOrCreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
if (!await IsCollectionExistsAsync(collectionName, cancellationToken).ConfigureAwait(false))
await store.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
return new MilvusVectorCollection(store, collectionName);
}

public async Task<bool> IsCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
return await store.DoesCollectionExistAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task<IReadOnlyList<string>> ListCollectionsAsync(CancellationToken cancellationToken = default)
{
var collections = store.GetCollectionsAsync(cancellationToken);
return await collections.ToListAsync(cancellationToken).ConfigureAwait(false);
}
}
Loading
Loading