From 0022e70d9640cb3a59c83d7fd34b462642892a61 Mon Sep 17 00:00:00 2001 From: Devis Lucato Date: Fri, 11 Aug 2023 12:55:45 -0700 Subject: [PATCH] * Complete filtering support, e.g. filtering memory by user/folder/chat/tag/anything * Complete "a document can contain multiple files" on all implementations of ISemanticMemoryClient * Rename SemanticMemoryServerless to Memory * Align interface of Memory, MemoryService, MemoryWebClient * Delete ISemanticMemoryService and re-use ISemanticMemoryClient * Align on camelCase for HTTP request/responses * Revisit API, align on the term "document" instead of "file" * Rename pipeline.Id to pipeline.DocumentId * Memory records: rename record.Metadata to record.Payload * Remove from CoreLib dependency on ASP.NET framework and ASP.NET HTTP * Upgrade Azure Cognitive Search nuget to latest beta * Rename /ask param "query" to "question" * Update docs and examples --- README.md | 24 +-- dotnet/ClientLib/ClientLib.csproj | 2 +- dotnet/ClientLib/ISemanticMemoryClient.cs | 62 ++++---- dotnet/ClientLib/MemoryWebClient.cs | 144 ++++++++++++------ dotnet/ClientLib/Models/DataPipelineStatus.cs | 4 +- dotnet/ClientLib/Models/Document.cs | 32 +++- .../ClientLib/Models/DocumentUploadRequest.cs | 33 ++++ dotnet/ClientLib/Models/MemoryAnswer.cs | 24 +-- dotnet/ClientLib/Models/MemoryFilter.cs | 19 +-- dotnet/ClientLib/Models/MemoryQuery.cs | 17 +++ dotnet/ClientLib/Models/UploadAccepted.cs | 6 +- .../AI/AzureOpenAI/AzureTextGeneration.cs | 2 +- .../CoreLib/AI/OpenAI/OpenAITextGeneration.cs | 2 +- .../AppBuilders/DependencyInjection.cs | 3 +- dotnet/CoreLib/CoreLib.csproj | 12 +- .../Handlers/GenerateEmbeddingsHandler.cs | 2 +- .../CoreLib/Handlers/SaveEmbeddingsHandler.cs | 18 +-- .../Handlers/TextPartitioningHandler.cs | 4 +- ...{SemanticMemoryServerless.cs => Memory.cs} | 74 ++++----- dotnet/CoreLib/MemoryService.cs | 77 ++++++++++ .../AzureCognitiveSearchMemory.cs | 6 +- .../AzureCognitiveSearchMemoryRecord.cs | 14 +- dotnet/CoreLib/MemoryStorage/MemoryRecord.cs | 4 +- dotnet/CoreLib/Pipeline/BaseOrchestrator.cs | 69 ++++++--- dotnet/CoreLib/Pipeline/DataPipeline.cs | 18 +-- .../DistributedPipelineOrchestrator.cs | 12 +- .../CoreLib/Pipeline/IPipelineOrchestrator.cs | 21 ++- .../Pipeline/InProcessPipelineOrchestrator.cs | 6 +- dotnet/CoreLib/Search/SearchClient.cs | 40 +++-- dotnet/CoreLib/SemanticMemoryService.cs | 66 -------- .../CoreLib/WebService/DocumentExtensions.cs | 45 ++++++ ...equest.cs => HttpDocumentUploadRequest.cs} | 6 +- .../HttpDocumentUploadRequestExtensions.cs | 33 ++++ dotnet/CoreLib/WebService/MemoryQuery.cs | 12 -- .../InteractiveSetup/InteractiveSetup.csproj | 4 +- dotnet/Service/Program.cs | 38 ++--- dotnet/Service/Service.csproj | 2 +- .../001-dotnet-Serverless.csproj | 2 +- samples/001-dotnet-Serverless/Program.cs | 36 +++-- samples/001-dotnet-Serverless/README.md | 14 +- .../002-dotnet-WebClient.csproj | 2 +- samples/002-dotnet-WebClient/Program.cs | 61 +++++--- samples/002-dotnet-WebClient/README.md | 18 ++- samples/002-dotnet-WebClient/setup.cmd | 5 - samples/002-dotnet-WebClient/setup.sh | 10 -- samples/003-curl-calling-webservice/README.md | 25 ++- .../ask-example.sh | 3 +- ...004-dotnet-ServerlessCustomPipeline.csproj | 2 +- .../Program.cs | 2 +- .../README.md | 2 +- .../005-dotnet-ExtractTextFromDocs.csproj | 2 +- .../006-dotnet-CustomHandler.csproj | 2 +- .../007-using-azure-cognitive-search.csproj | 4 +- .../Program.cs | 12 +- tools/ask.sh | 18 ++- 55 files changed, 707 insertions(+), 470 deletions(-) create mode 100644 dotnet/ClientLib/Models/DocumentUploadRequest.cs create mode 100644 dotnet/ClientLib/Models/MemoryQuery.cs rename dotnet/CoreLib/{SemanticMemoryServerless.cs => Memory.cs} (57%) create mode 100644 dotnet/CoreLib/MemoryService.cs delete mode 100644 dotnet/CoreLib/SemanticMemoryService.cs create mode 100644 dotnet/CoreLib/WebService/DocumentExtensions.cs rename dotnet/CoreLib/WebService/{UploadRequest.cs => HttpDocumentUploadRequest.cs} (94%) create mode 100644 dotnet/CoreLib/WebService/HttpDocumentUploadRequestExtensions.cs delete mode 100644 dotnet/CoreLib/WebService/MemoryQuery.cs delete mode 100644 samples/002-dotnet-WebClient/setup.cmd delete mode 100755 samples/002-dotnet-WebClient/setup.sh diff --git a/README.md b/README.md index 240a216ad..bd22c158a 100644 --- a/README.md +++ b/README.md @@ -30,17 +30,17 @@ in your app. > ### Importing documents into your Semantic Memory can be as simple as this: > > ```csharp -> var memory = new MemoryServerlessClient(); +> var memory = new Memory(serviceProvider); > > // Import a file (default user) -> await memory.ImportFileAsync("meeting-transcript.docx"); +> await memory.ImportDocumentAsync("meeting-transcript.docx"); > -> // Import a file specifying a User and Tags -> await memory.ImportFileAsync("business-plan.docx", -> new DocumentDetails("user@some.email", "file1") +> // Import a file specifying a Document ID, User and Tags +> await memory.ImportDocumentAsync("business-plan.docx", +> new DocumentDetails("user@some.email", "file001") > .AddTag("collection", "business") > .AddTag("collection", "plans") -> .AddTag("type", "doc")); +> .AddTag("fiscalYear", "2023")); > ``` > ### Asking questions: @@ -155,13 +155,15 @@ to **start the Semantic Memory Service**: > > var memory = new MemoryWebClient("http://127.0.0.1:9001"); // <== URL where the web service is running > -> await memory.ImportFileAsync("meeting-transcript.docx"); +> // Import a file (default user) +> await memory.ImportDocumentAsync("meeting-transcript.docx"); > -> await memory.ImportFileAsync("business-plan.docx", -> new DocumentDetails("file1", "user0022") +> // Import a file specifying a Document ID, User and Tags +> await memory.ImportDocumentAsync("business-plan.docx", +> new DocumentDetails("user@some.email", "file001") > .AddTag("collection", "business") > .AddTag("collection", "plans") -> .AddTag("type", "doc")); +> .AddTag("fiscalYear", "2023")); > ``` > ### Getting answers via the web service @@ -202,7 +204,7 @@ var app = AppBuilder.Build(); var storage = app.Services.GetService(); // Use a local, synchronous, orchestrator -var orchestrator = new InProcessPipelineOrchestrator(storage); +var orchestrator = new InProcessPipelineOrchestrator(storage, app.Services); // Define custom .NET handlers var step1 = new MyHandler1("step1", orchestrator); diff --git a/dotnet/ClientLib/ClientLib.csproj b/dotnet/ClientLib/ClientLib.csproj index 8e8201f3d..2641d1496 100644 --- a/dotnet/ClientLib/ClientLib.csproj +++ b/dotnet/ClientLib/ClientLib.csproj @@ -24,7 +24,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/dotnet/ClientLib/ISemanticMemoryClient.cs b/dotnet/ClientLib/ISemanticMemoryClient.cs index a87541197..5e7a08aee 100644 --- a/dotnet/ClientLib/ISemanticMemoryClient.cs +++ b/dotnet/ClientLib/ISemanticMemoryClient.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; using Microsoft.SemanticMemory.Client.Models; @@ -10,28 +9,20 @@ namespace Microsoft.SemanticMemory.Client; public interface ISemanticMemoryClient { /// - /// Import a file into memory. The file can have tags and other details. + /// Import a document into memory. The document can contain one or more files, can have tags and other details. /// - /// Details of the file to import + /// Upload request containing the document files and details /// Async task cancellation token /// Document ID - public Task ImportFileAsync(Document file, CancellationToken cancellationToken = default); + public Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default); /// - /// Import multiple files into memory. Each file can have tags and other details. + /// Import a document into memory. The document can contain one or more files, can have tags and other details. /// - /// Details of the files to import - /// Async task cancellation token - /// List of document IDs - public Task> ImportFilesAsync(Document[] files, CancellationToken cancellationToken = default); - - /// - /// Import a file from disk into the default user memory. - /// - /// Path and name of the file to import + /// Details of the files to import /// Async task cancellation token /// Document ID - public Task ImportFileAsync(string fileName, CancellationToken cancellationToken = default); + public Task ImportDocumentAsync(Document document, CancellationToken cancellationToken = default); /// /// Import a files from disk into memory, with details such as tags and user ID. @@ -40,35 +31,44 @@ public interface ISemanticMemoryClient /// File details such as tags and user ID /// Async task cancellation token /// Document ID - public Task ImportFileAsync(string fileName, DocumentDetails details, CancellationToken cancellationToken = default); + public Task ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default); /// - /// Search the default user memory for an answer to the given query. + /// Check if a document ID exists in a user memory and is ready for usage. + /// The logic checks if the uploaded document has been fully processed. + /// When the document exists in storage but is not processed yet, the method returns False. /// - /// Query/question to answer - /// Filter to match + /// ID of the user's memory to search + /// Document ID /// Async task cancellation token - /// Answer to the query, if possible - public Task AskAsync(string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default); + /// True if the document has been successfully uploaded and imported + public Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default); /// - /// Search a user memory for an answer to the given query. + /// Get information about an uploaded document /// - /// ID of the user's memory to search - /// Query/question to answer + /// User ID + /// Document ID (aka pipeline ID) + /// Async task cancellation token + /// Information about an uploaded document + public Task GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default); + + /// + /// Search the default user memory for an answer to the given query. + /// + /// Query/question to answer /// Filter to match /// Async task cancellation token /// Answer to the query, if possible - public Task AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default); + public Task AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default); /// - /// Check if a document ID exists in a user memory and is ready for usage. - /// The logic checks if the uploaded document has been fully processed. - /// When the document exists in storage but is not processed yet, the method returns False. + /// Search a user memory for an answer to the given query. /// /// ID of the user's memory to search - /// Document ID + /// Question to answer + /// Filter to match /// Async task cancellation token - /// True if the document has been successfully uploaded and imported - public Task IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default); + /// Answer to the query, if possible + public Task AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default); } diff --git a/dotnet/ClientLib/MemoryWebClient.cs b/dotnet/ClientLib/MemoryWebClient.cs index 0c34347d1..5bb92a1e8 100644 --- a/dotnet/ClientLib/MemoryWebClient.cs +++ b/dotnet/ClientLib/MemoryWebClient.cs @@ -28,39 +28,62 @@ public MemoryWebClient(string endpoint, HttpClient client) } /// - public Task ImportFileAsync(Document file, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default) { - return this.ImportFileInternalAsync(file, cancellationToken); + return this.ImportInternalAsync(uploadRequest, cancellationToken); } /// - public Task> ImportFilesAsync(Document[] files, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(Document document, CancellationToken cancellationToken = default) { - return this.ImportFilesInternalAsync(files, cancellationToken); + return this.ImportInternalAsync(document, cancellationToken); } /// - public Task ImportFileAsync(string fileName, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default) { - return this.ImportFileAsync(new Document(fileName), cancellationToken); + return this.ImportInternalAsync(new Document(fileName) { Details = details ?? new DocumentDetails() }, cancellationToken); } /// - public Task ImportFileAsync(string fileName, DocumentDetails details, CancellationToken cancellationToken = default) + public async Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) { - return this.ImportFileInternalAsync(new Document(fileName) { Details = details }, cancellationToken); + DataPipelineStatus? status = await this.GetDocumentStatusAsync(userId: userId, documentId: documentId, cancellationToken).ConfigureAwait(false); + return status != null && status.Completed; } /// - public Task AskAsync(string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + public async Task GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default) { - return this.AskAsync(new DocumentDetails().UserId, query, filter, cancellationToken); + HttpResponseMessage? response = await this._client.GetAsync($"/upload-status?user={userId}&id={documentId}", cancellationToken).ConfigureAwait(false); + if (response.StatusCode == HttpStatusCode.NotFound) + { + return null; + } + + response.EnsureSuccessStatusCode(); + + var json = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + DataPipelineStatus? status = JsonSerializer.Deserialize(json); + + if (status == null) + { + return null; + } + + return status; } /// - public async Task AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + public Task AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) { - var request = new { UserId = userId, Query = query, Tags = new TagCollection() }; + return this.AskAsync(new DocumentDetails().UserId, question, filter, cancellationToken); + } + + /// + public async Task AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + { + var request = new MemoryQuery { UserId = userId, Question = question, Filter = filter ?? new MemoryFilter() }; using var content = new StringContent(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json"); HttpResponseMessage? response = await this._client.PostAsync("/ask", content, cancellationToken).ConfigureAwait(false); @@ -70,64 +93,97 @@ public async Task AskAsync(string userId, string query, MemoryFilt return JsonSerializer.Deserialize(json, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new MemoryAnswer(); } - /// - public async Task IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) + #region private + + private async Task ImportInternalAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken) { - HttpResponseMessage? response = await this._client.GetAsync($"/upload-status?user={userId}&id={documentId}", cancellationToken).ConfigureAwait(false); - if (response.StatusCode == HttpStatusCode.NotFound) + // Populate form with values and files from disk + using var formData = new MultipartFormDataContent(); + + using StringContent documentIdContent = new(uploadRequest.DocumentId); + using (StringContent userContent = new(uploadRequest.UserId)) { - return false; - } + List disposables = new(); + formData.Add(documentIdContent, Constants.WebServiceDocumentIdField); + formData.Add(userContent, Constants.WebServiceUserIdField); - response.EnsureSuccessStatusCode(); + foreach (var tag in uploadRequest.Tags.Pairs) + { + var tagContent = new StringContent(tag.Value); + disposables.Add(tagContent); + formData.Add(tagContent, tag.Key); + } - var json = await response.Content.ReadAsStringAsync().ConfigureAwait(false); - DataPipelineStatus? status = JsonSerializer.Deserialize(json); + for (int index = 0; index < uploadRequest.Files.Count; index++) + { + string fileName = uploadRequest.Files[index].FileName; - if (status == null) - { - throw new SemanticMemoryWebException("Unable to parse status response"); - } + byte[] bytes; + using (var binaryReader = new BinaryReader(uploadRequest.Files[index].FileContent)) + { + bytes = binaryReader.ReadBytes((int)uploadRequest.Files[index].FileContent.Length); + } - return status.Completed; - } + var fileContent = new ByteArrayContent(bytes, 0, bytes.Length); + disposables.Add(fileContent); - #region private + formData.Add(fileContent, $"file{index}", fileName); + } - private async Task> ImportFilesInternalAsync(Document[] files, CancellationToken cancellationToken) - { - List docIds = new(); - foreach (Document file in files) - { - docIds.Add(await this.ImportFileInternalAsync(file, cancellationToken).ConfigureAwait(false)); + // Send HTTP request + try + { + HttpResponseMessage? response = await this._client.PostAsync("/upload", formData, cancellationToken).ConfigureAwait(false); + formData.Dispose(); + response.EnsureSuccessStatusCode(); + } + catch (HttpRequestException e) when (e.Data.Contains("StatusCode")) + { + throw new SemanticMemoryWebException($"{e.Message} [StatusCode: {e.Data["StatusCode"]}]", e); + } + catch (Exception e) + { + throw new SemanticMemoryWebException(e.Message, e); + } + finally + { + foreach (var disposable in disposables) + { + disposable.Dispose(); + } + } } - return docIds; + return uploadRequest.DocumentId; } - private async Task ImportFileInternalAsync(Document file, CancellationToken cancellationToken) + private async Task ImportInternalAsync(Document document, CancellationToken cancellationToken) { // Populate form with values and files from disk using var formData = new MultipartFormDataContent(); - using StringContent documentIdContent = new(file.Details.DocumentId); - using (StringContent userContent = new(file.Details.UserId)) + using StringContent documentIdContent = new(document.Details.DocumentId); + using (StringContent userContent = new(document.Details.UserId)) { List disposables = new(); formData.Add(documentIdContent, Constants.WebServiceDocumentIdField); formData.Add(userContent, Constants.WebServiceUserIdField); - foreach (var tag in file.Details.Tags.Pairs) + foreach (var tag in document.Details.Tags.Pairs) { var tagContent = new StringContent(tag.Value); disposables.Add(tagContent); formData.Add(tagContent, tag.Key); } - byte[] bytes = File.ReadAllBytes(file.FileName); - var fileContent = new ByteArrayContent(bytes, 0, bytes.Length); - disposables.Add(fileContent); - formData.Add(fileContent, "file1", file.FileName); + for (int index = 0; index < document.FileNames.Count; index++) + { + string fileName = document.FileNames[index]; + byte[] bytes = File.ReadAllBytes(fileName); + var fileContent = new ByteArrayContent(bytes, 0, bytes.Length); + disposables.Add(fileContent); + formData.Add(fileContent, $"file{index}", fileName); + } // Send HTTP request try @@ -153,7 +209,7 @@ private async Task ImportFileInternalAsync(Document file, CancellationTo } } - return file.Details.DocumentId; + return document.Details.DocumentId; } #endregion diff --git a/dotnet/ClientLib/Models/DataPipelineStatus.cs b/dotnet/ClientLib/Models/DataPipelineStatus.cs index a033023ee..30f76fde1 100644 --- a/dotnet/ClientLib/Models/DataPipelineStatus.cs +++ b/dotnet/ClientLib/Models/DataPipelineStatus.cs @@ -20,8 +20,8 @@ public class DataPipelineStatus /// Unique Id /// [JsonPropertyOrder(10)] - [JsonPropertyName("id")] - public string Id { get; set; } = string.Empty; + [JsonPropertyName("document_id")] + public string DocumentId { get; set; } = string.Empty; [JsonPropertyOrder(11)] [JsonPropertyName("user_id")] diff --git a/dotnet/ClientLib/Models/Document.cs b/dotnet/ClientLib/Models/Document.cs index 1e9bae28e..fb11cf4ec 100644 --- a/dotnet/ClientLib/Models/Document.cs +++ b/dotnet/ClientLib/Models/Document.cs @@ -1,20 +1,46 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Collections.Generic; + namespace Microsoft.SemanticMemory.Client.Models; +/// +/// A document is a collection of one or multiple files, with additional +/// metadata such as tags and ownership. +/// public class Document { - public string FileName { get; set; } = string.Empty; + public List FileNames { get; set; } = new(); public DocumentDetails Details { get; set; } = new(); public Document() { } - public Document(string fileName) { this.FileName = fileName; } + public Document(string fileName) + { + this.FileNames.Add(fileName); + } + + public Document(List fileNames) + { + this.FileNames.AddRange(fileNames); + } public Document(string fileName, DocumentDetails details) { - this.FileName = fileName; + this.FileNames.Add(fileName); + this.Details = details; + } + + public Document(List fileNames, DocumentDetails details) + { + this.FileNames.AddRange(fileNames); + this.Details = details; + } + + public Document(string[] fileNames, DocumentDetails details) + { + this.FileNames.AddRange(fileNames); this.Details = details; } } diff --git a/dotnet/ClientLib/Models/DocumentUploadRequest.cs b/dotnet/ClientLib/Models/DocumentUploadRequest.cs new file mode 100644 index 000000000..fd10e6257 --- /dev/null +++ b/dotnet/ClientLib/Models/DocumentUploadRequest.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.SemanticMemory.Client.Models; + +// Note: this class is designed to avoid using Asp.Net IForm +// and avoiding dependencies on Asp.Net HTTP that would lead +// to dependency issues mixing .NET7 and .NET Standard 2.0 +public class DocumentUploadRequest +{ + public class UploadedFile + { + public string FileName { get; set; } = string.Empty; + public Stream FileContent { get; set; } = Stream.Null; + + public UploadedFile() + { + } + + public UploadedFile(string fileName, Stream fileContent) + { + this.FileName = fileName; + this.FileContent = fileContent; + } + } + + public string DocumentId { get; set; } = string.Empty; + public string UserId { get; set; } = string.Empty; + public TagCollection Tags { get; set; } = new(); + public List Files { get; set; } = new List(); +} diff --git a/dotnet/ClientLib/Models/MemoryAnswer.cs b/dotnet/ClientLib/Models/MemoryAnswer.cs index 4a261c098..ff7d59239 100644 --- a/dotnet/ClientLib/Models/MemoryAnswer.cs +++ b/dotnet/ClientLib/Models/MemoryAnswer.cs @@ -10,16 +10,16 @@ namespace Microsoft.SemanticMemory.Client.Models; public class MemoryAnswer { /// - /// Content of the query. + /// Client question. /// - [JsonPropertyName("Query")] + [JsonPropertyName("question")] [JsonPropertyOrder(1)] - public string Query { get; set; } = string.Empty; + public string Question { get; set; } = string.Empty; /// /// Content of the answer. /// - [JsonPropertyName("Text")] + [JsonPropertyName("text")] [JsonPropertyOrder(2)] public string Result { get; set; } = string.Empty; @@ -28,7 +28,7 @@ public class MemoryAnswer /// Key = Document ID /// Value = List of partitions used from the document. /// - [JsonPropertyName("RelevantSources")] + [JsonPropertyName("relevantSources")] [JsonPropertyOrder(3)] public List RelevantSources { get; set; } = new(); @@ -37,28 +37,28 @@ public class Citation /// /// Link to the source, if available. /// - [JsonPropertyName("Link")] + [JsonPropertyName("link")] [JsonPropertyOrder(1)] public string Link { get; set; } = string.Empty; /// /// Type of source, e.g. PDF, Word, Chat, etc. /// - [JsonPropertyName("SourceContentType")] + [JsonPropertyName("sourceContentType")] [JsonPropertyOrder(2)] public string SourceContentType { get; set; } = string.Empty; /// /// Name of the source, e.g. file name. /// - [JsonPropertyName("SourceName")] + [JsonPropertyName("sourceName")] [JsonPropertyOrder(3)] public string SourceName { get; set; } = string.Empty; /// /// List of chunks/blocks of text used. /// - [JsonPropertyName("Partitions")] + [JsonPropertyName("partitions")] [JsonPropertyOrder(4)] public List Partitions { get; set; } = new(); @@ -67,7 +67,7 @@ public class Partition /// /// Content of the document partition, aka chunk/block of text. /// - [JsonPropertyName("Text")] + [JsonPropertyName("text")] [JsonPropertyOrder(1)] public string Text { get; set; } = string.Empty; @@ -75,14 +75,14 @@ public class Partition /// Relevance of this partition against the given query. /// Value usually is between 0 and 1, when using cosine similarity. /// - [JsonPropertyName("Relevance")] + [JsonPropertyName("relevance")] [JsonPropertyOrder(2)] public float Relevance { get; set; } = 0; /// /// Timestamp about the file/text partition. /// - [JsonPropertyName("LastUpdate")] + [JsonPropertyName("lastUpdate")] [JsonPropertyOrder(4)] public DateTimeOffset LastUpdate { get; set; } = DateTimeOffset.MinValue; } diff --git a/dotnet/ClientLib/Models/MemoryFilter.cs b/dotnet/ClientLib/Models/MemoryFilter.cs index f7e730a4c..2297976c3 100644 --- a/dotnet/ClientLib/Models/MemoryFilter.cs +++ b/dotnet/ClientLib/Models/MemoryFilter.cs @@ -4,40 +4,33 @@ namespace Microsoft.SemanticMemory.Client.Models; -public class MemoryFilter +public class MemoryFilter : TagCollection { - private readonly TagCollection _tags; - - public MemoryFilter() - { - this._tags = new TagCollection(); - } - public bool IsEmpty() { - return this._tags.Count == 0; + return this.Count == 0; } public MemoryFilter ByTag(string name, string value) { - this._tags.Add(name, value); + this.Add(name, value); return this; } public MemoryFilter ByUser(string userId) { - this._tags.Add(Constants.ReservedUserIdTag, userId); + this.Add(Constants.ReservedUserIdTag, userId); return this; } public MemoryFilter ByDocument(string docId) { - this._tags.Add(Constants.ReservedPipelineIdTag, docId); + this.Add(Constants.ReservedPipelineIdTag, docId); return this; } public IEnumerable> GetFilters() { - return this._tags.ToKeyValueList(); + return this.ToKeyValueList(); } } diff --git a/dotnet/ClientLib/Models/MemoryQuery.cs b/dotnet/ClientLib/Models/MemoryQuery.cs new file mode 100644 index 000000000..4d811ad01 --- /dev/null +++ b/dotnet/ClientLib/Models/MemoryQuery.cs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Serialization; + +namespace Microsoft.SemanticMemory.Client.Models; + +public class MemoryQuery +{ + [JsonPropertyName("userId")] + public string UserId { get; set; } = string.Empty; + + [JsonPropertyName("question")] + public string Question { get; set; } = string.Empty; + + [JsonPropertyName("filter")] + public MemoryFilter Filter { get; set; } = new(); +} diff --git a/dotnet/ClientLib/Models/UploadAccepted.cs b/dotnet/ClientLib/Models/UploadAccepted.cs index 354306b32..64098571d 100644 --- a/dotnet/ClientLib/Models/UploadAccepted.cs +++ b/dotnet/ClientLib/Models/UploadAccepted.cs @@ -6,15 +6,15 @@ namespace Microsoft.SemanticMemory.Client.Models; public class UploadAccepted { - [JsonPropertyName("UserId")] + [JsonPropertyName("userId")] [JsonPropertyOrder(1)] public string UserId { get; set; } = string.Empty; - [JsonPropertyName("Id")] + [JsonPropertyName("id")] [JsonPropertyOrder(2)] public string Id { get; set; } = string.Empty; - [JsonPropertyName("Message")] + [JsonPropertyName("message")] [JsonPropertyOrder(3)] public string Message { get; set; } = string.Empty; } diff --git a/dotnet/CoreLib/AI/AzureOpenAI/AzureTextGeneration.cs b/dotnet/CoreLib/AI/AzureOpenAI/AzureTextGeneration.cs index 15483cabf..be4144c8e 100644 --- a/dotnet/CoreLib/AI/AzureOpenAI/AzureTextGeneration.cs +++ b/dotnet/CoreLib/AI/AzureOpenAI/AzureTextGeneration.cs @@ -112,7 +112,7 @@ public async IAsyncEnumerable GenerateTextAsync( NucleusSamplingFactor = (float)options.TopP, FrequencyPenalty = (float)options.FrequencyPenalty, PresencePenalty = (float)options.PresencePenalty, - ChoiceCount = 1, + // ChoiceCount = 1, }; if (options.StopSequences is { Count: > 0 }) diff --git a/dotnet/CoreLib/AI/OpenAI/OpenAITextGeneration.cs b/dotnet/CoreLib/AI/OpenAI/OpenAITextGeneration.cs index ecdfffe7e..35ef28765 100644 --- a/dotnet/CoreLib/AI/OpenAI/OpenAITextGeneration.cs +++ b/dotnet/CoreLib/AI/OpenAI/OpenAITextGeneration.cs @@ -95,7 +95,7 @@ public async IAsyncEnumerable GenerateTextAsync( NucleusSamplingFactor = (float)options.TopP, FrequencyPenalty = (float)options.FrequencyPenalty, PresencePenalty = (float)options.PresencePenalty, - ChoiceCount = 1, + // ChoiceCount = 1, }; if (options.StopSequences is { Count: > 0 }) diff --git a/dotnet/CoreLib/AppBuilders/DependencyInjection.cs b/dotnet/CoreLib/AppBuilders/DependencyInjection.cs index 99cbf4743..a5bcdb6d4 100644 --- a/dotnet/CoreLib/AppBuilders/DependencyInjection.cs +++ b/dotnet/CoreLib/AppBuilders/DependencyInjection.cs @@ -2,6 +2,7 @@ using System; using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticMemory.Client; using Microsoft.SemanticMemory.Core.Configuration; using Microsoft.SemanticMemory.Core.Pipeline; using Microsoft.SemanticMemory.Core.Search; @@ -37,7 +38,7 @@ public static void ConfigureRuntime(this IServiceCollection services, SemanticMe if (config.Service.RunWebService) { - services.AddSingleton(); + services.AddSingleton(); if (config.Service.OpenApiEnabled) { diff --git a/dotnet/CoreLib/CoreLib.csproj b/dotnet/CoreLib/CoreLib.csproj index b780d5faa..bacdaab56 100644 --- a/dotnet/CoreLib/CoreLib.csproj +++ b/dotnet/CoreLib/CoreLib.csproj @@ -14,23 +14,19 @@ - - - + - - + - - + @@ -46,7 +42,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/dotnet/CoreLib/Handlers/GenerateEmbeddingsHandler.cs b/dotnet/CoreLib/Handlers/GenerateEmbeddingsHandler.cs index c9f3542bf..3e8ee7eb6 100644 --- a/dotnet/CoreLib/Handlers/GenerateEmbeddingsHandler.cs +++ b/dotnet/CoreLib/Handlers/GenerateEmbeddingsHandler.cs @@ -54,7 +54,7 @@ public GenerateEmbeddingsHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogTrace("Generating embeddings, pipeline {0}", pipeline.Id); + this._log.LogTrace("Generating embeddings, pipeline {0}", pipeline.DocumentId); foreach (var uploadedFile in pipeline.Files) { diff --git a/dotnet/CoreLib/Handlers/SaveEmbeddingsHandler.cs b/dotnet/CoreLib/Handlers/SaveEmbeddingsHandler.cs index d4dea966a..69607290c 100644 --- a/dotnet/CoreLib/Handlers/SaveEmbeddingsHandler.cs +++ b/dotnet/CoreLib/Handlers/SaveEmbeddingsHandler.cs @@ -66,28 +66,28 @@ public SaveEmbeddingsHandler( var record = new MemoryRecord { - Id = GetEmbeddingRecordId(pipeline.UserId, pipeline.Id, embeddingFile.Value.Id), + Id = GetEmbeddingRecordId(pipeline.UserId, pipeline.DocumentId, embeddingFile.Value.Id), Vector = embeddingData.Vector, Owner = pipeline.UserId, }; // Note that the User Id is not set here, but when mapping MemoryRecord to the specific VectorDB schema - record.Tags.Add(Constants.ReservedPipelineIdTag, pipeline.Id); + record.Tags.Add(Constants.ReservedPipelineIdTag, pipeline.DocumentId); record.Tags.Add(Constants.ReservedFileIdTag, embeddingFile.Value.ParentId); record.Tags.Add(Constants.ReservedFilePartitionTag, embeddingFile.Value.Id); record.Tags.Add(Constants.ReservedFileTypeTag, pipeline.GetFile(embeddingFile.Value.ParentId).Type); pipeline.Tags.CopyTo(record.Tags); - record.Metadata.Add("file_name", pipeline.GetFile(embeddingFile.Value.ParentId).Name); - record.Metadata.Add("vector_provider", embeddingData.GeneratorProvider); - record.Metadata.Add("vector_generator", embeddingData.GeneratorName); - record.Metadata.Add("last_update", DateTimeOffset.UtcNow.ToString("s")); + record.Payload.Add("file_name", pipeline.GetFile(embeddingFile.Value.ParentId).Name); + record.Payload.Add("vector_provider", embeddingData.GeneratorProvider); + record.Payload.Add("vector_generator", embeddingData.GeneratorName); + record.Payload.Add("last_update", DateTimeOffset.UtcNow.ToString("s")); // Store text partition for RAG // TODO: make this optional to reduce space usage, using blob files instead string partitionContent = await this._orchestrator.ReadTextFileAsync(pipeline, embeddingData.SourceFileName, cancellationToken).ConfigureAwait(false); - record.Metadata.Add("text", partitionContent); + record.Payload.Add("text", partitionContent); string indexName = record.Owner; @@ -113,7 +113,7 @@ private async Task DeletePreviousEmbeddingsAsync(DataPipeline pipeline, Cancella // Decide which embeddings not to delete, looking at the current pipeline foreach (DataPipeline.GeneratedFileDetails embeddingFile in pipeline.Files.SelectMany(f1 => f1.GeneratedFiles.Where(f2 => f2.Value.IsEmbeddingFile()).Select(x => x.Value))) { - string recordId = GetEmbeddingRecordId(pipeline.UserId, pipeline.Id, embeddingFile.Id); + string recordId = GetEmbeddingRecordId(pipeline.UserId, pipeline.DocumentId, embeddingFile.Id); embeddingsToKeep.Add(recordId); } @@ -122,7 +122,7 @@ private async Task DeletePreviousEmbeddingsAsync(DataPipeline pipeline, Cancella { foreach (DataPipeline.GeneratedFileDetails embeddingFile in oldPipeline.Files.SelectMany(f1 => f1.GeneratedFiles.Where(f2 => f2.Value.IsEmbeddingFile()).Select(x => x.Value))) { - string recordId = GetEmbeddingRecordId(pipeline.UserId, oldPipeline.Id, embeddingFile.Id); + string recordId = GetEmbeddingRecordId(pipeline.UserId, oldPipeline.DocumentId, embeddingFile.Id); if (embeddingsToKeep.Contains(recordId)) { continue; } string indexName = pipeline.UserId; diff --git a/dotnet/CoreLib/Handlers/TextPartitioningHandler.cs b/dotnet/CoreLib/Handlers/TextPartitioningHandler.cs index 392203f8a..4ade3fa5f 100644 --- a/dotnet/CoreLib/Handlers/TextPartitioningHandler.cs +++ b/dotnet/CoreLib/Handlers/TextPartitioningHandler.cs @@ -9,7 +9,6 @@ using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.Connectors.AI.OpenAI.Tokenizers; using Microsoft.SemanticKernel.Text; -// using Microsoft.SemanticMemory.Core.AppBuilders; using Microsoft.SemanticMemory.Core.Diagnostics; using Microsoft.SemanticMemory.Core.Pipeline; @@ -86,6 +85,9 @@ public TextPartitioningHandler( break; } + // TODO: add virtual/injectable logic + // TODO: see https://learn.microsoft.com/en-us/windows/win32/search/-search-ifilter-about + default: this._log.LogWarning("File {0} cannot be partitioned, type not supported", file.Name); // Don't partition other files diff --git a/dotnet/CoreLib/SemanticMemoryServerless.cs b/dotnet/CoreLib/Memory.cs similarity index 57% rename from dotnet/CoreLib/SemanticMemoryServerless.cs rename to dotnet/CoreLib/Memory.cs index 254d13c89..2db327f54 100644 --- a/dotnet/CoreLib/SemanticMemoryServerless.cs +++ b/dotnet/CoreLib/Memory.cs @@ -1,8 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. using System; -using System.Collections.Generic; -using System.Linq; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.DependencyInjection; @@ -12,6 +10,7 @@ using Microsoft.SemanticMemory.Core.Handlers; using Microsoft.SemanticMemory.Core.Pipeline; using Microsoft.SemanticMemory.Core.Search; +using Microsoft.SemanticMemory.Core.WebService; namespace Microsoft.SemanticMemory.Core; @@ -23,9 +22,9 @@ namespace Microsoft.SemanticMemory.Core; /// /// TODO: pipeline structure is hardcoded, should allow custom handlers/steps /// -public class SemanticMemoryServerless : ISemanticMemoryClient +public class Memory : ISemanticMemoryClient { - public SemanticMemoryServerless(IServiceProvider serviceProvider) + public Memory(IServiceProvider serviceProvider) { this._configuration = serviceProvider.GetService() ?? throw new SemanticMemoryException("Unable to load configuration. Are all the dependencies configured?"); @@ -38,48 +37,46 @@ public SemanticMemoryServerless(IServiceProvider serviceProvider) } /// - public async Task ImportFileAsync(Document file, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default) { - var ids = await this.ImportFilesAsync(new[] { file }, cancellationToken).ConfigureAwait(false); - return ids.First(); + return this.ImportInternalAsync(uploadRequest, cancellationToken); } /// - public Task> ImportFilesAsync(Document[] files, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(Document document, CancellationToken cancellationToken = default) { - return this.ImportFilesInternalAsync(files, cancellationToken); + return this.ImportInternalAsync(document, cancellationToken); } /// - public Task ImportFileAsync(string fileName, CancellationToken cancellationToken = default) + public Task ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default) { - return this.ImportFileAsync(new Document(fileName), cancellationToken); + return this.ImportInternalAsync(new Document(fileName) { Details = details ?? new DocumentDetails() }, cancellationToken); } /// - public async Task ImportFileAsync(string fileName, DocumentDetails details, CancellationToken cancellationToken = default) + public async Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) { - var ids = await this.ImportFilesAsync(new[] { new Document(fileName) { Details = details } }, cancellationToken).ConfigureAwait(false); - return ids.First(); + var orchestrator = await this.GetOrchestratorAsync(cancellationToken).ConfigureAwait(false); + return await orchestrator.IsDocumentReadyAsync(userId, documentId, cancellationToken).ConfigureAwait(false); } /// - public Task AskAsync(string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + public Task GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default) { - return this.AskAsync(new DocumentDetails().UserId, query, filter, cancellationToken); + throw new NotImplementedException(); } /// - public Task AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + public Task AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) { - return this._searchClient.AskAsync(userId: userId, query: query, filter: filter, cancellationToken: cancellationToken); + return this.AskAsync(new DocumentDetails().UserId, question, filter, cancellationToken); } /// - public async Task IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) + public Task AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) { - var orchestrator = await this.GetOrchestratorAsync(cancellationToken).ConfigureAwait(false); - return await orchestrator.IsReadyAsync(userId, documentId, cancellationToken).ConfigureAwait(false); + return this._searchClient.AskAsync(userId: userId, question: question, filter: filter, cancellationToken: cancellationToken); } #region private @@ -116,36 +113,17 @@ private async Task GetOrchestratorAsync(Cancellat return this._orchestrator; } - private async Task> ImportFilesInternalAsync(Document[] files, CancellationToken cancellationToken) + private async Task ImportInternalAsync(Document document, CancellationToken cancellationToken) { - List ids = new(); + DocumentUploadRequest uploadRequest = await document.ToDocumentUploadRequestAsync(cancellationToken).ConfigureAwait(false); InProcessPipelineOrchestrator orchestrator = await this.GetOrchestratorAsync(cancellationToken).ConfigureAwait(false); + return await orchestrator.ImportDocumentAsync(uploadRequest, cancellationToken).ConfigureAwait(false); + } - foreach (Document file in files) - { - var pipeline = orchestrator - .PrepareNewFileUploadPipeline( - userId: file.Details.UserId, - documentId: file.Details.DocumentId, - file.Details.Tags); - - pipeline.AddUploadFile( - name: "file1", - filename: file.FileName, - sourceFile: file.FileName); - - pipeline - .Then("extract") - .Then("partition") - .Then("gen_embeddings") - .Then("save_embeddings") - .Build(); - - await orchestrator.RunPipelineAsync(pipeline, cancellationToken).ConfigureAwait(false); - ids.Add(file.Details.DocumentId); - } - - return ids; + private async Task ImportInternalAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken) + { + InProcessPipelineOrchestrator orchestrator = await this.GetOrchestratorAsync(cancellationToken).ConfigureAwait(false); + return await orchestrator.ImportDocumentAsync(uploadRequest, cancellationToken).ConfigureAwait(false); } #endregion diff --git a/dotnet/CoreLib/MemoryService.cs b/dotnet/CoreLib/MemoryService.cs new file mode 100644 index 000000000..facb0a4b1 --- /dev/null +++ b/dotnet/CoreLib/MemoryService.cs @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticMemory.Client; +using Microsoft.SemanticMemory.Client.Models; +using Microsoft.SemanticMemory.Core.Pipeline; +using Microsoft.SemanticMemory.Core.Search; +using Microsoft.SemanticMemory.Core.WebService; + +namespace Microsoft.SemanticMemory.Core; + +public class MemoryService : ISemanticMemoryClient +{ + private readonly IPipelineOrchestrator _orchestrator; + private readonly SearchClient _searchClient; + + public MemoryService( + IPipelineOrchestrator orchestrator, + SearchClient searchClient) + { + this._orchestrator = orchestrator; + this._searchClient = searchClient; + } + + /// + public Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default) + { + return this._orchestrator.ImportDocumentAsync(uploadRequest, cancellationToken); + } + + /// + public async Task ImportDocumentAsync(Document document, CancellationToken cancellationToken = default) + { + return await this.ImportInternalAsync(document, cancellationToken).ConfigureAwait(false); + } + + /// + public Task ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default) + { + return this.ImportInternalAsync(new Document(fileName) { Details = details ?? new DocumentDetails() }, cancellationToken); + } + + /// + public Task GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default) + { + return this._orchestrator.ReadPipelineSummaryAsync(userId, documentId, cancellationToken); + } + + /// + public Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) + { + return this._orchestrator.IsDocumentReadyAsync(userId, documentId, cancellationToken); + } + + /// + public Task AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + { + return this.AskAsync(new DocumentDetails().UserId, question, filter, cancellationToken); + } + + /// + public Task AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + { + return this._searchClient.AskAsync(userId: userId, question: question, filter: filter, cancellationToken: cancellationToken); + } + + #region private + + private async Task ImportInternalAsync(Document document, CancellationToken cancellationToken) + { + DocumentUploadRequest uploadRequest = await document.ToDocumentUploadRequestAsync(cancellationToken).ConfigureAwait(false); + return await this._orchestrator.ImportDocumentAsync(uploadRequest, cancellationToken).ConfigureAwait(false); + } + + #endregion +} diff --git a/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemory.cs b/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemory.cs index 13bf3c189..acb338f19 100644 --- a/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemory.cs +++ b/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemory.cs @@ -108,13 +108,13 @@ await client.IndexDocumentsAsync( SearchQueryVector vectorQuery = new() { KNearestNeighborsCount = limit, - Fields = AzureCognitiveSearchMemoryRecord.VectorField, - Value = embedding.Vector.ToList() + Value = embedding.Vector.ToList(), + Fields = { AzureCognitiveSearchMemoryRecord.VectorField } }; SearchOptions options = new() { - Vector = vectorQuery + Vectors = { vectorQuery } }; if (filter != null && !filter.IsEmpty()) diff --git a/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemoryRecord.cs b/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemoryRecord.cs index 630448873..8d5e23af6 100644 --- a/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemoryRecord.cs +++ b/dotnet/CoreLib/MemoryStorage/AzureCognitiveSearch/AzureCognitiveSearchMemoryRecord.cs @@ -17,7 +17,7 @@ public sealed class AzureCognitiveSearchMemoryRecord private const string IdField = "id"; internal const string VectorField = "embedding"; private const string TagsField = "tags"; - private const string MetadataField = "metadata"; + private const string PayloadField = "payload"; private static readonly JsonSerializerOptions s_jsonOptions = new() { @@ -39,8 +39,8 @@ public sealed class AzureCognitiveSearchMemoryRecord [JsonPropertyName(TagsField)] public List Tags { get; set; } = new(); - [JsonPropertyName(MetadataField)] - public string Metadata { get; set; } = string.Empty; + [JsonPropertyName(PayloadField)] + public string Payload { get; set; } = string.Empty; public static VectorDbSchema GetSchema(int vectorSize) { @@ -51,7 +51,7 @@ public static VectorDbSchema GetSchema(int vectorSize) new() { Name = IdField, Type = VectorDbField.FieldType.Text, IsKey = true }, new() { Name = VectorField, Type = VectorDbField.FieldType.Vector, VectorSize = vectorSize }, new() { Name = TagsField, Type = VectorDbField.FieldType.ListOfStrings, IsFilterable = true }, - new() { Name = MetadataField, Type = VectorDbField.FieldType.Text, IsFilterable = false }, + new() { Name = PayloadField, Type = VectorDbField.FieldType.Text, IsFilterable = false }, } }; } @@ -61,8 +61,8 @@ public MemoryRecord ToMemoryRecord(bool withEmbedding = true) MemoryRecord result = new() { Id = DecodeId(this.Id), - Metadata = JsonSerializer.Deserialize>(this.Metadata, s_jsonOptions) - ?? new Dictionary() + Payload = JsonSerializer.Deserialize>(this.Payload, s_jsonOptions) + ?? new Dictionary() }; if (withEmbedding) @@ -95,7 +95,7 @@ public static AzureCognitiveSearchMemoryRecord FromMemoryRecord(MemoryRecord rec { Id = EncodeId(record.Id), Vector = record.Vector.Vector.ToArray(), - Metadata = JsonSerializer.Serialize(record.Metadata, s_jsonOptions) + Payload = JsonSerializer.Serialize(record.Payload, s_jsonOptions) }; // Note: record owner is stored inside Tags diff --git a/dotnet/CoreLib/MemoryStorage/MemoryRecord.cs b/dotnet/CoreLib/MemoryStorage/MemoryRecord.cs index a87827cf7..fba147f4d 100644 --- a/dotnet/CoreLib/MemoryStorage/MemoryRecord.cs +++ b/dotnet/CoreLib/MemoryStorage/MemoryRecord.cs @@ -39,7 +39,7 @@ public class MemoryRecord public TagCollection Tags { get; set; } = new(); /// - /// Optional Non-Searchable metadata processed client side. + /// Optional Non-Searchable payload processed client side. /// /// Use cases: /// * citations @@ -51,5 +51,5 @@ public class MemoryRecord /// * timestamps /// * etc. /// - public Dictionary Metadata { get; set; } = new(); + public Dictionary Payload { get; set; } = new(); } diff --git a/dotnet/CoreLib/Pipeline/BaseOrchestrator.cs b/dotnet/CoreLib/Pipeline/BaseOrchestrator.cs index 031123a0b..bf2175498 100644 --- a/dotnet/CoreLib/Pipeline/BaseOrchestrator.cs +++ b/dotnet/CoreLib/Pipeline/BaseOrchestrator.cs @@ -6,7 +6,6 @@ using System.Text.Json; using System.Threading; using System.Threading.Tasks; -using Microsoft.AspNetCore.Http; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.AI.Embeddings; @@ -16,7 +15,6 @@ using Microsoft.SemanticMemory.Core.ContentStorage; using Microsoft.SemanticMemory.Core.Diagnostics; using Microsoft.SemanticMemory.Core.MemoryStorage; -using Microsoft.SemanticMemory.Core.WebService; namespace Microsoft.SemanticMemory.Core.Pipeline; @@ -73,17 +71,17 @@ protected BaseOrchestrator( public abstract Task RunPipelineAsync(DataPipeline pipeline, CancellationToken cancellationToken = default); /// - public async Task UploadFileAsync(UploadRequest uploadDetails, CancellationToken cancellationToken = default) + public async Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default) { - this.Log.LogInformation("Queueing upload of {0} files for further processing [request {1}]", uploadDetails.Files.Count(), uploadDetails.DocumentId); + this.Log.LogInformation("Queueing upload of {0} files for further processing [request {1}]", uploadRequest.Files.Count, uploadRequest.DocumentId); // TODO: allow custom pipeline steps from UploadRequest // Define all the steps in the pipeline - var pipeline = this.PrepareNewFileUploadPipeline( - userId: uploadDetails.UserId, - documentId: uploadDetails.DocumentId, - uploadDetails.Tags, - uploadDetails.Files) + var pipeline = this.PrepareNewDocumentUpload( + userId: uploadRequest.UserId, + documentId: uploadRequest.DocumentId, + uploadRequest.Tags, + uploadRequest.Files) .Then("extract") .Then("partition") .Then("gen_embeddings") @@ -93,7 +91,7 @@ public async Task UploadFileAsync(UploadRequest uploadDetails, Cancellat try { await this.RunPipelineAsync(pipeline, cancellationToken).ConfigureAwait(false); - return pipeline.Id; + return pipeline.DocumentId; } catch (Exception e) { @@ -103,25 +101,25 @@ public async Task UploadFileAsync(UploadRequest uploadDetails, Cancellat } /// - public DataPipeline PrepareNewFileUploadPipeline( + public DataPipeline PrepareNewDocumentUpload( string userId, string documentId, TagCollection tags) { - return this.PrepareNewFileUploadPipeline(userId: userId, documentId: documentId, tags, new List()); + return this.PrepareNewDocumentUpload(userId: userId, documentId: documentId, tags, new List()); } /// - public DataPipeline PrepareNewFileUploadPipeline( + public DataPipeline PrepareNewDocumentUpload( string userId, string documentId, TagCollection tags, - IEnumerable filesToUpload) + IEnumerable filesToUpload) { var pipeline = new DataPipeline { UserId = userId, - Id = documentId, + DocumentId = documentId, Tags = tags, Creation = DateTimeOffset.UtcNow, LastUpdate = DateTimeOffset.UtcNow, @@ -150,7 +148,28 @@ public DataPipeline PrepareNewFileUploadPipeline( } /// - public async Task IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) + public async Task ReadPipelineSummaryAsync(string userId, string documentId, CancellationToken cancellationToken = default) + { + var pipeline = await this.ReadPipelineStatusAsync(userId: userId, documentId: documentId, cancellationToken).ConfigureAwait(false); + if (pipeline == null) { return null; } + + return new DataPipelineStatus + { + Completed = pipeline.Complete, + Failed = false, // TODO + DocumentId = pipeline.DocumentId, + UserId = pipeline.UserId, + Tags = pipeline.Tags, + Creation = pipeline.Creation, + LastUpdate = pipeline.LastUpdate, + Steps = pipeline.Steps, + RemainingSteps = pipeline.RemainingSteps, + CompletedSteps = pipeline.CompletedSteps, + }; + } + + /// + public async Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default) { DataPipeline? pipeline = await this.ReadPipelineStatusAsync(userId, documentId, cancellationToken).ConfigureAwait(false); return pipeline != null && pipeline.Complete; @@ -173,7 +192,7 @@ public async Task ReadTextFileAsync(DataPipeline pipeline, string fileNa /// public Task ReadFileAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default) { - var path = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.Id); + var path = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.DocumentId); return this.ContentStorage.ReadFileAsync(path, fileName, cancellationToken); } @@ -198,7 +217,7 @@ public List GetVectorDbs() /// public Task WriteFileAsync(DataPipeline pipeline, string fileName, BinaryData fileContent, CancellationToken cancellationToken = default) { - var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.Id); + var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.DocumentId); return this.ContentStorage.WriteStreamAsync( dirPath, fileName, @@ -224,7 +243,7 @@ protected async Task UploadFilesAsync(DataPipeline currentPipeline, Cancellation // If the folder contains the status of a previous execution, // capture it to run consolidation later, e.g. purging deprecated memory records. // Note: although not required, the list of executions to purge is ordered from oldest to most recent - DataPipeline? previousPipeline = await this.ReadPipelineStatusAsync(currentPipeline.UserId, currentPipeline.Id, cancellationToken).ConfigureAwait(false); + DataPipeline? previousPipeline = await this.ReadPipelineStatusAsync(currentPipeline.UserId, currentPipeline.DocumentId, cancellationToken).ConfigureAwait(false); if (previousPipeline != null && previousPipeline.ExecutionId != currentPipeline.ExecutionId) { var dedupe = new HashSet(); @@ -261,10 +280,10 @@ protected async Task UploadFilesAsync(DataPipeline currentPipeline, Cancellation /// Whether to throw exceptions or just log them protected async Task UpdatePipelineStatusAsync(DataPipeline pipeline, CancellationToken cancellationToken, bool ignoreExceptions = false) { - this.Log.LogDebug("Saving pipeline status to {0}/{1}", pipeline.Id, Constants.PipelineStatusFilename); + this.Log.LogDebug("Saving pipeline status to {0}/{1}", pipeline.DocumentId, Constants.PipelineStatusFilename); try { - var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.Id); + var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.DocumentId); await this.ContentStorage.WriteTextFileAsync( dirPath, Constants.PipelineStatusFilename, @@ -295,14 +314,14 @@ protected static string ToJson(object data, bool indented = false) private async Task UploadFormFilesAsync(DataPipeline pipeline, CancellationToken cancellationToken) { - this.Log.LogDebug("Uploading {0} files, pipeline {1}", pipeline.FilesToUpload.Count, pipeline.Id); + this.Log.LogDebug("Uploading {0} files, pipeline {1}", pipeline.FilesToUpload.Count, pipeline.DocumentId); await this.ContentStorage.CreateDirectoryAsync(pipeline.UserId, cancellationToken).ConfigureAwait(false); - var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.Id); + var dirPath = this.ContentStorage.JoinPaths(pipeline.UserId, pipeline.DocumentId); await this.ContentStorage.CreateDirectoryAsync(dirPath, cancellationToken).ConfigureAwait(false); - foreach (IFormFile file in pipeline.FilesToUpload) + foreach (DocumentUploadRequest.UploadedFile file in pipeline.FilesToUpload) { if (string.Equals(file.FileName, Constants.PipelineStatusFilename, StringComparison.OrdinalIgnoreCase)) { @@ -311,7 +330,7 @@ private async Task UploadFormFilesAsync(DataPipeline pipeline, CancellationToken } this.Log.LogDebug("Uploading file: {0}", file.FileName); - var size = await this.ContentStorage.WriteStreamAsync(dirPath, file.FileName, file.OpenReadStream(), cancellationToken).ConfigureAwait(false); + var size = await this.ContentStorage.WriteStreamAsync(dirPath, file.FileName, file.FileContent, cancellationToken).ConfigureAwait(false); pipeline.Files.Add(new DataPipeline.FileDetails { Id = Guid.NewGuid().ToString("N"), diff --git a/dotnet/CoreLib/Pipeline/DataPipeline.cs b/dotnet/CoreLib/Pipeline/DataPipeline.cs index b54468163..f18979f3e 100644 --- a/dotnet/CoreLib/Pipeline/DataPipeline.cs +++ b/dotnet/CoreLib/Pipeline/DataPipeline.cs @@ -5,7 +5,7 @@ using System.IO; using System.Linq; using System.Text.Json.Serialization; -using Microsoft.AspNetCore.Http; +using Microsoft.SemanticMemory.Client; using Microsoft.SemanticMemory.Client.Models; using Microsoft.SemanticMemory.Core.Diagnostics; @@ -104,11 +104,11 @@ public string GetPartitionFileName(int partitionNumber) } /// - /// Id of the pipeline instance. This value will persist throughout the execution and in the final data lineage used for citations. + /// Id of the document and the pipeline instance. This value will persist throughout the execution and in the final data lineage used for citations. /// [JsonPropertyOrder(1)] - [JsonPropertyName("id")] - public string Id { get; set; } = string.Empty; + [JsonPropertyName("document_id")] + public string DocumentId { get; set; } = string.Empty; /// /// Unique execution id. If the pipeline is executed again, this value will change. @@ -183,7 +183,7 @@ public string GetPartitionFileName(int partitionNumber) public bool Complete => this.RemainingSteps.Count == 0; [JsonIgnore] - public List FilesToUpload { get; set; } = new(); + public List FilesToUpload { get; set; } = new(); [JsonIgnore] public bool UploadComplete { get; set; } @@ -212,7 +212,7 @@ public DataPipeline AddUploadFile(string name, string filename, BinaryData conte public DataPipeline AddUploadFile(string name, string filename, Stream content) { content.Seek(0, SeekOrigin.Begin); - this.FilesToUpload.Add(new FormFile(content, 0, content.Length, name, filename)); + this.FilesToUpload.Add(new DocumentUploadRequest.UploadedFile(filename, content)); return this; } @@ -240,7 +240,7 @@ public string MoveToNextStep() { if (this.RemainingSteps.Count == 0) { - throw new PipelineCompletedException("The list of remaining steps is empty"); + throw new SemanticMemoryException("The list of remaining steps is empty"); } var stepName = this.RemainingSteps.First(); @@ -252,9 +252,9 @@ public string MoveToNextStep() public void Validate() { - if (string.IsNullOrEmpty(this.Id)) + if (string.IsNullOrEmpty(this.DocumentId)) { - throw new ArgumentException("The pipeline ID is empty", nameof(this.Id)); + throw new ArgumentException("The pipeline ID is empty", nameof(this.DocumentId)); } if (string.IsNullOrEmpty(this.UserId)) diff --git a/dotnet/CoreLib/Pipeline/DistributedPipelineOrchestrator.cs b/dotnet/CoreLib/Pipeline/DistributedPipelineOrchestrator.cs index a7604c785..bff6857ac 100644 --- a/dotnet/CoreLib/Pipeline/DistributedPipelineOrchestrator.cs +++ b/dotnet/CoreLib/Pipeline/DistributedPipelineOrchestrator.cs @@ -113,7 +113,7 @@ public override async Task RunPipelineAsync( // In case the pipeline has no steps if (pipeline.Complete) { - this.Log.LogInformation("Pipeline {0} complete", pipeline.Id); + this.Log.LogInformation("Pipeline {0} complete", pipeline.DocumentId); return; } @@ -131,7 +131,7 @@ private async Task RunPipelineStepAsync( // In case the pipeline has no steps if (pipeline.Complete) { - this.Log.LogInformation("Pipeline {0} complete", pipeline.Id); + this.Log.LogInformation("Pipeline {0} complete", pipeline.DocumentId); // Note: returning True, the message is removed from the queue return true; } @@ -144,13 +144,13 @@ private async Task RunPipelineStepAsync( { pipeline = updatedPipeline; - this.Log.LogInformation("Handler {0} processed pipeline {1} successfully", currentStepName, pipeline.Id); + this.Log.LogInformation("Handler {0} processed pipeline {1} successfully", currentStepName, pipeline.DocumentId); pipeline.MoveToNextStep(); await this.MoveForwardAsync(pipeline, cancellationToken).ConfigureAwait(false); } else { - this.Log.LogError("Handler {0} failed to process pipeline {1}", currentStepName, pipeline.Id); + this.Log.LogError("Handler {0} failed to process pipeline {1}", currentStepName, pipeline.DocumentId); } // Note: returning True, the message is removed from the queue @@ -166,7 +166,7 @@ private async Task MoveForwardAsync(DataPipeline pipeline, CancellationToken can if (pipeline.RemainingSteps.Count == 0) { - this.Log.LogInformation("Pipeline '{0}' complete", pipeline.Id); + this.Log.LogInformation("Pipeline '{0}' complete", pipeline.DocumentId); // Try to save the pipeline status await this.UpdatePipelineStatusAsync(pipeline, cancellationToken, ignoreExceptions: false).ConfigureAwait(false); @@ -174,7 +174,7 @@ private async Task MoveForwardAsync(DataPipeline pipeline, CancellationToken can else { string nextStepName = pipeline.RemainingSteps.First(); - this.Log.LogInformation("Enqueueing pipeline '{0}' step '{1}'", pipeline.Id, nextStepName); + this.Log.LogInformation("Enqueueing pipeline '{0}' step '{1}'", pipeline.DocumentId, nextStepName); using IQueue queue = this._queueClientFactory.Build(); await queue.ConnectToQueueAsync(nextStepName, QueueOptions.PublishOnly, cancellationToken).ConfigureAwait(false); diff --git a/dotnet/CoreLib/Pipeline/IPipelineOrchestrator.cs b/dotnet/CoreLib/Pipeline/IPipelineOrchestrator.cs index 49a758618..d6afc3319 100644 --- a/dotnet/CoreLib/Pipeline/IPipelineOrchestrator.cs +++ b/dotnet/CoreLib/Pipeline/IPipelineOrchestrator.cs @@ -4,11 +4,9 @@ using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; -using Microsoft.AspNetCore.Http; using Microsoft.SemanticKernel.AI.Embeddings; using Microsoft.SemanticMemory.Client.Models; using Microsoft.SemanticMemory.Core.MemoryStorage; -using Microsoft.SemanticMemory.Core.WebService; namespace Microsoft.SemanticMemory.Core.Pipeline; @@ -31,10 +29,10 @@ public interface IPipelineOrchestrator /// /// Upload a file and start the processing pipeline /// - /// Details about the file and how to import it + /// Details about the file and how to import it /// Async task cancellation token /// Import Id - Task UploadFileAsync(UploadRequest uploadDetails, CancellationToken cancellationToken = default); + Task ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default); /// /// Create a new pipeline value object for files upload @@ -44,7 +42,7 @@ public interface IPipelineOrchestrator /// List of key-value pairs, used to organize and label the memories. E.g. "type", "category", etc. Multiple values per key are allowed. /// List of files provided before starting the pipeline, to be uploaded into the container before starting. /// Pipeline representation - DataPipeline PrepareNewFileUploadPipeline(string userId, string documentId, TagCollection tags, IEnumerable filesToUpload); + DataPipeline PrepareNewDocumentUpload(string userId, string documentId, TagCollection tags, IEnumerable filesToUpload); /// /// Create a new pipeline value object, with an empty list of files @@ -53,7 +51,7 @@ public interface IPipelineOrchestrator /// Id of the pipeline instance. This value will persist throughout the pipeline and final data lineage used for citations. /// List of key-value pairs, used to organize and label the memories. E.g. "type", "category", etc. Multiple values per key are allowed. /// Pipeline representation - DataPipeline PrepareNewFileUploadPipeline(string userId, string documentId, TagCollection tags); + DataPipeline PrepareNewDocumentUpload(string userId, string documentId, TagCollection tags); /// /// Start a new data pipeline execution @@ -71,6 +69,15 @@ public interface IPipelineOrchestrator /// Pipeline status if available Task ReadPipelineStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default); + /// + /// Fetch the pipeline status from storage + /// + /// Primary user who the data belongs to. Other users, e.g. sharing, is not supported in the pipeline at this time. + /// Id of the document and pipeline execution instance + /// Async task cancellation token + /// Pipeline status if available + Task ReadPipelineSummaryAsync(string userId, string documentId, CancellationToken cancellationToken = default); + /// /// Check if a document ID exists in a user memory and is ready for usage. /// The logic checks if the uploaded document has been fully processed. @@ -80,7 +87,7 @@ public interface IPipelineOrchestrator /// Document ID /// Async task cancellation token /// True if the document has been successfully uploaded and imported - public Task IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default); + public Task IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default); /// /// Stop all the pipelines in progress diff --git a/dotnet/CoreLib/Pipeline/InProcessPipelineOrchestrator.cs b/dotnet/CoreLib/Pipeline/InProcessPipelineOrchestrator.cs index f8fa9c519..19dc866ac 100644 --- a/dotnet/CoreLib/Pipeline/InProcessPipelineOrchestrator.cs +++ b/dotnet/CoreLib/Pipeline/InProcessPipelineOrchestrator.cs @@ -100,17 +100,17 @@ public override async Task RunPipelineAsync( if (success) { pipeline = updatedPipeline; - this.Log.LogInformation("Handler '{0}' processed pipeline '{1}' successfully", currentStepName, pipeline.Id); + this.Log.LogInformation("Handler '{0}' processed pipeline '{1}' successfully", currentStepName, pipeline.DocumentId); pipeline.MoveToNextStep(); await this.UpdatePipelineStatusAsync(pipeline, cancellationToken, ignoreExceptions: false).ConfigureAwait(false); } else { - this.Log.LogError("Handler '{0}' failed to process pipeline '{1}'", currentStepName, pipeline.Id); + this.Log.LogError("Handler '{0}' failed to process pipeline '{1}'", currentStepName, pipeline.DocumentId); throw new OrchestrationException($"Pipeline error, step {currentStepName} failed"); } } - this.Log.LogInformation("Pipeline '{0}' complete", pipeline.Id); + this.Log.LogInformation("Pipeline '{0}' complete", pipeline.DocumentId); } } diff --git a/dotnet/CoreLib/Search/SearchClient.cs b/dotnet/CoreLib/Search/SearchClient.cs index 2f9b4be31..086e01aa1 100644 --- a/dotnet/CoreLib/Search/SearchClient.cs +++ b/dotnet/CoreLib/Search/SearchClient.cs @@ -14,7 +14,6 @@ using Microsoft.SemanticMemory.Core.AI; using Microsoft.SemanticMemory.Core.Diagnostics; using Microsoft.SemanticMemory.Core.MemoryStorage; -using Microsoft.SemanticMemory.Core.WebService; namespace Microsoft.SemanticMemory.Core.Search; @@ -59,15 +58,25 @@ public SearchClient( public Task AskAsync(MemoryQuery query, CancellationToken cancellationToken = default) { - return this.AskAsync(query.UserId, query.Query, query.Filter, cancellationToken); + return this.AskAsync(query.UserId, query.Question, query.Filter, cancellationToken); } - public async Task AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default) + public async Task AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default) { + if (string.IsNullOrEmpty(question)) + { + this._log.LogWarning("No question provided"); + return new MemoryAnswer + { + Question = question, + Result = "INFO NOT FOUND", + }; + } + var facts = new StringBuilder(); var tokensAvailable = 8000 - GPT3Tokenizer.Encode(this._prompt).Count - - GPT3Tokenizer.Encode(query).Count + - GPT3Tokenizer.Encode(question).Count - AnswerTokens; var factsUsedCount = 0; @@ -75,11 +84,18 @@ public async Task AskAsync(string userId, string query, MemoryFilt var answer = new MemoryAnswer { - Query = query, + Question = question, Result = "INFO NOT FOUND", }; - var embedding = await this.GenerateEmbeddingAsync(query).ConfigureAwait(false); + var embedding = await this.GenerateEmbeddingAsync(question).ConfigureAwait(false); + + if (!string.IsNullOrEmpty(userId)) + { + if (filter == null) { filter = new MemoryFilter(); } + + filter.ByUser(userId); + } this._log.LogTrace("Fetching relevant memories"); IAsyncEnumerable<(MemoryRecord, double)> matches = this._vectorDb.GetSimilarListAsync( @@ -113,10 +129,10 @@ public async Task AskAsync(string userId, string query, MemoryFilt string linkToFile = $"{documentId}/{fileId}"; string fileContentType = memory.Tags[Constants.ReservedFileTypeTag].FirstOrDefault() ?? string.Empty; - string fileName = memory.Metadata["file_name"].ToString() ?? string.Empty; + string fileName = memory.Payload["file_name"].ToString() ?? string.Empty; factsAvailableCount++; - var partitionText = memory.Metadata["text"].ToString()?.Trim() ?? ""; + var partitionText = memory.Payload["text"].ToString()?.Trim() ?? ""; if (string.IsNullOrEmpty(partitionText)) { this._log.LogError("The document partition is empty, user: {0}, doc: {1}", memory.Owner, memory.Id); @@ -150,7 +166,7 @@ public async Task AskAsync(string userId, string query, MemoryFilt citation.SourceName = fileName; #pragma warning disable CA1806 // it's ok if parsing fails - DateTimeOffset.TryParse(memory.Metadata["last_update"].ToString(), out var lastUpdate); + DateTimeOffset.TryParse(memory.Payload["last_update"].ToString(), out var lastUpdate); #pragma warning restore CA1806 citation.Partitions.Add(new MemoryAnswer.Citation.Partition @@ -169,17 +185,17 @@ public async Task AskAsync(string userId, string query, MemoryFilt if (factsAvailableCount > 0 && factsUsedCount == 0) { this._log.LogError("Unable to inject memories in the prompt, not enough tokens available"); - return new MemoryAnswer { Query = query, Result = "INFO NOT FOUND" }; + return new MemoryAnswer { Question = question, Result = "INFO NOT FOUND" }; } if (factsUsedCount == 0) { this._log.LogWarning("No memories available"); - return new MemoryAnswer { Query = query, Result = "INFO NOT FOUND" }; + return new MemoryAnswer { Question = question, Result = "INFO NOT FOUND" }; } var text = new StringBuilder(); - await foreach (var x in this.GenerateAnswerAsync(query, facts.ToString()).ConfigureAwait(false)) + await foreach (var x in this.GenerateAnswerAsync(question, facts.ToString()).ConfigureAwait(false)) { text.Append(x); } diff --git a/dotnet/CoreLib/SemanticMemoryService.cs b/dotnet/CoreLib/SemanticMemoryService.cs deleted file mode 100644 index 3bb225b09..000000000 --- a/dotnet/CoreLib/SemanticMemoryService.cs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Threading; -using System.Threading.Tasks; -using Microsoft.SemanticMemory.Client.Models; -using Microsoft.SemanticMemory.Core.Pipeline; -using Microsoft.SemanticMemory.Core.Search; -using Microsoft.SemanticMemory.Core.WebService; - -namespace Microsoft.SemanticMemory.Core; - -public interface ISemanticMemoryService -{ - /// - /// Upload a file and start the processing pipeline - /// - /// Details about the file and how to import it - /// Async task cancellation token - /// Import Id - Task UploadFileAsync(UploadRequest uploadDetails, CancellationToken cancellationToken = default); - - /// - /// Fetch the pipeline status from storage - /// - /// Primary user who the data belongs to. Other users, e.g. sharing, is not supported in the pipeline at this time. - /// Id of the document and pipeline execution instance - /// Async task cancellation token - /// Pipeline status if available - Task ReadPipelineStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default); - - Task AskAsync(MemoryQuery query, CancellationToken cancellationToken = default); -} - -public class SemanticMemoryService : ISemanticMemoryService -{ - private readonly IPipelineOrchestrator _orchestrator; - private readonly SearchClient _searchClient; - - public SemanticMemoryService( - IPipelineOrchestrator orchestrator, - SearchClient searchClient) - { - this._orchestrator = orchestrator; - this._searchClient = searchClient; - } - - /// - public Task UploadFileAsync( - UploadRequest uploadDetails, - CancellationToken cancellationToken = default) - { - return this._orchestrator.UploadFileAsync(uploadDetails, cancellationToken); - } - - /// - public Task ReadPipelineStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default) - { - return this._orchestrator.ReadPipelineStatusAsync(userId, documentId, cancellationToken); - } - - /// - public Task AskAsync(MemoryQuery query, CancellationToken cancellationToken = default) - { - return this._searchClient.AskAsync(query, cancellationToken); - } -} diff --git a/dotnet/CoreLib/WebService/DocumentExtensions.cs b/dotnet/CoreLib/WebService/DocumentExtensions.cs new file mode 100644 index 000000000..498646db0 --- /dev/null +++ b/dotnet/CoreLib/WebService/DocumentExtensions.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticMemory.Client; +using Microsoft.SemanticMemory.Client.Models; + +namespace Microsoft.SemanticMemory.Core.WebService; + +public static class DocumentExtensions +{ + // Note: this code is not .NET Standard 2.0 compatible + public static async Task ToDocumentUploadRequestAsync(this Document file, CancellationToken cancellationToken) + { + var uploadRequest = new DocumentUploadRequest + { + DocumentId = file.Details.DocumentId, + UserId = file.Details.UserId, + Tags = file.Details.Tags + }; + + var files = new List(); + for (int index = 0; index < file.FileNames.Count; index++) + { + string fileName = file.FileNames[index]; + + if (!File.Exists(fileName)) + { + throw new SemanticMemoryException($"File not found: {fileName}"); + } + + byte[] bytes = await File.ReadAllBytesAsync(fileName, cancellationToken).ConfigureAwait(false); + var data = new BinaryData(bytes); + var formFile = new DocumentUploadRequest.UploadedFile(fileName, data.ToStream()); + files.Add(formFile); + } + + uploadRequest.Files = files; + + return uploadRequest; + } +} diff --git a/dotnet/CoreLib/WebService/UploadRequest.cs b/dotnet/CoreLib/WebService/HttpDocumentUploadRequest.cs similarity index 94% rename from dotnet/CoreLib/WebService/UploadRequest.cs rename to dotnet/CoreLib/WebService/HttpDocumentUploadRequest.cs index 4909b8572..4ab85e509 100644 --- a/dotnet/CoreLib/WebService/UploadRequest.cs +++ b/dotnet/CoreLib/WebService/HttpDocumentUploadRequest.cs @@ -13,7 +13,7 @@ namespace Microsoft.SemanticMemory.Core.WebService; // Note: use multiform part serialization -public class UploadRequest +public class HttpDocumentUploadRequest { public string DocumentId { get; set; } = string.Empty; public string UserId { get; set; } = string.Empty; @@ -26,12 +26,12 @@ public class UploadRequest * https://stackoverflow.com/questions/71499435/how-do-i-do-file-upload-using-asp-net-core-6-minimal-api * https://stackoverflow.com/questions/57033535/multipartformdatacontent-add-stringcontent-is-adding-carraige-return-linefeed-to */ - public static async Task<(UploadRequest model, bool isValid, string errMsg)> BindHttpRequestAsync(HttpRequest httpRequest) + public static async Task<(HttpDocumentUploadRequest model, bool isValid, string errMsg)> BindHttpRequestAsync(HttpRequest httpRequest) { string userIdField = Constants.WebServiceUserIdField; string documentIdField = Constants.WebServiceDocumentIdField; - var result = new UploadRequest(); + var result = new HttpDocumentUploadRequest(); // Content format validation if (!httpRequest.HasFormContentType) diff --git a/dotnet/CoreLib/WebService/HttpDocumentUploadRequestExtensions.cs b/dotnet/CoreLib/WebService/HttpDocumentUploadRequestExtensions.cs new file mode 100644 index 000000000..affccf13b --- /dev/null +++ b/dotnet/CoreLib/WebService/HttpDocumentUploadRequestExtensions.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.AspNetCore.Http; +using Microsoft.SemanticMemory.Client.Models; + +namespace Microsoft.SemanticMemory.Core.WebService; + +// Note: this class is designed to avoid using Asp.Net IForm +// and avoiding dependencies on Asp.Net HTTP that would lead +// to dependency issues mixing .NET7 and .NET Standard 2.0 +public static class HttpDocumentUploadRequestExtensions +{ + public static DocumentUploadRequest ToDocumentUploadRequest(this HttpDocumentUploadRequest request) + { + var result = new DocumentUploadRequest + { + DocumentId = request.DocumentId, + UserId = request.UserId, + Tags = request.Tags + }; + + foreach (IFormFile file in request.Files) + { + result.Files.Add(new DocumentUploadRequest.UploadedFile + { + FileName = file.FileName, + FileContent = file.OpenReadStream() + }); + } + + return result; + } +} diff --git a/dotnet/CoreLib/WebService/MemoryQuery.cs b/dotnet/CoreLib/WebService/MemoryQuery.cs deleted file mode 100644 index e14e36be7..000000000 --- a/dotnet/CoreLib/WebService/MemoryQuery.cs +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Microsoft.SemanticMemory.Client.Models; - -namespace Microsoft.SemanticMemory.Core.WebService; - -public class MemoryQuery -{ - public string UserId { get; set; } = string.Empty; - public string Query { get; set; } = string.Empty; - public MemoryFilter Filter { get; set; } = new(); -} diff --git a/dotnet/InteractiveSetup/InteractiveSetup.csproj b/dotnet/InteractiveSetup/InteractiveSetup.csproj index ac2add5b3..a3e8b11c9 100644 --- a/dotnet/InteractiveSetup/InteractiveSetup.csproj +++ b/dotnet/InteractiveSetup/InteractiveSetup.csproj @@ -1,7 +1,7 @@ - net7.0 + net6.0 Microsoft.SemanticMemory.InteractiveSetup Microsoft.SemanticMemory.InteractiveSetup CA1031,CA1303,CA1724 @@ -25,7 +25,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/dotnet/Service/Program.cs b/dotnet/Service/Program.cs index 7191cc09a..74731cbfb 100644 --- a/dotnet/Service/Program.cs +++ b/dotnet/Service/Program.cs @@ -7,11 +7,10 @@ using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; using Microsoft.Extensions.Logging; +using Microsoft.SemanticMemory.Client; using Microsoft.SemanticMemory.Client.Models; -using Microsoft.SemanticMemory.Core; using Microsoft.SemanticMemory.Core.Configuration; using Microsoft.SemanticMemory.Core.Diagnostics; -using Microsoft.SemanticMemory.Core.Pipeline; using Microsoft.SemanticMemory.Core.WebService; using Microsoft.SemanticMemory.InteractiveSetup; using Microsoft.SemanticMemory.Service; @@ -61,13 +60,13 @@ // File upload endpoint app.MapPost("/upload", async Task ( HttpRequest request, - ISemanticMemoryService service, + ISemanticMemoryClient service, ILogger log) => { log.LogTrace("New upload request"); // Note: .NET doesn't yet support binding multipart forms including data and files - (UploadRequest input, bool isValid, string errMsg) = await UploadRequest.BindHttpRequestAsync(request).ConfigureAwait(false); + (HttpDocumentUploadRequest input, bool isValid, string errMsg) = await HttpDocumentUploadRequest.BindHttpRequestAsync(request).ConfigureAwait(false); if (!isValid) { @@ -77,13 +76,14 @@ try { - var id = await service.UploadFileAsync(input); + // UploadRequest => Document + var id = await service.ImportDocumentAsync(input.ToDocumentUploadRequest()); return Results.Accepted($"/upload-status?user={input.UserId}&id={id}", - new UploadAccepted { Id = id, UserId = input.UserId, Message = "Upload completed, ingestion started" }); + new UploadAccepted { Id = id, UserId = input.UserId, Message = "Document upload completed, ingestion pipeline started" }); } catch (Exception e) { - return Results.Problem(title: "Upload failed", detail: e.Message, statusCode: 503); + return Results.Problem(title: "Document upload failed", detail: e.Message, statusCode: 503); } }) .Produces(StatusCodes.Status202Accepted); @@ -92,11 +92,11 @@ app.MapPost("/ask", async Task ( MemoryQuery query, - ISemanticMemoryService service, + ISemanticMemoryClient service, ILogger log) => { log.LogTrace("New search request"); - MemoryAnswer answer = await service.AskAsync(query); + MemoryAnswer answer = await service.AskAsync(query.UserId, query.Question, query.Filter); return Results.Ok(answer); }) .Produces(StatusCodes.Status200OK); @@ -106,7 +106,7 @@ async Task ( async Task ( [FromQuery(Name = "user")] string userId, [FromQuery(Name = "id")] string pipelineId, - ISemanticMemoryService service) => + ISemanticMemoryClient service) => { if (string.IsNullOrEmpty(userId)) { @@ -118,27 +118,13 @@ async Task ( return Results.BadRequest("'id' query parameter is missing or has no value"); } - DataPipeline? pipeline = await service.ReadPipelineStatusAsync(userId, pipelineId); + DataPipelineStatus? pipeline = await service.GetDocumentStatusAsync(userId, pipelineId); if (pipeline == null) { return Results.NotFound("Document pipeline not found"); } - var result = new DataPipelineStatus - { - Completed = pipeline.Complete, - Failed = false, // TODO - Id = pipeline.Id, - UserId = pipeline.UserId, - Tags = pipeline.Tags, - Creation = pipeline.Creation, - LastUpdate = pipeline.LastUpdate, - Steps = pipeline.Steps, - RemainingSteps = pipeline.RemainingSteps, - CompletedSteps = pipeline.CompletedSteps, - }; - - return Results.Ok(result); + return Results.Ok(pipeline); }) .Produces(StatusCodes.Status200OK) .Produces(StatusCodes.Status400BadRequest) diff --git a/dotnet/Service/Service.csproj b/dotnet/Service/Service.csproj index e36a733dc..3cc7fc087 100644 --- a/dotnet/Service/Service.csproj +++ b/dotnet/Service/Service.csproj @@ -29,7 +29,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/001-dotnet-Serverless/001-dotnet-Serverless.csproj b/samples/001-dotnet-Serverless/001-dotnet-Serverless.csproj index c002837ee..3e73c7176 100644 --- a/samples/001-dotnet-Serverless/001-dotnet-Serverless.csproj +++ b/samples/001-dotnet-Serverless/001-dotnet-Serverless.csproj @@ -35,7 +35,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/001-dotnet-Serverless/Program.cs b/samples/001-dotnet-Serverless/Program.cs index 447d84f55..3825fe49b 100644 --- a/samples/001-dotnet-Serverless/Program.cs +++ b/samples/001-dotnet-Serverless/Program.cs @@ -18,37 +18,41 @@ * * Note: no web service required, each file is processed in this process. */ -var memory = new SemanticMemoryServerless(Builder.GetServiceProvider()); +var memory = new Memory(Builder.GetServiceProvider()); // ======================= // === UPLOAD ============ // ======================= // Uploading one file - This will create -// a new upload every time because no file ID is specified, and +// a new upload every time because no document ID is specified, and // stored under the "default" user because no User ID is specified. -await memory.ImportFileAsync("file1-Wikipedia-Carbon.txt"); +Console.WriteLine("Uploading file without document ID"); +await memory.ImportDocumentAsync("file1-Wikipedia-Carbon.txt"); -// Uploading only if the file has not been (successfully) uploaded already -if (!await memory.IsReadyAsync(userId: "user1", documentId: "f01")) +// Uploading only if the document has not been (successfully) uploaded already +if (!await memory.IsDocumentReadyAsync(userId: "user1", documentId: "doc001")) { - await memory.ImportFileAsync("file1-Wikipedia-Carbon.txt", - new DocumentDetails(userId: "user1", documentId: "f01")); + Console.WriteLine("Uploading doc001"); + await memory.ImportDocumentAsync("file1-Wikipedia-Carbon.txt", + new DocumentDetails(userId: "user1", documentId: "doc001")); } -// Uploading multiple files -await memory.ImportFilesAsync(new[] +// Uploading a document containing multiple files +Console.WriteLine("Uploading doc002"); +await memory.ImportDocumentAsync(new Document(new[] { - new Document("file2-Wikipedia-Moon.txt", new DocumentDetails("user1", "f02")), - new Document("file3-lorem-ipsum.docx", new DocumentDetails("user1", "f03")), - new Document("file4-SK-Readme.pdf", new DocumentDetails("user1", "f04")), -}); + "file2-Wikipedia-Moon.txt", + "file3-lorem-ipsum.docx", + "file4-SK-Readme.pdf" +}, new DocumentDetails("user1", "doc002"))); // Categorizing files with tags -if (!await memory.IsReadyAsync(userId: "user2", documentId: "f05")) +if (!await memory.IsDocumentReadyAsync(userId: "user2", documentId: "doc003")) { - await memory.ImportFileAsync("file5-NASA-news.pdf", - new DocumentDetails("user2", "f05") + Console.WriteLine("Uploading doc003"); + await memory.ImportDocumentAsync("file5-NASA-news.pdf", + new DocumentDetails("user2", "doc003") .AddTag("collection", "meetings") .AddTag("collection", "NASA") .AddTag("collection", "space") diff --git a/samples/001-dotnet-Serverless/README.md b/samples/001-dotnet-Serverless/README.md index 58ef74668..b372ae668 100644 --- a/samples/001-dotnet-Serverless/README.md +++ b/samples/001-dotnet-Serverless/README.md @@ -7,16 +7,16 @@ All the logic is executed locally using the default C# handlers. Depending on your settings, files can be stored locally or in Azure Blobs. ```csharp -var memory = new MemoryServerlessClient(config); +var memory = new Memory(serviceProvider); -await memory.ImportFilesAsync(new[] +await memory.ImportDocumentAsync(new Document(new[] { - new Document("file2.txt", new DocumentDetails("f02", "user1")), - new Document("file3.docx", new DocumentDetails("f03", "user1")), - new Document("file4.pdf", new DocumentDetails("f04", "user1")), -}); + "file2.txt", + "file3.docx", + "file4.pdf" +}, new DocumentDetails("user1", "doc002"))); -string answer = await memory.AskAsync("What's Semantic Kernel?", "user1"); +string answer = await memory.AskAsync("user1", "What's Semantic Kernel?"); ``` # Prepare the example diff --git a/samples/002-dotnet-WebClient/002-dotnet-WebClient.csproj b/samples/002-dotnet-WebClient/002-dotnet-WebClient.csproj index b2eb4dbcf..04c1a40b2 100644 --- a/samples/002-dotnet-WebClient/002-dotnet-WebClient.csproj +++ b/samples/002-dotnet-WebClient/002-dotnet-WebClient.csproj @@ -35,7 +35,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/002-dotnet-WebClient/Program.cs b/samples/002-dotnet-WebClient/Program.cs index 0e516dff0..5f73cbace 100644 --- a/samples/002-dotnet-WebClient/Program.cs +++ b/samples/002-dotnet-WebClient/Program.cs @@ -20,42 +20,44 @@ // ======================= // Uploading one file - This will create -// a new upload every time because no file ID is specified, and +// a new upload every time because no document ID is specified, and // stored under the "default" user because no User ID is specified. -await memory.ImportFileAsync("file1-Wikipedia-Carbon.txt"); +Console.WriteLine("Uploading file without document ID"); +await memory.ImportDocumentAsync("file1-Wikipedia-Carbon.txt"); -// Uploading only if the file has not been (successfully) uploaded already -if (!await memory.IsReadyAsync(userId: "user1", documentId: "f01")) +// Uploading only if the document has not been (successfully) uploaded already +if (!await memory.IsDocumentReadyAsync(userId: "user1", documentId: "doc001")) { - await memory.ImportFileAsync("file1-Wikipedia-Carbon.txt", - new DocumentDetails(userId: "user1", documentId: "f01")); + Console.WriteLine("Uploading doc001"); + await memory.ImportDocumentAsync("file1-Wikipedia-Carbon.txt", + new DocumentDetails(userId: "user1", documentId: "doc001")); } -// Uploading multiple files -await memory.ImportFilesAsync(new[] +// Uploading a document containing multiple files +Console.WriteLine("Uploading doc002"); +await memory.ImportDocumentAsync(new Document(new[] { - new Document("file2-Wikipedia-Moon.txt", new DocumentDetails("user1", "f02")), - new Document("file3-lorem-ipsum.docx", new DocumentDetails("user1", "f03")), - new Document("file4-SK-Readme.pdf", new DocumentDetails("user1", "f04")), -}); + "file2-Wikipedia-Moon.txt", + "file3-lorem-ipsum.docx", + "file4-SK-Readme.pdf" +}, new DocumentDetails("user1", "doc002"))); // Categorizing files with tags -if (!await memory.IsReadyAsync(userId: "user2", documentId: "f05")) +if (!await memory.IsDocumentReadyAsync(userId: "user2", documentId: "doc003")) { - await memory.ImportFileAsync("file5-NASA-news.pdf", - new DocumentDetails("user2", "f05") - .AddTag("collection", "samples") - .AddTag("collection", "webClient") - .AddTag("collection", ".NET") + Console.WriteLine("Uploading doc003"); + await memory.ImportDocumentAsync("file5-NASA-news.pdf", + new DocumentDetails("user2", "doc003") + .AddTag("collection", "meetings") + .AddTag("collection", "NASA") + .AddTag("collection", "space") .AddTag("type", "news")); } while ( - !await memory.IsReadyAsync(userId: "user1", documentId: "f01") - || !await memory.IsReadyAsync(userId: "user1", documentId: "f02") - || !await memory.IsReadyAsync(userId: "user1", documentId: "f03") - || !await memory.IsReadyAsync(userId: "user1", documentId: "f04") - || !await memory.IsReadyAsync(userId: "user2", documentId: "f05") + !await memory.IsDocumentReadyAsync(userId: "user1", documentId: "doc001") + || !await memory.IsDocumentReadyAsync(userId: "user1", documentId: "doc002") + || !await memory.IsDocumentReadyAsync(userId: "user2", documentId: "doc003") ) { Console.WriteLine("Waiting for memory ingestion to complete..."); @@ -92,3 +94,16 @@ await memory.ImportFileAsync("file5-NASA-news.pdf", { Console.WriteLine($" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); } + +// Test with tags +question = "What is Orion?"; +Console.WriteLine($"\n\nQuestion: {question}"); + +var filter1 = new MemoryFilter().ByTag("type", "article"); +var filter2 = new MemoryFilter().ByTag("type", "news"); + +answer = await memory.AskAsync("user2", question, filter1); +Console.WriteLine($"\nArticles: {answer.Result}\n\n"); + +answer = await memory.AskAsync("user2", question, filter2); +Console.WriteLine($"\nNews: {answer.Result}\n\n"); diff --git a/samples/002-dotnet-WebClient/README.md b/samples/002-dotnet-WebClient/README.md index fcd6980e8..456391ad7 100644 --- a/samples/002-dotnet-WebClient/README.md +++ b/samples/002-dotnet-WebClient/README.md @@ -10,16 +10,20 @@ Start `dotnet/Service/Service.csproj`. See `dotnet/Service/README.md` for detail ```csharp var memory = new MemoryWebClient("http://127.0.0.1:9001/"); -await memory.ImportFilesAsync(new[] +await memory.ImportDocumentAsync(new Document(new[] { - new Document("file2.txt", new DocumentDetails("f02", "user1")), - new Document("file3.docx", new DocumentDetails("f03", "user1")), - new Document("file4.pdf", new DocumentDetails("f04", "user1")), -}); + "file2.txt", + "file3.docx", + "file4.pdf" +}, new DocumentDetails("user1", "doc002"))); -// ...wait for the service to import the files in the background... +while (!await memory.IsDocumentReadyAsync(userId: "user1", documentId: "doc002")) +{ + Console.WriteLine("Waiting for memory ingestion to complete..."); + await Task.Delay(TimeSpan.FromSeconds(2)); +} -string answer = await memory.AskAsync("What's Semantic Kernel?", "user1"); +string answer = await memory.AskAsync("user1", "What's Semantic Kernel?"); ``` # Prepare the example diff --git a/samples/002-dotnet-WebClient/setup.cmd b/samples/002-dotnet-WebClient/setup.cmd deleted file mode 100644 index 75e18a50b..000000000 --- a/samples/002-dotnet-WebClient/setup.cmd +++ /dev/null @@ -1,5 +0,0 @@ -@echo off - -dotnet restore -dotnet build -dotnet run setup diff --git a/samples/002-dotnet-WebClient/setup.sh b/samples/002-dotnet-WebClient/setup.sh deleted file mode 100755 index 0ec8f85ae..000000000 --- a/samples/002-dotnet-WebClient/setup.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -set -e - -cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/" - -dotnet restore -dotnet build -dotnet run setup - diff --git a/samples/003-curl-calling-webservice/README.md b/samples/003-curl-calling-webservice/README.md index d211d1375..b9bdb2b3d 100644 --- a/samples/003-curl-calling-webservice/README.md +++ b/samples/003-curl-calling-webservice/README.md @@ -7,18 +7,33 @@ The example points to http://127.0.0.1:9001 so by default you should run the services locally, though you can also deploy them to Azure and update the script accordingly. -# Run the example +# Run the example: upload a file ```bash -./example.sh +./upload-example.sh ``` -Content of [example.sh](example.sh): +Content of [upload-example.sh](upload-example.sh): ```bash ../../tools/upload-file.sh -f test.pdf \ - -s http://127.0.0.1:9001/upload \ + -s http://127.0.0.1:9001 \ -u curlUser \ - -c curlDataCollection \ + -t "type=test" \ -i curlExample01 +``` + +# Run the example: ask a question + +```bash +./ask-example.sh +``` + +Content of [ask-example.sh](ask-example.sh): + +```bash +../../tools/ask.sh -s http://127.0.0.1:9001 \ + -u curlUser \ + -q "tell me about Semantic Kernel" \ + -f '"type":["test"]' ``` \ No newline at end of file diff --git a/samples/003-curl-calling-webservice/ask-example.sh b/samples/003-curl-calling-webservice/ask-example.sh index 2d2bb13a6..678ced655 100755 --- a/samples/003-curl-calling-webservice/ask-example.sh +++ b/samples/003-curl-calling-webservice/ask-example.sh @@ -6,5 +6,6 @@ cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/" ../../tools/ask.sh -s http://127.0.0.1:9001 \ -u curlUser \ - -q "tell me about Semantic Kernel" + -q "tell me about Semantic Kernel" \ + -f '"type":["test"]' diff --git a/samples/004-dotnet-ServerlessCustomPipeline/004-dotnet-ServerlessCustomPipeline.csproj b/samples/004-dotnet-ServerlessCustomPipeline/004-dotnet-ServerlessCustomPipeline.csproj index c002837ee..3e73c7176 100644 --- a/samples/004-dotnet-ServerlessCustomPipeline/004-dotnet-ServerlessCustomPipeline.csproj +++ b/samples/004-dotnet-ServerlessCustomPipeline/004-dotnet-ServerlessCustomPipeline.csproj @@ -35,7 +35,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/004-dotnet-ServerlessCustomPipeline/Program.cs b/samples/004-dotnet-ServerlessCustomPipeline/Program.cs index 8158bb515..fe85f4a49 100644 --- a/samples/004-dotnet-ServerlessCustomPipeline/Program.cs +++ b/samples/004-dotnet-ServerlessCustomPipeline/Program.cs @@ -48,7 +48,7 @@ // Create sample pipeline with 4 files Console.WriteLine("* Defining pipeline with 4 files..."); var pipeline = orchestrator - .PrepareNewFileUploadPipeline("userZ", "inProcessTest", new TagCollection { { "testName", "example3" } }) + .PrepareNewDocumentUpload("userZ", "inProcessTest", new TagCollection { { "testName", "example3" } }) .AddUploadFile("file1", "file1-Wikipedia-Carbon.txt", "file1-Wikipedia-Carbon.txt") .AddUploadFile("file2", "file2-Wikipedia-Moon.txt", "file2-Wikipedia-Moon.txt") .AddUploadFile("file3", "file3-lorem-ipsum.docx", "file3-lorem-ipsum.docx") diff --git a/samples/004-dotnet-ServerlessCustomPipeline/README.md b/samples/004-dotnet-ServerlessCustomPipeline/README.md index ace38dc24..97a70f17f 100644 --- a/samples/004-dotnet-ServerlessCustomPipeline/README.md +++ b/samples/004-dotnet-ServerlessCustomPipeline/README.md @@ -9,7 +9,7 @@ multiple files, with a fluent syntax: ```csharp var pipeline = orchestrator - .PrepareNewFileUploadPipeline("userZ", "inProcessTest", new TagCollection { { "testName", "example3" } }) + .PrepareNewDocumentUpload("userZ", "inProcessTest", new TagCollection { { "type", "test" } }) .AddUploadFile("file1", "file1.txt", "file1.txt") .AddUploadFile("file2", "file2.txt", "file2.txt") .AddUploadFile("file3", "file3.docx", "file3.docx") diff --git a/samples/005-dotnet-ExtractTextFromDocs/005-dotnet-ExtractTextFromDocs.csproj b/samples/005-dotnet-ExtractTextFromDocs/005-dotnet-ExtractTextFromDocs.csproj index 804518c06..903370c92 100644 --- a/samples/005-dotnet-ExtractTextFromDocs/005-dotnet-ExtractTextFromDocs.csproj +++ b/samples/005-dotnet-ExtractTextFromDocs/005-dotnet-ExtractTextFromDocs.csproj @@ -30,7 +30,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/006-dotnet-CustomHandler/006-dotnet-CustomHandler.csproj b/samples/006-dotnet-CustomHandler/006-dotnet-CustomHandler.csproj index b42675b17..c0ddd4147 100644 --- a/samples/006-dotnet-CustomHandler/006-dotnet-CustomHandler.csproj +++ b/samples/006-dotnet-CustomHandler/006-dotnet-CustomHandler.csproj @@ -26,7 +26,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/007-using-azure-cognitive-search/007-using-azure-cognitive-search.csproj b/samples/007-using-azure-cognitive-search/007-using-azure-cognitive-search.csproj index 00ca91a18..5fb38cd61 100644 --- a/samples/007-using-azure-cognitive-search/007-using-azure-cognitive-search.csproj +++ b/samples/007-using-azure-cognitive-search/007-using-azure-cognitive-search.csproj @@ -9,7 +9,7 @@ - + @@ -29,7 +29,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/007-using-azure-cognitive-search/Program.cs b/samples/007-using-azure-cognitive-search/Program.cs index 77262a880..d8995ae16 100644 --- a/samples/007-using-azure-cognitive-search/Program.cs +++ b/samples/007-using-azure-cognitive-search/Program.cs @@ -54,7 +54,7 @@ public static async Task Main(string[] args) // Insert two records var recordId1 = await InsertRecordAsync(IndexName, externalId: ExternalRecordId1, - metadata: new Dictionary { { "filename", "dotnet.pdf" }, { "text", "this is a sentence" }, }, + payload: new Dictionary { { "filename", "dotnet.pdf" }, { "text", "this is a sentence" }, }, tags: new TagCollection { { "category", "samples" }, @@ -66,7 +66,7 @@ public static async Task Main(string[] args) var recordId2 = await InsertRecordAsync(IndexName, externalId: ExternalRecordId2, - metadata: new Dictionary { { "filename", "python.pdf" }, { "text", "this is a sentence" }, }, + payload: new Dictionary { { "filename", "python.pdf" }, { "text", "this is a sentence" }, }, tags: new TagCollection { { "category", "samples" }, @@ -90,7 +90,7 @@ public static async Task Main(string[] args) foreach (MemoryRecord rec in records) { Console.WriteLine(" - " + rec.Id); - Console.WriteLine(" " + rec.Metadata.FirstOrDefault().Value); + Console.WriteLine(" " + rec.Payload.FirstOrDefault().Value); } // // Delete the record @@ -137,7 +137,7 @@ private static async Task CreateIndexAsync(string name) IsSortable = false, }); - indexSchema.Fields.Add(new SearchField("metadata", SearchFieldDataType.String) + indexSchema.Fields.Add(new SearchField("payload", SearchFieldDataType.String) { IsKey = false, IsFilterable = true, @@ -174,7 +174,7 @@ private static async Task CreateIndexAsync(string name) // =============================================================================================== private static async Task InsertRecordAsync(string indexName, - string externalId, Dictionary metadata, TagCollection tags, Embedding embedding) + string externalId, Dictionary payload, TagCollection tags, Embedding embedding) { Console.WriteLine("\n== INSERT ==\n"); var client = adminClient.GetSearchClient(indexName); @@ -185,7 +185,7 @@ private static async Task InsertRecordAsync(string indexName, Vector = embedding, Owner = "userAB", Tags = tags, - Metadata = metadata + Payload = payload }; AzureCognitiveSearchMemoryRecord localRecord = AzureCognitiveSearchMemoryRecord.FromMemoryRecord(record); diff --git a/tools/ask.sh b/tools/ask.sh index 8bbb49922..ed124088f 100755 --- a/tools/ask.sh +++ b/tools/ask.sh @@ -13,11 +13,12 @@ Help for Bash script Usage: - ./ask.sh -s -u -q "" + ./ask.sh -s -u -q "" -f "" -s web service URL (required) Semantic Memory web service URL. -u userId (required) User ID. -q question (required) Question, using quotes. + -f filter (optional) Key-value filter, e.g. -f '"type":["news","article"],"group":["emails"]' -h Print this help content. @@ -45,7 +46,11 @@ readParameters() { ;; -q) shift - QUERY=$1 + QUESTION=$1 + ;; + -f) + shift + FILTER=$1 ;; *) help @@ -65,7 +70,7 @@ validateParameters() { echo "Please specify the user ID" exit 2 fi - if [ -z "$QUERY" ]; then + if [ -z "$QUESTION" ]; then echo "Please specify the user ID" exit 2 fi @@ -73,7 +78,7 @@ validateParameters() { # Remove variables and functions from the environment, in case the script was sourced cleanupEnv() { - unset SERVICE_URL USER_ID QUERY + unset SERVICE_URL USER_ID QUESTION FILTER unset -f help readParameters validateParameters cleanupEnv exitScript } @@ -89,6 +94,5 @@ validateParameters # Send HTTP request using curl set -x curl -v -H 'Content-Type: application/json' \ - -d'{"query":"'"${QUERY}"'","userId":"'"${USER_ID}"'"}' \ - $SERVICE_URL/ask - + -d'{"question":"'"${QUESTION}"'","userId":"'"${USER_ID}"'","filter":{'"${FILTER}"'}}' \ + $SERVICE_URL/ask