Skip to content

Commit

Permalink
* Complete filtering support, e.g. filtering memory by user/folder/ch…
Browse files Browse the repository at this point in the history
…at/tag/anything

* Complete "a document can contain multiple files" on all implementations of ISemanticMemoryClient
* Rename SemanticMemoryServerless to Memory
* Align interface of Memory, MemoryService, MemoryWebClient
* Delete ISemanticMemoryService and re-use ISemanticMemoryClient
* Align on camelCase for HTTP request/responses
* Revisit API, align on the term "document" instead of "file"
* Rename pipeline.Id to pipeline.DocumentId
* Memory records: rename record.Metadata to record.Payload
* Remove from CoreLib dependency on ASP.NET framework and ASP.NET HTTP 
* Upgrade Azure Cognitive Search nuget to latest beta
* Rename /ask param "query" to "question"
* Update docs and examples
  • Loading branch information
dluc committed Aug 11, 2023
1 parent 9c6e4ce commit 0022e70
Show file tree
Hide file tree
Showing 55 changed files with 707 additions and 470 deletions.
24 changes: 13 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ in your app.
> ### Importing documents into your Semantic Memory can be as simple as this:
>
> ```csharp
> var memory = new MemoryServerlessClient();
> var memory = new Memory(serviceProvider);
>
> // Import a file (default user)
> await memory.ImportFileAsync("meeting-transcript.docx");
> await memory.ImportDocumentAsync("meeting-transcript.docx");
>
> // Import a file specifying a User and Tags
> await memory.ImportFileAsync("business-plan.docx",
> new DocumentDetails("user@some.email", "file1")
> // Import a file specifying a Document ID, User and Tags
> await memory.ImportDocumentAsync("business-plan.docx",
> new DocumentDetails("user@some.email", "file001")
> .AddTag("collection", "business")
> .AddTag("collection", "plans")
> .AddTag("type", "doc"));
> .AddTag("fiscalYear", "2023"));
> ```
> ### Asking questions:
Expand Down Expand Up @@ -155,13 +155,15 @@ to **start the Semantic Memory Service**:
>
> var memory = new MemoryWebClient("http://127.0.0.1:9001"); // <== URL where the web service is running
>
> await memory.ImportFileAsync("meeting-transcript.docx");
> // Import a file (default user)
> await memory.ImportDocumentAsync("meeting-transcript.docx");
>
> await memory.ImportFileAsync("business-plan.docx",
> new DocumentDetails("file1", "user0022")
> // Import a file specifying a Document ID, User and Tags
> await memory.ImportDocumentAsync("business-plan.docx",
> new DocumentDetails("user@some.email", "file001")
> .AddTag("collection", "business")
> .AddTag("collection", "plans")
> .AddTag("type", "doc"));
> .AddTag("fiscalYear", "2023"));
> ```
> ### Getting answers via the web service
Expand Down Expand Up @@ -202,7 +204,7 @@ var app = AppBuilder.Build();
var storage = app.Services.GetService<IContentStorage>();
// Use a local, synchronous, orchestrator
var orchestrator = new InProcessPipelineOrchestrator(storage);
var orchestrator = new InProcessPipelineOrchestrator(storage, app.Services);
// Define custom .NET handlers
var step1 = new MyHandler1("step1", orchestrator);
Expand Down
2 changes: 1 addition & 1 deletion dotnet/ClientLib/ClientLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.VisualStudio.Threading.Analyzers" Version="17.6.40">
<PackageReference Include="Microsoft.VisualStudio.Threading.Analyzers" Version="17.7.30">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
62 changes: 31 additions & 31 deletions dotnet/ClientLib/ISemanticMemoryClient.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.SemanticMemory.Client.Models;
Expand All @@ -10,28 +9,20 @@ namespace Microsoft.SemanticMemory.Client;
public interface ISemanticMemoryClient
{
/// <summary>
/// Import a file into memory. The file can have tags and other details.
/// Import a document into memory. The document can contain one or more files, can have tags and other details.
/// </summary>
/// <param name="file">Details of the file to import</param>
/// <param name="uploadRequest">Upload request containing the document files and details</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Document ID</returns>
public Task<string> ImportFileAsync(Document file, CancellationToken cancellationToken = default);
public Task<string> ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default);

/// <summary>
/// Import multiple files into memory. Each file can have tags and other details.
/// Import a document into memory. The document can contain one or more files, can have tags and other details.
/// </summary>
/// <param name="files">Details of the files to import</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>List of document IDs</returns>
public Task<IList<string>> ImportFilesAsync(Document[] files, CancellationToken cancellationToken = default);

/// <summary>
/// Import a file from disk into the default user memory.
/// </summary>
/// <param name="fileName">Path and name of the file to import</param>
/// <param name="document">Details of the files to import</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Document ID</returns>
public Task<string> ImportFileAsync(string fileName, CancellationToken cancellationToken = default);
public Task<string> ImportDocumentAsync(Document document, CancellationToken cancellationToken = default);

/// <summary>
/// Import a files from disk into memory, with details such as tags and user ID.
Expand All @@ -40,35 +31,44 @@ public interface ISemanticMemoryClient
/// <param name="details">File details such as tags and user ID</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Document ID</returns>
public Task<string> ImportFileAsync(string fileName, DocumentDetails details, CancellationToken cancellationToken = default);
public Task<string> ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default);

/// <summary>
/// Search the default user memory for an answer to the given query.
/// Check if a document ID exists in a user memory and is ready for usage.
/// The logic checks if the uploaded document has been fully processed.
/// When the document exists in storage but is not processed yet, the method returns False.
/// </summary>
/// <param name="query">Query/question to answer</param>
/// <param name="filter">Filter to match</param>
/// <param name="userId">ID of the user's memory to search</param>
/// <param name="documentId">Document ID</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Answer to the query, if possible</returns>
public Task<MemoryAnswer> AskAsync(string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default);
/// <returns>True if the document has been successfully uploaded and imported</returns>
public Task<bool> IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default);

/// <summary>
/// Search a user memory for an answer to the given query.
/// Get information about an uploaded document
/// </summary>
/// <param name="userId">ID of the user's memory to search</param>
/// <param name="query">Query/question to answer</param>
/// <param name="userId">User ID</param>
/// <param name="documentId">Document ID (aka pipeline ID)</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Information about an uploaded document</returns>
public Task<DataPipelineStatus?> GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default);

/// <summary>
/// Search the default user memory for an answer to the given query.
/// </summary>
/// <param name="question">Query/question to answer</param>
/// <param name="filter">Filter to match</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>Answer to the query, if possible</returns>
public Task<MemoryAnswer> AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default);
public Task<MemoryAnswer> AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default);

/// <summary>
/// Check if a document ID exists in a user memory and is ready for usage.
/// The logic checks if the uploaded document has been fully processed.
/// When the document exists in storage but is not processed yet, the method returns False.
/// Search a user memory for an answer to the given query.
/// </summary>
/// <param name="userId">ID of the user's memory to search</param>
/// <param name="documentId">Document ID</param>
/// <param name="question">Question to answer</param>
/// <param name="filter">Filter to match</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>True if the document has been successfully uploaded and imported</returns>
public Task<bool> IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default);
/// <returns>Answer to the query, if possible</returns>
public Task<MemoryAnswer> AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default);
}
144 changes: 100 additions & 44 deletions dotnet/ClientLib/MemoryWebClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,39 +28,62 @@ public MemoryWebClient(string endpoint, HttpClient client)
}

/// <inheritdoc />
public Task<string> ImportFileAsync(Document file, CancellationToken cancellationToken = default)
public Task<string> ImportDocumentAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken = default)
{
return this.ImportFileInternalAsync(file, cancellationToken);
return this.ImportInternalAsync(uploadRequest, cancellationToken);
}

/// <inheritdoc />
public Task<IList<string>> ImportFilesAsync(Document[] files, CancellationToken cancellationToken = default)
public Task<string> ImportDocumentAsync(Document document, CancellationToken cancellationToken = default)
{
return this.ImportFilesInternalAsync(files, cancellationToken);
return this.ImportInternalAsync(document, cancellationToken);
}

/// <inheritdoc />
public Task<string> ImportFileAsync(string fileName, CancellationToken cancellationToken = default)
public Task<string> ImportDocumentAsync(string fileName, DocumentDetails? details = null, CancellationToken cancellationToken = default)
{
return this.ImportFileAsync(new Document(fileName), cancellationToken);
return this.ImportInternalAsync(new Document(fileName) { Details = details ?? new DocumentDetails() }, cancellationToken);
}

/// <inheritdoc />
public Task<string> ImportFileAsync(string fileName, DocumentDetails details, CancellationToken cancellationToken = default)
public async Task<bool> IsDocumentReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default)
{
return this.ImportFileInternalAsync(new Document(fileName) { Details = details }, cancellationToken);
DataPipelineStatus? status = await this.GetDocumentStatusAsync(userId: userId, documentId: documentId, cancellationToken).ConfigureAwait(false);
return status != null && status.Completed;
}

/// <inheritdoc />
public Task<MemoryAnswer> AskAsync(string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default)
public async Task<DataPipelineStatus?> GetDocumentStatusAsync(string userId, string documentId, CancellationToken cancellationToken = default)
{
return this.AskAsync(new DocumentDetails().UserId, query, filter, cancellationToken);
HttpResponseMessage? response = await this._client.GetAsync($"/upload-status?user={userId}&id={documentId}", cancellationToken).ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
{
return null;
}

response.EnsureSuccessStatusCode();

var json = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
DataPipelineStatus? status = JsonSerializer.Deserialize<DataPipelineStatus>(json);

if (status == null)
{
return null;
}

return status;
}

/// <inheritdoc />
public async Task<MemoryAnswer> AskAsync(string userId, string query, MemoryFilter? filter = null, CancellationToken cancellationToken = default)
public Task<MemoryAnswer> AskAsync(string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default)
{
var request = new { UserId = userId, Query = query, Tags = new TagCollection() };
return this.AskAsync(new DocumentDetails().UserId, question, filter, cancellationToken);
}

/// <inheritdoc />
public async Task<MemoryAnswer> AskAsync(string userId, string question, MemoryFilter? filter = null, CancellationToken cancellationToken = default)
{
var request = new MemoryQuery { UserId = userId, Question = question, Filter = filter ?? new MemoryFilter() };
using var content = new StringContent(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json");

HttpResponseMessage? response = await this._client.PostAsync("/ask", content, cancellationToken).ConfigureAwait(false);
Expand All @@ -70,64 +93,97 @@ public async Task<MemoryAnswer> AskAsync(string userId, string query, MemoryFilt
return JsonSerializer.Deserialize<MemoryAnswer>(json, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new MemoryAnswer();
}

/// <inheritdoc />
public async Task<bool> IsReadyAsync(string userId, string documentId, CancellationToken cancellationToken = default)
#region private

private async Task<string> ImportInternalAsync(DocumentUploadRequest uploadRequest, CancellationToken cancellationToken)
{
HttpResponseMessage? response = await this._client.GetAsync($"/upload-status?user={userId}&id={documentId}", cancellationToken).ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotFound)
// Populate form with values and files from disk
using var formData = new MultipartFormDataContent();

using StringContent documentIdContent = new(uploadRequest.DocumentId);
using (StringContent userContent = new(uploadRequest.UserId))
{
return false;
}
List<IDisposable> disposables = new();
formData.Add(documentIdContent, Constants.WebServiceDocumentIdField);
formData.Add(userContent, Constants.WebServiceUserIdField);

response.EnsureSuccessStatusCode();
foreach (var tag in uploadRequest.Tags.Pairs)
{
var tagContent = new StringContent(tag.Value);
disposables.Add(tagContent);
formData.Add(tagContent, tag.Key);
}

var json = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
DataPipelineStatus? status = JsonSerializer.Deserialize<DataPipelineStatus>(json);
for (int index = 0; index < uploadRequest.Files.Count; index++)
{
string fileName = uploadRequest.Files[index].FileName;

if (status == null)
{
throw new SemanticMemoryWebException("Unable to parse status response");
}
byte[] bytes;
using (var binaryReader = new BinaryReader(uploadRequest.Files[index].FileContent))
{
bytes = binaryReader.ReadBytes((int)uploadRequest.Files[index].FileContent.Length);
}

return status.Completed;
}
var fileContent = new ByteArrayContent(bytes, 0, bytes.Length);
disposables.Add(fileContent);

#region private
formData.Add(fileContent, $"file{index}", fileName);
}

private async Task<IList<string>> ImportFilesInternalAsync(Document[] files, CancellationToken cancellationToken)
{
List<string> docIds = new();
foreach (Document file in files)
{
docIds.Add(await this.ImportFileInternalAsync(file, cancellationToken).ConfigureAwait(false));
// Send HTTP request
try
{
HttpResponseMessage? response = await this._client.PostAsync("/upload", formData, cancellationToken).ConfigureAwait(false);
formData.Dispose();
response.EnsureSuccessStatusCode();
}
catch (HttpRequestException e) when (e.Data.Contains("StatusCode"))
{
throw new SemanticMemoryWebException($"{e.Message} [StatusCode: {e.Data["StatusCode"]}]", e);
}
catch (Exception e)
{
throw new SemanticMemoryWebException(e.Message, e);
}
finally
{
foreach (var disposable in disposables)
{
disposable.Dispose();
}
}
}

return docIds;
return uploadRequest.DocumentId;
}

private async Task<string> ImportFileInternalAsync(Document file, CancellationToken cancellationToken)
private async Task<string> ImportInternalAsync(Document document, CancellationToken cancellationToken)
{
// Populate form with values and files from disk
using var formData = new MultipartFormDataContent();

using StringContent documentIdContent = new(file.Details.DocumentId);
using (StringContent userContent = new(file.Details.UserId))
using StringContent documentIdContent = new(document.Details.DocumentId);
using (StringContent userContent = new(document.Details.UserId))
{
List<IDisposable> disposables = new();
formData.Add(documentIdContent, Constants.WebServiceDocumentIdField);
formData.Add(userContent, Constants.WebServiceUserIdField);

foreach (var tag in file.Details.Tags.Pairs)
foreach (var tag in document.Details.Tags.Pairs)
{
var tagContent = new StringContent(tag.Value);
disposables.Add(tagContent);
formData.Add(tagContent, tag.Key);
}

byte[] bytes = File.ReadAllBytes(file.FileName);
var fileContent = new ByteArrayContent(bytes, 0, bytes.Length);
disposables.Add(fileContent);
formData.Add(fileContent, "file1", file.FileName);
for (int index = 0; index < document.FileNames.Count; index++)
{
string fileName = document.FileNames[index];
byte[] bytes = File.ReadAllBytes(fileName);
var fileContent = new ByteArrayContent(bytes, 0, bytes.Length);
disposables.Add(fileContent);
formData.Add(fileContent, $"file{index}", fileName);
}

// Send HTTP request
try
Expand All @@ -153,7 +209,7 @@ private async Task<string> ImportFileInternalAsync(Document file, CancellationTo
}
}

return file.Details.DocumentId;
return document.Details.DocumentId;
}

#endregion
Expand Down
4 changes: 2 additions & 2 deletions dotnet/ClientLib/Models/DataPipelineStatus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ public class DataPipelineStatus
/// Unique Id
/// </summary>
[JsonPropertyOrder(10)]
[JsonPropertyName("id")]
public string Id { get; set; } = string.Empty;
[JsonPropertyName("document_id")]
public string DocumentId { get; set; } = string.Empty;

[JsonPropertyOrder(11)]
[JsonPropertyName("user_id")]
Expand Down
Loading

0 comments on commit 0022e70

Please sign in to comment.