Skip to content

Commit

Permalink
Add handlers settings
Browse files Browse the repository at this point in the history
Split embedding generation and storage
Add embedding file mime type
Code cleanup, fix code style warnings, increase logging, fix dispose
  • Loading branch information
dluc committed Jul 25, 2023
1 parent d4d5ae4 commit 0fc3fb7
Show file tree
Hide file tree
Showing 24 changed files with 558 additions and 83 deletions.
2 changes: 2 additions & 0 deletions SemanticMemory.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ public void It$SOMENAME$()
<s:Boolean x:Key="/Default/UserDictionary/Words/=Dotproduct/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=ENDPART/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=fareweller/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=fffffff/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=greaterthan/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Hmmss/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Joinable/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=keyvault/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Kitto/@EntryIndexedValue">True</s:Boolean>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ private async Task ImportFilesInternalAsync(string[] files, ImportFileOptions op
// Include all files
for (int index = 0; index < files.Length; index++)
{
string? file = files[index];
string file = files[index];
pipeline.AddUploadFile($"file{index + 1}", file, file);
}

Expand Down
55 changes: 0 additions & 55 deletions lib/dotnet/Core.NetStandard20/ISemanticMemoryClient.cs
Original file line number Diff line number Diff line change
@@ -1,66 +1,11 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Threading.Tasks;

// ReSharper disable CommentTypo

namespace Microsoft.SemanticKernel.SemanticMemory.Core20;

public class ImportFileOptions
{
public string UserId { get; set; } = string.Empty;
public List<string> VaultIds { get; set; } = new();
public string RequestId { get; set; } = string.Empty;

public ImportFileOptions()
{
}

public ImportFileOptions(string userId, string vaultId)
: this(userId, vaultId, string.Empty)
{
}

public ImportFileOptions(string userId, string vaultId, string requestId)
{
this.UserId = userId;
this.VaultIds.Add(vaultId);
this.RequestId = requestId;
}

public ImportFileOptions(string userId, List<string> vaultIds, string requestId)
{
this.UserId = userId;
this.VaultIds = vaultIds;
this.RequestId = requestId;
}

public void Sanitize()
{
if (string.IsNullOrEmpty(this.RequestId))
{
// this.RequestId = Guid.NewGuid().ToString("D") + "-" + DateTimeOffset.UtcNow.ToString("yyyyMMdd.HHmmss.fffffff");
this.RequestId = Guid.NewGuid().ToString("D") + "-" + DateTimeOffset.UtcNow.ToString("ss.fffffff", CultureInfo.InvariantCulture);
}
}

public void Validate()
{
if (string.IsNullOrEmpty(this.UserId))
{
throw new ArgumentNullException(nameof(this.UserId), "User ID is empty");
}

if (this.VaultIds.Count < 1)
{
throw new ArgumentNullException(nameof(this.VaultIds), "The list of vaults is empty");
}
}
}

public interface ISemanticMemoryClient
{
public Task ImportFileAsync(string file, ImportFileOptions options);
Expand Down
59 changes: 59 additions & 0 deletions lib/dotnet/Core.NetStandard20/ImportFileOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Globalization;

namespace Microsoft.SemanticKernel.SemanticMemory.Core20;

public class ImportFileOptions
{
public string UserId { get; set; } = string.Empty;
public List<string> VaultIds { get; set; } = new();
public string RequestId { get; set; } = string.Empty;

public ImportFileOptions()
{
}

public ImportFileOptions(string userId, string vaultId)
: this(userId, vaultId, string.Empty)
{
}

public ImportFileOptions(string userId, string vaultId, string requestId)
{
this.UserId = userId;
this.VaultIds.Add(vaultId);
this.RequestId = requestId;
}

public ImportFileOptions(string userId, List<string> vaultIds, string requestId)
{
this.UserId = userId;
this.VaultIds = vaultIds;
this.RequestId = requestId;
}

public void Sanitize()
{
if (string.IsNullOrEmpty(this.RequestId))
{
// note: the ID doesn't include the full date, to avoid "personal" details
this.RequestId = Guid.NewGuid().ToString("D") + "-" + DateTimeOffset.UtcNow.ToString("ss.fffffff", CultureInfo.InvariantCulture);
}
}

public void Validate()
{
if (string.IsNullOrEmpty(this.UserId))
{
throw new ArgumentNullException(nameof(this.UserId), "User ID is empty");
}

if (this.VaultIds.Count < 1)
{
throw new ArgumentNullException(nameof(this.VaultIds), "The list of vaults is empty");
}
}
}
32 changes: 32 additions & 0 deletions lib/dotnet/Core/Configuration/ConfigurationException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) Microsoft. All rights reserved.

using System;

namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration;

public class ConfigurationException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with a default message.
/// </summary>
public ConfigurationException()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
public ConfigurationException(string? message) : base(message)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
/// <param name="innerException">The exception that is the cause of the current exception.</param>
public ConfigurationException(string? message, Exception? innerException) : base(message, innerException)
{
}
}
134 changes: 134 additions & 0 deletions lib/dotnet/Core/Configuration/EmbeddingGenerationConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;

namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration;

/// <summary>
/// Configuration settings for the embedding generators
/// </summary>
public class EmbeddingGenerationConfig
{
/// <summary>
/// List of active generators, out of the full list.
/// <see cref="GeneratorsConfig"/> might contain settings for several generators, but normally only one is in use.
/// </summary>
public List<string> ActiveGenerators { get; set; } = new();

/// <summary>
/// Available embedding generators, with settings.
/// Settings here are stored as string values, and parsed to actual types by <see cref="GetActiveGeneratorsTypedConfig"/>
/// </summary>
public Dictionary<string, Dictionary<string, string>> GeneratorsConfig { get; set; } = new();

/// <summary>
/// Known embedding generator types.
/// TODO: add SentenceTransformers
/// </summary>
public enum GeneratorTypes
{
Unknown = 0,
AzureOpenAI = 1,
OpenAI = 2,
}

/// <summary>
/// Azure OpenAI embedding generator settings.
/// </summary>
public class AzureOpenAI
{
public GeneratorTypes Type { get; } = GeneratorTypes.AzureOpenAI;
public string APIKey { get; set; } = string.Empty;
public string Endpoint { get; set; } = string.Empty;
public string Deployment { get; set; } = string.Empty;
}

/// <summary>
/// OpenAI embedding generator settings.
/// </summary>
public class OpenAI
{
public GeneratorTypes Type { get; } = GeneratorTypes.OpenAI;
public string APIKey { get; set; } = string.Empty;
public string OrgId { get; set; } = string.Empty;
public string Model { get; set; } = string.Empty;
}

/// <summary>
/// Cast settings from <see cref="GeneratorsConfig"/> to actual typed values.
/// </summary>
/// <param name="log">Optional logger</param>
/// <returns>Strongly typed view of active generators</returns>
public Dictionary<string, object> GetActiveGeneratorsTypedConfig(ILogger? log = null)
{
log ??= NullLogger<EmbeddingGenerationConfig>.Instance;

Dictionary<string, object> result = new();
foreach (string name in this.ActiveGenerators)
{
result[name] = this.GetGeneratorConfig(name);
switch (result[name])
{
case AzureOpenAI x:
log.LogDebug("Using Azure OpenAI embeddings, deployment: {0}", x.Deployment);
break;

case OpenAI x:
log.LogDebug("Using OpenAI embeddings, model: {0}", x.Model);
break;
}
}

return result;
}

private object GetGeneratorConfig(string name)
{
string type = this.GeneratorsConfig[name]["Type"];

if (string.Equals(type, GeneratorTypes.AzureOpenAI.ToString("G"), StringComparison.OrdinalIgnoreCase))
{
return new AzureOpenAI
{
APIKey = this.GetGeneratorSetting(name, "APIKey"),
Endpoint = this.GetGeneratorSetting(name, "Endpoint"),
Deployment = this.GetGeneratorSetting(name, "Deployment"),
};
}

if (string.Equals(type, GeneratorTypes.OpenAI.ToString("G"), StringComparison.OrdinalIgnoreCase))
{
return new OpenAI
{
APIKey = this.GetGeneratorSetting(name, "APIKey"),
OrgId = this.GetGeneratorSetting(name, "OrgId", true),
Model = this.GetGeneratorSetting(name, "Model"),
};
}

throw new ConfigurationException($"Embedding generator type '{this.GeneratorsConfig[name]["Type"]}' not supported");
}

private string GetGeneratorSetting(string generator, string key, bool optional = false)
{
if (!this.GeneratorsConfig.ContainsKey(generator))
{
throw new ConfigurationException($"Embedding generator '{generator}' configuration not found");
}

if (!this.GeneratorsConfig[generator].ContainsKey(key))
{
if (optional)
{
return string.Empty;
}

throw new ConfigurationException($"Configuration '{generator}' is missing the '{key}' value");
}

return this.GeneratorsConfig[generator][key];
}
}
36 changes: 36 additions & 0 deletions lib/dotnet/Core/Configuration/SKMemoryConfig.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,46 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using Microsoft.Extensions.Configuration;

namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration;

public class SKMemoryConfig
{
/// <summary>
/// Content storage settings, e.g. Azure Blob or File System details
/// </summary>
public ContentStorageConfig ContentStorage { get; set; } = new();

/// <summary>
/// Memory ingestion pipeline settings, including queueing system
/// </summary>
public OrchestrationConfig Orchestration { get; set; } = new();

/// <summary>
/// Memory ingestion pipeline handlers settings, e.g. settings about chunking, insights, and embeddings.
/// </summary>
public Dictionary<string, IConfigurationSection> Handlers { get; set; } = new();

/// <summary>
/// Web service settings, e.g. whether to expose OpenAPI swagger docs.
/// </summary>
public bool OpenApiEnabled { get; set; } = false;

/// <summary>
/// Get pipeline handler configuration.
/// </summary>
/// <param name="handlerName">Handler name</param>
/// <param name="sectionName">Configuration section name</param>
/// <typeparam name="T">Type of handler configuration</typeparam>
/// <returns>Configuration data, mapped by .NET configuration binder</returns>
public T GetHandlerConfig<T>(string handlerName, string sectionName) where T : class, new()
{
if (!this.Handlers.TryGetValue(handlerName, out IConfigurationSection? section))
{
return new T();
}

return section.GetSection(sectionName).Get<T>() ?? new T();
}
}
Loading

0 comments on commit 0fc3fb7

Please sign in to comment.