-
Notifications
You must be signed in to change notification settings - Fork 336
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Split embedding generation and storage Add embedding file mime type Code cleanup, fix code style warnings, increase logging, fix dispose
- Loading branch information
Showing
24 changed files
with
558 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Globalization; | ||
|
||
namespace Microsoft.SemanticKernel.SemanticMemory.Core20; | ||
|
||
public class ImportFileOptions | ||
{ | ||
public string UserId { get; set; } = string.Empty; | ||
public List<string> VaultIds { get; set; } = new(); | ||
public string RequestId { get; set; } = string.Empty; | ||
|
||
public ImportFileOptions() | ||
{ | ||
} | ||
|
||
public ImportFileOptions(string userId, string vaultId) | ||
: this(userId, vaultId, string.Empty) | ||
{ | ||
} | ||
|
||
public ImportFileOptions(string userId, string vaultId, string requestId) | ||
{ | ||
this.UserId = userId; | ||
this.VaultIds.Add(vaultId); | ||
this.RequestId = requestId; | ||
} | ||
|
||
public ImportFileOptions(string userId, List<string> vaultIds, string requestId) | ||
{ | ||
this.UserId = userId; | ||
this.VaultIds = vaultIds; | ||
this.RequestId = requestId; | ||
} | ||
|
||
public void Sanitize() | ||
{ | ||
if (string.IsNullOrEmpty(this.RequestId)) | ||
{ | ||
// note: the ID doesn't include the full date, to avoid "personal" details | ||
this.RequestId = Guid.NewGuid().ToString("D") + "-" + DateTimeOffset.UtcNow.ToString("ss.fffffff", CultureInfo.InvariantCulture); | ||
} | ||
} | ||
|
||
public void Validate() | ||
{ | ||
if (string.IsNullOrEmpty(this.UserId)) | ||
{ | ||
throw new ArgumentNullException(nameof(this.UserId), "User ID is empty"); | ||
} | ||
|
||
if (this.VaultIds.Count < 1) | ||
{ | ||
throw new ArgumentNullException(nameof(this.VaultIds), "The list of vaults is empty"); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
|
||
namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration; | ||
|
||
public class ConfigurationException : Exception | ||
{ | ||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with a default message. | ||
/// </summary> | ||
public ConfigurationException() | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
public ConfigurationException(string? message) : base(message) | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
/// <param name="innerException">The exception that is the cause of the current exception.</param> | ||
public ConfigurationException(string? message, Exception? innerException) : base(message, innerException) | ||
{ | ||
} | ||
} |
134 changes: 134 additions & 0 deletions
134
lib/dotnet/Core/Configuration/EmbeddingGenerationConfig.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Extensions.Logging; | ||
using Microsoft.Extensions.Logging.Abstractions; | ||
|
||
namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration; | ||
|
||
/// <summary> | ||
/// Configuration settings for the embedding generators | ||
/// </summary> | ||
public class EmbeddingGenerationConfig | ||
{ | ||
/// <summary> | ||
/// List of active generators, out of the full list. | ||
/// <see cref="GeneratorsConfig"/> might contain settings for several generators, but normally only one is in use. | ||
/// </summary> | ||
public List<string> ActiveGenerators { get; set; } = new(); | ||
|
||
/// <summary> | ||
/// Available embedding generators, with settings. | ||
/// Settings here are stored as string values, and parsed to actual types by <see cref="GetActiveGeneratorsTypedConfig"/> | ||
/// </summary> | ||
public Dictionary<string, Dictionary<string, string>> GeneratorsConfig { get; set; } = new(); | ||
|
||
/// <summary> | ||
/// Known embedding generator types. | ||
/// TODO: add SentenceTransformers | ||
/// </summary> | ||
public enum GeneratorTypes | ||
{ | ||
Unknown = 0, | ||
AzureOpenAI = 1, | ||
OpenAI = 2, | ||
} | ||
|
||
/// <summary> | ||
/// Azure OpenAI embedding generator settings. | ||
/// </summary> | ||
public class AzureOpenAI | ||
{ | ||
public GeneratorTypes Type { get; } = GeneratorTypes.AzureOpenAI; | ||
public string APIKey { get; set; } = string.Empty; | ||
public string Endpoint { get; set; } = string.Empty; | ||
public string Deployment { get; set; } = string.Empty; | ||
} | ||
|
||
/// <summary> | ||
/// OpenAI embedding generator settings. | ||
/// </summary> | ||
public class OpenAI | ||
{ | ||
public GeneratorTypes Type { get; } = GeneratorTypes.OpenAI; | ||
public string APIKey { get; set; } = string.Empty; | ||
public string OrgId { get; set; } = string.Empty; | ||
public string Model { get; set; } = string.Empty; | ||
} | ||
|
||
/// <summary> | ||
/// Cast settings from <see cref="GeneratorsConfig"/> to actual typed values. | ||
/// </summary> | ||
/// <param name="log">Optional logger</param> | ||
/// <returns>Strongly typed view of active generators</returns> | ||
public Dictionary<string, object> GetActiveGeneratorsTypedConfig(ILogger? log = null) | ||
{ | ||
log ??= NullLogger<EmbeddingGenerationConfig>.Instance; | ||
|
||
Dictionary<string, object> result = new(); | ||
foreach (string name in this.ActiveGenerators) | ||
{ | ||
result[name] = this.GetGeneratorConfig(name); | ||
switch (result[name]) | ||
{ | ||
case AzureOpenAI x: | ||
log.LogDebug("Using Azure OpenAI embeddings, deployment: {0}", x.Deployment); | ||
break; | ||
|
||
case OpenAI x: | ||
log.LogDebug("Using OpenAI embeddings, model: {0}", x.Model); | ||
break; | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private object GetGeneratorConfig(string name) | ||
{ | ||
string type = this.GeneratorsConfig[name]["Type"]; | ||
|
||
if (string.Equals(type, GeneratorTypes.AzureOpenAI.ToString("G"), StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
return new AzureOpenAI | ||
{ | ||
APIKey = this.GetGeneratorSetting(name, "APIKey"), | ||
Endpoint = this.GetGeneratorSetting(name, "Endpoint"), | ||
Deployment = this.GetGeneratorSetting(name, "Deployment"), | ||
}; | ||
} | ||
|
||
if (string.Equals(type, GeneratorTypes.OpenAI.ToString("G"), StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
return new OpenAI | ||
{ | ||
APIKey = this.GetGeneratorSetting(name, "APIKey"), | ||
OrgId = this.GetGeneratorSetting(name, "OrgId", true), | ||
Model = this.GetGeneratorSetting(name, "Model"), | ||
}; | ||
} | ||
|
||
throw new ConfigurationException($"Embedding generator type '{this.GeneratorsConfig[name]["Type"]}' not supported"); | ||
} | ||
|
||
private string GetGeneratorSetting(string generator, string key, bool optional = false) | ||
{ | ||
if (!this.GeneratorsConfig.ContainsKey(generator)) | ||
{ | ||
throw new ConfigurationException($"Embedding generator '{generator}' configuration not found"); | ||
} | ||
|
||
if (!this.GeneratorsConfig[generator].ContainsKey(key)) | ||
{ | ||
if (optional) | ||
{ | ||
return string.Empty; | ||
} | ||
|
||
throw new ConfigurationException($"Configuration '{generator}' is missing the '{key}' value"); | ||
} | ||
|
||
return this.GeneratorsConfig[generator][key]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,46 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System.Collections.Generic; | ||
using Microsoft.Extensions.Configuration; | ||
|
||
namespace Microsoft.SemanticKernel.SemanticMemory.Core.Configuration; | ||
|
||
public class SKMemoryConfig | ||
{ | ||
/// <summary> | ||
/// Content storage settings, e.g. Azure Blob or File System details | ||
/// </summary> | ||
public ContentStorageConfig ContentStorage { get; set; } = new(); | ||
|
||
/// <summary> | ||
/// Memory ingestion pipeline settings, including queueing system | ||
/// </summary> | ||
public OrchestrationConfig Orchestration { get; set; } = new(); | ||
|
||
/// <summary> | ||
/// Memory ingestion pipeline handlers settings, e.g. settings about chunking, insights, and embeddings. | ||
/// </summary> | ||
public Dictionary<string, IConfigurationSection> Handlers { get; set; } = new(); | ||
|
||
/// <summary> | ||
/// Web service settings, e.g. whether to expose OpenAPI swagger docs. | ||
/// </summary> | ||
public bool OpenApiEnabled { get; set; } = false; | ||
|
||
/// <summary> | ||
/// Get pipeline handler configuration. | ||
/// </summary> | ||
/// <param name="handlerName">Handler name</param> | ||
/// <param name="sectionName">Configuration section name</param> | ||
/// <typeparam name="T">Type of handler configuration</typeparam> | ||
/// <returns>Configuration data, mapped by .NET configuration binder</returns> | ||
public T GetHandlerConfig<T>(string handlerName, string sectionName) where T : class, new() | ||
{ | ||
if (!this.Handlers.TryGetValue(handlerName, out IConfigurationSection? section)) | ||
{ | ||
return new T(); | ||
} | ||
|
||
return section.GetSection(sectionName).Get<T>() ?? new T(); | ||
} | ||
} |
Oops, something went wrong.