Skip to content

Commit

Permalink
Improve perf of SanitizeKey
Browse files Browse the repository at this point in the history
Provides a more performant implementation of the `SanitizeKey` method
for both `AzureTableStorage` and `CosmosDbStorage`. The new
implementation is faster in all cases and, perhaps more importantly,
the new implementation allocates zero bytes in the case that there are
no illegal characters in the key and guarantees only a single
allocation in the case when there are bad characters.

Benchmarks:

``` ini

BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4
physical cores
  [Host]     : .NET Framework 4.7.1 (CLR 4.0.30319.42000), 64bit
RyuJIT-v4.7.3101.0
  DefaultJob : .NET Framework 4.7.1 (CLR 4.0.30319.42000), 64bit
RyuJIT-v4.7.3101.0

```
|                Method |
                                         key |        Mean |      Error
|     StdDev | Scaled | ScaledSD |  Gen 0 | Allocated |
|----------------------
|-------------------------------------------------------------------------------------------

|------------:|-----------:|-----------:|-------:|---------:|-------:|----------:|
| **SanitizeKey_OPTIMIZED** |
                  **bad-key-!@#$%^&*()~/\><,.?';`~** |
**379.31 ns** |  **8.6554 ns** | **16.2569 ns** |   **0.63** |
**0.03** | **0.0873** |     **368 B** |
|  SanitizeKey_ORIGINAL |
              bad-key-!@#$%^&*()~/\><,.?';`~ |   604.43
ns | 11.9625 ns | 11.7487 ns |   1.00 |     0.00 | 0.0887 |     376 B |
|                       |
                                             |             |
|            |        |          |        |           |
| **SanitizeKey_OPTIMIZED** |
**bad-this-is-a-realllllllll\y-long-key-that-
you-probably-wouldn't-encounter//-in-the-wild#** | **1,040.62 ns**
| **15.5311 ns** | **14.5278 ns** |   **0.56** |     **0.01** |
**0.2079** |     **880 B** |
|  SanitizeKey_ORIGINAL | bad-this-is-a-realllllllll\y-long-key-that-
you-probably-wouldn't-encounter//-in-the-wild# | 1,851.81 ns |
7.5866 ns |  5.4856 ns |   1.00 |     0.00 | 0.1755 |     744 B |
|                       |
                                             |             |
|            |        |          |        |           |
| **SanitizeKey_OPTIMIZED** |
                                    **good-key-123** |    **26.44 ns**
|  **0.5314 ns** |  **0.4437 ns** |   **0.11** |     **0.00** |
**-** |       **0 B** |
|  SanitizeKey_ORIGINAL |
                                good-key-123 |   242.83 ns |  4.9347 ns
| 10.5163 ns |   1.00 |     0.00 | 0.0358 |     152 B |
|                       |
                                             |             |
|            |        |          |        |           |
| **SanitizeKey_OPTIMIZED** |
**good-this-is-a-reallllllllly-long-key-that-you-probably-wouldnt-encounter-in-the-wild#**
 |   **349.20 ns** |  **6.5568 ns** |  **5.8124 ns** |   **0.20** |
**0.01** | **0.1101** |     **464 B** |
|  SanitizeKey_ORIGINAL |
good-this-is-a-reallllllllly-long-key-that-you-probably-wouldnt-encounter-in-the-wild#
 | 1,778.14 ns | 37.4411 ns | 72.1362 ns |   1.00 |     0.00 | 0.1659 |
    704 B |
  • Loading branch information
Drew Marsh committed May 22, 2018
1 parent 1f6f55f commit db02fd5
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 31 deletions.
48 changes: 33 additions & 15 deletions libraries/Microsoft.Bot.Builder.Azure/AzureTableStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ namespace Microsoft.Bot.Builder.Azure
/// </summary>
public class AzureTableStorage : IStorage
{
private static readonly char[] IllegalKeyCharacters = new char[] { '\\', '?', '/', '#', '\t', '\n', '\r' };
private static Lazy<Dictionary<char, string>> IllegalKeyCharacterReplacementMap = new Lazy<Dictionary<char, string>>(() => IllegalKeyCharacters.ToDictionary(c => c, c => '%' + ((int)c).ToString("x2")));

private readonly CloudStorageAccount _storageAccount;
private readonly string _tableName;
private CloudTable _table;
Expand Down Expand Up @@ -238,25 +241,40 @@ public EntityKey(string partitionKey, string rowKey)
/// <returns>Sanitized key that can be used as PartitionKey</returns>
public static string SanitizeKey(string key)
{
StringBuilder sb = new StringBuilder();
foreach (char ch in key)
var firstIllegalCharIndex = key.IndexOfAny(IllegalKeyCharacters);

// If there are no illegal characters return immediately and avoid any further processing/allocations
if (firstIllegalCharIndex == -1) return key;

// Allocate a builder that assumes that all remaining characters might be replaced to avoid any extra allocations
var sanitizedKeyBuilder = new StringBuilder(key.Length + (key.Length - firstIllegalCharIndex + 1) * 3);

// Add all good characters up to the first bad character to the builder first
for (int index = 0; index < firstIllegalCharIndex; index++)
{
if (badChars.Value.TryGetValue(ch, out string val))
sb.Append(val);
sanitizedKeyBuilder.Append(key[index]);
}

var illegalCharacterReplacementMap = IllegalKeyCharacterReplacementMap.Value;

// Now walk the remaining characters, starting at the first known bad character, replacing any bad ones with their designated replacement value from the map
for (int index = firstIllegalCharIndex; index < key.Length; index++)
{
var ch = key[index];

// Check if this next character is considered illegal and, if so, append its replacement; otherwise just append the good character as is
if (illegalCharacterReplacementMap.TryGetValue(ch, out var replacement))
{
sanitizedKeyBuilder.Append(replacement);
}
else
sb.Append(ch);
{
sanitizedKeyBuilder.Append(ch);
}
}
return sb.ToString();
}

private static Lazy<Dictionary<char, string>> badChars = new Lazy<Dictionary<char, string>>(() =>
{
char[] badChars = new char[] { '\\', '?', '/', '#', '\t', '\n', '\r' };
var dict = new Dictionary<char, string>();
foreach (var badChar in badChars)
dict[badChar] = '%' + ((int)badChar).ToString("x2");
return dict;
});
return sanitizedKeyBuilder.ToString();
}
}
}
}
50 changes: 34 additions & 16 deletions libraries/Microsoft.Bot.Builder.Azure/CosmosDbStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ namespace Microsoft.Bot.Builder.Azure
/// </summary>
public class CosmosDbStorage : IStorage
{
private static readonly char[] IllegalKeyCharacters = new char[] { '\\', '?', '/', '#', ' ' };
private static Lazy<Dictionary<char, string>> IllegalKeyCharacterReplacementMap = new Lazy<Dictionary<char, string>>(() => IllegalKeyCharacters.ToDictionary(c => c, c => '*' + ((int)c).ToString("x2")));

private readonly string _databaseId;
private readonly string _collectionId;
private readonly DocumentClient _client;
Expand Down Expand Up @@ -213,27 +216,42 @@ private async ValueTask<string> GetCollectionLink()
/// The following characters are restricted and cannot be used in the Id property: '/', '\', '?', '#'
/// More information at https://docs.microsoft.com/en-us/dotnet/api/microsoft.azure.documents.resource.id?view=azure-dotnet#remarks
/// </summary>
private static string SanitizeKey(string key)
public static string SanitizeKey(string key)
{
StringBuilder sb = new StringBuilder();
foreach (char ch in key)
var firstIllegalCharIndex = key.IndexOfAny(IllegalKeyCharacters);

// If there are no illegal characters return immediately and avoid any further processing/allocations
if (firstIllegalCharIndex == -1) return key;

// Allocate a builder that assumes that all remaining characters might be replaced to avoid any extra allocations
var sanitizedKeyBuilder = new StringBuilder(key.Length + (key.Length - firstIllegalCharIndex + 1) * 3);

// Add all good characters up to the first bad character to the builder first
for (int index = 0; index < firstIllegalCharIndex; index++)
{
if (_badChars.Value.TryGetValue(ch, out string val))
sb.Append(val);
sanitizedKeyBuilder.Append(key[index]);
}

var illegalCharacterReplacementMap = IllegalKeyCharacterReplacementMap.Value;

// Now walk the remaining characters, starting at the first known bad character, replacing any bad ones with their designated replacement value from the map
for (int index = firstIllegalCharIndex; index < key.Length; index++)
{
var ch = key[index];

// Check if this next character is considered illegal and, if so, append its replacement; otherwise just append the good character as is
if (illegalCharacterReplacementMap.TryGetValue(ch, out var replacement))
{
sanitizedKeyBuilder.Append(replacement);
}
else
sb.Append(ch);
{
sanitizedKeyBuilder.Append(ch);
}
}
return sb.ToString();
}

private static Lazy<Dictionary<char, string>> _badChars = new Lazy<Dictionary<char, string>>(() =>
{
char[] badChars = new char[] { '\\', '?', '/', '#', ' ' };
var dict = new Dictionary<char, string>();
foreach (var badChar in badChars)
dict[badChar] = '*' + ((int)badChar).ToString("x2");
return dict;
});
return sanitizedKeyBuilder.ToString();
}

/// <summary>
/// Internal data structure for storing items in a CosmosDB Collection.
Expand Down

0 comments on commit db02fd5

Please sign in to comment.