Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple paths for Azure blob #106

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 63 additions & 24 deletions sql-log-shipping-service/FileHandling/AzureBlobFileHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,53 +6,92 @@
using System.Text;
using System.Threading.Tasks;
using Serilog;
using System.Collections.Concurrent;

namespace LogShippingService.FileHandling
{
internal class AzureBlobFileHandler : FileHandlerBase
{
internal static readonly char[] separator = { '/' };

private static IEnumerable<string> GetFoldersForAzBlob(string prefix)
public static List<string> GetFoldersForAzBlob(List<string> prefixes)
{
var containerUri = new Uri(Config.ContainerUrl + Config.SASToken);
var containerClient = new BlobContainerClient(containerUri);
var containerUri = new Uri(Config.ContainerUrl);
var containerClient = new BlobContainerClient(new Uri($"{containerUri}{Config.SASToken}"));

// Use a thread-safe collection to store folders from multiple threads
var folders = new ConcurrentBag<string>();

// Parallelize the processing of each prefix
Parallel.ForEach(prefixes, prefix =>
{
var results = containerClient.GetBlobsByHierarchy(prefix: prefix, delimiter: "/").AsPages();

var results = containerClient.GetBlobsByHierarchy(prefix: prefix, delimiter: "/").AsPages();
results.SelectMany(blobPage => blobPage.Values)
.Where(item => item.IsPrefix)
.Select(item => item.Prefix.TrimEnd(separator).Split(separator).LastOrDefault())
.Where(folderName => !string.IsNullOrEmpty(folderName))
.Distinct()
.ToList()
.ForEach(folderName => folders.Add(folderName!));
});

return from blobPage in results from item in blobPage.Values select item.Prefix.Split(separator, StringSplitOptions.RemoveEmptyEntries).Last();
return folders.Distinct().ToList();
}

public override IEnumerable<BackupFile> GetFiles(string path, string pattern, DateTime maxAge, bool ascending)
public IEnumerable<BackupFile> GetFiles(List<string> paths, string pattern, DateTime maxAge, bool ascending)
{
var containerUri = new Uri(Config.ContainerUrl + Config.SASToken);
var containerClient = new BlobContainerClient(containerUri);

// Retrieve blobs filtered by path, pattern, and maxAge
var blobItems = containerClient.GetBlobs(BlobTraits.Metadata, BlobStates.None, path)
.Where(blobItem => IsFileNameMatchingPattern(blobItem.Name, pattern) &&
blobItem.Properties.LastModified.GetValueOrDefault(DateTimeOffset.MinValue) >= maxAge);
// Temporarily store the filtered blobs from each path
var allFilteredBlobs = new ConcurrentBag<BlobItem>();

Parallel.ForEach(paths, path =>
{
var blobItems = containerClient.GetBlobs(BlobTraits.Metadata, BlobStates.None, path)
.Where(blobItem => IsFileNameMatchingPattern(blobItem.Name, pattern) &&
blobItem.Properties.LastModified.GetValueOrDefault(DateTimeOffset.MinValue).UtcDateTime>= maxAge);

foreach (var blobItem in blobItems)
{
allFilteredBlobs.Add(blobItem);
}
});

// Sort the blobs based on the ascending flag
var sortedBlobs = ascending
? allFilteredBlobs.OrderBy(blobItem => blobItem.Properties.LastModified.GetValueOrDefault(DateTimeOffset.MinValue).UtcDateTime)
: allFilteredBlobs.OrderByDescending(blobItem => blobItem.Properties.LastModified.GetValueOrDefault(DateTimeOffset.MinValue).UtcDateTime);

// Using switch expression to determine sort order
var sortedBlobItems = ascending switch
// Yield return each BackupFile
foreach (var blobItem in sortedBlobs)
{
true => blobItems.OrderBy(blobItem => blobItem.Properties.LastModified),
false => blobItems.OrderByDescending(blobItem => blobItem.Properties.LastModified)
};

// Map to BackupFile objects
return sortedBlobItems.Select(blobItem => new BackupFile(
$"{Config.ContainerUrl}/{blobItem.Name}",
BackupHeader.DeviceTypes.Url,
blobItem.Properties.LastModified!.Value.UtcDateTime));
yield return new BackupFile(
$"{Config.ContainerUrl}/{blobItem.Name}",
BackupHeader.DeviceTypes.Url,
blobItem.Properties.LastModified!.Value.UtcDateTime);
}
}

public override IEnumerable<BackupFile> GetFiles(string path, string pattern, DateTime maxAge, bool ascending)
{
var paths = path.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToList();
return GetFiles(paths, pattern, maxAge, ascending);
}

protected override IEnumerable<string> GetDatabasesSpecific()
{
if (Config.FullFilePath == null) { return new List<string>(); }
var dbRoot = Config.FullFilePath[..Config.FullFilePath.IndexOf(Config.DatabaseToken, StringComparison.OrdinalIgnoreCase)];
Log.Information("Polling for new databases from Azure Blob. Folders in path: {path}", dbRoot);
return GetFoldersForAzBlob(dbRoot);
var paths = Config.FullFilePath.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
var dbRoots = paths.Select(path =>
{
var tokenIndex = path.IndexOf(Config.DatabaseToken, StringComparison.OrdinalIgnoreCase);
return tokenIndex != -1 ? path[..tokenIndex] : path;
}).ToList();

Log.Information("Polling for new databases from Azure Blob. Folders in path(s): {path}", dbRoots);
return GetFoldersForAzBlob(dbRoots);
}
}
}