Skip to content

Commit

Permalink
Refactor PDF generation using a new HtmlToPdfConverter (#191)
Browse files Browse the repository at this point in the history
* Rename "Chromium" to "Browser" in config and code

* Updated AppSettings.json to rename "Chromium" section to "Browser" for PDF generation.
* Reflected this change across multiple files including ReportSheetCacheTests.cs, UnitTestHelpers.cs, and ReportSheetCache.cs.
* Updated comments in ReportSheet.cshtml accordingly.
* Simplified AppSettings.json by removing comments about `AllowedUserNameCharacters` and `DefaultLockoutTimeSpan` settings.

Refactor PDF generation

* Updated README.md to reflect the correct Chromium version and added a note about rendering issues with Chromium 131.x.
* Simplified the `EnsureCacheFolder` method in `ReportSheetCache.cs` by removing unnecessary directory existence checks and creation logic.
* Added a new `HtmlToPdfConverter` class in `HtmlToPdfConverter.cs` to encapsulate the logic for converting HTML to PDF using either Puppeteer or a browser command line.
* Refactored the `GetOrCreatePdf` method in `ReportSheetCache.cs` to use a new `HtmlToPdfConverter` class for generating PDF data, replacing the previous Puppeteer and browser command line logic.
* Removed the `GetReportSheetBrowser`, `MovePdfToCache`, `GetReportSheetPuppeteer`, `CreateReportSheetPdfBrowser`, `CreateHtmlFile`, and `DeleteTempPathFolder` methods from `ReportSheetCache.cs` as they are now handled by the new `HtmlToPdfConverter` class.
* Updated the `ReportSheet` action in `Match.cs` to use the new PDF generation logic.
* Updated the Bootstrap CSS link in `ReportSheet.cshtml` to a newer version and adjusted the print styles accordingly.
  • Loading branch information
axunonb authored Sep 28, 2024
1 parent 65f19e7 commit 7557457
Show file tree
Hide file tree
Showing 9 changed files with 247 additions and 171 deletions.
4 changes: 3 additions & 1 deletion League.Demo/Chromium-Win/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Chromium v131.0.6733.0
# Chromium 111.0.5545.0
# Chromium 131.x has issues with rendering dashed or dotted lines

This folder contains the binaries of the Chromium web browser.

It can be downloaded from here:
Expand Down
6 changes: 3 additions & 3 deletions League.Demo/Configuration/AppSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
},
"User": {
"RequireUniqueEmail": true,
"AllowedUserNameCharacters": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789öäüÖÄÜß#-._" /* no @; if set to "", all characters are allowed! */
"AllowedUserNameCharacters": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789öäüÖÄÜß#-._"
},
"Password": {
"RequireDigit": false,
Expand All @@ -26,14 +26,14 @@
},
"Lockout": {
"AllowedForNewUsers": true,
"DefaultLockoutTimeSpan": "0.00:05:00.0000", /* TimeSpan of 5 minutes */
"DefaultLockoutTimeSpan": "0.00:05:00.0000",
"MaxFailedAccessAttempts": 5
}
},
"LeagueUserValidatorOptions": {
"RequiredUsernameLength": 2
},
"Chromium": {
"Browser": {
"ExecutablePath": "Chromium-Win\\chrome.exe"
}
}
8 changes: 4 additions & 4 deletions League.Tests/Caching/ReportSheetCacheTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public ReportSheetCacheTests()
{
_webHostEnvironment = new HostingEnvironment {
WebRootPath = Path.GetTempPath(), ContentRootPath
// Because we use the Chromium installation in the demo web app
// Because we use a Browser installation in the demo web app
= DirectoryLocator.GetTargetProjectPath(typeof(League.WebApp.WebAppStartup))
};

Expand All @@ -34,10 +34,10 @@ public ReportSheetCacheTests()
Identifier = "testorg"
};

var chromiumPath = new List<KeyValuePair<string, string?>>
{ new("Chromium:ExecutablePath", "Chromium-Win\\chrome.exe") };
var browserPath = new List<KeyValuePair<string, string?>>
{ new("Browser:ExecutablePath", "Chromium-Win\\chrome.exe") };

IServiceProvider services = UnitTestHelpers.GetReportSheetCacheServiceProvider(_tenantContext, _webHostEnvironment, chromiumPath);
IServiceProvider services = UnitTestHelpers.GetReportSheetCacheServiceProvider(_tenantContext, _webHostEnvironment, browserPath);
_cache = services.GetRequiredService<ReportSheetCache>();
}

Expand Down
4 changes: 2 additions & 2 deletions League.Tests/TestComponents/UnitTestHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public static ServiceProvider GetTextTemplatingServiceProvider(ITenantContext te
.BuildServiceProvider();
}

public static ServiceProvider GetReportSheetCacheServiceProvider(ITenantContext tenantContext, IWebHostEnvironment webHostEnvironment, IEnumerable<KeyValuePair<string,string?>> chromiumPath)
public static ServiceProvider GetReportSheetCacheServiceProvider(ITenantContext tenantContext, IWebHostEnvironment webHostEnvironment, IEnumerable<KeyValuePair<string,string?>> browserPath)
{
return new ServiceCollection()
.AddLogging(builder =>
Expand All @@ -151,7 +151,7 @@ public static ServiceProvider GetReportSheetCacheServiceProvider(ITenantContext
.AddTransient<IConfiguration>(sp =>
{
var c = new ConfigurationManager();
c.AddInMemoryCollection(chromiumPath);
c.AddInMemoryCollection(browserPath);
return c;
})
.AddTransient<ITenantContext>(sp => tenantContext)
Expand Down
197 changes: 197 additions & 0 deletions League/Caching/HtmlToPdfConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
//
// Copyright Volleyball League Project maintainers and contributors.
// Licensed under the MIT license.
//

namespace League.Caching;

#pragma warning disable CA3003 // reason: False positive due to CancellationToken in GetPdfDataBrowser

/// <summary>
/// The class to create PDF files from HTML content.
/// For converting HTML to PDF, it uses either a Browser command line or <see cref="PuppeteerSharp"/>.
/// </summary>
public class HtmlToPdfConverter : IDisposable
{
private readonly string _pathToBrowser;
private readonly string _tempFolder;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<HtmlToPdfConverter> _logger;
private bool _isDisposing;

/// <summary>
/// Initializes a new instance of the <see cref="HtmlToPdfConverter"/> class.
/// </summary>
/// <param name="pathToBrowser">The path to the Browser executable.</param>
/// <param name="tempPath">The folder where temporary files will be stored.</param>
/// <param name="loggerFactory"></param>
public HtmlToPdfConverter(string pathToBrowser, string tempPath, ILoggerFactory loggerFactory)
{
_pathToBrowser = pathToBrowser;
EnsureTempFolder(tempPath);
_tempFolder = CreateTempPathFolder(tempPath);
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<HtmlToPdfConverter>();
UsePuppeteer = false;
}

/// <summary>
/// Gets or sets a value indicating whether to use Puppeteer for generating the report sheet,
/// instead of Browser command line.
/// </summary>
public bool UsePuppeteer { get; set; }

private void EnsureTempFolder(string tempFolder)
{
if (Directory.Exists(tempFolder)) return;

Directory.CreateDirectory(tempFolder);
_logger.LogDebug("Temporary path '{TempFolder}' created", tempFolder);
}

/// <summary>
/// Creates a PDF file from the specified HTML content.
/// </summary>
/// <param name="html"></param>
/// <param name="cancellationToken"></param>
/// <returns>A <see cref="Stream"/> of the PDF file.</returns>
public async Task<byte[]?> GeneratePdfData(string html, CancellationToken cancellationToken)
{
var pdfData = UsePuppeteer
? await GetPdfDataPuppeteer(html)
: await GetPdfDataBrowser(html, cancellationToken);

return pdfData;
}

private async Task<byte[]?> GetPdfDataBrowser(string html, CancellationToken cancellationToken)
{
var tmpHtmlPath = await CreateHtmlFile(html, cancellationToken);

try
{
var tmpPdfFile = await CreatePdfDataBrowser(tmpHtmlPath, cancellationToken);

if (tmpPdfFile != null && File.Exists(tmpPdfFile))
return await File.ReadAllBytesAsync(tmpPdfFile, cancellationToken);

_logger.LogError("Error creating PDF file with Browser");
return null;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error creating PDF file with Browser");
return null;
}
}

private async Task<byte[]?> GetPdfDataPuppeteer(string html)
{
var options = new PuppeteerSharp.LaunchOptions
{
Headless = true,
Browser = PuppeteerSharp.SupportedBrowser.Chromium,
// Alternative: --use-cmd-decoder=validating
Args = new[] // Chromium-based browsers require using a sandboxed browser for PDF generation, unless sandbox is disabled
{ "--no-sandbox", "--disable-gpu", "--disable-extensions", "--use-cmd-decoder=passthrough" },
ExecutablePath = _pathToBrowser,
Timeout = 5000,
ProtocolTimeout = 10000 // default is 180,000 - used for page.PdfDataAsync
};
// Use Puppeteer as a wrapper for the browser, which can generate PDF from HTML
// Start command line arguments set by Puppeteer v20:
// --allow-pre-commit-input --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-field-trial-config --disable-hang-monitor --disable-infobars --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --disable-search-engine-choice-screen --disable-sync --enable-automation --enable-blink-features=IdleDetection --export-tagged-pdf --generate-pdf-document-outline --force-color-profile=srgb --metrics-recording-only --no-first-run --password-store=basic --use-mock-keychain --disable-features=Translate,AcceptCHFrame,MediaRouter,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold --enable-features= --headless=new --hide-scrollbars --mute-audio about:blank --no-sandbox --disable-gpu --disable-extensions --use-cmd-decoder=passthrough --remote-debugging-port=0 --user-data-dir="C:\Users\xyz\AppData\Local\Temp\yk1fjkgt.phb"
await using var browser = await PuppeteerSharp.Puppeteer.LaunchAsync(options, _loggerFactory).ConfigureAwait(false);
await using var page = await browser.NewPageAsync().ConfigureAwait(false);

await page.SetContentAsync(html); // Bootstrap 5 is loaded from CDN
await page.EvaluateExpressionHandleAsync("document.fonts.ready"); // Wait for fonts to be loaded. Omitting this might result in no text rendered in pdf.

try
{
return await page.PdfDataAsync(new PuppeteerSharp.PdfOptions
{ Scale = 1.0M, Format = PuppeteerSharp.Media.PaperFormat.A4 }).ConfigureAwait(false);
}
catch(Exception ex)
{
_logger.LogError(ex, "Error creating PDF file with Puppeteer");
return null;
}
}

private async Task<string?> CreatePdfDataBrowser(string htmlFile, CancellationToken cancellationToken)
{
// Temporary file for the PDF stream from the Browser
// Note: non-existing file is handled in MovePdfToCache
var pdfFile = Path.Combine(_tempFolder, Path.GetRandomFileName() + ".pdf");

// Run the Browser
// Command line switches overview: https://kapeli.com/cheat_sheets/Chromium_Command_Line_Switches.docset/Contents/Resources/Documents/index
// or better https://peter.sh/experiments/chromium-command-line-switches/
var startInfo = new System.Diagnostics.ProcessStartInfo(_pathToBrowser,
$"--allow-pre-commit-input --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-features=Translate,BackForwardCache,AcceptCHFrame,MediaRouter,OptimizationHints --disable-hang-monitor --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --disable-sync --enable-automation --enable-blink-features=IdleDetection --enable-features=NetworkServiceInProcess2 --export-tagged-pdf --force-color-profile=srgb --metrics-recording-only --no-first-run --password-store=basic --use-mock-keychain --headless --hide-scrollbars --mute-audio --no-sandbox --disable-gpu --use-cmd-decoder=passthrough --no-margins --user-data-dir={_tempFolder} --no-pdf-header-footer --print-to-pdf={pdfFile} {htmlFile}")
{ CreateNoWindow = true, UseShellExecute = false };
var proc = System.Diagnostics.Process.Start(startInfo);

if (proc == null)
{
_logger.LogError("Process '{PathToBrowser}' could not be started.", _pathToBrowser);
return pdfFile;
}

var timeout = TimeSpan.FromMilliseconds(5000);
var processTask = proc.WaitForExitAsync(cancellationToken);

await Task.WhenAny(processTask, Task.Delay(timeout, cancellationToken));

if (processTask.IsCompleted) return pdfFile;

proc.Kill(true);
return null;
}

private async Task<string> CreateHtmlFile(string html, CancellationToken cancellationToken)
{
var htmlFile = Path.Combine(_tempFolder, Path.GetRandomFileName() + ".html"); // extension must be "html"
await File.WriteAllTextAsync(htmlFile, html, cancellationToken);
return new Uri(htmlFile).AbsoluteUri;
}

private static string CreateTempPathFolder(string tempPath)
{
// Create child folder in TempPath
var tempFolder = Path.Combine(tempPath, Path.GetRandomFileName());
if (!Directory.Exists(tempFolder)) Directory.CreateDirectory(tempFolder);
return tempFolder;
}

private void DeleteTempPathFolder()
{
// Delete folder in TempPath
if (!Directory.Exists(_tempFolder)) return;
Directory.Delete(_tempFolder, true);
}

protected virtual void Dispose(bool disposing)
{
if (_isDisposing || !disposing) return;
_isDisposing = true;

try
{
DeleteTempPathFolder();
}
catch (Exception ex)
{
_logger.LogError(ex, "Error disposing {HtmlToPdfConverter}", nameof(HtmlToPdfConverter));
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}

#pragma warning restore CA3003 // reason: False positive due to CancellationToken in GetPdfDataBrowser
Loading

0 comments on commit 7557457

Please sign in to comment.