diff --git a/README.md b/README.md index 08b80ba..ea3dfd9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ pagedjs-cli ./path/to/index.html -o result.pdf -ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside" -e, --encoding [type] Set the encoding of the input html, defaults to "utf-8" -t, --timeout [ms] Set a max timeout of [ms] +--outline-tags [tags] Specifies that an outline should be generated for the resulting PDF document. [tags] specifies which HTML tags should be considered for that outline. "h1,h2" will trigger an outline with "h1" tags as root elements and "h2" elements as their childs. ``` ## Hyphenation diff --git a/bin/paged b/bin/paged index 71a10a2..707ae1f 100755 --- a/bin/paged +++ b/bin/paged @@ -26,6 +26,11 @@ program .option('-t, --timeout [ms]', 'Set a max timeout of [ms]') .option('-x, --html', 'output html file') .option('-b, --blockLocal', 'Disallow access to filesystem for local files') + .option('--outline-tags [tags]', 'Specifies that an outline should be ' + + 'generated for the resulting PDF document. [tags] specifies which ' + + 'HTML tags should be considered for that outline. ' + + '"h1,h2" will trigger an outline with "h1" tags as root elements ' + + 'and "h2" elements as their childs.') .parse(process.argv); @@ -122,6 +127,7 @@ if (typeof input === "string") { file = await printer.html(input, options); output = replaceExt(output, '.html'); } else { + options.outlineTags = !program.outlineTags ? [] : program.outlineTags.split(','); file = await printer.pdf(input, options); } } else { diff --git a/src/postprocesser.js b/src/postprocesser.js index 635f277..a52886b 100644 --- a/src/postprocesser.js +++ b/src/postprocesser.js @@ -203,6 +203,100 @@ class PostProcesser extends EventEmitter { console.log(page); } + /** + * Adds a table of content to the generated PDF + * + * Ideally this would not be required if Chromium would add this directly. + * So if these bugs are closed this can probably be removed again: + * - https://bugs.chromium.org/p/chromium/issues/detail?id=840455 + * - https://github.com/GoogleChrome/puppeteer/issues/1778 + * + * This code is heavily based on @Hopding's comment at: + * https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179 + */ + addOutline(outlineSpec) { + const outline = JSON.parse(JSON.stringify(outlineSpec)) + + + const pageRefs = []; + this.pdfDoc.catalog.Pages.traverse((kid, ref) => { + if (kid instanceof PDFLib.PDFPage) + pageRefs.push(ref); + }); + const index = this.pdfDoc.index; + + const outlineReference = index.nextObjectNumber(); + + const countOutlineLayer = (layer) => { + let count = 0; + for (const outlineEntry of layer) { + ++count; + count += countOutlineLayer(outlineEntry.children); + } + return count; + } + + const createItemsForOutlineLayer = (layer, parent) => { + layer.forEach((outlineItem, i) => { + let prev = i > 0 ? layer[i - 1].ref : null; + let next = i < layer.length - 1 ? layer[i + 1].ref : null; + const pdfItem = createOutlineItem(outlineItem, prev, next, parent); + index.assign(outlineItem.ref, pdfItem); + }); + } + + const createOutlineItem = (outlineItem, prev, next, parent) => { + if (!outlineItem.id) { + throw new Error(`Cannot generate outline item with title '${outlineItem.title} ` + + `without any target anchor. Please specify an 'id' attribute for ` + + `the relevant HTML element`); + } + const item = { + Title: PDFLib.PDFString.fromString(outlineItem.title), + Parent: parent, + Dest: PDFLib.PDFName.from(outlineItem.id), + }; + if (prev) { + item.Prev = prev; + } + if (next) { + item.Next = next; + } + if (outlineItem.children.length > 0) { + item.First = outlineItem.children[0].ref; + item.Last = outlineItem.children[outlineItem.children.length - 1].ref; + item.Count = PDFLib.PDFNumber.fromNumber(countOutlineLayer(outlineItem.children)); + createItemsForOutlineLayer(outlineItem.children, outlineItem.ref); + } + + return PDFLib.PDFDictionary.from(item, index); + }; + + const createOutlineReferences = (outlineEntry) => { + outlineEntry.ref = index.nextObjectNumber(); + for (const child of outlineEntry.children) { + createOutlineReferences(child); + } + } + + for (const outlineItem of outline) { + createOutlineReferences(outlineItem); + } + + createItemsForOutlineLayer(outline, outlineReference); + + const pdfOutline = PDFLib.PDFDictionary.from( + { + First: outline[0].ref, + Last: outline[outline.length - 1].ref, + Count: PDFLib.PDFNumber.fromNumber(countOutlineLayer(outline)), + }, + index, + ); + index.assign(outlineReference, pdfOutline); + this.pdfDoc.catalog.set('Outlines', outlineReference); + } + save() { let writer = new PDFDocumentWriter(); const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc); diff --git a/src/printer.js b/src/printer.js index 21f6fe6..b8b4f72 100644 --- a/src/printer.js +++ b/src/printer.js @@ -182,6 +182,54 @@ class Printer extends EventEmitter { return page; } + async _parseOutline(page, tags) { + return await page.evaluate((tags) => { + const tagsToProcess = []; + for (const node of document.querySelectorAll(tags.join(','))) { + tagsToProcess.push(node); + } + tagsToProcess.reverse(); + + const root = {children: [], depth: -1}; + let currentOutlineNode = root; + + while (tagsToProcess.length > 0) { + const tag = tagsToProcess.pop(); + const orderDepth = tags.indexOf(tag.tagName.toLowerCase()); + + if (orderDepth < currentOutlineNode.depth) { + currentOutlineNode = currentOutlineNode.parent; + tagsToProcess.push(tag); + } else { + const newNode = { + title: tag.innerText, + id: tag.id, + children: [], + depth: orderDepth, + }; + if (orderDepth == currentOutlineNode.depth) { + newNode.parent = currentOutlineNode.parent; + currentOutlineNode.parent.children.push(newNode); + currentOutlineNode = newNode; + } else if (orderDepth > currentOutlineNode.depth) { + newNode.parent = currentOutlineNode; + currentOutlineNode.children.push(newNode); + currentOutlineNode = newNode; + } + } + } + + const stripParentProperty = (node) => { + node.parent = undefined; + for (const child of node.children) { + stripParentProperty(child); + } + } + stripParentProperty(root) + return root.children; + }, tags); + } + async pdf(input, options={}) { let page = await this.render(input); @@ -201,6 +249,8 @@ class Printer extends EventEmitter { return meta; }); + const outline = options.outlineTags.length > 0 ? await this._parseOutline(page, options.outlineTags) : null; + let settings = { printBackground: true, displayHeaderFooter: false, @@ -228,6 +278,9 @@ class Printer extends EventEmitter { let post = new PostProcesser(pdf); post.metadata(meta); post.boxes(this.pages); + if (outline) { + post.addOutline(outline); + } pdf = post.save(); return pdf;