From 7cdbc98716b5cbd34a0e74ba1a6289f6cc51d772 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Sun, 20 Jun 2021 14:03:59 +0200 Subject: [PATCH] XFA - Match font family correctly - partial fix for https://bugzilla.mozilla.org/show_bug.cgi?id=1716980; - some pdf can contain an invalid font family (e.g. 'Windings 3') so in this case remove the space; - the font family in typeface attribute doesn't always match the one defined in the FontDescriptor dictionary. --- src/core/document.js | 5 +- src/core/fonts.js | 2 + src/core/xfa/factory.js | 25 +---- src/core/xfa/fonts.js | 157 ++++++++++++++++++++++++++++++ src/core/xfa/html_utils.js | 17 +++- src/core/xfa/parser.js | 4 + src/core/xfa/template.js | 10 +- src/core/xfa/text.js | 32 +++--- src/core/xfa/xfa_object.js | 5 +- src/core/xfa/xhtml.js | 9 +- test/pdfs/xfa_bug1716980.pdf.link | 1 + test/test_manifest.json | 8 ++ test/unit/xfa_tohtml_spec.js | 3 +- 13 files changed, 221 insertions(+), 57 deletions(-) create mode 100644 src/core/xfa/fonts.js create mode 100644 test/pdfs/xfa_bug1716980.pdf.link diff --git a/src/core/document.js b/src/core/document.js index 764acf5c25b8d..56b00192810ee 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -926,7 +926,9 @@ class PDFDocument { if (!(descriptor instanceof Dict)) { continue; } - const fontFamily = descriptor.get("FontFamily"); + let fontFamily = descriptor.get("FontFamily"); + // For example, "Wingdings 3" is not a valid font name in the css specs. + fontFamily = fontFamily.replace(/[ ]+([0-9])/g, "$1"); const fontWeight = descriptor.get("FontWeight"); // Angle is expressed in degrees counterclockwise in PDF @@ -956,6 +958,7 @@ class PDFDocument { }) ); } + await Promise.all(promises); this.xfaFactory.setFonts(pdfFonts); } diff --git a/src/core/fonts.js b/src/core/fonts.js index 13eb5fdf02948..c1f1d22666a77 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -836,6 +836,7 @@ function createNameTable(name, proto) { class Font { constructor(name, file, properties) { this.name = name; + this.psName = null; this.mimetype = null; this.disableFontFace = false; @@ -2730,6 +2731,7 @@ class Font { // ... using existing 'name' table as prototype const namePrototype = readNameTable(tables.name); tables.name.data = createNameTable(name, namePrototype); + this.psName = namePrototype[0][6] || null; } const builder = new OpenTypeFileBuilder(header.version); diff --git a/src/core/xfa/factory.js b/src/core/xfa/factory.js index 286ec844ef011..b4adbff959712 100644 --- a/src/core/xfa/factory.js +++ b/src/core/xfa/factory.js @@ -13,8 +13,9 @@ * limitations under the License. */ -import { $fonts, $toHTML } from "./xfa_object.js"; +import { $globalData, $toHTML } from "./xfa_object.js"; import { Binder } from "./bind.js"; +import { FontFinder } from "./fonts.js"; import { warn } from "../../shared/util.js"; import { XFAParser } from "./parser.js"; @@ -23,6 +24,7 @@ class XFAFactory { try { this.root = new XFAParser().parse(XFAFactory._createDocument(data)); this.form = new Binder(this.root).bind(); + this.form[$globalData].template = this.form; } catch (e) { warn(`XFA - an error occured during parsing and binding: ${e}`); } @@ -56,26 +58,7 @@ class XFAFactory { } setFonts(fonts) { - this.form[$fonts] = Object.create(null); - for (const font of fonts) { - const cssFontInfo = font.cssFontInfo; - const name = cssFontInfo.fontFamily; - if (!this.form[$fonts][name]) { - this.form[$fonts][name] = Object.create(null); - } - let property = "regular"; - if (cssFontInfo.italicAngle !== "0") { - if (parseFloat(cssFontInfo.fontWeight) >= 700) { - property = "bolditalic"; - } else { - property = "italic"; - } - } else if (parseFloat(cssFontInfo.fontWeight) >= 700) { - property = "bold"; - } - - this.form[$fonts][name][property] = font; - } + this.form[$globalData].fontFinder = new FontFinder(fonts); } getPages() { diff --git a/src/core/xfa/fonts.js b/src/core/xfa/fonts.js new file mode 100644 index 0000000000000..0bd68cc75ecc8 --- /dev/null +++ b/src/core/xfa/fonts.js @@ -0,0 +1,157 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { warn } from "../../shared/util.js"; + +class FontFinder { + constructor(pdfFonts) { + this.fonts = new Map(); + this.cache = new Map(); + this.warned = new Set(); + this.defaultFont = null; + for (const pdfFont of pdfFonts) { + const cssFontInfo = pdfFont.cssFontInfo; + const name = cssFontInfo.fontFamily; + let font = this.fonts.get(name); + if (!font) { + font = Object.create(null); + this.fonts.set(name, font); + if (!this.defaultFont) { + this.defaultFont = font; + } + } + let property = ""; + if (cssFontInfo.italicAngle !== "0") { + if (parseFloat(cssFontInfo.fontWeight) >= 700) { + property = "bolditalic"; + } else { + property = "italic"; + } + } else if (parseFloat(cssFontInfo.fontWeight) >= 700) { + property = "bold"; + } + + if (!property) { + if ( + pdfFont.name.includes("Bold") || + (pdfFont.psName && pdfFont.psName.includes("Bold")) + ) { + property = "bold"; + } + if ( + pdfFont.name.includes("Italic") || + pdfFont.name.endsWith("It") || + (pdfFont.psName && + (pdfFont.psName.includes("Italic") || + pdfFont.psName.endsWith("It"))) + ) { + property += "italic"; + } + } + + if (!property) { + property = "regular"; + } + + font[property] = pdfFont; + } + + for (const pdfFont of this.fonts.values()) { + if (!pdfFont.regular) { + pdfFont.regular = pdfFont.italic || pdfFont.bold || pdfFont.bolditalic; + } + } + } + + getDefault() { + return this.defaultFont; + } + + find(fontName, mustWarn = true) { + let font = this.fonts.get(fontName) || this.cache.get(fontName); + if (font) { + return font; + } + + const pattern = /,|-| |bolditalic|bold|italic|regular|it/gi; + let name = fontName.replace(pattern, ""); + font = this.fonts.get(name); + if (font) { + this.cache.set(fontName, font); + return font; + } + name = name.toLowerCase(); + + const maybe = []; + for (const [family, pdfFont] of this.fonts.entries()) { + if (family.replace(pattern, "").toLowerCase().startsWith(name)) { + maybe.push(pdfFont); + } + } + + if (maybe.length === 0) { + for (const [, pdfFont] of this.fonts.entries()) { + if ( + pdfFont.regular.name && + pdfFont.regular.name + .replace(pattern, "") + .toLowerCase() + .startsWith(name) + ) { + maybe.push(pdfFont); + } + } + } + + if (maybe.length === 0) { + name = name.replace(/psmt|mt/gi, ""); + for (const [family, pdfFont] of this.fonts.entries()) { + if (family.replace(pattern, "").toLowerCase().startsWith(name)) { + maybe.push(pdfFont); + } + } + } + + if (maybe.length === 0) { + for (const pdfFont of this.fonts.values()) { + if ( + pdfFont.regular.name && + pdfFont.regular.name + .replace(pattern, "") + .toLowerCase() + .startsWith(name) + ) { + maybe.push(pdfFont); + } + } + } + + if (maybe.length >= 1) { + if (maybe.length !== 1 && mustWarn) { + warn(`XFA - Too many choices to guess the correct font: ${fontName}`); + } + this.cache.set(fontName, maybe[0]); + return maybe[0]; + } + + if (mustWarn && !this.warned.has(fontName)) { + this.warned.add(fontName); + warn(`XFA - Cannot find the font: ${fontName}`); + } + return null; + } +} + +export { FontFinder }; diff --git a/src/core/xfa/html_utils.js b/src/core/xfa/html_utils.js index b645a580995aa..7bf13d3238db5 100644 --- a/src/core/xfa/html_utils.js +++ b/src/core/xfa/html_utils.js @@ -190,8 +190,8 @@ function setMinMaxDimensions(node, style) { } } -function layoutText(text, xfaFont, fonts, width) { - const measure = new TextMeasure(xfaFont, fonts); +function layoutText(text, xfaFont, fontFinder, width) { + const measure = new TextMeasure(xfaFont, fontFinder); if (typeof text === "string") { measure.addString(text); } else { @@ -448,13 +448,20 @@ function fixTextIndent(styles) { } } -function getFonts(family) { +function getFonts(family, fontFinder) { if (family.startsWith("'") || family.startsWith('"')) { family = family.slice(1, family.length - 1); } - const fonts = [`"${family}"`, `"${family}-PdfJS-XFA"`]; - return fonts.join(","); + const pdfFont = fontFinder.find(family); + if (pdfFont) { + const { fontFamily } = pdfFont.regular.cssFontInfo; + if (fontFamily !== family) { + return `"${family}","${fontFamily}"`; + } + } + + return `"${family}"`; } export { diff --git a/src/core/xfa/parser.js b/src/core/xfa/parser.js index 2c2c97289db93..f2459e66edd79 100644 --- a/src/core/xfa/parser.js +++ b/src/core/xfa/parser.js @@ -18,6 +18,7 @@ import { $clean, $content, $finalize, + $globalData, $isCDATAXml, $nsAttributes, $onChild, @@ -33,6 +34,7 @@ class XFAParser extends XMLParserBase { super(); this._builder = new Builder(); this._stack = []; + this._globalData = Object.create(null); this._ids = new Map(); this._current = this._builder.buildRoot(this._ids); this._errorCode = XMLParserErrorCode.NoError; @@ -135,6 +137,7 @@ class XFAParser extends XMLParserBase { namespace, prefixes, }); + node[$globalData] = this._globalData; if (isEmpty) { // No children: just push the node into its parent. @@ -154,6 +157,7 @@ class XFAParser extends XMLParserBase { const node = this._current; if (node[$isCDATAXml]() && typeof node[$content] === "string") { const parser = new XFAParser(); + parser._globalData = this._globalData; const root = parser.parse(node[$content]); node[$content] = null; node[$onChild](root); diff --git a/src/core/xfa/template.js b/src/core/xfa/template.js index ba4f88c0a5a85..0c134100d0e96 100644 --- a/src/core/xfa/template.js +++ b/src/core/xfa/template.js @@ -23,7 +23,6 @@ import { $extra, $finalize, $flushHTML, - $fonts, $getAvailableSpace, $getChildren, $getContainedChildren, @@ -31,6 +30,7 @@ import { $getParent, $getSubformParent, $getTemplateRoot, + $globalData, $hasItem, $hasSettableValue, $ids, @@ -1441,7 +1441,7 @@ class Draw extends XFAObject { if ((this.w === "" || this.h === "") && this.value) { const maxWidth = this.w === "" ? availableSpace.width : this.w; - const fonts = this[$getTemplateRoot]()[$fonts]; + const fontFinder = this[$globalData].fontFinder; let font = this.font; if (!font) { let parent = this[$getParent](); @@ -1464,7 +1464,7 @@ class Draw extends XFAObject { const res = layoutText( this.value.exData[$content], font, - fonts, + fontFinder, maxWidth ); width = res.width; @@ -1472,7 +1472,7 @@ class Draw extends XFAObject { } else { const text = this.value[$text](); if (text) { - const res = layoutText(text, font, fonts, maxWidth); + const res = layoutText(text, font, fontFinder, maxWidth); width = res.width; height = res.height; } @@ -2660,7 +2660,7 @@ class Font extends XFAObject { style.fontSize = fontSize; } - style.fontFamily = getFonts(this.typeface); + style.fontFamily = getFonts(this.typeface, this[$globalData].fontFinder); if (this.underline !== 0) { style.textDecoration = "underline"; diff --git a/src/core/xfa/text.js b/src/core/xfa/text.js index 4bf7898179cf9..2cf5dee73e9c5 100644 --- a/src/core/xfa/text.js +++ b/src/core/xfa/text.js @@ -17,19 +17,16 @@ const WIDTH_FACTOR = 1.2; const HEIGHT_FACTOR = 1.2; class FontInfo { - constructor(xfaFont, fonts) { + constructor(xfaFont, fontFinder) { if (!xfaFont) { - [this.pdfFont, this.xfaFont] = this.defaultFont(fonts); + [this.pdfFont, this.xfaFont] = this.defaultFont(fontFinder); return; } this.xfaFont = xfaFont; - let typeface = fonts[xfaFont.typeface]; + const typeface = fontFinder.find(xfaFont.typeface); if (!typeface) { - typeface = fonts[`${xfaFont.typeface}-PdfJS-XFA`]; - } - if (!typeface) { - [this.pdfFont, this.xfaFont] = this.defaultFont(fonts); + [this.pdfFont, this.xfaFont] = this.defaultFont(fontFinder); return; } @@ -47,18 +44,17 @@ class FontInfo { } if (!this.pdfFont) { - [this.pdfFont, this.xfaFont] = this.defaultFont(fonts); + [this.pdfFont, this.xfaFont] = this.defaultFont(fontFinder); } } - defaultFont(fonts) { + defaultFont(fontFinder) { // TODO: Add a default font based on Liberation. const font = - fonts.Helvetica || - fonts["Myriad Pro"] || - fonts.Arial || - fonts.ArialMT || - Object.values(fonts)[0]; + fontFinder.find("Helvetica", false) || + fontFinder.find("Myriad Pro", false) || + fontFinder.find("Arial", false) || + fontFinder.getDefault(); if (font && font.regular) { const pdfFont = font.regular; const info = pdfFont.cssFontInfo; @@ -82,9 +78,9 @@ class FontInfo { } class FontSelector { - constructor(defaultXfaFont, fonts) { - this.fonts = fonts; - this.stack = [new FontInfo(defaultXfaFont, fonts)]; + constructor(defaultXfaFont, fontFinder) { + this.fontFinder = fontFinder; + this.stack = [new FontInfo(defaultXfaFont, fontFinder)]; } pushFont(xfaFont) { @@ -95,7 +91,7 @@ class FontSelector { } } - const fontInfo = new FontInfo(xfaFont, this.fonts); + const fontInfo = new FontInfo(xfaFont, this.fontFinder); if (!fontInfo.pdfFont) { fontInfo.pdfFont = lastFont.pdfFont; } diff --git a/src/core/xfa/xfa_object.js b/src/core/xfa/xfa_object.js index c59552abd84ec..07e1700b4b579 100644 --- a/src/core/xfa/xfa_object.js +++ b/src/core/xfa/xfa_object.js @@ -34,7 +34,6 @@ const $dump = Symbol(); const $extra = Symbol("extra"); const $finalize = Symbol(); const $flushHTML = Symbol(); -const $fonts = Symbol(); const $getAttributeIt = Symbol(); const $getAvailableSpace = Symbol(); const $getChildrenByClass = Symbol(); @@ -49,6 +48,7 @@ const $getSubformParent = Symbol(); const $getParent = Symbol(); const $getTemplateRoot = Symbol(); const $global = Symbol(); +const $globalData = Symbol(); const $hasItem = Symbol(); const $hasSettableValue = Symbol(); const $ids = Symbol(); @@ -107,6 +107,7 @@ class XFAObject { this[_parent] = null; this[_children] = []; this[$uid] = `${name}${uid++}`; + this[$globalData] = null; } [$onChild](child) { @@ -986,7 +987,6 @@ export { $extra, $finalize, $flushHTML, - $fonts, $getAttributeIt, $getAvailableSpace, $getChildren, @@ -1001,6 +1001,7 @@ export { $getSubformParent, $getTemplateRoot, $global, + $globalData, $hasItem, $hasSettableValue, $ids, diff --git a/src/core/xfa/xhtml.js b/src/core/xfa/xhtml.js index 78bbf0429f204..ce24d68e9fd0b 100644 --- a/src/core/xfa/xhtml.js +++ b/src/core/xfa/xhtml.js @@ -19,6 +19,7 @@ import { $content, $extra, $getChildren, + $globalData, $nodeName, $onText, $pushGlyphs, @@ -91,13 +92,13 @@ const StyleMapping = new Map([ ["margin-right", value => measureToString(getMeasurement(value))], ["margin-top", value => measureToString(getMeasurement(value))], ["text-indent", value => measureToString(getMeasurement(value))], - ["font-family", value => getFonts(value)], + ["font-family", (value, fontFinder) => getFonts(value, fontFinder)], ]); const spacesRegExp = /\s+/g; const crlfRegExp = /[\r\n]+/g; -function mapStyle(styleStr) { +function mapStyle(styleStr, fontFinder) { const style = Object.create(null); if (!styleStr) { return style; @@ -112,7 +113,7 @@ function mapStyle(styleStr) { if (typeof mapping === "string") { newValue = mapping; } else { - newValue = mapping(value); + newValue = mapping(value, fontFinder); } } if (key.endsWith("scale")) { @@ -218,7 +219,7 @@ class XhtmlObject extends XmlObject { name: this[$nodeName], attributes: { href: this.href, - style: mapStyle(this.style), + style: mapStyle(this.style, this[$globalData].fontFinder), }, children, value: this[$content] || "", diff --git a/test/pdfs/xfa_bug1716980.pdf.link b/test/pdfs/xfa_bug1716980.pdf.link new file mode 100644 index 0000000000000..851f3984c15f7 --- /dev/null +++ b/test/pdfs/xfa_bug1716980.pdf.link @@ -0,0 +1 @@ +https://bugzilla.mozilla.org/attachment.cgi?id=9227656 diff --git a/test/test_manifest.json b/test/test_manifest.json index 66f2b817a1938..0cc3b28fe291c 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -938,6 +938,14 @@ "enableXfa": true, "type": "eq" }, + { "id": "xfa_bug1716980", + "file": "pdfs/xfa_bug1716980.pdf", + "md5": "3d7598b9548d78f209d013c485162e9a", + "link": true, + "rounds": 1, + "enableXfa": true, + "type": "eq" + }, { "id": "xfa_bug1716809", "file": "pdfs/xfa_bug1716809.pdf", "md5": "7192f9e27e8b84776d107f57cbe353d5", diff --git a/test/unit/xfa_tohtml_spec.js b/test/unit/xfa_tohtml_spec.js index 8e5de87eb5808..3f77e5c5b2ae0 100644 --- a/test/unit/xfa_tohtml_spec.js +++ b/test/unit/xfa_tohtml_spec.js @@ -73,6 +73,7 @@ describe("XFAFactory", function () { `; const factory = new XFAFactory({ "xdp:xdp": xml }); + factory.setFonts([]); expect(factory.numberPages).toEqual(2); @@ -116,7 +117,7 @@ describe("XFAFactory", function () { ]); expect(draw.attributes.style).toEqual({ color: "#0c1722", - fontFamily: '"FooBar","FooBar-PdfJS-XFA"', + fontFamily: '"FooBar"', fontSize: "6.93px", margin: "1px 4px 2px 3px", verticalAlign: "2px",