diff --git a/src/transformers/lib/index.ts b/src/transformers/lib/index.ts index 594ab68..8fd2598 100644 --- a/src/transformers/lib/index.ts +++ b/src/transformers/lib/index.ts @@ -1 +1 @@ -export * from './rehypeSqueezeParagraphs' +export * from './rehypeSqueezeBreaks' diff --git a/src/transformers/lib/rehypeSqueezeBreaks.ts b/src/transformers/lib/rehypeSqueezeBreaks.ts new file mode 100644 index 0000000..031e6e4 --- /dev/null +++ b/src/transformers/lib/rehypeSqueezeBreaks.ts @@ -0,0 +1,168 @@ +import { type ElementContent, type Root, type RootContent } from 'hast' + +export interface RehypeSqueezeBreaksOptions { + maxHardBreaks?: number + maxSoftBreaks?: number +} + +const isEmptyText = (node: RootContent) => + node.type === 'text' && node.value.replace(/\s/g, '') === '' + +const isBr = (node: RootContent) => + node.type === 'element' && node.tagName === 'br' + +const isEmptyParagraph = (nodes: ElementContent[]) => { + // -

+ // -

+ // -


+ // -


+ return nodes.length === 0 || nodes.every((n) => isBr(n) || isEmptyText(n)) +} + +const squeezeSoftBreaks = ({ + children, + maxSoftBreaks, +}: { children: ElementContent[] } & Pick< + RehypeSqueezeBreaksOptions, + 'maxSoftBreaks' +>) => { + const newChildren: ElementContent[] = [] + const isRetainAll = maxSoftBreaks === -1 + let breakCount = 0 + + children.forEach((node) => { + if (!isBr(node)) { + breakCount = 0 + newChildren.push(node) + return + } + + // cap empty paragraphs or retain all by adding
+ breakCount++ + if (isRetainAll || (maxSoftBreaks && breakCount <= maxSoftBreaks)) { + newChildren.push({ + type: 'element', + tagName: 'br', + properties: {}, + children: [], + }) + } + }) + + return newChildren +} + +const squeezeHardBreaks = ({ + children, + maxHardBreaks, + maxSoftBreaks, +}: { + children: Array +} & RehypeSqueezeBreaksOptions) => { + const newChildren: RootContent[] = [] + const isRetainAll = maxHardBreaks === -1 + let breakCount = 0 + + if (maxHardBreaks === undefined) { + return children + } + + children.forEach((node) => { + // skip empty text nodes + if (isEmptyText(node)) { + newChildren.push(node) + return + } + + // skip non-element nodes + if (node.type !== 'element') { + breakCount = 0 + newChildren.push(node) + return + } + + switch (node.tagName) { + case 'blockquote': + newChildren.push({ + type: 'element', + tagName: 'blockquote', + properties: node.properties, + children: squeezeHardBreaks({ + children: node.children, + maxHardBreaks, + maxSoftBreaks, + }) as ElementContent[], + }) + break + case 'p': + // skip non-empty paragraph: + if (!isEmptyParagraph(node.children)) { + breakCount = 0 + newChildren.push({ + type: 'element', + tagName: 'p', + properties: node.properties, + children: squeezeSoftBreaks({ + children: node.children, + maxSoftBreaks, + }), + }) + break + } + + // cap empty paragraphs or retain all by adding
+ breakCount++ + + if (!isRetainAll && !(maxHardBreaks && breakCount <= maxHardBreaks)) { + break + } + + newChildren.push({ + type: 'element', + tagName: 'p', + properties: {}, + children: [ + { + type: 'element', + tagName: 'br', + properties: {}, + children: [], + }, + ], + }) + break + + // skip non-paragraph node + default: + breakCount = 0 + newChildren.push(node) + } + }) + + return newChildren +} + +/** + * Squeeze hard and soft breaks to a maximum of N + * + * e.g. + *

+ * => + *



+ * + */ +export const rehypeSqueezeBreaks = + (props: RehypeSqueezeBreaksOptions) => (tree: Root) => { + if (tree.type !== 'root') { + return + } + + if ( + typeof props.maxHardBreaks !== 'number' && + typeof props.maxSoftBreaks !== 'number' + ) { + return + } + + tree.children = squeezeHardBreaks({ children: tree.children, ...props }) + } diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts deleted file mode 100644 index f872efb..0000000 --- a/src/transformers/lib/rehypeSqueezeParagraphs.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { type Root, type RootContent } from 'hast' - -/** - * Squeeze empty paragraphs to a maximum of N - * - * e.g. - *

- * => - *



- * - * @param {number} maxCount: maximum number of empty paragraphs, -1 to retain all - * - */ -export const rehypeSqueezeParagraphs = - ({ maxCount }: { maxCount: number }) => - (tree: Root) => { - if (tree.type !== 'root') { - return - } - - const children: RootContent[] = [] - const isRetainAll = maxCount < 0 - let count = 0 - let touched = false - - tree.children.forEach((node) => { - // skip empty text nodes - if (node.type === 'text' && node.value.replace(/\s/g, '') === '') { - children.push(node) - return - } - - // skip non-paragraph nodes - if (node.type !== 'element' || node.tagName !== 'p') { - count = 0 - children.push(node) - return - } - - // skip non-empty paragraphs: - // -

- // -


- const isEmptyParagraph = - node.children.length === 0 || - node.children.every((n) => n.type === 'element' && n.tagName === 'br') - if (!isEmptyParagraph) { - count = 0 - children.push(node) - return - } - - // cap empty paragraphs or retain all by adding
- count++ - if (count <= maxCount || isRetainAll) { - children.push({ - type: 'element', - tagName: 'p', - properties: {}, - children: [ - { - type: 'element', - tagName: 'br', - properties: {}, - children: [], - }, - ], - }) - touched = true - } - }) - - if (touched || isRetainAll) { - tree.children = children - } - } diff --git a/src/transformers/normalize-sanitize.test.ts b/src/transformers/normalize-sanitize.test.ts index 11ef83d..676f745 100644 --- a/src/transformers/normalize-sanitize.test.ts +++ b/src/transformers/normalize-sanitize.test.ts @@ -41,6 +41,23 @@ const expectProcessCommentHTML = ( describe('Sanitize and normalize article', () => { test('squeeze empty paragraphs', () => { + expectProcessArticleHTML( + stripIndent` +

1

+

+

2

+

+

+

3

+ `, + stripIndent` +

1

+

2

+

3

+ `, + { maxHardBreaks: 0 }, + ) + expectProcessArticleHTML( stripIndent`

1

@@ -54,7 +71,27 @@ describe('Sanitize and normalize article', () => {


3

`, - { maxEmptyParagraphs: 1 }, + { maxHardBreaks: 1 }, + ) + + expectProcessArticleHTML( + stripIndent` +
+

1

+

2

+

+

3

+
+ `, + stripIndent` +
+

1

+

2

+


+

3

+
+ `, + { maxHardBreaks: 1 }, ) expectProcessArticleHTML( @@ -88,7 +125,7 @@ describe('Sanitize and normalize article', () => {



`, - { maxEmptyParagraphs: 2 }, + { maxHardBreaks: 2 }, ) }) @@ -128,7 +165,7 @@ describe('Sanitize and normalize article', () => {



`, - { maxEmptyParagraphs: -1 }, + { maxHardBreaks: -1 }, ) }) }) @@ -172,4 +209,66 @@ describe('Sanitize and normalize comment', () => { `, ) }) + + test('squeeze
', () => { + expectProcessCommentHTML( + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + stripIndent` +

1

+

2

+

12

+

12

+

1

+ `, + { maxHardBreaks: 0, maxSoftBreaks: 0 }, + ) + + // max 1 soft break + expectProcessCommentHTML( + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + stripIndent` +

1

+

2

+

1
2

+

1
2

+

1

+ `, + { maxHardBreaks: 0, maxSoftBreaks: 1 }, + ) + + // blockquote + expectProcessCommentHTML( + stripIndent` +
+

1

+

2

+

1
2

+

1

2

+

1

+
+ `, + stripIndent` +
+

1

+

2

+

12

+

12

+

1

+
+ `, + { maxHardBreaks: 0, maxSoftBreaks: 0 }, + ) + }) }) diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts index e83f7a8..4d94f59 100644 --- a/src/transformers/sanitize.test.ts +++ b/src/transformers/sanitize.test.ts @@ -25,6 +25,30 @@ describe('Sanitization: custom', () => { }) test('squeeze empty paragraphs', () => { + // no empty paragraphs + expectSanitizeHTML( + stripIndent` +

1

+

+

2

+

+

3

+


+

4

+


+

5

+ `, + stripIndent` +

1

+

2

+

3

+

4

+

5

+ `, + { maxHardBreaks: 0 }, + ) + + // max 1 empty paragraph expectSanitizeHTML( stripIndent`

1

@@ -38,9 +62,10 @@ describe('Sanitization: custom', () => {


3

`, - { maxEmptyParagraphs: 1 }, + { maxHardBreaks: 1 }, ) + // max 2 empty paragraphs expectSanitizeHTML( stripIndent`

abc

@@ -71,7 +96,29 @@ describe('Sanitization: custom', () => {



`, - { maxEmptyParagraphs: 2 }, + { maxHardBreaks: 2 }, + ) + }) + + test('squeeze empty paragraphs in blockquote', () => { + // blockquote + expectSanitizeHTML( + stripIndent` +
+

1

+

2

+

+

3

+
+ `, + stripIndent` +
+

1

+

2

+

3

+
+ `, + { maxHardBreaks: 0 }, ) }) @@ -110,7 +157,7 @@ describe('Sanitization: custom', () => {



`, - { maxEmptyParagraphs: -1 }, + { maxHardBreaks: -1 }, ) }) @@ -151,6 +198,87 @@ describe('Sanitization: custom', () => { `, ) }) + + test('squeeze
', () => { + expectSanitizeHTML( + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + stripIndent` +

1

+

2

+

12

+

12

+

1

+ `, + { maxHardBreaks: 0, maxSoftBreaks: 0 }, + ) + + // max 1 soft break + expectSanitizeHTML( + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + stripIndent` +

1

+

2

+

1
2

+

1
2

+

1

+ `, + { maxHardBreaks: 0, maxSoftBreaks: 1 }, + ) + + // retain all + expectSanitizeHTML( + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + stripIndent` +

1

+

2

+

1
2

+

1

2

+

1

+ `, + { maxHardBreaks: 0, maxSoftBreaks: -1 }, + ) + + // blockquote + expectSanitizeHTML( + stripIndent` +
+

1

+

2

+

1
2

+

1

2

+

1

+
+ `, + stripIndent` +
+

1

+

2

+

12

+

12

+

1

+
+ `, + { maxHardBreaks: 0, maxSoftBreaks: 0 }, + ) + }) }) // via https://github.com/leizongmin/js-xss/blob/master/test/test_xss.js diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts index debb5d3..e9d7c43 100644 --- a/src/transformers/sanitize.ts +++ b/src/transformers/sanitize.ts @@ -5,33 +5,26 @@ import rehypeSanitize from 'rehype-sanitize' import rehypeStringify from 'rehype-stringify' import { unified } from 'unified' -import { rehypeSqueezeParagraphs } from './lib' +import { rehypeSqueezeBreaks, type RehypeSqueezeBreaksOptions } from './lib' import { rehypeParseOptions, rehypeSanitizeOptions, rehypeStringifyOptions, } from './options' -export interface SanitizeHTMLOptions { - maxEmptyParagraphs?: number -} +export type SanitizeHTMLOptions = RehypeSqueezeBreaksOptions export const sanitizeHTML = ( html: string, - { maxEmptyParagraphs }: SanitizeHTMLOptions = {}, + { maxHardBreaks, maxSoftBreaks }: SanitizeHTMLOptions = {}, ): string => { const formatter = unified() .use(rehypeParse, rehypeParseOptions) .use(rehypeRaw) .use(rehypeSanitize, rehypeSanitizeOptions) - - if (maxEmptyParagraphs) { - formatter.use(rehypeSqueezeParagraphs, { - maxCount: maxEmptyParagraphs, - }) - } - - formatter.use(rehypeFormat).use(rehypeStringify, rehypeStringifyOptions) + .use(rehypeSqueezeBreaks, { maxHardBreaks, maxSoftBreaks }) + .use(rehypeFormat) + .use(rehypeStringify, rehypeStringifyOptions) const result = formatter.processSync(html) return String(result)