diff --git a/src/transformers/lib/index.ts b/src/transformers/lib/index.ts
index 594ab68..8fd2598 100644
--- a/src/transformers/lib/index.ts
+++ b/src/transformers/lib/index.ts
@@ -1 +1 @@
-export * from './rehypeSqueezeParagraphs'
+export * from './rehypeSqueezeBreaks'
diff --git a/src/transformers/lib/rehypeSqueezeBreaks.ts b/src/transformers/lib/rehypeSqueezeBreaks.ts
new file mode 100644
index 0000000..031e6e4
--- /dev/null
+++ b/src/transformers/lib/rehypeSqueezeBreaks.ts
@@ -0,0 +1,168 @@
+import { type ElementContent, type Root, type RootContent } from 'hast'
+
+export interface RehypeSqueezeBreaksOptions {
+ maxHardBreaks?: number
+ maxSoftBreaks?: number
+}
+
+const isEmptyText = (node: RootContent) =>
+ node.type === 'text' && node.value.replace(/\s/g, '') === ''
+
+const isBr = (node: RootContent) =>
+ node.type === 'element' && node.tagName === 'br'
+
+const isEmptyParagraph = (nodes: ElementContent[]) => {
+ // -
+ // -
+ // -
+ // -
+ return nodes.length === 0 || nodes.every((n) => isBr(n) || isEmptyText(n))
+}
+
+const squeezeSoftBreaks = ({
+ children,
+ maxSoftBreaks,
+}: { children: ElementContent[] } & Pick<
+ RehypeSqueezeBreaksOptions,
+ 'maxSoftBreaks'
+>) => {
+ const newChildren: ElementContent[] = []
+ const isRetainAll = maxSoftBreaks === -1
+ let breakCount = 0
+
+ children.forEach((node) => {
+ if (!isBr(node)) {
+ breakCount = 0
+ newChildren.push(node)
+ return
+ }
+
+ // cap empty paragraphs or retain all by adding
+ breakCount++
+ if (isRetainAll || (maxSoftBreaks && breakCount <= maxSoftBreaks)) {
+ newChildren.push({
+ type: 'element',
+ tagName: 'br',
+ properties: {},
+ children: [],
+ })
+ }
+ })
+
+ return newChildren
+}
+
+const squeezeHardBreaks = ({
+ children,
+ maxHardBreaks,
+ maxSoftBreaks,
+}: {
+ children: Array
+} & RehypeSqueezeBreaksOptions) => {
+ const newChildren: RootContent[] = []
+ const isRetainAll = maxHardBreaks === -1
+ let breakCount = 0
+
+ if (maxHardBreaks === undefined) {
+ return children
+ }
+
+ children.forEach((node) => {
+ // skip empty text nodes
+ if (isEmptyText(node)) {
+ newChildren.push(node)
+ return
+ }
+
+ // skip non-element nodes
+ if (node.type !== 'element') {
+ breakCount = 0
+ newChildren.push(node)
+ return
+ }
+
+ switch (node.tagName) {
+ case 'blockquote':
+ newChildren.push({
+ type: 'element',
+ tagName: 'blockquote',
+ properties: node.properties,
+ children: squeezeHardBreaks({
+ children: node.children,
+ maxHardBreaks,
+ maxSoftBreaks,
+ }) as ElementContent[],
+ })
+ break
+ case 'p':
+ // skip non-empty paragraph:
+ if (!isEmptyParagraph(node.children)) {
+ breakCount = 0
+ newChildren.push({
+ type: 'element',
+ tagName: 'p',
+ properties: node.properties,
+ children: squeezeSoftBreaks({
+ children: node.children,
+ maxSoftBreaks,
+ }),
+ })
+ break
+ }
+
+ // cap empty paragraphs or retain all by adding
+ breakCount++
+
+ if (!isRetainAll && !(maxHardBreaks && breakCount <= maxHardBreaks)) {
+ break
+ }
+
+ newChildren.push({
+ type: 'element',
+ tagName: 'p',
+ properties: {},
+ children: [
+ {
+ type: 'element',
+ tagName: 'br',
+ properties: {},
+ children: [],
+ },
+ ],
+ })
+ break
+
+ // skip non-paragraph node
+ default:
+ breakCount = 0
+ newChildren.push(node)
+ }
+ })
+
+ return newChildren
+}
+
+/**
+ * Squeeze hard and soft breaks to a maximum of N
+ *
+ * e.g.
+ *
+ * =>
+ *
+ *
+ */
+export const rehypeSqueezeBreaks =
+ (props: RehypeSqueezeBreaksOptions) => (tree: Root) => {
+ if (tree.type !== 'root') {
+ return
+ }
+
+ if (
+ typeof props.maxHardBreaks !== 'number' &&
+ typeof props.maxSoftBreaks !== 'number'
+ ) {
+ return
+ }
+
+ tree.children = squeezeHardBreaks({ children: tree.children, ...props })
+ }
diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts
deleted file mode 100644
index f872efb..0000000
--- a/src/transformers/lib/rehypeSqueezeParagraphs.ts
+++ /dev/null
@@ -1,75 +0,0 @@
-import { type Root, type RootContent } from 'hast'
-
-/**
- * Squeeze empty paragraphs to a maximum of N
- *
- * e.g.
- *
- * =>
- *
- *
- * @param {number} maxCount: maximum number of empty paragraphs, -1 to retain all
- *
- */
-export const rehypeSqueezeParagraphs =
- ({ maxCount }: { maxCount: number }) =>
- (tree: Root) => {
- if (tree.type !== 'root') {
- return
- }
-
- const children: RootContent[] = []
- const isRetainAll = maxCount < 0
- let count = 0
- let touched = false
-
- tree.children.forEach((node) => {
- // skip empty text nodes
- if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
- children.push(node)
- return
- }
-
- // skip non-paragraph nodes
- if (node.type !== 'element' || node.tagName !== 'p') {
- count = 0
- children.push(node)
- return
- }
-
- // skip non-empty paragraphs:
- // -
- // -
- const isEmptyParagraph =
- node.children.length === 0 ||
- node.children.every((n) => n.type === 'element' && n.tagName === 'br')
- if (!isEmptyParagraph) {
- count = 0
- children.push(node)
- return
- }
-
- // cap empty paragraphs or retain all by adding
- count++
- if (count <= maxCount || isRetainAll) {
- children.push({
- type: 'element',
- tagName: 'p',
- properties: {},
- children: [
- {
- type: 'element',
- tagName: 'br',
- properties: {},
- children: [],
- },
- ],
- })
- touched = true
- }
- })
-
- if (touched || isRetainAll) {
- tree.children = children
- }
- }
diff --git a/src/transformers/normalize-sanitize.test.ts b/src/transformers/normalize-sanitize.test.ts
index 11ef83d..676f745 100644
--- a/src/transformers/normalize-sanitize.test.ts
+++ b/src/transformers/normalize-sanitize.test.ts
@@ -41,6 +41,23 @@ const expectProcessCommentHTML = (
describe('Sanitize and normalize article', () => {
test('squeeze empty paragraphs', () => {
+ expectProcessArticleHTML(
+ stripIndent`
+ 1
+
+ 2
+
+
+ 3
+ `,
+ stripIndent`
+ 1
+ 2
+ 3
+ `,
+ { maxHardBreaks: 0 },
+ )
+
expectProcessArticleHTML(
stripIndent`
1
@@ -54,7 +71,27 @@ describe('Sanitize and normalize article', () => {
3
`,
- { maxEmptyParagraphs: 1 },
+ { maxHardBreaks: 1 },
+ )
+
+ expectProcessArticleHTML(
+ stripIndent`
+
+ 1
+ 2
+
+ 3
+
+ `,
+ stripIndent`
+
+ 1
+ 2
+
+ 3
+
+ `,
+ { maxHardBreaks: 1 },
)
expectProcessArticleHTML(
@@ -88,7 +125,7 @@ describe('Sanitize and normalize article', () => {
`,
- { maxEmptyParagraphs: 2 },
+ { maxHardBreaks: 2 },
)
})
@@ -128,7 +165,7 @@ describe('Sanitize and normalize article', () => {
`,
- { maxEmptyParagraphs: -1 },
+ { maxHardBreaks: -1 },
)
})
})
@@ -172,4 +209,66 @@ describe('Sanitize and normalize comment', () => {
`,
)
})
+
+ test('squeeze
', () => {
+ expectProcessCommentHTML(
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ stripIndent`
+ 1
+ 2
+ 12
+ 12
+ 1
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 0 },
+ )
+
+ // max 1 soft break
+ expectProcessCommentHTML(
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 1 },
+ )
+
+ // blockquote
+ expectProcessCommentHTML(
+ stripIndent`
+
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+
+ `,
+ stripIndent`
+
+ 1
+ 2
+ 12
+ 12
+ 1
+
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 0 },
+ )
+ })
})
diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts
index e83f7a8..4d94f59 100644
--- a/src/transformers/sanitize.test.ts
+++ b/src/transformers/sanitize.test.ts
@@ -25,6 +25,30 @@ describe('Sanitization: custom', () => {
})
test('squeeze empty paragraphs', () => {
+ // no empty paragraphs
+ expectSanitizeHTML(
+ stripIndent`
+ 1
+
+ 2
+
+ 3
+
+ 4
+
+ 5
+ `,
+ stripIndent`
+ 1
+ 2
+ 3
+ 4
+ 5
+ `,
+ { maxHardBreaks: 0 },
+ )
+
+ // max 1 empty paragraph
expectSanitizeHTML(
stripIndent`
1
@@ -38,9 +62,10 @@ describe('Sanitization: custom', () => {
3
`,
- { maxEmptyParagraphs: 1 },
+ { maxHardBreaks: 1 },
)
+ // max 2 empty paragraphs
expectSanitizeHTML(
stripIndent`
abc
@@ -71,7 +96,29 @@ describe('Sanitization: custom', () => {
`,
- { maxEmptyParagraphs: 2 },
+ { maxHardBreaks: 2 },
+ )
+ })
+
+ test('squeeze empty paragraphs in blockquote', () => {
+ // blockquote
+ expectSanitizeHTML(
+ stripIndent`
+
+ 1
+ 2
+
+ 3
+
+ `,
+ stripIndent`
+
+ 1
+ 2
+ 3
+
+ `,
+ { maxHardBreaks: 0 },
)
})
@@ -110,7 +157,7 @@ describe('Sanitization: custom', () => {
`,
- { maxEmptyParagraphs: -1 },
+ { maxHardBreaks: -1 },
)
})
@@ -151,6 +198,87 @@ describe('Sanitization: custom', () => {
`,
)
})
+
+ test('squeeze
', () => {
+ expectSanitizeHTML(
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ stripIndent`
+ 1
+ 2
+ 12
+ 12
+ 1
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 0 },
+ )
+
+ // max 1 soft break
+ expectSanitizeHTML(
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 1 },
+ )
+
+ // retain all
+ expectSanitizeHTML(
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ stripIndent`
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: -1 },
+ )
+
+ // blockquote
+ expectSanitizeHTML(
+ stripIndent`
+
+ 1
+ 2
+ 1
2
+ 1
2
+ 1
+
+ `,
+ stripIndent`
+
+ 1
+ 2
+ 12
+ 12
+ 1
+
+ `,
+ { maxHardBreaks: 0, maxSoftBreaks: 0 },
+ )
+ })
})
// via https://github.com/leizongmin/js-xss/blob/master/test/test_xss.js
diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts
index debb5d3..e9d7c43 100644
--- a/src/transformers/sanitize.ts
+++ b/src/transformers/sanitize.ts
@@ -5,33 +5,26 @@ import rehypeSanitize from 'rehype-sanitize'
import rehypeStringify from 'rehype-stringify'
import { unified } from 'unified'
-import { rehypeSqueezeParagraphs } from './lib'
+import { rehypeSqueezeBreaks, type RehypeSqueezeBreaksOptions } from './lib'
import {
rehypeParseOptions,
rehypeSanitizeOptions,
rehypeStringifyOptions,
} from './options'
-export interface SanitizeHTMLOptions {
- maxEmptyParagraphs?: number
-}
+export type SanitizeHTMLOptions = RehypeSqueezeBreaksOptions
export const sanitizeHTML = (
html: string,
- { maxEmptyParagraphs }: SanitizeHTMLOptions = {},
+ { maxHardBreaks, maxSoftBreaks }: SanitizeHTMLOptions = {},
): string => {
const formatter = unified()
.use(rehypeParse, rehypeParseOptions)
.use(rehypeRaw)
.use(rehypeSanitize, rehypeSanitizeOptions)
-
- if (maxEmptyParagraphs) {
- formatter.use(rehypeSqueezeParagraphs, {
- maxCount: maxEmptyParagraphs,
- })
- }
-
- formatter.use(rehypeFormat).use(rehypeStringify, rehypeStringifyOptions)
+ .use(rehypeSqueezeBreaks, { maxHardBreaks, maxSoftBreaks })
+ .use(rehypeFormat)
+ .use(rehypeStringify, rehypeStringifyOptions)
const result = formatter.processSync(html)
return String(result)