From 4121cfa7cc8de50ac49712430adf81ea8d971863 Mon Sep 17 00:00:00 2001
From: robertu <4065233+robertu7@users.noreply.github.com>
Date: Tue, 12 Mar 2024 09:47:13 +0800
Subject: [PATCH 1/2] feat: add
inside empty paragraph in
`rehypeSqueezeParagraphs`
---
package.json | 2 +-
src/transformers/lib/index.ts | 1 +
.../lib/rehypeSqueezeParagraphs.ts | 74 +++++++++++++++
src/transformers/normalize-sanitize.test.ts | 72 +++++++++++++++
src/transformers/normalize.test.ts | 2 +-
src/transformers/sanitize.test.ts | 47 ++++++++--
src/transformers/sanitize.ts | 89 ++++---------------
7 files changed, 206 insertions(+), 81 deletions(-)
create mode 100644 src/transformers/lib/index.ts
create mode 100644 src/transformers/lib/rehypeSqueezeParagraphs.ts
create mode 100644 src/transformers/normalize-sanitize.test.ts
diff --git a/package.json b/package.json
index 5ff5148..1aef457 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@matters/matters-editor",
- "version": "0.2.3-alpha.1",
+ "version": "0.2.3-alpha.2",
"description": "Editor for matters.news",
"author": "https://github.com/thematters",
"homepage": "https://github.com/thematters/matters-editor",
diff --git a/src/transformers/lib/index.ts b/src/transformers/lib/index.ts
new file mode 100644
index 0000000..594ab68
--- /dev/null
+++ b/src/transformers/lib/index.ts
@@ -0,0 +1 @@
+export * from './rehypeSqueezeParagraphs'
diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts
new file mode 100644
index 0000000..f60e256
--- /dev/null
+++ b/src/transformers/lib/rehypeSqueezeParagraphs.ts
@@ -0,0 +1,74 @@
+import { type Root, type RootContent } from 'hast'
+
+/**
+ * Squeeze empty paragraphs to a maximum of N
+ *
+ * e.g.
+ *
+ * =>
+ *
+ *
+ * @param {number} maxCount
+ */
+export const rehypeSqueezeParagraphs =
+ ({ maxCount }: { maxCount: number }) =>
+ (tree: Root) => {
+ if (tree.type !== 'root') {
+ return
+ }
+
+ const children: RootContent[] = []
+ let count = 0
+ let touched = false
+
+ tree.children.forEach((node) => {
+ // skip empty text nodes
+ if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
+ children.push(node)
+ return
+ }
+
+ // skip non-paragraph nodes
+ if (node.type !== 'element' || node.tagName !== 'p') {
+ count = 0
+ children.push(node)
+ return
+ }
+
+ // skip non-empty paragraphs:
+ // -
+ // -
+ const isEmptyParagraph =
+ node.children.length === 0 ||
+ node.children.every((n) => n.type === 'element' && n.tagName === 'br')
+ if (!isEmptyParagraph) {
+ count = 0
+ children.push(node)
+ return
+ }
+
+ // cap empty paragraphs
+ count++
+ if (count <= maxCount) {
+ children.push({
+ type: 'element',
+ tagName: 'p',
+ properties: {},
+ children: [
+ {
+ type: 'element',
+ tagName: 'br',
+ properties: {},
+ children: [],
+ },
+ ],
+ })
+ } else {
+ touched = true
+ }
+ })
+
+ if (touched) {
+ tree.children = children
+ }
+ }
diff --git a/src/transformers/normalize-sanitize.test.ts b/src/transformers/normalize-sanitize.test.ts
new file mode 100644
index 0000000..6774330
--- /dev/null
+++ b/src/transformers/normalize-sanitize.test.ts
@@ -0,0 +1,72 @@
+import { stripIndent } from 'common-tags'
+import rehypeFormat from 'rehype-format'
+import rehypeParse from 'rehype-parse'
+import rehypeRaw from 'rehype-raw'
+import rehypeStringify from 'rehype-stringify'
+import { unified } from 'unified'
+import { describe, expect, test } from 'vitest'
+
+import { normalizeArticleHTML } from './normalize'
+import { rehypeParseOptions, rehypeStringifyOptions } from './options'
+import { sanitizeHTML } from './sanitize'
+
+const formatter = unified()
+ .use(rehypeParse, rehypeParseOptions)
+ .use(rehypeRaw)
+ .use(rehypeFormat)
+ .use(rehypeStringify, rehypeStringifyOptions)
+
+const formatHTML = (html: string): string => {
+ const result = formatter.processSync(html)
+ return String(result)
+}
+
+const expectProcessArticleHTML = (input: string, output: string) => {
+ const result = normalizeArticleHTML(sanitizeHTML(input))
+ expect(formatHTML(result).trim()).toBe(output)
+}
+
+// const expectProcessCommentHTML = (input: string, output: string) => {
+// const result = normalizeCommentHTML(sanitizeHTML(input))
+// expect(formatHTML(result).trim()).toBe(output)
+// }
+
+describe('Sanitize and normalize article', () => {
+ test('squeeze empty paragraphys', () => {
+ expectProcessArticleHTML(
+ stripIndent`
+ abc
+
+
+ abc
+
+ abc
+
+
+
+ abc
+
+
+
+
+
+
+ `,
+ stripIndent`
+ abc
+
+
+ abc
+
+ abc
+
+
+ abc
+
+
+ `,
+ )
+ })
+})
+
+// describe('Sanitize and normalize comment', () => {})
diff --git a/src/transformers/normalize.test.ts b/src/transformers/normalize.test.ts
index 95c9f6a..7e5a9f6 100644
--- a/src/transformers/normalize.test.ts
+++ b/src/transformers/normalize.test.ts
@@ -7,7 +7,7 @@ const expectNormalizeArticleHTML = (input: string, output: string) => {
expect(result.trim()).toBe(output)
}
-export const expectNormalizeCommentHTML = (input: string, output: string) => {
+const expectNormalizeCommentHTML = (input: string, output: string) => {
const result = normalizeCommentHTML(input)
expect(result.trim()).toBe(output)
}
diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts
index d46029f..05c2bd3 100644
--- a/src/transformers/sanitize.test.ts
+++ b/src/transformers/sanitize.test.ts
@@ -1,10 +1,14 @@
import { stripIndent } from 'common-tags'
import { describe, expect, test } from 'vitest'
-import { sanitizeHTML } from './sanitize'
-
-const expectSanitizeHTML = (input: string, output: string): void => {
- const result = sanitizeHTML(input)
+import { sanitizeHTML, type SanitizeHTMLOptions } from './sanitize'
+
+const expectSanitizeHTML = (
+ input: string,
+ output: string,
+ options?: SanitizeHTMLOptions,
+): void => {
+ const result = sanitizeHTML(input, options)
expect(result.trim()).toBe(output)
}
@@ -40,18 +44,51 @@ describe('Sanitization: custom', () => {
`,
+ stripIndent`
+ abc
+
+
abc
+
+ abc
+
+
+ abc
+
+
+ `,
+ )
+ })
+
+ test('allow max one empty paragraphys', () => {
+ expectSanitizeHTML(
stripIndent`
abc
- abc
+
+ abc
abc
+
abc
+
+
+
+
`,
+ stripIndent`
+ abc
+
abc
+
+ abc
+
+ abc
+
+ `,
+ { maxEmptyParagraphs: 1 },
)
})
})
diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts
index 4dd59fc..0c7a1d4 100644
--- a/src/transformers/sanitize.ts
+++ b/src/transformers/sanitize.ts
@@ -1,4 +1,3 @@
-import { type Root, type RootContent } from 'hast'
import rehypeFormat from 'rehype-format'
import rehypeParse from 'rehype-parse'
import rehypeRaw from 'rehype-raw'
@@ -6,87 +5,29 @@ import rehypeSanitize from 'rehype-sanitize'
import rehypeStringify from 'rehype-stringify'
import { unified } from 'unified'
+import { rehypeSqueezeParagraphs } from './lib'
import {
rehypeParseOptions,
rehypeSanitizeOptions,
rehypeStringifyOptions,
} from './options'
-/**
- * Squeeze empty paragraphs to a maximum of N
- *
- * e.g.
- *
- * =>
- *
- *
- * @param {number} maxCount
- */
-const rehypeSqueezeParagraphs =
- ({ maxCount }: { maxCount: number }) =>
- (tree: Root) => {
- if (tree.type !== 'root') {
- return
- }
-
- const children: RootContent[] = []
- let count = 0
- let touched = false
-
- tree.children.forEach((node) => {
- // skip empty text nodes
- if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
- children.push(node)
- return
- }
-
- // skip non-paragraph nodes
- if (node.type !== 'element' || node.tagName !== 'p') {
- count = 0
- children.push(node)
- return
- }
-
- // skip non-empty paragraphs:
- // -
- // -
- const isEmptyParagraph =
- node.children.length === 0 ||
- node.children.every((n) => n.type === 'element' && n.tagName === 'br')
- if (!isEmptyParagraph) {
- count = 0
- children.push(node)
- return
- }
-
- // cap empty paragraphs
- count++
- if (count <= maxCount) {
- children.push({
- type: 'element',
- tagName: 'p',
- properties: {},
- children: [],
- })
- } else {
- touched = true
- }
- })
-
- if (touched) {
- tree.children = children
- }
- }
+export interface SanitizeHTMLOptions {
+ maxEmptyParagraphs?: number
+}
-const formatter = unified()
- .use(rehypeParse, rehypeParseOptions)
- .use(rehypeRaw)
- .use(rehypeSanitize, rehypeSanitizeOptions)
- .use(rehypeSqueezeParagraphs, { maxCount: 2 })
- .use(rehypeFormat)
- .use(rehypeStringify, rehypeStringifyOptions)
+export const sanitizeHTML = (
+ html: string,
+ { maxEmptyParagraphs }: SanitizeHTMLOptions = {},
+): string => {
+ const formatter = unified()
+ .use(rehypeParse, rehypeParseOptions)
+ .use(rehypeRaw)
+ .use(rehypeSanitize, rehypeSanitizeOptions)
+ .use(rehypeSqueezeParagraphs, { maxCount: maxEmptyParagraphs ?? 2 })
+ .use(rehypeFormat)
+ .use(rehypeStringify, rehypeStringifyOptions)
-export const sanitizeHTML = (html: string): string => {
const result = formatter.processSync(html)
return String(result)
}
From 908da6e0447914821f77b5b9e4e8cd9e8df0aec6 Mon Sep 17 00:00:00 2001
From: robertu <4065233+robertu7@users.noreply.github.com>
Date: Tue, 12 Mar 2024 09:48:53 +0800
Subject: [PATCH 2/2] docs: revise rehypeSqueezeParagraphs doc
---
src/transformers/lib/rehypeSqueezeParagraphs.ts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts
index f60e256..0518896 100644
--- a/src/transformers/lib/rehypeSqueezeParagraphs.ts
+++ b/src/transformers/lib/rehypeSqueezeParagraphs.ts
@@ -6,7 +6,7 @@ import { type Root, type RootContent } from 'hast'
* e.g.
*
* =>
- *
+ *
*
* @param {number} maxCount
*/