From 7b454ae4b61ddddee1a51a8fe14f4583bdce000a Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:29:28 +0800 Subject: [PATCH 1/4] feat: add tests for normalizeCommentHTML --- package.json | 2 +- src/transformers/normalize.test.ts | 72 +++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 750bb11..5ff5148 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@matters/matters-editor", - "version": "0.2.3-alpha.0", + "version": "0.2.3-alpha.1", "description": "Editor for matters.news", "author": "https://github.com/thematters", "homepage": "https://github.com/thematters/matters-editor", diff --git a/src/transformers/normalize.test.ts b/src/transformers/normalize.test.ts index e47c019..f431407 100644 --- a/src/transformers/normalize.test.ts +++ b/src/transformers/normalize.test.ts @@ -1,16 +1,21 @@ import { describe, expect, test } from 'vitest' -import { normalizeArticleHTML } from './normalize' +import { normalizeArticleHTML, normalizeCommentHTML } from './normalize' const expectNormalizeArticleHTML = (input: string, output: string) => { const result = normalizeArticleHTML(input) expect(result.trim()).toBe(output) } +export const expectNormalizeCommentHTML = (input: string, output: string) => { + const result = normalizeCommentHTML(input) + expect(result.trim()).toBe(output) +} + /** * Tests */ -describe('Normalization', () => { +describe('Normalization: Article', () => { test('bolds', () => { expectNormalizeArticleHTML( '

abc

', @@ -279,3 +284,66 @@ describe('Normalization', () => { ) }) }) + +describe('Normalization: Comment', () => { + test('bolds is not supported', () => { + expectNormalizeCommentHTML('

abc

', '

abc

') + expectNormalizeCommentHTML('

abc

', '

abc

') + }) + + test('strikethrough is not supported', () => { + expectNormalizeCommentHTML('

abc

', '

abc

') + expectNormalizeCommentHTML('

abc

', '

abc

') + expectNormalizeCommentHTML('

abc

', '

abc

') + }) + + test('italic is not supported', () => { + expectNormalizeCommentHTML('

abc

', '

abc

') + + expectNormalizeCommentHTML('

abc

', '

abc

') + }) + + test('underline is not supported', () => { + expectNormalizeCommentHTML('

abc

', '

abc

') + }) + + test('self-closed tags', () => { + expectNormalizeCommentHTML('

', '

') + + expectNormalizeCommentHTML('

', '


') + + expectNormalizeCommentHTML('
', '

') + + // -> + expectNormalizeCommentHTML( + '
左:女反派。右:女主。
', + '

左:女反派。右:女主。

', + ) + + // + expectNormalizeCommentHTML( + '
', + '

', + ) + }) +}) From a32f702311cd5f1d8467a63ee97ee12bc46e3680 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:32:09 +0800 Subject: [PATCH 2/4] feat: add more tests to normalizeCommentHTML --- src/transformers/normalize.test.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/transformers/normalize.test.ts b/src/transformers/normalize.test.ts index f431407..95c9f6a 100644 --- a/src/transformers/normalize.test.ts +++ b/src/transformers/normalize.test.ts @@ -286,6 +286,20 @@ describe('Normalization: Article', () => { }) describe('Normalization: Comment', () => { + test('quote', () => { + expectNormalizeCommentHTML( + '

abc

', + '

abc

', + ) + }) + + test('link', () => { + expectNormalizeCommentHTML( + '

abc

', + '

abc

', + ) + }) + test('bolds is not supported', () => { expectNormalizeCommentHTML('

abc

', '

abc

') expectNormalizeCommentHTML('

abc

', '

abc

') From 64881c566fff22e6f898b984c947d67042285f47 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:53:08 +0800 Subject: [PATCH 3/4] feat: add tests for current empty lines processor --- src/transformers/sanitize.test.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts index 8de3b07..12db7d6 100644 --- a/src/transformers/sanitize.test.ts +++ b/src/transformers/sanitize.test.ts @@ -1,3 +1,4 @@ +import { stripIndent } from 'common-tags' import { describe, expect, test } from 'vitest' import { sanitizeHTML } from './sanitize' @@ -18,6 +19,25 @@ describe('Sanitization: custom', () => { 'pp', ) }) + + test('allow max two empty paragraphys', () => { + expectSanitizeHTML( + '

abc

abc

', + stripIndent` +

abc

+

+

+

+

abc

+

+

+

+

+

+

+ `, + ) + }) }) // via https://github.com/leizongmin/js-xss/blob/master/test/test_xss.js From cd42355ce79a74f21f879d0c328fa7cba495df8c Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:15:50 +0800 Subject: [PATCH 4/4] feat: squeeze empty paragraphs to max 2 --- src/transformers/sanitize.test.ts | 18 +++++++- src/transformers/sanitize.ts | 72 ++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts index 12db7d6..d46029f 100644 --- a/src/transformers/sanitize.test.ts +++ b/src/transformers/sanitize.test.ts @@ -22,17 +22,33 @@ describe('Sanitization: custom', () => { test('allow max two empty paragraphys', () => { expectSanitizeHTML( - '

abc

abc

', stripIndent`

abc

+ abc

abc

+

abc

+

+


+


+



+




+

+ `, + stripIndent` +

abc

+

abc +

+

abc

+

+

+

abc

`, diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts index 37ab3af..4dd59fc 100644 --- a/src/transformers/sanitize.ts +++ b/src/transformers/sanitize.ts @@ -1,3 +1,4 @@ +import { type Root, type RootContent } from 'hast' import rehypeFormat from 'rehype-format' import rehypeParse from 'rehype-parse' import rehypeRaw from 'rehype-raw' @@ -11,14 +12,81 @@ import { rehypeStringifyOptions, } from './options' +/** + * Squeeze empty paragraphs to a maximum of N + * + * e.g. + *

+ * => + *

+ * + * @param {number} maxCount + */ +const rehypeSqueezeParagraphs = + ({ maxCount }: { maxCount: number }) => + (tree: Root) => { + if (tree.type !== 'root') { + return + } + + const children: RootContent[] = [] + let count = 0 + let touched = false + + tree.children.forEach((node) => { + // skip empty text nodes + if (node.type === 'text' && node.value.replace(/\s/g, '') === '') { + children.push(node) + return + } + + // skip non-paragraph nodes + if (node.type !== 'element' || node.tagName !== 'p') { + count = 0 + children.push(node) + return + } + + // skip non-empty paragraphs: + // -

+ // -


+ const isEmptyParagraph = + node.children.length === 0 || + node.children.every((n) => n.type === 'element' && n.tagName === 'br') + if (!isEmptyParagraph) { + count = 0 + children.push(node) + return + } + + // cap empty paragraphs + count++ + if (count <= maxCount) { + children.push({ + type: 'element', + tagName: 'p', + properties: {}, + children: [], + }) + } else { + touched = true + } + }) + + if (touched) { + tree.children = children + } + } + const formatter = unified() .use(rehypeParse, rehypeParseOptions) .use(rehypeRaw) .use(rehypeSanitize, rehypeSanitizeOptions) + .use(rehypeSqueezeParagraphs, { maxCount: 2 }) .use(rehypeFormat) .use(rehypeStringify, rehypeStringifyOptions) -export const sanitizeHTML = (md: string): string => { - const result = formatter.processSync(md) +export const sanitizeHTML = (html: string): string => { + const result = formatter.processSync(html) return String(result) }