From 4121cfa7cc8de50ac49712430adf81ea8d971863 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Tue, 12 Mar 2024 09:47:13 +0800 Subject: [PATCH 1/2] feat: add
inside empty paragraph in `rehypeSqueezeParagraphs` --- package.json | 2 +- src/transformers/lib/index.ts | 1 + .../lib/rehypeSqueezeParagraphs.ts | 74 +++++++++++++++ src/transformers/normalize-sanitize.test.ts | 72 +++++++++++++++ src/transformers/normalize.test.ts | 2 +- src/transformers/sanitize.test.ts | 47 ++++++++-- src/transformers/sanitize.ts | 89 ++++--------------- 7 files changed, 206 insertions(+), 81 deletions(-) create mode 100644 src/transformers/lib/index.ts create mode 100644 src/transformers/lib/rehypeSqueezeParagraphs.ts create mode 100644 src/transformers/normalize-sanitize.test.ts diff --git a/package.json b/package.json index 5ff5148..1aef457 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@matters/matters-editor", - "version": "0.2.3-alpha.1", + "version": "0.2.3-alpha.2", "description": "Editor for matters.news", "author": "https://github.com/thematters", "homepage": "https://github.com/thematters/matters-editor", diff --git a/src/transformers/lib/index.ts b/src/transformers/lib/index.ts new file mode 100644 index 0000000..594ab68 --- /dev/null +++ b/src/transformers/lib/index.ts @@ -0,0 +1 @@ +export * from './rehypeSqueezeParagraphs' diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts new file mode 100644 index 0000000..f60e256 --- /dev/null +++ b/src/transformers/lib/rehypeSqueezeParagraphs.ts @@ -0,0 +1,74 @@ +import { type Root, type RootContent } from 'hast' + +/** + * Squeeze empty paragraphs to a maximum of N + * + * e.g. + *

+ * => + *

+ * + * @param {number} maxCount + */ +export const rehypeSqueezeParagraphs = + ({ maxCount }: { maxCount: number }) => + (tree: Root) => { + if (tree.type !== 'root') { + return + } + + const children: RootContent[] = [] + let count = 0 + let touched = false + + tree.children.forEach((node) => { + // skip empty text nodes + if (node.type === 'text' && node.value.replace(/\s/g, '') === '') { + children.push(node) + return + } + + // skip non-paragraph nodes + if (node.type !== 'element' || node.tagName !== 'p') { + count = 0 + children.push(node) + return + } + + // skip non-empty paragraphs: + // -

+ // -


+ const isEmptyParagraph = + node.children.length === 0 || + node.children.every((n) => n.type === 'element' && n.tagName === 'br') + if (!isEmptyParagraph) { + count = 0 + children.push(node) + return + } + + // cap empty paragraphs + count++ + if (count <= maxCount) { + children.push({ + type: 'element', + tagName: 'p', + properties: {}, + children: [ + { + type: 'element', + tagName: 'br', + properties: {}, + children: [], + }, + ], + }) + } else { + touched = true + } + }) + + if (touched) { + tree.children = children + } + } diff --git a/src/transformers/normalize-sanitize.test.ts b/src/transformers/normalize-sanitize.test.ts new file mode 100644 index 0000000..6774330 --- /dev/null +++ b/src/transformers/normalize-sanitize.test.ts @@ -0,0 +1,72 @@ +import { stripIndent } from 'common-tags' +import rehypeFormat from 'rehype-format' +import rehypeParse from 'rehype-parse' +import rehypeRaw from 'rehype-raw' +import rehypeStringify from 'rehype-stringify' +import { unified } from 'unified' +import { describe, expect, test } from 'vitest' + +import { normalizeArticleHTML } from './normalize' +import { rehypeParseOptions, rehypeStringifyOptions } from './options' +import { sanitizeHTML } from './sanitize' + +const formatter = unified() + .use(rehypeParse, rehypeParseOptions) + .use(rehypeRaw) + .use(rehypeFormat) + .use(rehypeStringify, rehypeStringifyOptions) + +const formatHTML = (html: string): string => { + const result = formatter.processSync(html) + return String(result) +} + +const expectProcessArticleHTML = (input: string, output: string) => { + const result = normalizeArticleHTML(sanitizeHTML(input)) + expect(formatHTML(result).trim()).toBe(output) +} + +// const expectProcessCommentHTML = (input: string, output: string) => { +// const result = normalizeCommentHTML(sanitizeHTML(input)) +// expect(formatHTML(result).trim()).toBe(output) +// } + +describe('Sanitize and normalize article', () => { + test('squeeze empty paragraphys', () => { + expectProcessArticleHTML( + stripIndent` +

abc

+

+

+ abc +

+

abc

+

+

+

+

abc

+

+


+


+



+




+

+ `, + stripIndent` +

abc

+


+


+

abc

+


+

abc

+


+


+

abc

+


+


+ `, + ) + }) +}) + +// describe('Sanitize and normalize comment', () => {}) diff --git a/src/transformers/normalize.test.ts b/src/transformers/normalize.test.ts index 95c9f6a..7e5a9f6 100644 --- a/src/transformers/normalize.test.ts +++ b/src/transformers/normalize.test.ts @@ -7,7 +7,7 @@ const expectNormalizeArticleHTML = (input: string, output: string) => { expect(result.trim()).toBe(output) } -export const expectNormalizeCommentHTML = (input: string, output: string) => { +const expectNormalizeCommentHTML = (input: string, output: string) => { const result = normalizeCommentHTML(input) expect(result.trim()).toBe(output) } diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts index d46029f..05c2bd3 100644 --- a/src/transformers/sanitize.test.ts +++ b/src/transformers/sanitize.test.ts @@ -1,10 +1,14 @@ import { stripIndent } from 'common-tags' import { describe, expect, test } from 'vitest' -import { sanitizeHTML } from './sanitize' - -const expectSanitizeHTML = (input: string, output: string): void => { - const result = sanitizeHTML(input) +import { sanitizeHTML, type SanitizeHTMLOptions } from './sanitize' + +const expectSanitizeHTML = ( + input: string, + output: string, + options?: SanitizeHTMLOptions, +): void => { + const result = sanitizeHTML(input, options) expect(result.trim()).toBe(output) } @@ -40,18 +44,51 @@ describe('Sanitization: custom', () => {




`, + stripIndent` +

abc

+


+


abc +


+

abc

+


+


+

abc

+


+


+ `, + ) + }) + + test('allow max one empty paragraphys', () => { + expectSanitizeHTML( stripIndent`

abc

-

abc +

+ abc

abc

+

abc

+


+


+



+




`, + stripIndent` +

abc

+


abc +


+

abc

+


+

abc

+


+ `, + { maxEmptyParagraphs: 1 }, ) }) }) diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts index 4dd59fc..0c7a1d4 100644 --- a/src/transformers/sanitize.ts +++ b/src/transformers/sanitize.ts @@ -1,4 +1,3 @@ -import { type Root, type RootContent } from 'hast' import rehypeFormat from 'rehype-format' import rehypeParse from 'rehype-parse' import rehypeRaw from 'rehype-raw' @@ -6,87 +5,29 @@ import rehypeSanitize from 'rehype-sanitize' import rehypeStringify from 'rehype-stringify' import { unified } from 'unified' +import { rehypeSqueezeParagraphs } from './lib' import { rehypeParseOptions, rehypeSanitizeOptions, rehypeStringifyOptions, } from './options' -/** - * Squeeze empty paragraphs to a maximum of N - * - * e.g. - *

- * => - *

- * - * @param {number} maxCount - */ -const rehypeSqueezeParagraphs = - ({ maxCount }: { maxCount: number }) => - (tree: Root) => { - if (tree.type !== 'root') { - return - } - - const children: RootContent[] = [] - let count = 0 - let touched = false - - tree.children.forEach((node) => { - // skip empty text nodes - if (node.type === 'text' && node.value.replace(/\s/g, '') === '') { - children.push(node) - return - } - - // skip non-paragraph nodes - if (node.type !== 'element' || node.tagName !== 'p') { - count = 0 - children.push(node) - return - } - - // skip non-empty paragraphs: - // -

- // -


- const isEmptyParagraph = - node.children.length === 0 || - node.children.every((n) => n.type === 'element' && n.tagName === 'br') - if (!isEmptyParagraph) { - count = 0 - children.push(node) - return - } - - // cap empty paragraphs - count++ - if (count <= maxCount) { - children.push({ - type: 'element', - tagName: 'p', - properties: {}, - children: [], - }) - } else { - touched = true - } - }) - - if (touched) { - tree.children = children - } - } +export interface SanitizeHTMLOptions { + maxEmptyParagraphs?: number +} -const formatter = unified() - .use(rehypeParse, rehypeParseOptions) - .use(rehypeRaw) - .use(rehypeSanitize, rehypeSanitizeOptions) - .use(rehypeSqueezeParagraphs, { maxCount: 2 }) - .use(rehypeFormat) - .use(rehypeStringify, rehypeStringifyOptions) +export const sanitizeHTML = ( + html: string, + { maxEmptyParagraphs }: SanitizeHTMLOptions = {}, +): string => { + const formatter = unified() + .use(rehypeParse, rehypeParseOptions) + .use(rehypeRaw) + .use(rehypeSanitize, rehypeSanitizeOptions) + .use(rehypeSqueezeParagraphs, { maxCount: maxEmptyParagraphs ?? 2 }) + .use(rehypeFormat) + .use(rehypeStringify, rehypeStringifyOptions) -export const sanitizeHTML = (html: string): string => { const result = formatter.processSync(html) return String(result) } From 908da6e0447914821f77b5b9e4e8cd9e8df0aec6 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Tue, 12 Mar 2024 09:48:53 +0800 Subject: [PATCH 2/2] docs: revise rehypeSqueezeParagraphs doc --- src/transformers/lib/rehypeSqueezeParagraphs.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/lib/rehypeSqueezeParagraphs.ts b/src/transformers/lib/rehypeSqueezeParagraphs.ts index f60e256..0518896 100644 --- a/src/transformers/lib/rehypeSqueezeParagraphs.ts +++ b/src/transformers/lib/rehypeSqueezeParagraphs.ts @@ -6,7 +6,7 @@ import { type Root, type RootContent } from 'hast' * e.g. *

* => - *

+ *



* * @param {number} maxCount */