Skip to content

Commit

Permalink
feat: add <br> inside empty paragraph in rehypeSqueezeParagraphs
Browse files Browse the repository at this point in the history
  • Loading branch information
robertu7 committed Mar 12, 2024
1 parent cd42355 commit 4121cfa
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 81 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@matters/matters-editor",
"version": "0.2.3-alpha.1",
"version": "0.2.3-alpha.2",
"description": "Editor for matters.news",
"author": "https://github.com/thematters",
"homepage": "https://github.com/thematters/matters-editor",
Expand Down
1 change: 1 addition & 0 deletions src/transformers/lib/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './rehypeSqueezeParagraphs'
74 changes: 74 additions & 0 deletions src/transformers/lib/rehypeSqueezeParagraphs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { type Root, type RootContent } from 'hast'

/**
* Squeeze empty paragraphs to a maximum of N
*
* e.g.
* <p></p><p></p><p></p><p></p><p></p><p></p>
* =>
* <p></p><p></p>
*
* @param {number} maxCount
*/
export const rehypeSqueezeParagraphs =
({ maxCount }: { maxCount: number }) =>
(tree: Root) => {
if (tree.type !== 'root') {
return
}

const children: RootContent[] = []
let count = 0
let touched = false

tree.children.forEach((node) => {
// skip empty text nodes
if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
children.push(node)
return
}

// skip non-paragraph nodes
if (node.type !== 'element' || node.tagName !== 'p') {
count = 0
children.push(node)
return
}

// skip non-empty paragraphs:
// - <p></p>
// - <p><br/></p>
const isEmptyParagraph =
node.children.length === 0 ||
node.children.every((n) => n.type === 'element' && n.tagName === 'br')
if (!isEmptyParagraph) {
count = 0
children.push(node)
return
}

// cap empty paragraphs
count++
if (count <= maxCount) {
children.push({
type: 'element',
tagName: 'p',
properties: {},
children: [
{
type: 'element',
tagName: 'br',
properties: {},
children: [],
},
],
})
} else {
touched = true
}
})

if (touched) {
tree.children = children
}
}
72 changes: 72 additions & 0 deletions src/transformers/normalize-sanitize.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { stripIndent } from 'common-tags'
import rehypeFormat from 'rehype-format'
import rehypeParse from 'rehype-parse'
import rehypeRaw from 'rehype-raw'
import rehypeStringify from 'rehype-stringify'
import { unified } from 'unified'
import { describe, expect, test } from 'vitest'

import { normalizeArticleHTML } from './normalize'
import { rehypeParseOptions, rehypeStringifyOptions } from './options'
import { sanitizeHTML } from './sanitize'

const formatter = unified()
.use(rehypeParse, rehypeParseOptions)
.use(rehypeRaw)
.use(rehypeFormat)
.use(rehypeStringify, rehypeStringifyOptions)

const formatHTML = (html: string): string => {
const result = formatter.processSync(html)
return String(result)
}

const expectProcessArticleHTML = (input: string, output: string) => {
const result = normalizeArticleHTML(sanitizeHTML(input))
expect(formatHTML(result).trim()).toBe(output)
}

// const expectProcessCommentHTML = (input: string, output: string) => {
// const result = normalizeCommentHTML(sanitizeHTML(input))
// expect(formatHTML(result).trim()).toBe(output)
// }

describe('Sanitize and normalize article', () => {
test('squeeze empty paragraphys', () => {
expectProcessArticleHTML(
stripIndent`
<p>abc</p>
<p></p>
<p></p>
abc
<p></p>
<p>abc</p>
<p></p>
<p></p>
<p></p>
<p>abc</p>
<p></p>
<p><br></p>
<p><br/></p>
<p><br></br></p>
<p><br/><br/><br/></p>
<p></p>
`,
stripIndent`
<p>abc</p>
<p><br class="smart"></p>
<p><br class="smart"></p>
<p>abc</p>
<p><br class="smart"></p>
<p>abc</p>
<p><br class="smart"></p>
<p><br class="smart"></p>
<p>abc</p>
<p><br class="smart"></p>
<p><br class="smart"></p>
`,
)
})
})

// describe('Sanitize and normalize comment', () => {})
2 changes: 1 addition & 1 deletion src/transformers/normalize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const expectNormalizeArticleHTML = (input: string, output: string) => {
expect(result.trim()).toBe(output)
}

export const expectNormalizeCommentHTML = (input: string, output: string) => {
const expectNormalizeCommentHTML = (input: string, output: string) => {
const result = normalizeCommentHTML(input)
expect(result.trim()).toBe(output)
}
Expand Down
47 changes: 42 additions & 5 deletions src/transformers/sanitize.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import { stripIndent } from 'common-tags'
import { describe, expect, test } from 'vitest'

import { sanitizeHTML } from './sanitize'

const expectSanitizeHTML = (input: string, output: string): void => {
const result = sanitizeHTML(input)
import { sanitizeHTML, type SanitizeHTMLOptions } from './sanitize'

const expectSanitizeHTML = (
input: string,
output: string,
options?: SanitizeHTMLOptions,
): void => {
const result = sanitizeHTML(input, options)
expect(result.trim()).toBe(output)
}

Expand Down Expand Up @@ -40,18 +44,51 @@ describe('Sanitization: custom', () => {
<p><br/><br/><br/></p>
<p></p>
`,
stripIndent`
<p>abc</p>
<p><br></p>
<p><br></p>abc
<p><br></p>
<p>abc</p>
<p><br></p>
<p><br></p>
<p>abc</p>
<p><br></p>
<p><br></p>
`,
)
})

test('allow max one empty paragraphys', () => {
expectSanitizeHTML(
stripIndent`
<p>abc</p>
<p></p>
<p></p>abc
<p></p>
abc
<p></p>
<p>abc</p>
<p></p>
<p></p>
<p></p>
<p>abc</p>
<p></p>
<p><br></p>
<p><br/></p>
<p><br></br></p>
<p><br/><br/><br/></p>
<p></p>
`,
stripIndent`
<p>abc</p>
<p><br></p>abc
<p><br></p>
<p>abc</p>
<p><br></p>
<p>abc</p>
<p><br></p>
`,
{ maxEmptyParagraphs: 1 },
)
})
})
Expand Down
89 changes: 15 additions & 74 deletions src/transformers/sanitize.ts
Original file line number Diff line number Diff line change
@@ -1,92 +1,33 @@
import { type Root, type RootContent } from 'hast'
import rehypeFormat from 'rehype-format'
import rehypeParse from 'rehype-parse'
import rehypeRaw from 'rehype-raw'
import rehypeSanitize from 'rehype-sanitize'
import rehypeStringify from 'rehype-stringify'
import { unified } from 'unified'

import { rehypeSqueezeParagraphs } from './lib'
import {
rehypeParseOptions,
rehypeSanitizeOptions,
rehypeStringifyOptions,
} from './options'

/**
* Squeeze empty paragraphs to a maximum of N
*
* e.g.
* <p></p><p></p><p></p><p></p><p></p><p></p>
* =>
* <p></p><p></p>
*
* @param {number} maxCount
*/
const rehypeSqueezeParagraphs =
({ maxCount }: { maxCount: number }) =>
(tree: Root) => {
if (tree.type !== 'root') {
return
}

const children: RootContent[] = []
let count = 0
let touched = false

tree.children.forEach((node) => {
// skip empty text nodes
if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
children.push(node)
return
}

// skip non-paragraph nodes
if (node.type !== 'element' || node.tagName !== 'p') {
count = 0
children.push(node)
return
}

// skip non-empty paragraphs:
// - <p></p>
// - <p><br/></p>
const isEmptyParagraph =
node.children.length === 0 ||
node.children.every((n) => n.type === 'element' && n.tagName === 'br')
if (!isEmptyParagraph) {
count = 0
children.push(node)
return
}

// cap empty paragraphs
count++
if (count <= maxCount) {
children.push({
type: 'element',
tagName: 'p',
properties: {},
children: [],
})
} else {
touched = true
}
})

if (touched) {
tree.children = children
}
}
export interface SanitizeHTMLOptions {
maxEmptyParagraphs?: number
}

const formatter = unified()
.use(rehypeParse, rehypeParseOptions)
.use(rehypeRaw)
.use(rehypeSanitize, rehypeSanitizeOptions)
.use(rehypeSqueezeParagraphs, { maxCount: 2 })
.use(rehypeFormat)
.use(rehypeStringify, rehypeStringifyOptions)
export const sanitizeHTML = (
html: string,
{ maxEmptyParagraphs }: SanitizeHTMLOptions = {},
): string => {
const formatter = unified()
.use(rehypeParse, rehypeParseOptions)
.use(rehypeRaw)
.use(rehypeSanitize, rehypeSanitizeOptions)
.use(rehypeSqueezeParagraphs, { maxCount: maxEmptyParagraphs ?? 2 })
.use(rehypeFormat)
.use(rehypeStringify, rehypeStringifyOptions)

export const sanitizeHTML = (html: string): string => {
const result = formatter.processSync(html)
return String(result)
}

0 comments on commit 4121cfa

Please sign in to comment.