Skip to content

Commit

Permalink
Merge pull request #456 from thematters/feat/comment-style
Browse files Browse the repository at this point in the history
feat: squeeze empty paragraphs to max 2
  • Loading branch information
robertu7 authored Mar 11, 2024
2 parents c4b5840 + cd42355 commit 05147b7
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 2 deletions.
36 changes: 36 additions & 0 deletions src/transformers/sanitize.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { stripIndent } from 'common-tags'
import { describe, expect, test } from 'vitest'

import { sanitizeHTML } from './sanitize'
Expand All @@ -18,6 +19,41 @@ describe('Sanitization: custom', () => {
'<a class="">pp</a>',
)
})

test('allow max two empty paragraphys', () => {
expectSanitizeHTML(
stripIndent`
<p>abc</p>
<p></p>
<p></p>
abc
<p></p>
<p>abc</p>
<p></p>
<p></p>
<p></p>
<p>abc</p>
<p></p>
<p><br></p>
<p><br/></p>
<p><br></br></p>
<p><br/><br/><br/></p>
<p></p>
`,
stripIndent`
<p>abc</p>
<p></p>
<p></p>abc
<p></p>
<p>abc</p>
<p></p>
<p></p>
<p>abc</p>
<p></p>
<p></p>
`,
)
})
})

// via https://github.com/leizongmin/js-xss/blob/master/test/test_xss.js
Expand Down
72 changes: 70 additions & 2 deletions src/transformers/sanitize.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { type Root, type RootContent } from 'hast'
import rehypeFormat from 'rehype-format'
import rehypeParse from 'rehype-parse'
import rehypeRaw from 'rehype-raw'
Expand All @@ -11,14 +12,81 @@ import {
rehypeStringifyOptions,
} from './options'

/**
* Squeeze empty paragraphs to a maximum of N
*
* e.g.
* <p></p><p></p><p></p><p></p><p></p><p></p>
* =>
* <p></p><p></p>
*
* @param {number} maxCount
*/
const rehypeSqueezeParagraphs =
({ maxCount }: { maxCount: number }) =>
(tree: Root) => {
if (tree.type !== 'root') {
return
}

const children: RootContent[] = []
let count = 0
let touched = false

tree.children.forEach((node) => {
// skip empty text nodes
if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
children.push(node)
return
}

// skip non-paragraph nodes
if (node.type !== 'element' || node.tagName !== 'p') {
count = 0
children.push(node)
return
}

// skip non-empty paragraphs:
// - <p></p>
// - <p><br/></p>
const isEmptyParagraph =
node.children.length === 0 ||
node.children.every((n) => n.type === 'element' && n.tagName === 'br')
if (!isEmptyParagraph) {
count = 0
children.push(node)
return
}

// cap empty paragraphs
count++
if (count <= maxCount) {
children.push({
type: 'element',
tagName: 'p',
properties: {},
children: [],
})
} else {
touched = true
}
})

if (touched) {
tree.children = children
}
}

const formatter = unified()
.use(rehypeParse, rehypeParseOptions)
.use(rehypeRaw)
.use(rehypeSanitize, rehypeSanitizeOptions)
.use(rehypeSqueezeParagraphs, { maxCount: 2 })
.use(rehypeFormat)
.use(rehypeStringify, rehypeStringifyOptions)

export const sanitizeHTML = (md: string): string => {
const result = formatter.processSync(md)
export const sanitizeHTML = (html: string): string => {
const result = formatter.processSync(html)
return String(result)
}

0 comments on commit 05147b7

Please sign in to comment.