Skip to content

Commit

Permalink
feat: support squeezing soft breaks
Browse files Browse the repository at this point in the history
  • Loading branch information
robertu7 committed Jun 5, 2024
1 parent b4a5621 commit 0512dfb
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 88 deletions.
2 changes: 1 addition & 1 deletion src/transformers/lib/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export * from './rehypeSqueezeParagraphs'
export * from './rehypeSqueezeBreaks'
152 changes: 152 additions & 0 deletions src/transformers/lib/rehypeSqueezeBreaks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import { type ElementContent, type Root, type RootContent } from 'hast'

interface Props {
maxHardBreaks?: number
maxSoftBreaks?: number
}

const isEmptyText = (node: RootContent) =>
node.type === 'text' && node.value.replace(/\s/g, '') === ''

const isBr = (node: RootContent) =>
node.type === 'element' && node.tagName === 'br'

const squeezeSoftBreaks = ({
children,
maxSoftBreaks,
}: { children: ElementContent[] } & Pick<Props, 'maxSoftBreaks'>) => {
const newChildren: ElementContent[] = []
const isRetainAll = maxSoftBreaks === -1
let breakCount = 0

children.forEach((node) => {
if (node.type !== 'element' || node.tagName !== 'br') {
breakCount = 0
newChildren.push(node)
return
}

// cap empty paragraphs or retain all by adding <br>
breakCount++
const shouldRetain =
isRetainAll || (maxSoftBreaks ? breakCount <= maxSoftBreaks : false)
if (shouldRetain) {
newChildren.push({
type: 'element',
tagName: 'br',
properties: {},
children: [],
})
}
})

return newChildren
}

const squeezeHardBreaks = ({
children,
maxHardBreaks,
maxSoftBreaks,
}: { children: Array<RootContent | ElementContent> } & Props) => {
const newChildren: RootContent[] = []
const isRetainAll = maxHardBreaks === -1
let breakCount = 0

children.forEach((node) => {
// skip empty text nodes
if (isEmptyText(node)) {
newChildren.push(node)
return
}

// paragraphs in blockquote
if (node.type === 'element' && node.tagName === 'blockquote') {
newChildren.push({
type: 'element',
tagName: 'blockquote',
properties: node.properties,
children: squeezeHardBreaks({
children: node.children,
maxHardBreaks,
maxSoftBreaks,
}) as ElementContent[],
})
return
}

// skip non-paragraph node
if (node.type !== 'element' || node.tagName !== 'p') {
breakCount = 0
newChildren.push(node)
return
}

// skip non-empty paragraph:
// - <p></p>
// - <p> </p>
// - <p><br></p>
// - <p> <br></p>
const isEmptyParagraph =
node.children.length === 0 ||
node.children.every((n) => isBr(n) || isEmptyText(n))
if (!isEmptyParagraph) {
breakCount = 0
newChildren.push({
type: 'element',
tagName: 'p',
properties: node.properties,
children: squeezeSoftBreaks({
children: node.children,
maxSoftBreaks,
}),
})
return
}

// cap empty paragraphs or retain all by adding <br>
breakCount++
const shouldRetain =
isRetainAll || (maxHardBreaks ? breakCount <= maxHardBreaks : false)
if (shouldRetain) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: [
{
type: 'element',
tagName: 'br',
properties: {},
children: [],
},
],
})
}
})

return newChildren
}

/**
* Squeeze hard and soft breaks to a maximum of N
*
* e.g.
* <p></p><p></p><p></p><p></p><p></p><p></p>
* =>
* <p><br></p><p><br></p>
*
*/
export const rehypeSqueezeBreaks = (props: Props) => (tree: Root) => {
if (tree.type !== 'root') {
return
}

if (
typeof props.maxHardBreaks !== 'number' &&
typeof props.maxSoftBreaks !== 'number'
) {
return
}

tree.children = squeezeHardBreaks({ children: tree.children, ...props })
}
75 changes: 0 additions & 75 deletions src/transformers/lib/rehypeSqueezeParagraphs.ts

This file was deleted.

105 changes: 102 additions & 3 deletions src/transformers/normalize-sanitize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,23 @@ const expectProcessCommentHTML = (

describe('Sanitize and normalize article', () => {
test('squeeze empty paragraphs', () => {
expectProcessArticleHTML(
stripIndent`
<p>1</p>
<p></p>
<p>2</p>
<p></p>
<p></p>
<p>3</p>
`,
stripIndent`
<p>1</p>
<p>2</p>
<p>3</p>
`,
{ maxHardBreaks: 0 },
)

expectProcessArticleHTML(
stripIndent`
<p>1</p>
Expand All @@ -54,7 +71,27 @@ describe('Sanitize and normalize article', () => {
<p><br class="smart"></p>
<p>3</p>
`,
{ maxEmptyParagraphs: 1 },
{ maxHardBreaks: 1 },
)

expectProcessArticleHTML(
stripIndent`
<blockquote>
<p>1</p>
<p>2</p>
<p></p>
<p>3</p>
</blockquote>
`,
stripIndent`
<blockquote>
<p>1</p>
<p>2</p>
<p><br class="smart"></p>
<p>3</p>
</blockquote>
`,
{ maxHardBreaks: 1 },
)

expectProcessArticleHTML(
Expand Down Expand Up @@ -88,7 +125,69 @@ describe('Sanitize and normalize article', () => {
<p><br class="smart"></p>
<p><br class="smart"></p>
`,
{ maxEmptyParagraphs: 2 },
{ maxHardBreaks: 2 },
)
})

test('squeeze <br>', () => {
expectProcessArticleHTML(
stripIndent`
<p>1</p>
<p>2</p>
<p>1<br>2</p>
<p>1<br><br>2</p>
<p>1<br><br></p>
`,
stripIndent`
<p>1</p>
<p>2</p>
<p>12</p>
<p>12</p>
<p>1</p>
`,
{ maxHardBreaks: 0, maxSoftBreaks: 0 },
)

// max 1 soft break
expectProcessArticleHTML(
stripIndent`
<p>1</p>
<p>2</p>
<p>1<br>2</p>
<p>1<br><br>2</p>
<p>1<br><br></p>
`,
stripIndent`
<p>1</p>
<p>2</p>
<p>1<br class="smart">2</p>
<p>1<br class="smart">2</p>
<p>1<br class="smart"></p>
`,
{ maxHardBreaks: 0, maxSoftBreaks: 1 },
)

// blockquote
expectProcessArticleHTML(
stripIndent`
<blockquote>
<p>1</p>
<p>2</p>
<p>1<br>2</p>
<p>1<br><br>2</p>
<p>1<br><br></p>
</blockquote>
`,
stripIndent`
<blockquote>
<p>1</p>
<p>2</p>
<p>12</p>
<p>12</p>
<p>1</p>
</blockquote>
`,
{ maxHardBreaks: 0, maxSoftBreaks: 0 },
)
})

Expand Down Expand Up @@ -128,7 +227,7 @@ describe('Sanitize and normalize article', () => {
<p><br class="smart"></p>
<p><br class="smart"></p>
`,
{ maxEmptyParagraphs: -1 },
{ maxHardBreaks: -1 },
)
})
})
Expand Down
Loading

0 comments on commit 0512dfb

Please sign in to comment.