Merge pull request #457 from thematters/develop

Release: v0.2.3-alpha.1
thematters · Mar 11, 2024 · 457a705 · 457a705
2 parents 72c0e1d + 05147b7
commit 457a705
Show file tree

Hide file tree

Showing 4 changed files with 191 additions and 5 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@matters/matters-editor",
-  "version": "0.2.3-alpha.0",
+  "version": "0.2.3-alpha.1",
   "description": "Editor for matters.news",
   "author": "https://github.com/thematters",
   "homepage": "https://github.com/thematters/matters-editor",

diff --git a/src/transformers/normalize.test.ts b/src/transformers/normalize.test.ts
@@ -1,16 +1,21 @@
 import { describe, expect, test } from 'vitest'
 
-import { normalizeArticleHTML } from './normalize'
+import { normalizeArticleHTML, normalizeCommentHTML } from './normalize'
 
 const expectNormalizeArticleHTML = (input: string, output: string) => {
   const result = normalizeArticleHTML(input)
   expect(result.trim()).toBe(output)
 }
 
+export const expectNormalizeCommentHTML = (input: string, output: string) => {
+  const result = normalizeCommentHTML(input)
+  expect(result.trim()).toBe(output)
+}
+
 /**
  * Tests
  */
-describe('Normalization', () => {
+describe('Normalization: Article', () => {
   test('bolds', () => {
     expectNormalizeArticleHTML(
       '<p><strong>abc</strong></p>',
@@ -279,3 +284,80 @@ describe('Normalization', () => {
     )
   })
 })
+
+describe('Normalization: Comment', () => {
+  test('quote', () => {
+    expectNormalizeCommentHTML(
+      '<blockquote><p>abc</p></blockquote>',
+      '<blockquote><p>abc</p></blockquote>',
+    )
+  })
+
+  test('link', () => {
+    expectNormalizeCommentHTML(
+      '<p><a target="_blank" rel="noopener noreferrer nofollow" href="https://example.com">abc</a></p>',
+      '<p><a target="_blank" rel="noopener noreferrer nofollow" href="https://example.com">abc</a></p>',
+    )
+  })
+
+  test('bolds is not supported', () => {
+    expectNormalizeCommentHTML('<p><strong>abc</strong></p>', '<p>abc</p>')
+    expectNormalizeCommentHTML('<p><b>abc</b></p>', '<p>abc</p>')
+  })
+
+  test('strikethrough is not supported', () => {
+    expectNormalizeCommentHTML('<p><s>abc</s></p>', '<p>abc</p>')
+    expectNormalizeCommentHTML('<p><del>abc</del></p>', '<p>abc</p>')
+    expectNormalizeCommentHTML('<p><strike>abc</strike></p>', '<p>abc</p>')
+  })
+
+  test('italic is not supported', () => {
+    expectNormalizeCommentHTML('<p>abc</p>', '<p>abc</p>')
+
+    expectNormalizeCommentHTML('<p><i>abc</i></p>', '<p>abc</p>')
+  })
+
+  test('underline is not supported', () => {
+    expectNormalizeCommentHTML('<p><u>abc</u></p>', '<p>abc</p>')
+  })
+
+  test('self-closed tags', () => {
+    expectNormalizeCommentHTML('<p />', '<p></p>')
+
+    expectNormalizeCommentHTML('<br></br>', '<p><br class="smart"></p>')
+
+    expectNormalizeCommentHTML('<hr/>', '<p></p>')
+
+    // <img /> -> <img>
+    expectNormalizeCommentHTML(
+      '<figure class="image"><img src="https://assets.matters.news/embed/c40d5045-0c03-44b6-afe6-93a285ffd1bb.jpeg" /><figcaption>左：女反派。右：女主。</figcaption></figure>',
+      '<p>左：女反派。右：女主。</p>',
+    )
+
+    // <iframe /> -> <iframe></iframe>
+    expectNormalizeCommentHTML(
+      '<figure class="embed" data-provider="youtube"><div class="iframe-container"><iframe src="https://www.youtube.com/embed/Zk7DppcfaMY?rel=0" loading="lazy" allowfullscreen frameborder="0" /></div><figcaption></figcaption></figure>',
+      '<p></p>',
+    )
+  })
+
+  test('figures are not supported', () => {
+    // image
+    expectNormalizeCommentHTML(
+      '<figure class="image"><img src="https://assets.matters.news/embed/c40d5045-0c03-44b6-afe6-93a285ffd1bb.jpeg"><figcaption>左：女反派。右：女主。</figcaption></figure>',
+      '<p>左：女反派。右：女主。</p>',
+    )
+
+    // audio
+    expectNormalizeCommentHTML(
+      '<figure class="audio"><audio controls><source src="https://assets.matters.news/embedaudio/0a45d56a-d19a-4300-bfa4-305639fd5a82/點數經濟-讓過路客成為回頭客.mp3" type="audio/mp3"></audio><div class="player"><header><div class="meta"><h4 class="title">點數經濟：讓過路客成為回頭客</h4><div class="time"><span class="current" data-time="00:00"></span><span class="duration" data-time="--:--"></span></div></div><span class="play"></span></header><footer><div class="progress-bar"><span></span></div></footer></div><figcaption>區塊勢 Podcast</figcaption></figure>',
+      '<p>點數經濟：讓過路客成為回頭客</p><p>區塊勢 Podcast</p>',
+    )
+
+    // video
+    expectNormalizeCommentHTML(
+      '<figure class="embed embed-video" data-provider="youtube"><div class="iframe-container"><iframe src="https://www.youtube.com/embed/Zk7DppcfaMY?rel=0" loading="lazy" allowfullscreen frameborder="0"></iframe></div><figcaption></figcaption></figure>',
+      '<p></p>',
+    )
+  })
+})
diff --git a/src/transformers/sanitize.test.ts b/src/transformers/sanitize.test.ts
@@ -1,3 +1,4 @@
+import { stripIndent } from 'common-tags'
 import { describe, expect, test } from 'vitest'
 
 import { sanitizeHTML } from './sanitize'
@@ -18,6 +19,41 @@ describe('Sanitization: custom', () => {
       '<a class="">pp</a>',
     )
   })
+
+  test('allow max two empty paragraphys', () => {
+    expectSanitizeHTML(
+      stripIndent`
+        <p>abc</p>
+        <p></p>
+        <p></p>
+        abc
+        <p></p>
+        <p>abc</p>
+        <p></p>
+        <p></p>
+        <p></p>
+        <p>abc</p>
+        <p></p>
+        <p><br></p>
+        <p><br/></p>
+        <p><br></br></p>
+        <p><br/><br/><br/></p>
+        <p></p>
+      `,
+      stripIndent`
+        <p>abc</p>
+        <p></p>
+        <p></p>abc
+        <p></p>
+        <p>abc</p>
+        <p></p>
+        <p></p>
+        <p>abc</p>
+        <p></p>
+        <p></p>
+      `,
+    )
+  })
 })
 
 // via https://github.com/leizongmin/js-xss/blob/master/test/test_xss.js

diff --git a/src/transformers/sanitize.ts b/src/transformers/sanitize.ts
@@ -1,3 +1,4 @@
+import { type Root, type RootContent } from 'hast'
 import rehypeFormat from 'rehype-format'
 import rehypeParse from 'rehype-parse'
 import rehypeRaw from 'rehype-raw'
@@ -11,14 +12,81 @@ import {
   rehypeStringifyOptions,
 } from './options'
 
+/**
+ * Squeeze empty paragraphs to a maximum of N
+ *
+ * e.g.
+ * <p></p><p></p><p></p><p></p><p></p><p></p>
+ * =>
+ * <p></p><p></p>
+ *
+ * @param {number} maxCount
+ */
+const rehypeSqueezeParagraphs =
+  ({ maxCount }: { maxCount: number }) =>
+  (tree: Root) => {
+    if (tree.type !== 'root') {
+      return
+    }
+
+    const children: RootContent[] = []
+    let count = 0
+    let touched = false
+
+    tree.children.forEach((node) => {
+      // skip empty text nodes
+      if (node.type === 'text' && node.value.replace(/\s/g, '') === '') {
+        children.push(node)
+        return
+      }
+
+      // skip non-paragraph nodes
+      if (node.type !== 'element' || node.tagName !== 'p') {
+        count = 0
+        children.push(node)
+        return
+      }
+
+      // skip non-empty paragraphs:
+      // - <p></p>
+      // - <p><br/></p>
+      const isEmptyParagraph =
+        node.children.length === 0 ||
+        node.children.every((n) => n.type === 'element' && n.tagName === 'br')
+      if (!isEmptyParagraph) {
+        count = 0
+        children.push(node)
+        return
+      }
+
+      // cap empty paragraphs
+      count++
+      if (count <= maxCount) {
+        children.push({
+          type: 'element',
+          tagName: 'p',
+          properties: {},
+          children: [],
+        })
+      } else {
+        touched = true
+      }
+    })
+
+    if (touched) {
+      tree.children = children
+    }
+  }
+
 const formatter = unified()
   .use(rehypeParse, rehypeParseOptions)
   .use(rehypeRaw)
   .use(rehypeSanitize, rehypeSanitizeOptions)
+  .use(rehypeSqueezeParagraphs, { maxCount: 2 })
   .use(rehypeFormat)
   .use(rehypeStringify, rehypeStringifyOptions)
 
-export const sanitizeHTML = (md: string): string => {
-  const result = formatter.processSync(md)
+export const sanitizeHTML = (html: string): string => {
+  const result = formatter.processSync(html)
   return String(result)
 }