Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): clean markdown elements in search contents #2457

Merged
merged 13 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions src/plugins/search/markdown-to-txt.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* This is a modified version of the
* [markdown-to-txt](https://www.npmjs.com/package/markdown-to-txt) library.
*/
import { marked } from 'marked';
import { escape, unescape } from 'lodash';
const block = text => text + '\n\n';
const escapeBlock = text => escape(text) + '\n\n';
const line = text => text + '\n';
const inline = text => text;
const newline = () => '\n';
const empty = () => '';

const TxtRenderer = {
// Block elements
code: escapeBlock,
blockquote: block,
html: empty,
heading: block,
hr: newline,
list: text => block(text.trim()),
listitem: line,
checkbox: empty,
paragraph: block,
table: (header, body) => line(header + body),
tablerow: text => line(text.trim()),
tablecell: text => text + ' ',
// Inline elements
strong: inline,
em: inline,
codespan: inline,
br: newline,
del: inline,
link: (_0, _1, text) => text,
image: (_0, _1, text) => text,
text: inline,
// etc.
options: {},
};

/**
* Converts markdown to plaintext using the marked Markdown library.
* Accepts [MarkedOptions](https://marked.js.org/using_advanced#options) as
* the second argument.
*
* NOTE: The output of markdownToTxt is NOT sanitized. The output may contain
* valid HTML, JavaScript, etc. Be sure to sanitize if the output is intended
* for web use.
*
* @param markdown the markdown text to txtify
* @param options the marked options
* @returns the unmarked text
*/
export function markdownToTxt(markdown, options) {
const unmarked = marked(markdown, { ...options, renderer: TxtRenderer });
const unescaped = unescape(unmarked);
const trimmed = unescaped.trim();
return trimmed;
}

export default markdownToTxt;
27 changes: 17 additions & 10 deletions src/plugins/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
getAndRemoveConfig,
getAndRemoveDocsifyIgnoreConfig,
} from '../../core/render/utils.js';
import { markdownToTxt } from './markdown-to-txt.js';

let INDEXS = {};

Expand Down Expand Up @@ -34,6 +35,17 @@ function escapeHtml(string) {
return String(string).replace(/[&<>"']/g, s => entityMap[s]);
}

function formatContent(text) {
return escapeHtml(cleanMarkdown(ignoreDiacriticalMarks(text)));
}

function cleanMarkdown(text) {
if (text) {
text = markdownToTxt(text);
}
return text;
}

function getAllPaths(router) {
const paths = [];

Expand Down Expand Up @@ -175,19 +187,14 @@ export function search(query) {
keywords.forEach(keyword => {
// From https://github.com/sindresorhus/escape-string-regexp
const regEx = new RegExp(
escapeHtml(ignoreDiacriticalMarks(keyword)).replace(
/[|\\{}()[\]^$+*?.]/g,
'\\$&',
),
formatContent(keyword).replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'),
'gi',
);
let indexTitle = -1;
let indexContent = -1;
handlePostTitle = postTitle
? escapeHtml(ignoreDiacriticalMarks(postTitle))
: postTitle;
handlePostTitle = postTitle ? formatContent(postTitle) : postTitle;
handlePostContent = postContent
? escapeHtml(ignoreDiacriticalMarks(postContent))
? formatContent(postContent)
: postContent;

indexTitle = postTitle ? handlePostTitle.search(regEx) : -1;
Expand Down Expand Up @@ -221,8 +228,8 @@ export function search(query) {

if (matchesScore > 0) {
const matchingPost = {
title: handlePostTitle,
content: postContent ? resultStr : '',
title: formatContent(handlePostTitle),
content: formatContent(postContent ? resultStr : ''),
url: postUrl,
score: matchesScore,
};
Expand Down
19 changes: 19 additions & 0 deletions test/e2e/search.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -232,4 +232,23 @@ test.describe('Search Plugin Tests', () => {
await page.keyboard.press('z');
await expect(searchFieldElm).toBeFocused();
});
test('search result should remove markdown', async ({ page }) => {
const docsifyInitConfig = {
markdown: {
homepage: `
# The [mock](example.com) link
There is lots of words.
`,
},
scriptURLs: ['/dist/plugins/search.js'],
};

const searchFieldElm = page.locator('input[type=search]');
const resultsHeadingElm = page.locator('.results-panel h2');

await docsifyInit(docsifyInitConfig);

await searchFieldElm.fill('There');
await expect(resultsHeadingElm).toHaveText('The mock link');
});
});
Loading