Skip to content

Commit

Permalink
update: refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Koooooo-7 committed Aug 1, 2024
1 parent 35a7261 commit 3f6d18d
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 78 deletions.
6 changes: 6 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@
"--testPathIgnorePatterns"
],
"console": "integratedTerminal"
},
{
"type": "node",
"request": "launch",
"name": "Run a single js file functions",
"program": "${file}"
}
]
}
2 changes: 1 addition & 1 deletion src/plugins/search/component.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function doSearch(value) {
<div class="matching-post" aria-label="search result ${i + 1}">
<a href="${post.url}">
<p class="title clamp-1">${post.title}</p>
<p class="content clamp-2">${post.content}</p>
<p class="content clamp-2">...${post.content}...</p>
</a>
</div>
`;
Expand Down
248 changes: 196 additions & 52 deletions src/plugins/search/markdown-to-txt.js
Original file line number Diff line number Diff line change
@@ -1,61 +1,205 @@
/**
* This is a modified version of the
* [markdown-to-txt](https://www.npmjs.com/package/markdown-to-txt) library.
* This is a function to convert markdown to txt based on markedjs v13+.
* Copies the escape/unescape functions from [lodash](https://www.npmjs.com/package/lodash) instead import to reduce the size.
*/
import { marked } from 'marked';
import { escape, unescape } from 'lodash';
const block = text => text + '\n\n';
const escapeBlock = text => escape(text) + '\n\n';
const line = text => text + '\n';
const inline = text => text;
const newline = () => '\n';
const empty = () => '';

const TxtRenderer = {
// Block elements
code: escapeBlock,
blockquote: block,
html: empty,
heading: block,
hr: newline,
list: text => block(text.trim()),
listitem: line,
checkbox: empty,
paragraph: block,
table: (header, body) => line(header + body),
tablerow: text => line(text.trim()),
tablecell: text => text + ' ',
// Inline elements
strong: inline,
em: inline,
codespan: inline,
br: newline,
del: inline,
link: (_0, _1, text) => text,
image: (_0, _1, text) => text,
text: inline,
// etc.
options: {},

const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39);/g;
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
const htmlUnescapes = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
};

/**
* Converts markdown to plaintext using the marked Markdown library.
* Accepts [MarkedOptions](https://marked.js.org/using_advanced#options) as
* the second argument.
*
* NOTE: The output of markdownToTxt is NOT sanitized. The output may contain
* valid HTML, JavaScript, etc. Be sure to sanitize if the output is intended
* for web use.
*
* @param markdown the markdown text to txtify
* @param options the marked options
* @returns the unmarked text
*/
export function markdownToTxt(markdown, options) {
const unmarked = marked(markdown, { ...options, renderer: TxtRenderer });
function unescape(string) {
return string && reHasEscapedHtml.test(string)
? string.replace(reEscapedHtml, entity => htmlUnescapes[entity] || "'")
: string || '';
}

const reUnescapedHtml = /[&<>"']/g;
const reHasUnescapedHtml = RegExp(reUnescapedHtml.source);
const htmlEscapes = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;',
};

function escape(string) {
return string && reHasUnescapedHtml.test(string)
? string.replace(reUnescapedHtml, chr => htmlEscapes[chr])
: string || '';
}

function helpersCleanup(string) {
return string && string.replace('!>', '').replace('?>', '');
}

const markdownToTxtRenderer = {
space() {
return '';
},

code({ text }) {
const code = text.replace(/\n$/, '');
return escape(code);
},

blockquote({ tokens }) {
return this.parser?.parse(tokens) || '';
},

html() {
return '';
},

heading({ tokens }) {
return this.parser?.parse(tokens) || '';
},

hr() {
return '';
},

list(token) {
let body = '';
for (let j = 0; j < token.items.length; j++) {
const item = token.items[j];
body += this.listitem?.(item);
}

return body;
},

listitem(item) {
let itemBody = '';
if (item.task) {
const checkbox = this.checkbox?.({ checked: !!item.checked });
if (item.loose) {
if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') {
item.tokens[0].text = checkbox + ' ' + item.tokens[0].text;
if (
item.tokens[0].tokens &&
item.tokens[0].tokens.length > 0 &&
item.tokens[0].tokens[0].type === 'text'
) {
item.tokens[0].tokens[0].text =
checkbox + ' ' + item.tokens[0].tokens[0].text;
}
} else {
item.tokens.unshift({
type: 'text',
raw: checkbox + ' ',
text: checkbox + ' ',
});
}
} else {
itemBody += checkbox + ' ';
}
}

itemBody += this.parser?.parse(item.tokens, !!item.loose);

return `${itemBody || ''}`;
},

checkbox() {
return '';
},

paragraph({ tokens }) {
return this.parser?.parseInline(tokens) || '';
},

table(token) {
let header = '';

let cell = '';
for (let j = 0; j < token.header.length; j++) {
cell += this.tablecell?.(token.header[j]);
}
header += this.tablerow?.({ text: cell });

let body = '';
for (let j = 0; j < token.rows.length; j++) {
const row = token.rows[j];

cell = '';
for (let k = 0; k < row.length; k++) {
cell += this.tablecell?.(row[k]);
}

body += this.tablerow?.({ text: cell });
}

return header + ' ' + body;
},

tablerow({ text }) {
return text;
},

tablecell(token) {
return this.parser?.parseInline(token.tokens) || '';
},

strong({ text }) {
return text;
},

em({ tokens }) {
return this.parser?.parseInline(tokens) || '';
},

codespan({ text }) {
return text;
},

br() {
return ' ';
},

del({ tokens }) {
return this.parser?.parseInline(tokens);
},

link({ tokens, href, title }) {
// Remain the href and title attributes for searching, so is the image
// e.g. [filename](_media/example.js ':include :type=code :fragment=demo')
// Result: filename _media/example.js :include :type=code :fragment=demo
return `${this.parser?.parseInline(tokens) || ''} ${href || ''} ${title || ''}`;
},

image({ title, text, href }) {
return `${text || ''} ${href || ''} ${title || ''}`;
},

text(token) {
return token.tokens
? this.parser?.parseInline(token.tokens) || ''
: token.text || '';
},
};
const _marked = marked.setOptions({ renderer: markdownToTxtRenderer });

export function markdownToTxt(markdown) {
const unmarked = _marked.parse(markdown);
const unescaped = unescape(unmarked);
const trimmed = unescaped.trim();
return trimmed;
const helpersCleaned = helpersCleanup(unescaped);
return helpersCleaned.trim();
}

export default markdownToTxt;

const t = '![logo](_media/icon.svg)';
console.log('run!');

Check warning on line 200 in src/plugins/search/markdown-to-txt.js

View workflow job for this annotation

GitHub Actions / lint (lts/*)

Unexpected console statement
fetch('http://localhost:3000/cover.md')
.then(res => res.text())
.then(data => {
console.log(markdownToTxt(t));

Check warning on line 204 in src/plugins/search/markdown-to-txt.js

View workflow job for this annotation

GitHub Actions / lint (lts/*)

Unexpected console statement
});
32 changes: 13 additions & 19 deletions src/plugins/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,6 @@ function escapeHtml(string) {
return String(string).replace(/[&<>"']/g, s => entityMap[s]);
}

function formatContent(text) {
return escapeHtml(cleanMarkdown(ignoreDiacriticalMarks(text)));
}

function cleanMarkdown(text) {
if (text) {
text = markdownToTxt(text);
}
return text;
}

function getAllPaths(router) {
const paths = [];

Expand Down Expand Up @@ -146,7 +135,7 @@ export function genIndex(path, content = '', router, depth, indexKey) {
index[slug] = {
slug,
title: path !== '/' ? path.slice(1) : 'Home Page',
body: token.text || '',
body: markdownToTxt(token.text || ''),
path: path,
indexKey: indexKey,
};
Expand All @@ -162,12 +151,12 @@ export function genIndex(path, content = '', router, depth, indexKey) {
token.text = getTableData(token);
token.text = getListData(token);

index[slug].body += '\n' + (token.text || '');
index[slug].body += '\n' + markdownToTxt(token.text || '');
} else {
token.text = getTableData(token);
token.text = getListData(token);

index[slug].body = token.text || '';
index[slug].body = markdownToTxt(token.text || '');
}

index[slug].path = path;
Expand Down Expand Up @@ -211,14 +200,19 @@ export function search(query) {
keywords.forEach(keyword => {
// From https://github.com/sindresorhus/escape-string-regexp
const regEx = new RegExp(
formatContent(keyword).replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'),
escapeHtml(ignoreDiacriticalMarks(keyword)).replace(
/[|\\{}()[\]^$+*?.]/g,
'\\$&',
),
'gi',
);
let indexTitle = -1;
let indexContent = -1;
handlePostTitle = postTitle ? formatContent(postTitle) : postTitle;
handlePostTitle = postTitle
? escapeHtml(ignoreDiacriticalMarks(postTitle))
: postTitle;
handlePostContent = postContent
? formatContent(postContent)
? escapeHtml(ignoreDiacriticalMarks(postContent))
: postContent;

indexTitle = postTitle ? handlePostTitle.search(regEx) : -1;
Expand Down Expand Up @@ -252,8 +246,8 @@ export function search(query) {

if (matchesScore > 0) {
const matchingPost = {
title: formatContent(handlePostTitle),
content: formatContent(postContent ? resultStr : ''),
title: handlePostTitle,
content: postContent ? resultStr : '',
url: postUrl,
score: matchesScore,
};
Expand Down
Loading

0 comments on commit 3f6d18d

Please sign in to comment.