From 107b17494626d46545e119a1fd5cd4dd2683b7e6 Mon Sep 17 00:00:00 2001 From: "David L. Qiu" Date: Wed, 6 Nov 2024 13:43:47 -0800 Subject: [PATCH] update escaping logic and system prompt --- .../jupyter_ai_magics/providers.py | 9 ++- .../src/components/rendermime-markdown.tsx | 64 +------------------ 2 files changed, 5 insertions(+), 68 deletions(-) diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py index 84f505e28..01a081913 100644 --- a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py +++ b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py @@ -57,11 +57,10 @@ You may use Markdown to format your response. If your response includes code, they must be enclosed in Markdown fenced code blocks (with triple backticks before and after). If your response includes mathematical notation, they must be expressed in LaTeX markup and enclosed in LaTeX delimiters. -- Human messages may still use `$` symbols to delimit inline math. - However, your response should never use `$` symbols to delimit inline math. -- Valid inline math: `\\( \\infty \\)` -- Valid display math: `\\[ \\infty \\]` -- Invalid inline math: `$\\infty$` +All quantities of USD must be formatted in LaTeX and not plaintext. +- Example prompt: `If I have \\\\$100 and spend \\\\$20, how much money do I have left?` +- **Correct** response: `You have \\(\\$80\\) remaining.` +- **Incorrect** response: `You have $80 remaining.` If you do not know the answer to a question, answer truthfully by responding that you do not know. The following is a friendly conversation between you and a human. """.strip() diff --git a/packages/jupyter-ai/src/components/rendermime-markdown.tsx b/packages/jupyter-ai/src/components/rendermime-markdown.tsx index 3ce4c6f0b..9a0278517 100644 --- a/packages/jupyter-ai/src/components/rendermime-markdown.tsx +++ b/packages/jupyter-ai/src/components/rendermime-markdown.tsx @@ -39,61 +39,6 @@ function escapeLatexDelimiters(text: string) { .replace(/\\\]/g, '\\\\]'); } -/** - * Type predicate function that determines whether a given DOM Node is a Text - * node. - */ -function isTextNode(node: Node | null): node is Text { - return node?.nodeType === Node.TEXT_NODE; -} - -/** - * Escapes all `$` symbols present in an HTML element except those within the - * following elements: `pre`, `code`, `samp`, `kbd`. - * - * This prevents `$` symbols from being used as inline math delimiters in AI - * messages, allowing `$` symbols to be used literally to denote quantities of - * USD. This does not escape literal `$` within elements that display their - * contents literally, like code elements. This overrides JupyterLab's default - * rendering of MarkDown w/ LaTeX for AI messages. - * - * The Jupyter AI system prompt should explicitly request that the LLM not use - * `$` as an inline math delimiter. This is the default behavior. - */ -function escapeDollarSymbols(el: HTMLElement) { - // Get all text nodes that are not within pre, code, samp, or kbd elements - const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, { - acceptNode: node => { - const isInSkippedElements = node.parentElement?.closest( - 'pre, code, samp, kbd' - ); - return isInSkippedElements - ? NodeFilter.FILTER_SKIP - : NodeFilter.FILTER_ACCEPT; - } - }); - - // Collect all valid text nodes in an array. - const textNodes: Text[] = []; - let currentNode: Node | null; - while ((currentNode = walker.nextNode())) { - if (isTextNode(currentNode)) { - textNodes.push(currentNode); - } - } - - // Replaces each `$` symbol with `\$` for each text node, unless there is - // another `$` symbol adjacent or it is already escaped. Examples: - // - `$10 - $5` => `\$10 - \$5` (escaped) - // - `$$ \infty $$` => `$$ \infty $$` (unchanged) - // - `\$10` => `\$10` (unchanged, already escaped) - textNodes.forEach(node => { - if (node.textContent) { - node.textContent = node.textContent.replace(/(? { @@ -131,14 +76,7 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element { ); } - // step 2: render LaTeX via MathJax, while escaping single dollar symbols - // in agent messages. - if ( - props.parentMessage?.type === 'agent' || - props.parentMessage?.type === 'agent-stream' - ) { - escapeDollarSymbols(renderer.node); - } + // step 2: render LaTeX via MathJax props.rmRegistry.latexTypesetter?.typeset(renderer.node); // insert the rendering into renderingContainer if not yet inserted