Skip to content

Commit

Permalink
feat(ai-help): index short_title (#10579)
Browse files Browse the repository at this point in the history
  • Loading branch information
caugner authored Feb 29, 2024
1 parent 8c988be commit 6939552
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 18 deletions.
61 changes: 43 additions & 18 deletions scripts/ai-help-macros.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ interface IndexedDoc {
id: number;
mdn_url: string;
title: string;
title_short: string;
token_count: number | null;
hash: string;
text_hash: string;
Expand All @@ -36,6 +37,7 @@ interface IndexedDoc {
interface Doc {
mdn_url: string;
title: string;
title_short: string;
hash: string;
html: string;
markdown: string;
Expand Down Expand Up @@ -108,9 +110,15 @@ export async function updateEmbeddings(
const updates: Doc[] = [];
const formattingUpdates: Doc[] = [];

for await (const { mdn_url, title, hash, html, markdown, text } of builtDocs(
directory
)) {
for await (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
text,
} of builtDocs(directory)) {
seenUrls.add(mdn_url);

// Check for existing document in DB and compare checksums.
Expand All @@ -122,6 +130,7 @@ export async function updateEmbeddings(
updates.push({
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -132,6 +141,7 @@ export async function updateEmbeddings(
formattingUpdates.push({
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -154,6 +164,7 @@ export async function updateEmbeddings(
for (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -173,27 +184,30 @@ export async function updateEmbeddings(
INSERT INTO mdn_doc_macro(
mdn_url,
title,
title_short,
hash,
html,
markdown,
token_count,
embedding,
text_hash
)
VALUES($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT (mdn_url) DO
VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4,
markdown = $5,
token_count = $6,
embedding = $7,
text_hash = $8
title_short = $3,
hash = $4,
html = $5,
markdown = $6,
token_count = $7,
embedding = $8,
text_hash = $9
`,
values: [
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -211,7 +225,14 @@ export async function updateEmbeddings(
console.error(context);
}
}
for (const { mdn_url, title, hash, html, markdown } of formattingUpdates) {
for (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
} of formattingUpdates) {
try {
console.log(
`-> [${mdn_url}] Updating document without generating new embedding...`
Expand All @@ -221,16 +242,17 @@ export async function updateEmbeddings(
const query = {
name: "upsert-doc",
text: `
INSERT INTO mdn_doc_macro(mdn_url, title, hash, html, markdown)
VALUES($1, $2, $3, $4, $5) ON CONFLICT (mdn_url) DO
INSERT INTO mdn_doc_macro(mdn_url, title, title_short, hash, html, markdown)
VALUES($1, $2, $3, $4, $5, $6) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4,
markdown = $5
title_short = $3,
hash = $4,
html = $5,
markdown = $6
`,
values: [mdn_url, title, hash, html, markdown],
values: [mdn_url, title, title_short, hash, html, markdown],
rowMode: "array",
};

Expand Down Expand Up @@ -286,7 +308,9 @@ async function* builtDocs(directory: string) {
for await (const metadataPath of builtPaths(directory)) {
try {
const raw = await readFile(metadataPath, "utf-8");
const { title, mdn_url, hash } = JSON.parse(raw) as DocMetadata;
const { title, short_title, mdn_url, hash } = JSON.parse(
raw
) as DocMetadata;

const plainPath = path.join(path.dirname(metadataPath), "plain.html");
const plainHTML = await readFile(plainPath, "utf-8");
Expand Down Expand Up @@ -314,6 +338,7 @@ async function* builtDocs(directory: string) {
yield {
mdn_url,
title,
title_short: short_title || title,
hash,
html,
markdown,
Expand Down
1 change: 1 addition & 0 deletions scripts/ai-help.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ create table
id bigserial,
hash text null,
title text not null,
title_short text not null,
mdn_url text not null,
html text null,
markdown text null,
Expand Down

0 comments on commit 6939552

Please sign in to comment.