Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(scripts): use pg instead of supabase #10337

Merged
merged 6 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/prod-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,9 @@ jobs:
run: yarn ai-help-macros update-index
env:
OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
PGHOST: ${{ secrets.SUPABASE_HOST }}
PGUSER: ${{ secrets.SUPABASE_USER }}
PGPASSWORD: ${{ secrets.SUPABASE_PASSWORD }}

- name: Slack Notification
if: failure()
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/stage-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,9 @@ jobs:
run: yarn ai-help-macros update-index
env:
OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
PGHOST: ${{ secrets.SUPABASE_HOST }}
PGUSER: ${{ secrets.SUPABASE_USER }}
PGPASSWORD: ${{ secrets.SUPABASE_PASSWORD }}

- name: Slack Notification
if: failure()
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
"open": "^9.1.0",
"open-editor": "^4.1.1",
"openai": "^4.24.7",
"pg": "^8.11.3",
"pgvector": "^0.1.7",
"prism-svelte": "^0.5.0",
"prismjs": "^1.29.0",
"react-markdown": "^9.0.1",
Expand Down
167 changes: 108 additions & 59 deletions scripts/ai-help-macros.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ import { createHash } from "node:crypto";
import { readFile } from "node:fs/promises";

import caporal from "@caporal/core";
import { SupabaseClient, createClient } from "@supabase/supabase-js";
import pg from "pg";
import pgvector from "pgvector/pg";
import { fdir } from "fdir";
import OpenAI from "openai";
import { load as cheerio } from "cheerio";
Expand Down Expand Up @@ -56,7 +57,14 @@ export async function updateEmbeddings(
}

// Supabase.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super tiny little nitty-nit: At this point, it is not about Supabase any more, it should be Postgres.

const supabaseClient = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY);
const pgClient = new pg.Client({
port: 5432,
database: "postgres",
});

await pgClient.connect();
await pgClient.query("CREATE EXTENSION IF NOT EXISTS vector");
await pgvector.registerType(pgClient);

// Open AI.
const openai = new OpenAI({
Expand All @@ -72,14 +80,11 @@ export async function updateEmbeddings(
});
} catch (e: any) {
const {
data: {
error: { message, type },
},
error: { message, type },
status,
statusText,
} = e.response;
caugner marked this conversation as resolved.
Show resolved Hide resolved
console.error(
`[!] Failed to create embedding (${status} ${statusText}): ${type} - ${message}`
`[!] Failed to create embedding (${status}): ${type} - ${message}`
);
// Try again with trimmed content.
embeddingResponse = await openai.embeddings.create({
Expand All @@ -100,7 +105,7 @@ export async function updateEmbeddings(
};

console.log(`Retrieving all indexed documents...`);
const existingDocs = await fetchAllExistingDocs(supabaseClient);
const existingDocs = await fetchAllExistingDocs(pgClient);
console.log(`-> Done.`);

const existingDocByUrl = new Map<string, IndexedDoc>(
Expand Down Expand Up @@ -162,23 +167,41 @@ export async function updateEmbeddings(
const { total_tokens, embedding } = await createEmbedding(text);

// Create/update document record.
await supabaseClient
.from("mdn_doc_macro")
.upsert(
{
mdn_url,
title,
hash,
html,
token_count: total_tokens,
embedding,
text_hash,
},
{ onConflict: "mdn_url" }
)
.select()
.single()
.throwOnError();
const query = {
name: "upsert-embedding-doc",
text: `
INSERT INTO mdn_doc_macro(
mdn_url,
title,
hash,
html,
token_count,
embedding,
text_hash
)
VALUES($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4,
token_count = $5,
embedding = $6,
text_hash = $7
`,
values: [
mdn_url,
title,
hash,
html,
total_tokens,
pgvector.toSql(embedding),
text_hash,
],
rowMode: "array",
};

await pgClient.query(query);
} catch (err: any) {
console.error(`!> [${mdn_url}] Failed to update document.`);
const context = err?.response?.data ?? err?.response ?? err;
Expand All @@ -192,20 +215,22 @@ export async function updateEmbeddings(
);

// Create/update document record.
await supabaseClient
.from("mdn_doc_macro")
.upsert(
{
mdn_url,
title,
hash,
html,
},
{ onConflict: "mdn_url" }
)
.select()
.single()
.throwOnError();
const query = {
name: "upsert-doc",
text: `
INSERT INTO mdn_doc_macro(mdn_url, title, hash, html)
VALUES($1, $2, $3, $4) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4
`,
values: [mdn_url, title, hash, html],
rowMode: "array",
};

await pgClient.query(query);
} catch (err: any) {
console.error(`!> [${mdn_url}] Failed to update document.`);
const context = err?.response?.data ?? err?.response ?? err;
Expand All @@ -219,14 +244,18 @@ export async function updateEmbeddings(
console.log(`Applying deletions...`);
for (const { id, mdn_url } of deletions) {
console.log(`-> [${mdn_url}] Deleting indexed document...`);
await supabaseClient
.from("mdn_doc_macro")
.delete()
.eq("id", id)
.throwOnError();
const query = {
name: "delete-doc",
text: `DELETE from mdn_doc_macro WHERE id = $1`,
values: [id],
rowMode: "array",
};

await pgClient.query(query);
}
console.log(`-> Done.`);
}
pgClient.end();
}

async function formatDocs(directory: string) {
Expand Down Expand Up @@ -449,24 +478,44 @@ export function isNotSupportedAtAll(support: SimpleSupportStatement) {
return !support.version_added && !hasLimitation(support);
}

async function fetchAllExistingDocs(supabase: SupabaseClient) {
async function fetchAllExistingDocs(pgClient) {
const PAGE_SIZE = 1000;
const selectDocs = () =>
supabase
.from("mdn_doc_macro")
.select("id, mdn_url, title, hash, token_count, text_hash")
.order("id")
.limit(PAGE_SIZE);

let { data } = await selectDocs().throwOnError();
let allData = data;
while (data.length === PAGE_SIZE) {
const lastItem = data[data.length - 1];
({ data } = await selectDocs().gt("id", lastItem.id).throwOnError());
allData = [...allData, ...data];
const selectDocs = async (lastId) => {
const query = {
name: "fetch-all-doc",
text: `
SELECT id,
mdn_url,
title,
hash,
token_count,
text_hash
from mdn_doc_macro
WHERE id > $1
ORDER BY id ASC
LIMIT $2
`,
values: [lastId, PAGE_SIZE],
rowMode: "array",
};
const result = await pgClient.query(query);
return result.rows.map(
([id, mdn_url, title, hash, token_count, text_hash]) => {
return { id, mdn_url, title, hash, token_count, text_hash };
}
);
};

const allDocs = [];
let docs = await selectDocs(0);
allDocs.push(...docs);
while (docs.length === PAGE_SIZE) {
const lastItem = docs[docs.length - 1];
docs = await selectDocs(lastItem.id);
allDocs.push(...docs);
}

return allData;
return allDocs;
}

// CLI.
Expand Down
100 changes: 100 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4397,6 +4397,11 @@ buffer-from@^1.0.0:
resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5"
integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==

buffer-writer@2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/buffer-writer/-/buffer-writer-2.0.0.tgz#ce7eb81a38f7829db09c873f2fbb792c0c98ec04"
integrity sha512-a7ZpuTZU1TRtnwyCNW3I5dc0wWNC3VR9S++Ewyk2HHZdrO3CQJqSpd+95Us590V6AL7JqUAH2IwZ/398PmNFgw==

buffer@^5.2.1, buffer@^5.5.0:
version "5.7.1"
resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0"
Expand Down Expand Up @@ -11343,6 +11348,11 @@ p-try@^2.0.0:
resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==

packet-reader@1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/packet-reader/-/packet-reader-1.0.0.tgz#9238e5480dedabacfe1fe3f2771063f164157d74"
integrity sha512-HAKu/fG3HpHFO0AA8WE8q2g+gBJaZ9MG7fcKk+IJPLTGAD6Psw4443l+9DGRbOIh3/aXr7Phy0TjilYivJo5XQ==

param-case@^3.0.4:
version "3.0.4"
resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5"
Expand Down Expand Up @@ -11490,6 +11500,69 @@ performance-now@^2.1.0:
resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
integrity sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==

pg-cloudflare@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/pg-cloudflare/-/pg-cloudflare-1.1.1.tgz#e6d5833015b170e23ae819e8c5d7eaedb472ca98"
integrity sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==

pg-connection-string@^2.6.2:
version "2.6.2"
resolved "https://registry.yarnpkg.com/pg-connection-string/-/pg-connection-string-2.6.2.tgz#713d82053de4e2bd166fab70cd4f26ad36aab475"
integrity sha512-ch6OwaeaPYcova4kKZ15sbJ2hKb/VP48ZD2gE7i1J+L4MspCtBMAx8nMgz7bksc7IojCIIWuEhHibSMFH8m8oA==

pg-int8@1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/pg-int8/-/pg-int8-1.0.1.tgz#943bd463bf5b71b4170115f80f8efc9a0c0eb78c"
integrity sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==

pg-pool@^3.6.1:
version "3.6.1"
resolved "https://registry.yarnpkg.com/pg-pool/-/pg-pool-3.6.1.tgz#5a902eda79a8d7e3c928b77abf776b3cb7d351f7"
integrity sha512-jizsIzhkIitxCGfPRzJn1ZdcosIt3pz9Sh3V01fm1vZnbnCMgmGl5wvGGdNN2EL9Rmb0EcFoCkixH4Pu+sP9Og==

pg-protocol@^1.6.0:
version "1.6.0"
resolved "https://registry.yarnpkg.com/pg-protocol/-/pg-protocol-1.6.0.tgz#4c91613c0315349363af2084608db843502f8833"
integrity sha512-M+PDm637OY5WM307051+bsDia5Xej6d9IR4GwJse1qA1DIhiKlksvrneZOYQq42OM+spubpcNYEo2FcKQrDk+Q==

pg-types@^2.1.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/pg-types/-/pg-types-2.2.0.tgz#2d0250d636454f7cfa3b6ae0382fdfa8063254a3"
integrity sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==
dependencies:
pg-int8 "1.0.1"
postgres-array "~2.0.0"
postgres-bytea "~1.0.0"
postgres-date "~1.0.4"
postgres-interval "^1.1.0"

pg@^8.11.3:
version "8.11.3"
resolved "https://registry.yarnpkg.com/pg/-/pg-8.11.3.tgz#d7db6e3fe268fcedd65b8e4599cda0b8b4bf76cb"
integrity sha512-+9iuvG8QfaaUrrph+kpF24cXkH1YOOUeArRNYIxq1viYHZagBxrTno7cecY1Fa44tJeZvaoG+Djpkc3JwehN5g==
dependencies:
buffer-writer "2.0.0"
packet-reader "1.0.0"
pg-connection-string "^2.6.2"
pg-pool "^3.6.1"
pg-protocol "^1.6.0"
pg-types "^2.1.0"
pgpass "1.x"
optionalDependencies:
pg-cloudflare "^1.1.1"

pgpass@1.x:
version "1.0.5"
resolved "https://registry.yarnpkg.com/pgpass/-/pgpass-1.0.5.tgz#9b873e4a564bb10fa7a7dbd55312728d422a223d"
integrity sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==
dependencies:
split2 "^4.1.0"

pgvector@^0.1.7:
version "0.1.7"
resolved "https://registry.yarnpkg.com/pgvector/-/pgvector-0.1.7.tgz#0a170c85da8bae79d79cb09c1968d42d021489f5"
integrity sha512-hl1/Rvvu8iENi/0x4QcZh6o3bF2GWyyKIEu5GfXIzRhzhbOo7aQmoTPNWedRG1pziif+gOMTKKzHaJorgr8F0A==

picocolors@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c"
Expand Down Expand Up @@ -12167,6 +12240,28 @@ postcss@^8.2.14, postcss@^8.4.23, postcss@^8.4.24, postcss@^8.4.28, postcss@^8.4
picocolors "^1.0.0"
source-map-js "^1.0.2"

postgres-array@~2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/postgres-array/-/postgres-array-2.0.0.tgz#48f8fce054fbc69671999329b8834b772652d82e"
integrity sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==

postgres-bytea@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/postgres-bytea/-/postgres-bytea-1.0.0.tgz#027b533c0aa890e26d172d47cf9ccecc521acd35"
integrity sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==

postgres-date@~1.0.4:
version "1.0.7"
resolved "https://registry.yarnpkg.com/postgres-date/-/postgres-date-1.0.7.tgz#51bc086006005e5061c591cee727f2531bf641a8"
integrity sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==

postgres-interval@^1.1.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/postgres-interval/-/postgres-interval-1.2.0.tgz#b460c82cb1587507788819a06aa0fffdb3544695"
integrity sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==
dependencies:
xtend "^4.0.0"

prelude-ls@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
Expand Down Expand Up @@ -13661,6 +13756,11 @@ spdy@^4.0.2:
select-hose "^2.0.0"
spdy-transport "^3.0.0"

split2@^4.1.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/split2/-/split2-4.2.0.tgz#c9c5920904d148bab0b9f67145f245a86aadbfa4"
integrity sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==

sprintf-js@~1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
Expand Down
Loading