diff --git a/examples/package.json b/examples/package.json index e20870dc08d7..ffe6a62dc799 100644 --- a/examples/package.json +++ b/examples/package.json @@ -102,7 +102,7 @@ "typeorm": "^0.3.20", "typesense": "^1.5.3", "uuid": "^10.0.0", - "vectordb": "^0.1.4", + "vectordb": "^0.9.0", "voy-search": "0.6.2", "weaviate-ts-client": "^2.0.0", "zod": "^3.22.4", diff --git a/examples/src/indexes/vector_stores/lancedb/fromDocs.ts b/examples/src/indexes/vector_stores/lancedb/fromDocs.ts index 69715191321d..2907ccde6fc9 100644 --- a/examples/src/indexes/vector_stores/lancedb/fromDocs.ts +++ b/examples/src/indexes/vector_stores/lancedb/fromDocs.ts @@ -4,24 +4,29 @@ import { TextLoader } from "langchain/document_loaders/fs/text"; import fs from "node:fs/promises"; import path from "node:path"; import os from "node:os"; -import { connect } from "vectordb"; // Create docs with a loader const loader = new TextLoader("src/document_loaders/example_data/example.txt"); const docs = await loader.load(); export const run = async () => { + const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings()); + + const resultOne = await vectorStore.similaritySearch("hello world", 1); + console.log(resultOne); + + // [ + // Document { + // pageContent: 'Foo\nBar\nBaz\n\n', + // metadata: { source: 'src/document_loaders/example_data/example.txt' } + // } + // ] +}; + +export const run_with_existing_table = async () => { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-")); - const db = await connect(dir); - const table = await db.createTable("vectors", [ - { vector: Array(1536), text: "sample", source: "a" }, - ]); - - const vectorStore = await LanceDB.fromDocuments( - docs, - new OpenAIEmbeddings(), - { table } - ); + + const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings()); const resultOne = await vectorStore.similaritySearch("hello world", 1); console.log(resultOne); diff --git a/examples/src/indexes/vector_stores/lancedb/fromTexts.ts b/examples/src/indexes/vector_stores/lancedb/fromTexts.ts index 2f70f340d5ad..4156d1a161d2 100644 --- a/examples/src/indexes/vector_stores/lancedb/fromTexts.ts +++ b/examples/src/indexes/vector_stores/lancedb/fromTexts.ts @@ -1,22 +1,27 @@ import { LanceDB } from "@langchain/community/vectorstores/lancedb"; import { OpenAIEmbeddings } from "@langchain/openai"; -import { connect } from "vectordb"; import * as fs from "node:fs/promises"; import * as path from "node:path"; import os from "node:os"; export const run = async () => { - const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-")); - const db = await connect(dir); - const table = await db.createTable("vectors", [ - { vector: Array(1536), text: "sample", id: 1 }, - ]); + const vectorStore = await LanceDB.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [{ id: 2 }, { id: 1 }, { id: 3 }], + new OpenAIEmbeddings() + ); + const resultOne = await vectorStore.similaritySearch("hello world", 1); + console.log(resultOne); + // [ Document { pageContent: 'hello nice world', metadata: { id: 3 } } ] +}; + +export const run_with_existing_table = async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-")); const vectorStore = await LanceDB.fromTexts( ["Hello world", "Bye bye", "hello nice world"], [{ id: 2 }, { id: 1 }, { id: 3 }], - new OpenAIEmbeddings(), - { table } + new OpenAIEmbeddings() ); const resultOne = await vectorStore.similaritySearch("hello world", 1); diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 389e6b303a1e..164bbf7e7e48 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -207,7 +207,7 @@ "typescript": "~5.1.6", "typesense": "^1.5.3", "usearch": "^1.1.1", - "vectordb": "^0.1.4", + "vectordb": "^0.9.0", "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", "web-auth-library": "^1.0.3", diff --git a/libs/langchain-community/src/vectorstores/lancedb.ts b/libs/langchain-community/src/vectorstores/lancedb.ts index def5a6d61cb4..7df73aac93e7 100644 --- a/libs/langchain-community/src/vectorstores/lancedb.ts +++ b/libs/langchain-community/src/vectorstores/lancedb.ts @@ -1,4 +1,4 @@ -import { Table } from "vectordb"; +import { connect, Table, Connection, WriteMode } from "vectordb"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; @@ -8,8 +8,11 @@ import { Document } from "@langchain/core/documents"; * table and an optional textKey. */ export type LanceDBArgs = { - table: Table; + table?: Table; textKey?: string; + uri?: string; + tableName?: string; + mode?: WriteMode; }; /** @@ -18,15 +21,24 @@ export type LanceDBArgs = { * embeddings. */ export class LanceDB extends VectorStore { - private table: Table; + private table?: Table; private textKey: string; - constructor(embeddings: EmbeddingsInterface, args: LanceDBArgs) { - super(embeddings, args); - this.table = args.table; + private uri: string; + + private tableName: string; + + private mode?: WriteMode; + + constructor(embeddings: EmbeddingsInterface, args?: LanceDBArgs) { + super(embeddings, args || {}); + this.table = args?.table; this.embeddings = embeddings; - this.textKey = args.textKey || "text"; + this.textKey = args?.textKey || "text"; + this.uri = args?.uri || "~/lancedb"; + this.tableName = args?.tableName || "langchain"; + this.mode = args?.mode || WriteMode.Overwrite; } /** @@ -71,6 +83,14 @@ export class LanceDB extends VectorStore { }); data.push(record); } + if (!this.table) { + const db: Connection = await connect(this.uri); + this.table = await db.createTable(this.tableName, data, { + writeMode: this.mode, + }); + + return; + } await this.table.add(data); } @@ -85,6 +105,11 @@ export class LanceDB extends VectorStore { query: number[], k: number ): Promise<[Document, number][]> { + if (!this.table) { + throw new Error( + "Table not found. Please add vectors to the table first." + ); + } const results = await this.table.search(query).limit(k).execute(); const docsAndScore: [Document, number][] = []; @@ -119,7 +144,7 @@ export class LanceDB extends VectorStore { texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, - dbConfig: LanceDBArgs + dbConfig?: LanceDBArgs ): Promise { const docs: Document[] = []; for (let i = 0; i < texts.length; i += 1) { @@ -143,7 +168,7 @@ export class LanceDB extends VectorStore { static async fromDocuments( docs: Document[], embeddings: EmbeddingsInterface, - dbConfig: LanceDBArgs + dbConfig?: LanceDBArgs ): Promise { const instance = new this(embeddings, dbConfig); await instance.addDocuments(docs); diff --git a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts index ec9bb2bb566e..3d561c903440 100644 --- a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts @@ -45,3 +45,27 @@ describe("LanceDB", () => { expect(resultsTwo.length).toBe(5); }); }); + +describe("LanceDB empty schema", () => { + test("Test fromTexts + addDocuments", async () => { + const embeddings = new OpenAIEmbeddings(); + const vectorStore = await LanceDB.fromTexts( + ["hello bye", "hello world", "bye bye"], + [{ id: 1 }, { id: 2 }, { id: 3 }], + embeddings + ); + + const results = await vectorStore.similaritySearch("hello bye", 10); + expect(results.length).toBe(3); + + await vectorStore.addDocuments([ + new Document({ + pageContent: "a new world", + metadata: { id: 4 }, + }), + ]); + + const resultsTwo = await vectorStore.similaritySearch("hello bye", 10); + expect(resultsTwo.length).toBe(4); + }); +}); diff --git a/yarn.lock b/yarn.lock index 1f1c606bab7f..c4d1455b8176 100644 --- a/yarn.lock +++ b/yarn.lock @@ -251,24 +251,6 @@ __metadata: languageName: node linkType: hard -"@apache-arrow/ts@npm:^12.0.0": - version: 12.0.0 - resolution: "@apache-arrow/ts@npm:12.0.0" - dependencies: - "@types/command-line-args": 5.2.0 - "@types/command-line-usage": 5.0.2 - "@types/node": 18.14.5 - "@types/pad-left": 2.1.1 - command-line-args: 5.2.1 - command-line-usage: 6.1.3 - flatbuffers: 23.3.3 - json-bignum: ^0.0.3 - pad-left: ^2.1.0 - tslib: ^2.5.0 - checksum: 67b2791e14d5377b1d160a0d8390decc386e013c517713f8b9c100737a0e478a394086d91a8c846848d4e30289070a119d8e65191998f4c2555b18a29564df50 - languageName: node - linkType: hard - "@apify/consts@npm:^2.13.0, @apify/consts@npm:^2.9.0": version: 2.13.0 resolution: "@apify/consts@npm:2.13.0" @@ -11112,6 +11094,41 @@ __metadata: languageName: node linkType: hard +"@lancedb/vectordb-darwin-arm64@npm:0.4.20": + version: 0.4.20 + resolution: "@lancedb/vectordb-darwin-arm64@npm:0.4.20" + conditions: os=darwin & cpu=arm64 + languageName: node + linkType: hard + +"@lancedb/vectordb-darwin-x64@npm:0.4.20": + version: 0.4.20 + resolution: "@lancedb/vectordb-darwin-x64@npm:0.4.20" + conditions: os=darwin & cpu=x64 + languageName: node + linkType: hard + +"@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20": + version: 0.4.20 + resolution: "@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20" + conditions: os=linux & cpu=arm64 + languageName: node + linkType: hard + +"@lancedb/vectordb-linux-x64-gnu@npm:0.4.20": + version: 0.4.20 + resolution: "@lancedb/vectordb-linux-x64-gnu@npm:0.4.20" + conditions: os=linux & cpu=x64 + languageName: node + linkType: hard + +"@lancedb/vectordb-win32-x64-msvc@npm:0.4.20": + version: 0.4.20 + resolution: "@lancedb/vectordb-win32-x64-msvc@npm:0.4.20" + conditions: os=win32 & cpu=x64 + languageName: node + linkType: hard + "@langchain/anthropic@*, @langchain/anthropic@workspace:*, @langchain/anthropic@workspace:libs/langchain-anthropic": version: 0.0.0-use.local resolution: "@langchain/anthropic@workspace:libs/langchain-anthropic" @@ -11577,7 +11594,7 @@ __metadata: typesense: ^1.5.3 usearch: ^1.1.1 uuid: ^10.0.0 - vectordb: ^0.1.4 + vectordb: ^0.9.0 voy-search: 0.6.2 weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 @@ -13039,6 +13056,13 @@ __metadata: languageName: node linkType: hard +"@neon-rs/load@npm:^0.0.74": + version: 0.0.74 + resolution: "@neon-rs/load@npm:0.0.74" + checksum: d26ec9b08cdf1a7c5aeefe98f77112d205d11b4005a7934b21fe8fd27528847e08e4749e7e6c3fc05ae9f701175a58c11a095ae6af449634df3991a2c82e1dfa + languageName: node + linkType: hard + "@neondatabase/serverless@npm:0.6.0": version: 0.6.0 resolution: "@neondatabase/serverless@npm:0.6.0" @@ -20774,26 +20798,6 @@ __metadata: languageName: node linkType: hard -"apache-arrow@npm:^12.0.0": - version: 12.0.0 - resolution: "apache-arrow@npm:12.0.0" - dependencies: - "@types/command-line-args": 5.2.0 - "@types/command-line-usage": 5.0.2 - "@types/node": 18.14.5 - "@types/pad-left": 2.1.1 - command-line-args: 5.2.1 - command-line-usage: 6.1.3 - flatbuffers: 23.3.3 - json-bignum: ^0.0.3 - pad-left: ^2.1.0 - tslib: ^2.5.0 - bin: - arrow2csv: bin/arrow2csv.js - checksum: 3285189517c2b298cda42852321ce127754918513116eade6e4914c57983f68b6ba96605cfaa2202796d3d6e14755d3b3758f76c1374492affa3d95714eaca40 - languageName: node - linkType: hard - "apache-arrow@npm:^12.0.1": version: 12.0.1 resolution: "apache-arrow@npm:12.0.1" @@ -27133,7 +27137,7 @@ __metadata: typescript: ~5.1.6 typesense: ^1.5.3 uuid: ^10.0.0 - vectordb: ^0.1.4 + vectordb: ^0.9.0 voy-search: 0.6.2 weaviate-ts-client: ^2.0.0 zod: ^3.22.4 @@ -42444,13 +42448,32 @@ __metadata: languageName: node linkType: hard -"vectordb@npm:^0.1.4": - version: 0.1.4 - resolution: "vectordb@npm:0.1.4" - dependencies: - "@apache-arrow/ts": ^12.0.0 - apache-arrow: ^12.0.0 - checksum: 8a40abf4466479b0b9e61687416b5ab232458401917bf9a1d5f3d8ea8c8320ecc5691174f4d4c0cfef0bb6c16328a9088419fd90ac85fd7267dbccdd1f9e55d7 +"vectordb@npm:^0.9.0": + version: 0.9.0 + resolution: "vectordb@npm:0.9.0" + dependencies: + "@lancedb/vectordb-darwin-arm64": 0.4.20 + "@lancedb/vectordb-darwin-x64": 0.4.20 + "@lancedb/vectordb-linux-arm64-gnu": 0.4.20 + "@lancedb/vectordb-linux-x64-gnu": 0.4.20 + "@lancedb/vectordb-win32-x64-msvc": 0.4.20 + "@neon-rs/load": ^0.0.74 + axios: ^1.4.0 + peerDependencies: + "@apache-arrow/ts": ^14.0.2 + apache-arrow: ^14.0.2 + dependenciesMeta: + "@lancedb/vectordb-darwin-arm64": + optional: true + "@lancedb/vectordb-darwin-x64": + optional: true + "@lancedb/vectordb-linux-arm64-gnu": + optional: true + "@lancedb/vectordb-linux-x64-gnu": + optional: true + "@lancedb/vectordb-win32-x64-msvc": + optional: true + conditions: (os=darwin | os=linux | os=win32) & (cpu=x64 | cpu=arm64) languageName: node linkType: hard