Skip to content

Commit

Permalink
feat(community): Remove required param from LanceDB integration (lang…
Browse files Browse the repository at this point in the history
…chain-ai#6706)

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
  • Loading branch information
2 people authored and FilipZmijewski committed Sep 27, 2024
1 parent d471435 commit 42ea1dd
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 77 deletions.
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
"typeorm": "^0.3.20",
"typesense": "^1.5.3",
"uuid": "^10.0.0",
"vectordb": "^0.1.4",
"vectordb": "^0.9.0",
"voy-search": "0.6.2",
"weaviate-ts-client": "^2.0.0",
"zod": "^3.22.4",
Expand Down
27 changes: 16 additions & 11 deletions examples/src/indexes/vector_stores/lancedb/fromDocs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ import { TextLoader } from "langchain/document_loaders/fs/text";
import fs from "node:fs/promises";
import path from "node:path";
import os from "node:os";
import { connect } from "vectordb";

// Create docs with a loader
const loader = new TextLoader("src/document_loaders/example_data/example.txt");
const docs = await loader.load();

export const run = async () => {
const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings());

const resultOne = await vectorStore.similaritySearch("hello world", 1);
console.log(resultOne);

// [
// Document {
// pageContent: 'Foo\nBar\nBaz\n\n',
// metadata: { source: 'src/document_loaders/example_data/example.txt' }
// }
// ]
};

export const run_with_existing_table = async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
const db = await connect(dir);
const table = await db.createTable("vectors", [
{ vector: Array(1536), text: "sample", source: "a" },
]);

const vectorStore = await LanceDB.fromDocuments(
docs,
new OpenAIEmbeddings(),
{ table }
);

const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings());

const resultOne = await vectorStore.similaritySearch("hello world", 1);
console.log(resultOne);
Expand Down
21 changes: 13 additions & 8 deletions examples/src/indexes/vector_stores/lancedb/fromTexts.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
import { LanceDB } from "@langchain/community/vectorstores/lancedb";
import { OpenAIEmbeddings } from "@langchain/openai";
import { connect } from "vectordb";
import * as fs from "node:fs/promises";
import * as path from "node:path";
import os from "node:os";

export const run = async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
const db = await connect(dir);
const table = await db.createTable("vectors", [
{ vector: Array(1536), text: "sample", id: 1 },
]);
const vectorStore = await LanceDB.fromTexts(
["Hello world", "Bye bye", "hello nice world"],
[{ id: 2 }, { id: 1 }, { id: 3 }],
new OpenAIEmbeddings()
);

const resultOne = await vectorStore.similaritySearch("hello world", 1);
console.log(resultOne);
// [ Document { pageContent: 'hello nice world', metadata: { id: 3 } } ]
};

export const run_with_existing_table = async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
const vectorStore = await LanceDB.fromTexts(
["Hello world", "Bye bye", "hello nice world"],
[{ id: 2 }, { id: 1 }, { id: 3 }],
new OpenAIEmbeddings(),
{ table }
new OpenAIEmbeddings()
);

const resultOne = await vectorStore.similaritySearch("hello world", 1);
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@
"typescript": "~5.1.6",
"typesense": "^1.5.3",
"usearch": "^1.1.1",
"vectordb": "^0.1.4",
"vectordb": "^0.9.0",
"voy-search": "0.6.2",
"weaviate-ts-client": "^1.4.0",
"web-auth-library": "^1.0.3",
Expand Down
43 changes: 34 additions & 9 deletions libs/langchain-community/src/vectorstores/lancedb.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Table } from "vectordb";
import { connect, Table, Connection, WriteMode } from "vectordb";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
Expand All @@ -8,8 +8,11 @@ import { Document } from "@langchain/core/documents";
* table and an optional textKey.
*/
export type LanceDBArgs = {
table: Table;
table?: Table;
textKey?: string;
uri?: string;
tableName?: string;
mode?: WriteMode;
};

/**
Expand All @@ -18,15 +21,24 @@ export type LanceDBArgs = {
* embeddings.
*/
export class LanceDB extends VectorStore {
private table: Table;
private table?: Table;

private textKey: string;

constructor(embeddings: EmbeddingsInterface, args: LanceDBArgs) {
super(embeddings, args);
this.table = args.table;
private uri: string;

private tableName: string;

private mode?: WriteMode;

constructor(embeddings: EmbeddingsInterface, args?: LanceDBArgs) {
super(embeddings, args || {});
this.table = args?.table;
this.embeddings = embeddings;
this.textKey = args.textKey || "text";
this.textKey = args?.textKey || "text";
this.uri = args?.uri || "~/lancedb";
this.tableName = args?.tableName || "langchain";
this.mode = args?.mode || WriteMode.Overwrite;
}

/**
Expand Down Expand Up @@ -71,6 +83,14 @@ export class LanceDB extends VectorStore {
});
data.push(record);
}
if (!this.table) {
const db: Connection = await connect(this.uri);
this.table = await db.createTable(this.tableName, data, {
writeMode: this.mode,
});

return;
}
await this.table.add(data);
}

Expand All @@ -85,6 +105,11 @@ export class LanceDB extends VectorStore {
query: number[],
k: number
): Promise<[Document, number][]> {
if (!this.table) {
throw new Error(
"Table not found. Please add vectors to the table first."
);
}
const results = await this.table.search(query).limit(k).execute();

const docsAndScore: [Document, number][] = [];
Expand Down Expand Up @@ -119,7 +144,7 @@ export class LanceDB extends VectorStore {
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
dbConfig: LanceDBArgs
dbConfig?: LanceDBArgs
): Promise<LanceDB> {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
Expand All @@ -143,7 +168,7 @@ export class LanceDB extends VectorStore {
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
dbConfig: LanceDBArgs
dbConfig?: LanceDBArgs
): Promise<LanceDB> {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,27 @@ describe("LanceDB", () => {
expect(resultsTwo.length).toBe(5);
});
});

describe("LanceDB empty schema", () => {
test("Test fromTexts + addDocuments", async () => {
const embeddings = new OpenAIEmbeddings();
const vectorStore = await LanceDB.fromTexts(
["hello bye", "hello world", "bye bye"],
[{ id: 1 }, { id: 2 }, { id: 3 }],
embeddings
);

const results = await vectorStore.similaritySearch("hello bye", 10);
expect(results.length).toBe(3);

await vectorStore.addDocuments([
new Document({
pageContent: "a new world",
metadata: { id: 4 },
}),
]);

const resultsTwo = await vectorStore.similaritySearch("hello bye", 10);
expect(resultsTwo.length).toBe(4);
});
});
117 changes: 70 additions & 47 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -251,24 +251,6 @@ __metadata:
languageName: node
linkType: hard

"@apache-arrow/ts@npm:^12.0.0":
version: 12.0.0
resolution: "@apache-arrow/ts@npm:12.0.0"
dependencies:
"@types/command-line-args": 5.2.0
"@types/command-line-usage": 5.0.2
"@types/node": 18.14.5
"@types/pad-left": 2.1.1
command-line-args: 5.2.1
command-line-usage: 6.1.3
flatbuffers: 23.3.3
json-bignum: ^0.0.3
pad-left: ^2.1.0
tslib: ^2.5.0
checksum: 67b2791e14d5377b1d160a0d8390decc386e013c517713f8b9c100737a0e478a394086d91a8c846848d4e30289070a119d8e65191998f4c2555b18a29564df50
languageName: node
linkType: hard

"@apify/consts@npm:^2.13.0, @apify/consts@npm:^2.9.0":
version: 2.13.0
resolution: "@apify/consts@npm:2.13.0"
Expand Down Expand Up @@ -11123,6 +11105,41 @@ __metadata:
languageName: node
linkType: hard

"@lancedb/vectordb-darwin-arm64@npm:0.4.20":
version: 0.4.20
resolution: "@lancedb/vectordb-darwin-arm64@npm:0.4.20"
conditions: os=darwin & cpu=arm64
languageName: node
linkType: hard

"@lancedb/vectordb-darwin-x64@npm:0.4.20":
version: 0.4.20
resolution: "@lancedb/vectordb-darwin-x64@npm:0.4.20"
conditions: os=darwin & cpu=x64
languageName: node
linkType: hard

"@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20":
version: 0.4.20
resolution: "@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20"
conditions: os=linux & cpu=arm64
languageName: node
linkType: hard

"@lancedb/vectordb-linux-x64-gnu@npm:0.4.20":
version: 0.4.20
resolution: "@lancedb/vectordb-linux-x64-gnu@npm:0.4.20"
conditions: os=linux & cpu=x64
languageName: node
linkType: hard

"@lancedb/vectordb-win32-x64-msvc@npm:0.4.20":
version: 0.4.20
resolution: "@lancedb/vectordb-win32-x64-msvc@npm:0.4.20"
conditions: os=win32 & cpu=x64
languageName: node
linkType: hard

"@langchain/anthropic@*, @langchain/anthropic@workspace:*, @langchain/anthropic@workspace:libs/langchain-anthropic":
version: 0.0.0-use.local
resolution: "@langchain/anthropic@workspace:libs/langchain-anthropic"
Expand Down Expand Up @@ -11588,7 +11605,7 @@ __metadata:
typesense: ^1.5.3
usearch: ^1.1.1
uuid: ^10.0.0
vectordb: ^0.1.4
vectordb: ^0.9.0
voy-search: 0.6.2
weaviate-ts-client: ^1.4.0
web-auth-library: ^1.0.3
Expand Down Expand Up @@ -13050,6 +13067,13 @@ __metadata:
languageName: node
linkType: hard

"@neon-rs/load@npm:^0.0.74":
version: 0.0.74
resolution: "@neon-rs/load@npm:0.0.74"
checksum: d26ec9b08cdf1a7c5aeefe98f77112d205d11b4005a7934b21fe8fd27528847e08e4749e7e6c3fc05ae9f701175a58c11a095ae6af449634df3991a2c82e1dfa
languageName: node
linkType: hard

"@neondatabase/serverless@npm:0.6.0":
version: 0.6.0
resolution: "@neondatabase/serverless@npm:0.6.0"
Expand Down Expand Up @@ -20806,26 +20830,6 @@ __metadata:
languageName: node
linkType: hard

"apache-arrow@npm:^12.0.0":
version: 12.0.0
resolution: "apache-arrow@npm:12.0.0"
dependencies:
"@types/command-line-args": 5.2.0
"@types/command-line-usage": 5.0.2
"@types/node": 18.14.5
"@types/pad-left": 2.1.1
command-line-args: 5.2.1
command-line-usage: 6.1.3
flatbuffers: 23.3.3
json-bignum: ^0.0.3
pad-left: ^2.1.0
tslib: ^2.5.0
bin:
arrow2csv: bin/arrow2csv.js
checksum: 3285189517c2b298cda42852321ce127754918513116eade6e4914c57983f68b6ba96605cfaa2202796d3d6e14755d3b3758f76c1374492affa3d95714eaca40
languageName: node
linkType: hard

"apache-arrow@npm:^12.0.1":
version: 12.0.1
resolution: "apache-arrow@npm:12.0.1"
Expand Down Expand Up @@ -27231,7 +27235,7 @@ __metadata:
typescript: ~5.1.6
typesense: ^1.5.3
uuid: ^10.0.0
vectordb: ^0.1.4
vectordb: ^0.9.0
voy-search: 0.6.2
weaviate-ts-client: ^2.0.0
zod: ^3.22.4
Expand Down Expand Up @@ -42704,13 +42708,32 @@ __metadata:
languageName: node
linkType: hard

"vectordb@npm:^0.1.4":
version: 0.1.4
resolution: "vectordb@npm:0.1.4"
dependencies:
"@apache-arrow/ts": ^12.0.0
apache-arrow: ^12.0.0
checksum: 8a40abf4466479b0b9e61687416b5ab232458401917bf9a1d5f3d8ea8c8320ecc5691174f4d4c0cfef0bb6c16328a9088419fd90ac85fd7267dbccdd1f9e55d7
"vectordb@npm:^0.9.0":
version: 0.9.0
resolution: "vectordb@npm:0.9.0"
dependencies:
"@lancedb/vectordb-darwin-arm64": 0.4.20
"@lancedb/vectordb-darwin-x64": 0.4.20
"@lancedb/vectordb-linux-arm64-gnu": 0.4.20
"@lancedb/vectordb-linux-x64-gnu": 0.4.20
"@lancedb/vectordb-win32-x64-msvc": 0.4.20
"@neon-rs/load": ^0.0.74
axios: ^1.4.0
peerDependencies:
"@apache-arrow/ts": ^14.0.2
apache-arrow: ^14.0.2
dependenciesMeta:
"@lancedb/vectordb-darwin-arm64":
optional: true
"@lancedb/vectordb-darwin-x64":
optional: true
"@lancedb/vectordb-linux-arm64-gnu":
optional: true
"@lancedb/vectordb-linux-x64-gnu":
optional: true
"@lancedb/vectordb-win32-x64-msvc":
optional: true
conditions: (os=darwin | os=linux | os=win32) & (cpu=x64 | cpu=arm64)
languageName: node
linkType: hard

Expand Down

0 comments on commit 42ea1dd

Please sign in to comment.