From 1f34ea6ddbcfbf6781cd2b8ea4b1ef1a41ebaca0 Mon Sep 17 00:00:00 2001 From: Jasper Heidinga Date: Wed, 12 Apr 2023 19:09:57 +0200 Subject: [PATCH 1/8] Implemented review comments --- langchain/package.json | 9 +- langchain/src/vectorstores/weaviate.ts | 149 +++++++++++++++++++++++++ yarn.lock | 61 +++++++++- 3 files changed, 214 insertions(+), 5 deletions(-) create mode 100644 langchain/src/vectorstores/weaviate.ts diff --git a/langchain/package.json b/langchain/package.json index 1e07a2815a05..c678612b8385 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -291,7 +291,8 @@ "srt-parser-2": "^1.2.2", "ts-jest": "^29.0.5", "typeorm": "^0.3.12", - "typescript": "^4.9.5" + "typescript": "^4.9.5", + "weaviate-ts-client": "1.0.0" }, "peerDependencies": { "@aws-sdk/client-s3": "^3.310.0", @@ -314,7 +315,8 @@ "redis": "^4.6.4", "replicate": "^0.9.0", "srt-parser-2": "^1.2.2", - "typeorm": "^0.3.12" + "typeorm": "^0.3.12", + "weaviate-ts-client": "1.0.0" }, "peerDependenciesMeta": { "@aws-sdk/client-s3": { @@ -379,6 +381,9 @@ }, "typeorm": { "optional": true + }, + "weaviate-ts-client": { + "optional": true } }, "dependencies": { diff --git a/langchain/src/vectorstores/weaviate.ts b/langchain/src/vectorstores/weaviate.ts new file mode 100644 index 000000000000..53318a5333b3 --- /dev/null +++ b/langchain/src/vectorstores/weaviate.ts @@ -0,0 +1,149 @@ +import { v4 } from "uuid"; +import { WeaviateObject, type WeaviateClient } from "weaviate-ts-client"; +import { VectorStore } from "./base.js"; +import { Embeddings } from "../embeddings/base.js"; +import { Document } from "../document.js"; + +export interface WeaviateLibArgs { + client: WeaviateClient; + indexName: string; + textKey: string; + attributes?: string[]; +} + +export class WeaviateStore extends VectorStore { + private client: WeaviateClient; + + private indexName: string; + + private textKey: string; + + private queryAttrs: string[]; + + constructor(public embeddings: Embeddings, args: WeaviateLibArgs) { + super(embeddings, args); + + this.client = args.client; + this.indexName = args.indexName; + this.textKey = args.textKey; + this.queryAttrs = [this.textKey]; + + if (args.attributes) { + this.queryAttrs = this.queryAttrs.concat(args.attributes); + } + } + + addVectors(_vectors: number[][], _documents: Document[]): Promise { + throw new Error("Not Implemented"); + } + + async addDocuments(documents: Document[]): Promise { + const batch: WeaviateObject[] = documents.map((document) => ({ + class: this.indexName, + id: v4(), + properties: { + [this.textKey]: document.pageContent, + ...document.metadata, + }, + })); + + try { + await this.client.batch + .objectsBatcher() + .withObjects(...batch) + .do(); + } catch (e) { + throw Error(`'Error in addDocuments' ${e}`); + } + } + + similaritySearchVectorWithScore( + _query: number[], + _k: number, + _filter?: object + ): Promise<[Document, number][]> { + throw new Error("Not Implemented"); + } + + async similaritySearch( + query: string, + k: number, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + filter?: Record | undefined + ): Promise { + const content: { + concepts: string[]; + certainty?: number; + } = { + concepts: [query], + }; + + if (filter?.searchDistance) { + content.certainty = filter.searchDistance; + } + + try { + const result = await this.client.graphql + .get() + .withClassName(this.indexName) + .withFields(this.queryAttrs.join(" ")) + .withNearText({ concepts: [query] }) + .withLimit(k) + .do(); + + const documents = []; + for (const data of result.data.Get[this.indexName]) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const record: Record = data; + const text = record[this.textKey]; + delete record[this.textKey]; + + documents.push( + new Document({ + pageContent: text, + metadata: record, + }) + ); + } + return documents; + } catch (e) { + throw Error(`'Error in similaritySearch' ${e}`); + } + } + + similaritySearchWithScore( + _query: string, + _k?: number, + _filter?: object | undefined + ): Promise<[object, number][]> { + throw Error("Not Implemented"); + } + + static fromTexts( + texts: string[], + metadatas: object | object[], + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return WeaviateStore.fromDocuments(docs, embeddings, args); + } + + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/yarn.lock b/yarn.lock index 8ea912e98644..abb9741b7feb 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4562,6 +4562,15 @@ __metadata: languageName: node linkType: hard +"@graphql-typed-document-node/core@npm:^3.1.1": + version: 3.2.0 + resolution: "@graphql-typed-document-node/core@npm:3.2.0" + peerDependencies: + graphql: ^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0 + checksum: fa44443accd28c8cf4cb96aaaf39d144a22e8b091b13366843f4e97d19c7bfeaf609ce3c7603a4aeffe385081eaf8ea245d078633a7324c11c5ec4b2011bb76d + languageName: node + linkType: hard + "@grpc/grpc-js@npm:^1.2.12": version: 1.8.13 resolution: "@grpc/grpc-js@npm:1.8.13" @@ -13108,6 +13117,13 @@ __metadata: languageName: node linkType: hard +"extract-files@npm:^9.0.0": + version: 9.0.0 + resolution: "extract-files@npm:9.0.0" + checksum: c31781d090f8d8f62cc541f1023b39ea863f24bd6fb3d4011922d71cbded70cef8191f2b70b43ec6cb5c5907cdad1dc5e9f29f78228936c10adc239091d8ab64 + languageName: node + linkType: hard + "extract-zip@npm:2.0.1": version: 2.0.1 resolution: "extract-zip@npm:2.0.1" @@ -14092,6 +14108,20 @@ __metadata: languageName: node linkType: hard +"graphql-request@npm:^5.1.0": + version: 5.2.0 + resolution: "graphql-request@npm:5.2.0" + dependencies: + "@graphql-typed-document-node/core": ^3.1.1 + cross-fetch: ^3.1.5 + extract-files: ^9.0.0 + form-data: ^3.0.0 + peerDependencies: + graphql: 14 - 16 + checksum: a8aa37816378898e6fc8c4db04a1c114c98f98d90718cf1680bd96b22724bd43b1210619f9b0d328b5c1acb4f7b76d2227a2537cd5ab059bb54cf0debecb33bf + languageName: node + linkType: hard + "gray-matter@npm:^4.0.3": version: 4.0.3 resolution: "gray-matter@npm:4.0.3" @@ -15510,6 +15540,16 @@ __metadata: languageName: node linkType: hard +"isomorphic-fetch@npm:^3.0.0": + version: 3.0.0 + resolution: "isomorphic-fetch@npm:3.0.0" + dependencies: + node-fetch: ^2.6.1 + whatwg-fetch: ^3.4.1 + checksum: e5ab79a56ce5af6ddd21265f59312ad9a4bc5a72cebc98b54797b42cb30441d5c5f8d17c5cd84a99e18101c8af6f90c081ecb8d12fd79e332be1778d58486d75 + languageName: node + linkType: hard + "issue-parser@npm:6.0.0": version: 6.0.0 resolution: "issue-parser@npm:6.0.0" @@ -17039,6 +17079,7 @@ __metadata: typeorm: ^0.3.12 typescript: ^4.9.5 uuid: ^9.0.0 + weaviate-ts-client: 1.0.0 yaml: ^2.2.1 zod: ^3.21.4 peerDependencies: @@ -17063,6 +17104,7 @@ __metadata: replicate: ^0.9.0 srt-parser-2: ^1.2.2 typeorm: ^0.3.12 + weaviate-ts-client: 1.0.0 peerDependenciesMeta: "@aws-sdk/client-s3": optional: true @@ -17106,6 +17148,8 @@ __metadata: optional: true typeorm: optional: true + weaviate-ts-client: + optional: true languageName: unknown linkType: soft @@ -18451,7 +18495,7 @@ __metadata: languageName: node linkType: hard -"node-fetch@npm:^2.6.7": +"node-fetch@npm:^2.6.1, node-fetch@npm:^2.6.7": version: 2.6.9 resolution: "node-fetch@npm:2.6.9" dependencies: @@ -23457,7 +23501,7 @@ __metadata: languageName: node linkType: hard -"tar@npm:^6.0.2, tar@npm:^6.1.11, tar@npm:^6.1.2": +"tar@npm:^6.0.2, tar@npm:^6.1.11, tar@npm:^6.1.13, tar@npm:^6.1.2": version: 6.1.13 resolution: "tar@npm:6.1.13" dependencies: @@ -25198,6 +25242,17 @@ __metadata: languageName: node linkType: hard +"weaviate-ts-client@npm:1.0.0": + version: 1.0.0 + resolution: "weaviate-ts-client@npm:1.0.0" + dependencies: + graphql-request: ^5.1.0 + isomorphic-fetch: ^3.0.0 + tar: ^6.1.13 + checksum: ffa3c13913cf8999a1bfd3db1cc1952d7bfd4e6ca3d2f6eadfb17a56595bfc74fd9f068d1e9d73677b87281d53486ba333d5d5aedb31f072f05d6a5d8e7b2502 + languageName: node + linkType: hard + "web-namespaces@npm:^1.0.0": version: 1.1.4 resolution: "web-namespaces@npm:1.1.4" @@ -25567,7 +25622,7 @@ __metadata: languageName: node linkType: hard -"whatwg-fetch@npm:^3.6.2": +"whatwg-fetch@npm:^3.4.1, whatwg-fetch@npm:^3.6.2": version: 3.6.2 resolution: "whatwg-fetch@npm:3.6.2" checksum: ee976b7249e7791edb0d0a62cd806b29006ad7ec3a3d89145921ad8c00a3a67e4be8f3fb3ec6bc7b58498724fd568d11aeeeea1f7827e7e1e5eae6c8a275afed From 86a5e3e4e5cbd1d85170c081354d13b5054cc261 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 13:35:37 +0100 Subject: [PATCH 2/8] Update dep range --- langchain/package.json | 4 ++-- yarn.lock | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/langchain/package.json b/langchain/package.json index c678612b8385..d6ae7e7fedd7 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -292,7 +292,7 @@ "ts-jest": "^29.0.5", "typeorm": "^0.3.12", "typescript": "^4.9.5", - "weaviate-ts-client": "1.0.0" + "weaviate-ts-client": "^1.0.0" }, "peerDependencies": { "@aws-sdk/client-s3": "^3.310.0", @@ -316,7 +316,7 @@ "replicate": "^0.9.0", "srt-parser-2": "^1.2.2", "typeorm": "^0.3.12", - "weaviate-ts-client": "1.0.0" + "weaviate-ts-client": "^1.0.0" }, "peerDependenciesMeta": { "@aws-sdk/client-s3": { diff --git a/yarn.lock b/yarn.lock index abb9741b7feb..0ada31767faf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17079,7 +17079,7 @@ __metadata: typeorm: ^0.3.12 typescript: ^4.9.5 uuid: ^9.0.0 - weaviate-ts-client: 1.0.0 + weaviate-ts-client: ^1.0.0 yaml: ^2.2.1 zod: ^3.21.4 peerDependencies: @@ -17104,7 +17104,7 @@ __metadata: replicate: ^0.9.0 srt-parser-2: ^1.2.2 typeorm: ^0.3.12 - weaviate-ts-client: 1.0.0 + weaviate-ts-client: ^1.0.0 peerDependenciesMeta: "@aws-sdk/client-s3": optional: true @@ -25242,7 +25242,7 @@ __metadata: languageName: node linkType: hard -"weaviate-ts-client@npm:1.0.0": +"weaviate-ts-client@npm:^1.0.0": version: 1.0.0 resolution: "weaviate-ts-client@npm:1.0.0" dependencies: From 274075188b76cda46079099a385bc92bc6aac587 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 13:57:18 +0100 Subject: [PATCH 3/8] Implement missing methods, add filtering --- langchain/src/vectorstores/weaviate.ts | 100 ++++++++++++------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/langchain/src/vectorstores/weaviate.ts b/langchain/src/vectorstores/weaviate.ts index 53318a5333b3..e4d8267e1ab9 100644 --- a/langchain/src/vectorstores/weaviate.ts +++ b/langchain/src/vectorstores/weaviate.ts @@ -1,5 +1,9 @@ import { v4 } from "uuid"; -import { WeaviateObject, type WeaviateClient } from "weaviate-ts-client"; +import type { + WeaviateObject, + WeaviateClient, + WhereFilter, +} from "weaviate-ts-client"; import { VectorStore } from "./base.js"; import { Embeddings } from "../embeddings/base.js"; import { Document } from "../document.js"; @@ -11,6 +15,16 @@ export interface WeaviateLibArgs { attributes?: string[]; } +interface ResultRow { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [key: string]: any; +} + +export interface WeaviateFilter { + distance?: number; + where: WhereFilter; +} + export class WeaviateStore extends VectorStore { private client: WeaviateClient; @@ -33,14 +47,11 @@ export class WeaviateStore extends VectorStore { } } - addVectors(_vectors: number[][], _documents: Document[]): Promise { - throw new Error("Not Implemented"); - } - - async addDocuments(documents: Document[]): Promise { - const batch: WeaviateObject[] = documents.map((document) => ({ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const batch: WeaviateObject[] = documents.map((document, index) => ({ class: this.indexName, id: v4(), + vector: vectors[index], properties: { [this.textKey]: document.pageContent, ...document.metadata, @@ -57,53 +68,46 @@ export class WeaviateStore extends VectorStore { } } - similaritySearchVectorWithScore( - _query: number[], - _k: number, - _filter?: object - ): Promise<[Document, number][]> { - throw new Error("Not Implemented"); + async addDocuments(documents: Document[]): Promise { + return this.addVectors( + await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), + documents + ); } - async similaritySearch( - query: string, + async similaritySearchVectorWithScore( + query: number[], k: number, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - filter?: Record | undefined - ): Promise { - const content: { - concepts: string[]; - certainty?: number; - } = { - concepts: [query], - }; - - if (filter?.searchDistance) { - content.certainty = filter.searchDistance; - } - + filter?: WeaviateFilter + ): Promise<[Document, number][]> { try { - const result = await this.client.graphql + let builder = await this.client.graphql .get() .withClassName(this.indexName) - .withFields(this.queryAttrs.join(" ")) - .withNearText({ concepts: [query] }) - .withLimit(k) - .do(); + .withFields(`${this.queryAttrs.join(" ")} _additional { distance }`) + .withNearVector({ + vector: query, + distance: filter?.distance, + }) + .withLimit(k); + + if (filter?.where) { + builder = builder.withWhere(filter.where); + } + + const result = await builder.do(); - const documents = []; + const documents: [Document, number][] = []; for (const data of result.data.Get[this.indexName]) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const record: Record = data; - const text = record[this.textKey]; - delete record[this.textKey]; + const { [this.textKey]: text, _additional, ...rest }: ResultRow = data; - documents.push( + documents.push([ new Document({ pageContent: text, - metadata: record, - }) - ); + metadata: rest, + }), + _additional.distance, + ]); } return documents; } catch (e) { @@ -111,21 +115,13 @@ export class WeaviateStore extends VectorStore { } } - similaritySearchWithScore( - _query: string, - _k?: number, - _filter?: object | undefined - ): Promise<[object, number][]> { - throw Error("Not Implemented"); - } - static fromTexts( texts: string[], metadatas: object | object[], embeddings: Embeddings, args: WeaviateLibArgs ): Promise { - const docs = []; + const docs: Document[] = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = new Document({ From bef6d93bcf4835c7f50e1af5e8ca993729f7d207 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 14:00:26 +0100 Subject: [PATCH 4/8] Add entrypoint --- langchain/.gitignore | 3 +++ langchain/package.json | 8 ++++++++ langchain/scripts/create-entrypoints.js | 2 ++ langchain/tsconfig.json | 1 + 4 files changed, 14 insertions(+) diff --git a/langchain/.gitignore b/langchain/.gitignore index a478f2c9ea20..65a6035736bd 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -73,6 +73,9 @@ vectorstores/chroma.d.ts vectorstores/hnswlib.cjs vectorstores/hnswlib.js vectorstores/hnswlib.d.ts +vectorstores/weaviate.cjs +vectorstores/weaviate.js +vectorstores/weaviate.d.ts vectorstores/pinecone.cjs vectorstores/pinecone.js vectorstores/pinecone.d.ts diff --git a/langchain/package.json b/langchain/package.json index d6ae7e7fedd7..e2c29224020d 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -85,6 +85,9 @@ "vectorstores/hnswlib.cjs", "vectorstores/hnswlib.js", "vectorstores/hnswlib.d.ts", + "vectorstores/weaviate.cjs", + "vectorstores/weaviate.js", + "vectorstores/weaviate.d.ts", "vectorstores/pinecone.cjs", "vectorstores/pinecone.js", "vectorstores/pinecone.d.ts", @@ -556,6 +559,11 @@ "import": "./vectorstores/hnswlib.js", "require": "./vectorstores/hnswlib.cjs" }, + "./vectorstores/weaviate": { + "types": "./vectorstores/weaviate.d.ts", + "import": "./vectorstores/weaviate.js", + "require": "./vectorstores/weaviate.cjs" + }, "./vectorstores/pinecone": { "types": "./vectorstores/pinecone.d.ts", "import": "./vectorstores/pinecone.js", diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 8f8c03c2234d..a62f7848fe18 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -40,6 +40,7 @@ const entrypoints = { "vectorstores/base": "vectorstores/base", "vectorstores/chroma": "vectorstores/chroma", "vectorstores/hnswlib": "vectorstores/hnswlib", + "vectorstores/weaviate": "vectorstores/weaviate", "vectorstores/pinecone": "vectorstores/pinecone", "vectorstores/supabase": "vectorstores/supabase", "vectorstores/milvus": "vectorstores/milvus", @@ -125,6 +126,7 @@ const requiresOptionalDependency = [ "prompts/load", "vectorstores/chroma", "vectorstores/hnswlib", + "vectorstores/weaviate", "vectorstores/pinecone", "vectorstores/supabase", "vectorstores/milvus", diff --git a/langchain/tsconfig.json b/langchain/tsconfig.json index 894363ffaa0b..cad75ff9036f 100644 --- a/langchain/tsconfig.json +++ b/langchain/tsconfig.json @@ -56,6 +56,7 @@ "src/vectorstores/base.ts", "src/vectorstores/chroma.ts", "src/vectorstores/hnswlib.ts", + "src/vectorstores/weaviate.ts", "src/vectorstores/pinecone.ts", "src/vectorstores/supabase.ts", "src/vectorstores/milvus.ts", From 014e412c5f9fab54df3d9a2129f22c82257429f3 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 17:02:37 +0100 Subject: [PATCH 5/8] Add docs, add examples, improve the types --- .../vector_stores/integrations/weaviate.mdx | 31 +++++++++++++ examples/.env.example | 3 ++ examples/package.json | 2 + .../vector_stores/weaviate_fromTexts.ts | 28 +++++++++++ .../indexes/vector_stores/weaviate_search.ts | 44 ++++++++++++++++++ langchain/.env.example | 3 ++ langchain/package.json | 1 + langchain/src/vectorstores/base.ts | 2 +- .../vectorstores/tests/opensearch.int.test.ts | 2 +- .../vectorstores/tests/weaviate.int.test.ts | 46 +++++++++++++++++++ langchain/src/vectorstores/weaviate.ts | 33 ++++++++++--- yarn.lock | 10 ++++ 12 files changed, 196 insertions(+), 9 deletions(-) create mode 100644 docs/docs/modules/indexes/vector_stores/integrations/weaviate.mdx create mode 100644 examples/src/indexes/vector_stores/weaviate_fromTexts.ts create mode 100644 examples/src/indexes/vector_stores/weaviate_search.ts create mode 100644 langchain/src/vectorstores/tests/weaviate.int.test.ts diff --git a/docs/docs/modules/indexes/vector_stores/integrations/weaviate.mdx b/docs/docs/modules/indexes/vector_stores/integrations/weaviate.mdx new file mode 100644 index 000000000000..5a834d6cedbf --- /dev/null +++ b/docs/docs/modules/indexes/vector_stores/integrations/weaviate.mdx @@ -0,0 +1,31 @@ +--- +hide_table_of_contents: true +--- + +import CodeBlock from "@theme/CodeBlock"; + +# Weaviate + +Weaviate is an open source vector database that stores both objects and vectors, allowing for combining vector search with structured filtering. LangChain connects to Weaviate via the `weaviate-ts-client` package, the official Typescript client for Weaviate. + +LangChain inserts vectors directly to Weaviate, and queries Weaviate for the nearest neighbors of a given vector, so that you can use all the LangChain Embeddings integrations with Weaviate. + +## Setup + +```bash npm2yarn +npm install weaviate-ts-client graphql +``` + +You'll need to run Weaviate either locally or on a server, see [the Weaviate documentation](https://weaviate.io/developers/weaviate/installation) for more information. + +## Usage, insert documents + +import InsertExample from "@examples/indexes/vector_stores/weaviate_fromTexts.ts"; + +{InsertExample} + +## Usage, query documents + +import QueryExample from "@examples/indexes/vector_stores/weaviate_search.ts"; + +{QueryExample} diff --git a/examples/.env.example b/examples/.env.example index c58141fad70a..7e887324d2f2 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -11,3 +11,6 @@ SERPAPI_API_KEY=ADD_YOURS_HERE # https://serpapi.com/manage-api-key SERPER_API_KEY=ADD_YOURS_HERE # https://serper.dev/api-key SUPABASE_PRIVATE_KEY=ADD_YOURS_HERE # https://app.supabase.com/project/YOUR_PROJECT_ID/settings/api SUPABASE_URL=ADD_YOURS_HERE # # https://app.supabase.com/project/YOUR_PROJECT_ID/settings/api +WEAVIATE_HOST=ADD_YOURS_HERE +WEAVIATE_SCHEME=ADD_YOURS_HERE +WEAVIATE_API_KEY=ADD_YOURS_HERE diff --git a/examples/package.json b/examples/package.json index 78ebfd7d505c..3800fdf3c985 100644 --- a/examples/package.json +++ b/examples/package.json @@ -30,6 +30,7 @@ "@zilliz/milvus2-sdk-node": "^2.2.0", "axios": "^0.26.0", "chromadb": "^1.3.0", + "graphql": "^16.6.0", "js-yaml": "^4.1.0", "langchain": "workspace:*", "ml-distance": "^4.0.0", @@ -37,6 +38,7 @@ "prisma": "^4.11.0", "sqlite3": "^5.1.4", "typeorm": "^0.3.12", + "weaviate-ts-client": "^1.0.0", "zod": "^3.21.4" }, "devDependencies": { diff --git a/examples/src/indexes/vector_stores/weaviate_fromTexts.ts b/examples/src/indexes/vector_stores/weaviate_fromTexts.ts new file mode 100644 index 000000000000..71a67c4e359f --- /dev/null +++ b/examples/src/indexes/vector_stores/weaviate_fromTexts.ts @@ -0,0 +1,28 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import weaviate from "weaviate-ts-client"; +import { WeaviateStore } from "langchain/vectorstores/weaviate"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; + +export async function run() { + // Something wrong with the weaviate-ts-client types, so we need to disable + const client = (weaviate as any).client({ + scheme: process.env.WEAVIATE_SCHEME || "https", + host: process.env.WEAVIATE_HOST || "localhost", + apiKey: new (weaviate as any).ApiKey( + process.env.WEAVIATE_API_KEY || "default" + ), + }); + + // Create a store and fill it with some texts + metadata + await WeaviateStore.fromTexts( + ["hello world", "hi there", "how are you", "bye now"], + [{ foo: "bar" }, { foo: "baz" }, { foo: "qux" }, { foo: "bar" }], + new OpenAIEmbeddings(), + { + client, + indexName: "Test", + textKey: "text", + metadataKeys: ["foo"], + } + ); +} diff --git a/examples/src/indexes/vector_stores/weaviate_search.ts b/examples/src/indexes/vector_stores/weaviate_search.ts new file mode 100644 index 000000000000..6e3bdd180868 --- /dev/null +++ b/examples/src/indexes/vector_stores/weaviate_search.ts @@ -0,0 +1,44 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import weaviate from "weaviate-ts-client"; +import { WeaviateStore } from "langchain/vectorstores/weaviate"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; + +export async function run() { + // Something wrong with the weaviate-ts-client types, so we need to disable + const client = (weaviate as any).client({ + scheme: process.env.WEAVIATE_SCHEME || "https", + host: process.env.WEAVIATE_HOST || "localhost", + apiKey: new (weaviate as any).ApiKey( + process.env.WEAVIATE_API_KEY || "default" + ), + }); + + // Create a store for an existing index + const store = new WeaviateStore(new OpenAIEmbeddings(), { + client, + indexName: "Test", + metadataKeys: ["foo"], + }); + + // Search the index without any filters + const results = await store.similaritySearch("hello world", 1); + console.log(results); + /* + [ Document { pageContent: 'hello world', metadata: { foo: 'bar' } } ] + */ + + // Search the index with a filter, in this case, only return results where + // the "foo" metadata key is equal to "baz", see the Weaviate docs for more + // https://weaviate.io/developers/weaviate/api/graphql/filters + const results2 = await store.similaritySearch("hello world", 1, { + where: { + operator: "Equal", + path: ["foo"], + valueText: "baz", + }, + }); + console.log(results2); + /* + [ Document { pageContent: 'hi there', metadata: { foo: 'baz' } } ] + */ +} diff --git a/langchain/.env.example b/langchain/.env.example index 68627635fcc3..ed3908416a0a 100644 --- a/langchain/.env.example +++ b/langchain/.env.example @@ -15,3 +15,6 @@ ANTHROPIC_API_KEY=ADD_YOURS_HERE REPLICATE_API_KEY=ADD_YOURS_HERE MONGO_URI=ADD_YOURS_HERE MILVUS_URL=ADD_YOURS_HERE +WEAVIATE_HOST=ADD_YOURS_HERE +WEAVIATE_SCHEME=ADD_YOURS_HERE +WEAVIATE_API_KEY=ADD_YOURS_HERE diff --git a/langchain/package.json b/langchain/package.json index ff43fb58e79d..902e0c27c376 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -296,6 +296,7 @@ "eslint-plugin-no-instanceof": "^1.0.1", "eslint-plugin-prettier": "^4.2.1", "eslint-plugin-tree-shaking": "^1.10.0", + "graphql": "^16.6.0", "hnswlib-node": "^1.4.2", "html-to-text": "^9.0.5", "jest": "^29.5.0", diff --git a/langchain/src/vectorstores/base.ts b/langchain/src/vectorstores/base.ts index 526657bd2528..e9e297ec4a93 100644 --- a/langchain/src/vectorstores/base.ts +++ b/langchain/src/vectorstores/base.ts @@ -58,7 +58,7 @@ export abstract class VectorStore { query: string, k = 4, filter: object | undefined = undefined - ): Promise<[object, number][]> { + ): Promise<[Document, number][]> { return this.similaritySearchVectorWithScore( await this.embeddings.embedQuery(query), k, diff --git a/langchain/src/vectorstores/tests/opensearch.int.test.ts b/langchain/src/vectorstores/tests/opensearch.int.test.ts index 0df33b96ac64..50198e946bb8 100644 --- a/langchain/src/vectorstores/tests/opensearch.int.test.ts +++ b/langchain/src/vectorstores/tests/opensearch.int.test.ts @@ -5,7 +5,7 @@ import { OpenAIEmbeddings } from "../../embeddings/openai.js"; import { OpenSearchVectorStore } from "../opensearch.js"; import { Document } from "../../document.js"; -test("OpenSearchVectorStore integration", async () => { +test.skip("OpenSearchVectorStore integration", async () => { if (!process.env.OPENSEARCH_URL) { throw new Error("OPENSEARCH_URL not set"); } diff --git a/langchain/src/vectorstores/tests/weaviate.int.test.ts b/langchain/src/vectorstores/tests/weaviate.int.test.ts new file mode 100644 index 000000000000..44bcf5acea87 --- /dev/null +++ b/langchain/src/vectorstores/tests/weaviate.int.test.ts @@ -0,0 +1,46 @@ +/* eslint-disable no-process-env */ +import { test, expect } from "@jest/globals"; +import weaviate from "weaviate-ts-client"; +import { WeaviateStore } from "../weaviate.js"; +import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { Document } from "../../document.js"; + +test("WeaviateStore", async () => { + // Something wrong with the weaviate-ts-client types, so we need to disable + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const client = (weaviate as any).client({ + scheme: process.env.WEAVIATE_SCHEME || "https", + host: process.env.WEAVIATE_HOST || "localhost", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + apiKey: new (weaviate as any).ApiKey( + process.env.WEAVIATE_API_KEY || "default" + ), + }); + const store = await WeaviateStore.fromTexts( + ["hello world", "hi there", "how are you", "bye now"], + [{ foo: "bar" }, { foo: "baz" }, { foo: "qux" }, { foo: "bar" }], + new OpenAIEmbeddings(), + { + client, + indexName: "Test", + textKey: "text", + metadataKeys: ["foo"], + } + ); + + const results = await store.similaritySearch("hello world", 1); + expect(results).toEqual([ + new Document({ pageContent: "hello world", metadata: { foo: "bar" } }), + ]); + + const results2 = await store.similaritySearch("hello world", 1, { + where: { + operator: "Equal", + path: ["foo"], + valueText: "baz", + }, + }); + expect(results2).toEqual([ + new Document({ pageContent: "hi there", metadata: { foo: "baz" } }), + ]); +}); diff --git a/langchain/src/vectorstores/weaviate.ts b/langchain/src/vectorstores/weaviate.ts index e4d8267e1ab9..434043eb9094 100644 --- a/langchain/src/vectorstores/weaviate.ts +++ b/langchain/src/vectorstores/weaviate.ts @@ -10,9 +10,12 @@ import { Document } from "../document.js"; export interface WeaviateLibArgs { client: WeaviateClient; + /** + * The name of the class in Weaviate. Must start with a capital letter. + */ indexName: string; - textKey: string; - attributes?: string[]; + textKey?: string; + metadataKeys?: string[]; } interface ResultRow { @@ -39,11 +42,11 @@ export class WeaviateStore extends VectorStore { this.client = args.client; this.indexName = args.indexName; - this.textKey = args.textKey; + this.textKey = args.textKey || "text"; this.queryAttrs = [this.textKey]; - if (args.attributes) { - this.queryAttrs = this.queryAttrs.concat(args.attributes); + if (args.metadataKeys) { + this.queryAttrs = this.queryAttrs.concat(args.metadataKeys); } } @@ -75,6 +78,22 @@ export class WeaviateStore extends VectorStore { ); } + async similaritySearch( + query: string, + k: number, + filter?: WeaviateFilter + ): Promise { + return super.similaritySearch(query, k, filter); + } + + async similaritySearchWithScore( + query: string, + k: number, + filter?: WeaviateFilter + ): Promise<[Document, number][]> { + return super.similaritySearchWithScore(query, k, filter); + } + async similaritySearchVectorWithScore( query: number[], k: number, @@ -120,7 +139,7 @@ export class WeaviateStore extends VectorStore { metadatas: object | object[], embeddings: Embeddings, args: WeaviateLibArgs - ): Promise { + ): Promise { const docs: Document[] = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; @@ -137,7 +156,7 @@ export class WeaviateStore extends VectorStore { docs: Document[], embeddings: Embeddings, args: WeaviateLibArgs - ): Promise { + ): Promise { const instance = new this(embeddings, args); await instance.addDocuments(docs); return instance; diff --git a/yarn.lock b/yarn.lock index 834e531e0d7f..32c237f3ee43 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13023,6 +13023,7 @@ __metadata: eslint-config-prettier: ^8.6.0 eslint-plugin-import: ^2.27.5 eslint-plugin-prettier: ^4.2.1 + graphql: ^16.6.0 js-yaml: ^4.1.0 langchain: "workspace:*" ml-distance: ^4.0.0 @@ -13033,6 +13034,7 @@ __metadata: tsx: ^3.12.3 typeorm: ^0.3.12 typescript: ^4.9.5 + weaviate-ts-client: ^1.0.0 zod: ^3.21.4 languageName: unknown linkType: soft @@ -14224,6 +14226,13 @@ __metadata: languageName: node linkType: hard +"graphql@npm:^16.6.0": + version: 16.6.0 + resolution: "graphql@npm:16.6.0" + checksum: bf1d9e3c1938ce3c1a81e909bd3ead1ae4707c577f91cff1ca2eca474bfbc7873d5d7b942e1e9777ff5a8304421dba57a4b76d7a29eb19de8711cb70e3c2415e + languageName: node + linkType: hard + "gray-matter@npm:^4.0.3": version: 4.0.3 resolution: "gray-matter@npm:4.0.3" @@ -17175,6 +17184,7 @@ __metadata: eslint-plugin-tree-shaking: ^1.10.0 expr-eval: ^2.0.2 flat: ^5.0.2 + graphql: ^16.6.0 hnswlib-node: ^1.4.2 html-to-text: ^9.0.5 jest: ^29.5.0 From b9211e5a61c23b5a281adca35c3e57d09faf3ea1 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 17:02:53 +0100 Subject: [PATCH 6/8] Disable the test --- langchain/src/vectorstores/tests/weaviate.int.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/src/vectorstores/tests/weaviate.int.test.ts b/langchain/src/vectorstores/tests/weaviate.int.test.ts index 44bcf5acea87..1a4fbd469baa 100644 --- a/langchain/src/vectorstores/tests/weaviate.int.test.ts +++ b/langchain/src/vectorstores/tests/weaviate.int.test.ts @@ -5,7 +5,7 @@ import { WeaviateStore } from "../weaviate.js"; import { OpenAIEmbeddings } from "../../embeddings/openai.js"; import { Document } from "../../document.js"; -test("WeaviateStore", async () => { +test.skip("WeaviateStore", async () => { // Something wrong with the weaviate-ts-client types, so we need to disable // eslint-disable-next-line @typescript-eslint/no-explicit-any const client = (weaviate as any).client({ From 18cb23984b5f39d00fb21528a26e1297d8d0b9e5 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 17:22:58 +0100 Subject: [PATCH 7/8] Add fromExistingIndex --- langchain/src/vectorstores/opensearch.ts | 16 +++++++++++++--- langchain/src/vectorstores/weaviate.ts | 7 +++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/langchain/src/vectorstores/opensearch.ts b/langchain/src/vectorstores/opensearch.ts index d24e948d8636..526f73cf986e 100644 --- a/langchain/src/vectorstores/opensearch.ts +++ b/langchain/src/vectorstores/opensearch.ts @@ -1,4 +1,3 @@ -/* eslint-disable no-instanceof/no-instanceof */ import { Client, RequestParams, errors } from "@opensearch-project/opensearch"; import { v4 as uuid } from "uuid"; import { Embeddings } from "../embeddings/base.js"; @@ -111,6 +110,7 @@ export class OpenSearchVectorStore extends VectorStore { const { body } = await this.client.search(search); + // eslint-disable-next-line @typescript-eslint/no-explicit-any return body.hits.hits.map((hit: any) => [ new Document({ pageContent: hit._source.text, @@ -125,7 +125,7 @@ export class OpenSearchVectorStore extends VectorStore { metadatas: object[] | object, embeddings: Embeddings, args: OpenSearchClientArgs - ): Promise { + ): Promise { const documents = texts.map((text, idx) => { const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; return new Document({ pageContent: text, metadata }); @@ -138,12 +138,21 @@ export class OpenSearchVectorStore extends VectorStore { docs: Document[], embeddings: Embeddings, dbConfig: OpenSearchClientArgs - ): Promise { + ): Promise { const store = new OpenSearchVectorStore(embeddings, dbConfig); await store.addDocuments(docs).then(() => store); return store; } + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: OpenSearchClientArgs + ): Promise { + const store = new OpenSearchVectorStore(embeddings, dbConfig); + await store.client.cat.indices({ index: store.indexName }); + return store; + } + private async ensureIndexExists( dimension: number, engine = "nmslib", @@ -210,6 +219,7 @@ export class OpenSearchVectorStore extends VectorStore { await this.client.cat.indices({ index: this.indexName }); return true; } catch (err: unknown) { + // eslint-disable-next-line no-instanceof/no-instanceof if (err instanceof errors.ResponseError && err.statusCode === 404) { return false; } diff --git a/langchain/src/vectorstores/weaviate.ts b/langchain/src/vectorstores/weaviate.ts index 434043eb9094..fcd70fa81cbf 100644 --- a/langchain/src/vectorstores/weaviate.ts +++ b/langchain/src/vectorstores/weaviate.ts @@ -161,4 +161,11 @@ export class WeaviateStore extends VectorStore { await instance.addDocuments(docs); return instance; } + + static async fromExistingIndex( + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + return new this(embeddings, args); + } } From c700701461e31968da6a5d9660b24911187af3e8 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 14 Apr 2023 17:25:00 +0100 Subject: [PATCH 8/8] Update example --- examples/src/indexes/vector_stores/weaviate_search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/indexes/vector_stores/weaviate_search.ts b/examples/src/indexes/vector_stores/weaviate_search.ts index 6e3bdd180868..1eeb12c8510a 100644 --- a/examples/src/indexes/vector_stores/weaviate_search.ts +++ b/examples/src/indexes/vector_stores/weaviate_search.ts @@ -14,7 +14,7 @@ export async function run() { }); // Create a store for an existing index - const store = new WeaviateStore(new OpenAIEmbeddings(), { + const store = await WeaviateStore.fromExistingIndex(new OpenAIEmbeddings(), { client, indexName: "Test", metadataKeys: ["foo"],