Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pinecone[patch]: Fix document ID not getting set when returned from PineconeStore #6539

Merged
merged 4 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 42 additions & 18 deletions libs/langchain-pinecone/src/tests/vectorstores.int.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
/* eslint-disable no-promise-executor-return */
/* eslint-disable @typescript-eslint/no-explicit-any */
import { describe, expect, test } from "@jest/globals";
import { faker } from "@faker-js/faker";
import { Pinecone } from "@pinecone-database/pinecone";
Expand All @@ -9,12 +10,14 @@ import { SyntheticEmbeddings } from "@langchain/core/utils/testing";
import { Document } from "@langchain/core/documents";
import { PineconeStoreParams, PineconeStore } from "../vectorstores.js";

const PINECONE_SLEEP_LENGTH = 40000;

function sleep(ms: number) {
// eslint-disable-next-line no-promise-executor-return
return new Promise((resolve) => setTimeout(resolve, ms));
}

describe.skip("PineconeStore", () => {
describe("PineconeStore", () => {
let pineconeStore: PineconeStore;
const testIndexName = process.env.PINECONE_INDEX!;
let namespaces: string[] = [];
Expand Down Expand Up @@ -57,22 +60,29 @@ describe.skip("PineconeStore", () => {
[{ pageContent, metadata: {} }],
[documentId]
);
await sleep(35000);

await sleep(PINECONE_SLEEP_LENGTH);

const results = await pineconeStore.similaritySearch(pageContent, 1);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);
expect(results).toEqual([
new Document({ metadata: {}, pageContent, id: documentId }),
]);

await pineconeStore.addDocuments(
[{ pageContent: `${pageContent} upserted`, metadata: {} }],
[documentId]
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results2 = await pineconeStore.similaritySearch(pageContent, 1);

expect(results2).toEqual([
new Document({ metadata: {}, pageContent: `${pageContent} upserted` }),
new Document({
metadata: {},
pageContent: `${pageContent} upserted`,
id: documentId,
}),
]);
});

Expand All @@ -83,11 +93,15 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: "bar" } },
]);

await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 1);

expect(results).toEqual([
new Document({ metadata: { foo: "bar" }, pageContent }),
new Document({
metadata: { foo: "bar" },
pageContent,
id: expect.any(String) as any,
}),
]);
});

Expand All @@ -100,14 +114,18 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: "qux" } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
// If the filter wasn't working, we'd get all 3 documents back
const results = await pineconeStore.similaritySearch(pageContent, 3, {
foo: id,
});

expect(results).toEqual([
new Document({ metadata: { foo: id }, pageContent }),
new Document({
metadata: { foo: id },
pageContent,
id: expect.any(String) as any,
}),
]);
});

Expand All @@ -120,7 +138,7 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
// If the filter wasn't working, we'd get all 3 documents back
const results = await pineconeStore.maxMarginalRelevanceSearch(
pageContent,
Expand All @@ -142,7 +160,7 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 2, {
foo: id,
});
Expand Down Expand Up @@ -174,7 +192,7 @@ describe.skip("PineconeStore", () => {
ids: [id, id2],
}
);
await sleep(40000);
await sleep(PINECONE_SLEEP_LENGTH);
const indexStats = await pineconeStore.pineconeIndex.describeIndexStats();
expect(indexStats.namespaces).toHaveProperty("");
expect(indexStats.namespaces?.[""].recordCount).toEqual(2);
Expand All @@ -184,7 +202,7 @@ describe.skip("PineconeStore", () => {
await pineconeStore.delete({
deleteAll: true,
});
await sleep(40000);
await sleep(PINECONE_SLEEP_LENGTH);
const indexStats2 = await pineconeStore.pineconeIndex.describeIndexStats();
expect(indexStats2.namespaces).not.toHaveProperty("");
// The new total records should be less than the previous total records
Expand All @@ -209,7 +227,7 @@ describe.skip("PineconeStore", () => {
namespace: namespaces[1],
}
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 1, {
namespace: namespaces[0],
});
Expand All @@ -234,22 +252,28 @@ describe.skip("PineconeStore", () => {
});

await store.addDocuments([{ pageContent, metadata: {} }], [documentId]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results = await store.similaritySearch(pageContent, 1);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);
expect(results).toEqual([
new Document({ metadata: {}, pageContent, id: documentId }),
]);

await store.addDocuments(
[{ pageContent: `${pageContent} upserted`, metadata: {} }],
[documentId]
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results2 = await store.similaritySearch(pageContent, 1);

expect(results2).toEqual([
new Document({ metadata: {}, pageContent: `${pageContent} upserted` }),
new Document({
metadata: {},
pageContent: `${pageContent} upserted`,
id: documentId,
}),
]);
});
});
66 changes: 41 additions & 25 deletions libs/langchain-pinecone/src/vectorstores.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
RecordMetadata,
PineconeRecord,
Index as PineconeIndex,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";

import type { EmbeddingsInterface } from "@langchain/core/embeddings";
Expand Down Expand Up @@ -401,6 +402,40 @@ export class PineconeStore extends VectorStore {
return results;
}

/**
* Format the matching results from the Pinecone query.
* @param matches Matching results from the Pinecone query.
* @returns An array of arrays, where each inner array contains a document and its score.
*/
private _formatMatches(
matches: ScoredPineconeRecord<RecordMetadata>[] = []
): [Document, number][] {
const documentsWithScores: [Document, number][] = [];

for (const record of matches) {
const {
id,
score,
metadata: { [this.textKey]: pageContent, ...metadata } = {
[this.textKey]: "",
},
} = record;

if (score) {
documentsWithScores.push([
new Document({
id,
pageContent: pageContent.toString(),
metadata,
}),
score,
]);
}
}

return documentsWithScores;
}

/**
* Method that performs a similarity search in the Pinecone database and
* returns the results along with their scores.
Expand All @@ -414,20 +449,10 @@ export class PineconeStore extends VectorStore {
k: number,
filter?: PineconeMetadata
): Promise<[Document, number][]> {
const results = await this._runPineconeQuery(query, k, filter);
const result: [Document, number][] = [];

if (results.matches) {
for (const res of results.matches) {
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
{}) as PineconeMetadata;
if (res.score) {
result.push([new Document({ metadata, pageContent }), res.score]);
}
}
}
const { matches = [] } = await this._runPineconeQuery(query, k, filter);
const records = this._formatMatches(matches);

return result;
return records;
}

/**
Expand Down Expand Up @@ -457,7 +482,7 @@ export class PineconeStore extends VectorStore {
{ includeValues: true }
);

const matches = results?.matches ?? [];
const { matches = [] } = results;
const embeddingList = matches.map((match) => match.values);

const mmrIndexes = maximalMarginalRelevance(
Expand All @@ -468,17 +493,8 @@ export class PineconeStore extends VectorStore {
);

const topMmrMatches = mmrIndexes.map((idx) => matches[idx]);

const finalResult: Document[] = [];
for (const res of topMmrMatches) {
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
{}) as PineconeMetadata;
if (res.score) {
finalResult.push(new Document({ metadata, pageContent }));
}
}

return finalResult;
const records = this._formatMatches(topMmrMatches);
return records.map(([doc, _score]) => doc);
}

/**
Expand Down
Loading