Skip to content

Commit

Permalink
Document Date Reranker
Browse files Browse the repository at this point in the history
  • Loading branch information
Mraj23 committed Sep 2, 2024
1 parent b85e160 commit b61d319
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
66 changes: 66 additions & 0 deletions langchain/src/retrievers/recency_ranked.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { BaseRetriever } from "@langchain/core/retrievers";
import { VectorStoreInterface } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";

export interface RecencyRankedRetrieverConfig {
vectorStore: VectorStoreInterface;
k: number;
recencyWeight?: number;
}

export class RecencyRankedRetriever extends BaseRetriever {
static lc_name() {
return "RecencyRankedRetriever";
}

lc_namespace = ["langchain", "retrievers", "recency_ranked"];

private vectorStore: VectorStoreInterface;

private k: number;

private recencyWeight: number;

constructor(config: RecencyRankedRetrieverConfig) {
super();
this.vectorStore = config.vectorStore;
this.k = config.k;
this.recencyWeight = config.recencyWeight ?? 0.3;
}

async getRelevantDocuments(query: string): Promise<Document[]> {
const relevantDocs = await this.vectorStore.similaritySearchWithScore(query, 15);
const rerankedDocs = this.recentDocumentRanker(relevantDocs, this.k, this.recencyWeight);
return rerankedDocs.map(([doc, _]) => doc);
}

private recentDocumentRanker(
documents: [Document, number][],
topK: number,
recencyWeight: number
): [Document, number][] {
if (documents.length === 0) return [];

const oldestDate = Math.min(
...documents.map(([doc, _]) => doc.metadata.date.getTime())
);
const newestDate = Math.max(
...documents.map(([doc, _]) => doc.metadata.date.getTime())
);
const dateRange = newestDate - oldestDate;

const rerankedDocuments = documents
.map(([doc, score]): [Document, number] => {
const normalizedRecency =
dateRange > 0
? (doc.metadata.date.getTime() - oldestDate) / dateRange
: 1;
const adjustedScore =
(1 - recencyWeight) * score + recencyWeight * normalizedRecency;
return [doc, adjustedScore];
})
.sort((a, b) => b[1] - a[1]);

return rerankedDocuments.slice(0, topK);
}
}
40 changes: 40 additions & 0 deletions langchain/src/retrievers/tests/recency_ranked.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { expect, test } from "@jest/globals";
import { Document } from "@langchain/core/documents";
import { FakeEmbeddings } from "@langchain/core/utils/testing";
import { MemoryVectorStore } from "../../vectorstores/memory.js"
import { RecencyRankedRetriever } from "../recency_ranked.js";



test("RecencyRankedRetriever", async () => {
const docs = [
new Document({
pageContent: "A",
metadata: { date: new Date("2023-01-01") },
}),
new Document({
pageContent: "B",
metadata: { date: new Date("2023-02-01") },
}),
new Document({
pageContent: "C",
metadata: { date: new Date("2023-03-01") },
}),
];

const vectorstore = new MemoryVectorStore(new FakeEmbeddings());

await vectorstore.addDocuments(docs);

const retriever = new RecencyRankedRetriever({
vectorStore: vectorstore,
k: 2,
recencyWeight: 0.99,
});

const results = await retriever.getRelevantDocuments("test query");

expect(results).toHaveLength(2);
expect(results[0].pageContent).toBe("C");
expect(results[1].pageContent).toBe("B");
});

0 comments on commit b61d319

Please sign in to comment.