Skip to content

Commit

Permalink
install aws-sdk/client-s3 as devDependency
Browse files Browse the repository at this point in the history
  • Loading branch information
jasondotparse committed Apr 6, 2023
1 parent 4736252 commit 8f3bfb3
Show file tree
Hide file tree
Showing 5 changed files with 1,185 additions and 34 deletions.
2 changes: 1 addition & 1 deletion examples/src/document_loaders/unstructured.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { UnstructuredLoader } from "langchain/document_loaders";

export const run = async () => {
const loader = new UnstructuredLoader(
"http://localhost:8000/general/v0/general",
"http://localhost:8000/general/v0.0.4/general",
"langchain/src/document_loaders/tests/example_data/example.txt"
);
const docs = await loader.load();
Expand Down
2 changes: 2 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
"author": "Langchain",
"license": "MIT",
"devDependencies": {
"@aws-sdk/client-s3": "^3.306.0",
"@babel/core": "^7.20.12",
"@babel/preset-env": "^7.20.2",
"@dqbd/tiktoken": "^1.0.2",
Expand Down Expand Up @@ -124,6 +125,7 @@
"typescript": "^4.9.5"
},
"peerDependencies": {
"@aws-sdk/client-s3": "^3.306.0",
"@dqbd/tiktoken": "^1.0.2",
"@huggingface/inference": "^1.5.1",
"@pinecone-database/pinecone": "^0.0.10",
Expand Down
23 changes: 13 additions & 10 deletions langchain/src/document_loaders/s3.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import fsDefault from "fs";
import path from "path";
import os from "os";
import { Readable } from "stream";
import { BaseDocumentLoader } from "./base.js";
import { UnstructuredLoader as UnstructuredLoaderDefault } from "./unstructured.js";

Expand Down Expand Up @@ -40,7 +41,7 @@ export class S3Loader extends BaseDocumentLoader {
const filePath = path.join(tempDir, this.key);

try {
const s3Client = new S3Client();
const s3Client = new S3Client({});

const getObjectCommand = new GetObjectCommand({
Bucket: this.bucket,
Expand All @@ -51,18 +52,19 @@ export class S3Loader extends BaseDocumentLoader {

const objectData = await new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = [];
response.Body.on("data", (chunk: Buffer) => chunks.push(chunk));
response.Body.on("end", () => resolve(Buffer.concat(chunks)));
response.Body.on("error", reject);

if (response.Body instanceof Readable) {
response.Body.on("data", (chunk: Buffer) => chunks.push(chunk));
response.Body.on("end", () => resolve(Buffer.concat(chunks)));
response.Body.on("error", reject);
} else {
reject(new Error("Response body is not a readable stream."));
}
});

this._fs.mkdirSync(path.dirname(filePath), { recursive: true });

this._fs.writeFileSync(filePath, objectData);

console.log(
`Downloaded file ${this.key} from S3 bucket ${this.bucket} to ${filePath}`
);
} catch {
throw new Error(
`Failed to download file ${this.key} from S3 bucket ${this.bucket}.`
Expand All @@ -88,8 +90,9 @@ export class S3Loader extends BaseDocumentLoader {

async function S3LoaderImports() {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return await import("@aws-sdk/client-s3" as any);
const s3Module = await import("@aws-sdk/client-s3");

return s3Module as typeof s3Module;
} catch (e) {
console.error(e);
throw new Error(
Expand Down
36 changes: 15 additions & 21 deletions langchain/src/document_loaders/tests/s3.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@ import { test, jest, expect } from "@jest/globals";
// eslint-disable-next-line import/no-extraneous-dependencies
import S3Client from "@aws-sdk/client-s3";
import fs from "fs";
import * as path from "path";
import { Readable } from "stream";
import { S3Loader } from "../s3.js";
import { UnstructuredLoader } from "../unstructured.js";

const fsMock = {
...fs,
mkdtempSync: jest.fn().mockReturnValue("/tmp/s3fileloader-12345"),
mkdirSync: jest.fn().mockImplementation(() => {
console.log("Mock mkdirSync invoked");
}),
writeFileSync: jest.fn().mockImplementation(() => {
console.log("Mock writeFileSync invoked");
}),
mkdtempSync: jest.fn().mockReturnValue("tmp/s3fileloader-12345"),
mkdirSync: jest.fn().mockImplementation(() => {}),
writeFileSync: jest.fn().mockImplementation(() => {}),
};

const UnstructuredLoaderMock = jest.fn().mockImplementation(() => ({
Expand All @@ -23,20 +21,16 @@ const UnstructuredLoaderMock = jest.fn().mockImplementation(() => ({

jest.mock("@aws-sdk/client-s3", () => ({
S3Client: jest.fn().mockImplementation(() => ({
send: jest.fn().mockImplementation(() => ({
Body: {
on: jest.fn().mockImplementation((event, callback) => {
if (event === "data") {
(callback as (buffer: Buffer) => void)(
Buffer.from("Mock file content")
);
} else if (event === "end") {
(callback as (buffer?: Buffer) => void)(undefined);
}
send: jest.fn().mockImplementation(() =>
Promise.resolve({
Body: new Readable({
read() {
this.push(Buffer.from("Mock file content"));
this.push(null);
},
}),
pipe: jest.fn(),
},
})),
})
),
})),
GetObjectCommand: jest.fn(),
}));
Expand All @@ -61,7 +55,7 @@ test("Test S3 loader", async () => {
expect(fsMock.writeFileSync).toHaveBeenCalled();
expect(UnstructuredLoaderMock).toHaveBeenCalledWith(
"http://localhost:8000/general/v0/general",
"/tmp/s3fileloader-12345/AccountingOverview.pdf"
path.join("tmp", "s3fileloader-12345", "AccountingOverview.pdf")
);
expect(result).toEqual(["fake document"]);
});
Loading

0 comments on commit 8f3bfb3

Please sign in to comment.