diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/csv.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/csv.md index dfc31c753877..e482cd3717e3 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/csv.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/csv.md @@ -21,7 +21,7 @@ id,text Example code: ```typescript -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; const loader = new CSVLoader("src/document_loaders/example_data/example.csv"); @@ -61,7 +61,7 @@ id,text Example code: ```typescript -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; const loader = new CSVLoader( "src/document_loaders/example_data/example.csv", diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/directory.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/directory.md index 7acffe762112..675bd341dda6 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/directory.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/directory.md @@ -20,13 +20,13 @@ src/document_loaders/example_data/example/ Example code: ```typescript +import { DirectoryLoader } from "langchain/document_loaders/fs/directory"; import { - DirectoryLoader, JSONLoader, JSONLinesLoader, - TextLoader, - CSVLoader, -} from "langchain/document_loaders"; +} from "langchain/document_loaders/fs/json"; +import { TextLoader } from "langchain/document_loaders/fs/text"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; const loader = new DirectoryLoader( "src/document_loaders/example_data/example", diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/docx.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/docx.md index e76b80b2647e..605a15c0fbc2 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/docx.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/docx.md @@ -15,7 +15,7 @@ npm install mammoth # Usage ```typescript -import { DocxLoader } from "langchain/document_loaders"; +import { DocxLoader } from "langchain/document_loaders/fs/docx"; const loader = new DocxLoader( "src/document_loaders/tests/example_data/attention.docx" diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/epub.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/epub.md index 6be21d68e634..df9c1faaedad 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/epub.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/epub.md @@ -15,7 +15,7 @@ npm install epub2 html-to-text # Usage, one document per chapter ```typescript -import { EPubLoader } from "langchain/document_loaders"; +import { EPubLoader } from "langchain/document_loaders/fs/epub"; const loader = new EPubLoader("src/document_loaders/example_data/example.epub"); @@ -25,7 +25,7 @@ const docs = await loader.load(); # Usage, one document per file ```typescript -import { EPubLoader } from "langchain/document_loaders"; +import { EPubLoader } from "langchain/document_loaders/fs/epub"; const loader = new EPubLoader( "src/document_loaders/example_data/example.epub", diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/json.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/json.md index d82cc052c785..61fed3477d1d 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/json.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/json.md @@ -18,7 +18,7 @@ Example JSON file: Example code: ```typescript -import { JSONLoader } from "langchain/document_loaders"; +import { JSONLoader } from "langchain/document_loaders/fs/json"; const loader = new JSONLoader("src/document_loaders/example_data/example.json"); @@ -73,7 +73,7 @@ In this example, we want to only extract information from "from" and "surname" e Example code: ```typescript -import { JSONLoader } from "langchain/document_loaders"; +import { JSONLoader } from "langchain/document_loaders/fs/json"; const loader = new JSONLoader( "src/document_loaders/example_data/example.json", diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/jsonlines.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/jsonlines.md index 5adf8d0d2b41..5aa125649b6a 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/jsonlines.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/jsonlines.md @@ -16,7 +16,7 @@ Example JSONLines file: Example code: ```typescript -import { JSONLinesLoader } from "langchain/document_loaders"; +import { JSONLinesLoader } from "langchain/document_loaders/fs/json"; const loader = new JSONLinesLoader( "src/document_loaders/example_data/example.jsonl", diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/pdf.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/pdf.md index 28f4194ebc18..eb473cb47af7 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/pdf.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/pdf.md @@ -15,7 +15,7 @@ npm install pdfjs-dist # Usage, one document per page ```typescript -import { PDFLoader } from "langchain/document_loaders"; +import { PDFLoader } from "langchain/document_loaders/fs/pdf"; const loader = new PDFLoader("src/document_loaders/example_data/example.pdf"); @@ -25,7 +25,7 @@ const docs = await loader.load(); # Usage, one document per file ```typescript -import { PDFLoader } from "langchain/document_loaders"; +import { PDFLoader } from "langchain/document_loaders/fs/pdf"; const loader = new PDFLoader("src/document_loaders/example_data/example.pdf", { splitPages: false, @@ -39,7 +39,7 @@ const docs = await loader.load(); In legacy environments, you can use the `pdfjs` option to provide a function that returns a promise that resolves to the `PDFJS` object. This is useful if you want to use a custom build of `pdfjs-dist` or if you want to use a different version of `pdfjs-dist`. Eg. here we use the legacy build of `pdfjs-dist`, which includes several polyfills that are not included in the default build. ```typescript -import { PDFLoader } from "langchain/document_loaders"; +import { PDFLoader } from "langchain/document_loaders/fs/pdf"; const loader = new PDFLoader("src/document_loaders/example_data/example.pdf", { pdfjs: () => diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/subtitles.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/subtitles.md index d9e554a9793d..035c51e903e2 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/subtitles.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/subtitles.md @@ -15,7 +15,7 @@ npm install srt-parser-2 ## Usage ```typescript -import { SRTLoader } from "langchain/document_loaders"; +import { SRTLoader } from "langchain/document_loaders/fs/srt"; const loader = new SRTLoader( "src/document_loaders/example_data/Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.srt" diff --git a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/text.md b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/text.md index 953411ddbaa8..d20d7c1942d2 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/file_loaders/text.md +++ b/docs/docs/modules/indexes/document_loaders/examples/file_loaders/text.md @@ -7,7 +7,7 @@ hide_table_of_contents: true This example goes over how to load data from text files. ```typescript -import { TextLoader } from "langchain/document_loaders"; +import { TextLoader } from "langchain/document_loaders/fs/text"; const loader = new TextLoader("src/document_loaders/example_data/example.txt"); diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/college_confidential.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/college_confidential.md index e275eaeaaa88..de141ce97f14 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/college_confidential.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/college_confidential.md @@ -15,7 +15,7 @@ npm install cheerio ## Usage ```typescript -import { CollegeConfidentialLoader } from "langchain/document_loaders"; +import { CollegeConfidentialLoader } from "langchain/document_loaders/web/college_confidential"; const loader = new CollegeConfidentialLoader( "https://www.collegeconfidential.com/colleges/brown-university/" diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/gitbook.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/gitbook.md index 16bd186e58b9..06d88da2ecc7 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/gitbook.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/gitbook.md @@ -15,7 +15,7 @@ npm install cheerio ## Load from single GitBook page ```typescript -import { GitbookLoader } from "langchain/document_loaders"; +import { GitbookLoader } from "langchain/document_loaders/web/gitbook"; const loader = new GitbookLoader( "https://docs.gitbook.com/product-tour/navigation" @@ -29,7 +29,7 @@ const docs = await loader.load(); For this to work, the GitbookLoader needs to be initialized with the root path (https://docs.gitbook.com in this example) and have `shouldLoadAllPaths` set to `true`. ```typescript -import { GitbookLoader } from "langchain/document_loaders"; +import { GitbookLoader } from "langchain/document_loaders/web/gitbook"; const loader = new GitbookLoader("https://docs.gitbook.com", { shouldLoadAllPaths: true, diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/github.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/github.md index d5b69d4dfd99..deab43a83a0b 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/github.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/github.md @@ -8,7 +8,7 @@ This example goes over how to load data from a GitHub repository. You can set the `GITHUB_ACCESS_TOKEN` environment variable to a GitHub access token to increase the rate limit and access private repositories. ```typescript -import { GithubRepoLoader } from "langchain/document_loaders"; +import { GithubRepoLoader } from "langchain/document_loaders/web/github"; const loader = new GithubRepoLoader( "https://github.com/hwchase17/langchainjs", diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/hn.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/hn.md index f0b47ac1c992..65d6b44d9e1d 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/hn.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/hn.md @@ -15,7 +15,7 @@ npm install cheerio ## Usage ```typescript -import { HNLoader } from "langchain/document_loaders"; +import { HNLoader } from "langchain/document_loaders/web/hn"; const loader = new HNLoader("https://news.ycombinator.com/item?id=34817881"); diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/imsdb.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/imsdb.md index 5ff2494cd8a4..27df8ee34f72 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/imsdb.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/imsdb.md @@ -15,7 +15,7 @@ npm install cheerio ## Usage ```typescript -import { IMSDBLoader } from "langchain/document_loaders"; +import { IMSDBLoader } from "langchain/document_loaders/web/imsdb"; const loader = new IMSDBLoader("https://imsdb.com/scripts/BlacKkKlansman.html"); diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_cheerio.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_cheerio.md index b07690878d2f..173adb27e4b7 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_cheerio.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_cheerio.md @@ -20,7 +20,7 @@ npm install cheerio ## Usage ```typescript -import { CheerioWebBaseLoader } from "langchain/document_loaders"; +import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio"; const loader = new CheerioWebBaseLoader( "https://news.ycombinator.com/item?id=34817881" diff --git a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_puppeteer.md b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_puppeteer.md index d644a0bbead4..437ede9b00ce 100644 --- a/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_puppeteer.md +++ b/docs/docs/modules/indexes/document_loaders/examples/web_loaders/web_puppeteer.md @@ -20,7 +20,7 @@ npm install puppeteer ## Usage ```typescript -import { PuppeteerWebBaseLoader } from "langchain/document_loaders"; +import { PuppeteerWebBaseLoader } from "langchain/document_loaders/web/puppeteer"; /** * Loader uses `page.evaluate(() => document.body.innerHTML)` @@ -54,7 +54,7 @@ By passing these options to the `PuppeteerWebBaseLoader` constructor, you can cu Here is a basic example to do it: ```typescript -import { PuppeteerWebBaseLoader } from "langchain/document_loaders"; +import { PuppeteerWebBaseLoader } from "langchain/document_loaders/web/puppeteer"; const loader = new PuppeteerWebBaseLoader("https://www.tabnews.com.br/", { launchOptions: { diff --git a/examples/src/document_loaders/cheerio_web.ts b/examples/src/document_loaders/cheerio_web.ts index f32ac26f9bd6..4297e85746d8 100644 --- a/examples/src/document_loaders/cheerio_web.ts +++ b/examples/src/document_loaders/cheerio_web.ts @@ -1,4 +1,4 @@ -import { CheerioWebBaseLoader } from "langchain/document_loaders"; +import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio"; export const run = async () => { const loader = new CheerioWebBaseLoader( diff --git a/examples/src/document_loaders/college_confidential.ts b/examples/src/document_loaders/college_confidential.ts index e4a6ab713c76..22b4b09003f5 100644 --- a/examples/src/document_loaders/college_confidential.ts +++ b/examples/src/document_loaders/college_confidential.ts @@ -1,4 +1,4 @@ -import { CollegeConfidentialLoader } from "langchain/document_loaders"; +import { CollegeConfidentialLoader } from "langchain/document_loaders/web/college_confidential"; export const run = async () => { const loader = new CollegeConfidentialLoader( diff --git a/examples/src/document_loaders/gitbook.ts b/examples/src/document_loaders/gitbook.ts index 072a738f64a6..454efaf2bb2a 100644 --- a/examples/src/document_loaders/gitbook.ts +++ b/examples/src/document_loaders/gitbook.ts @@ -1,4 +1,4 @@ -import { GitbookLoader } from "langchain/document_loaders"; +import { GitbookLoader } from "langchain/document_loaders/web/gitbook"; export const run = async () => { const loader = new GitbookLoader("https://docs.gitbook.com"); diff --git a/examples/src/document_loaders/github.ts b/examples/src/document_loaders/github.ts index 457c63b08697..b75c66196ab0 100644 --- a/examples/src/document_loaders/github.ts +++ b/examples/src/document_loaders/github.ts @@ -1,4 +1,4 @@ -import { GithubRepoLoader } from "langchain/document_loaders"; +import { GithubRepoLoader } from "langchain/document_loaders/web/github"; export const run = async () => { const loader = new GithubRepoLoader( diff --git a/examples/src/document_loaders/hn.ts b/examples/src/document_loaders/hn.ts index a02297f6ce97..d84b5afbefb2 100644 --- a/examples/src/document_loaders/hn.ts +++ b/examples/src/document_loaders/hn.ts @@ -1,4 +1,4 @@ -import { HNLoader } from "langchain/document_loaders"; +import { HNLoader } from "langchain/document_loaders/web/hn"; export const run = async () => { const loader = new HNLoader("https://news.ycombinator.com/item?id=34817881"); diff --git a/examples/src/document_loaders/imsdb.ts b/examples/src/document_loaders/imsdb.ts index 12e8af2cd70f..96a397be805c 100644 --- a/examples/src/document_loaders/imsdb.ts +++ b/examples/src/document_loaders/imsdb.ts @@ -1,4 +1,4 @@ -import { IMSDBLoader } from "langchain/document_loaders"; +import { IMSDBLoader } from "langchain/document_loaders/web/imsdb"; export const run = async () => { const loader = new IMSDBLoader( diff --git a/examples/src/document_loaders/notion_markdown.ts b/examples/src/document_loaders/notion_markdown.ts index a6317176e1b9..a6927a0887e7 100644 --- a/examples/src/document_loaders/notion_markdown.ts +++ b/examples/src/document_loaders/notion_markdown.ts @@ -1,4 +1,4 @@ -import { NotionLoader } from "langchain/document_loaders"; +import { NotionLoader } from "langchain/document_loaders/fs/notion"; export const run = async () => { /** Provide the directory path of your notion folder */ diff --git a/examples/src/document_loaders/puppeteer_web.ts b/examples/src/document_loaders/puppeteer_web.ts index ca3afe395758..6f430b123987 100644 --- a/examples/src/document_loaders/puppeteer_web.ts +++ b/examples/src/document_loaders/puppeteer_web.ts @@ -1,4 +1,4 @@ -import { PuppeteerWebBaseLoader } from "langchain/document_loaders"; +import { PuppeteerWebBaseLoader } from "langchain/document_loaders/web/puppeteer"; export const run = async () => { const loader = new PuppeteerWebBaseLoader("https://www.tabnews.com.br/"); diff --git a/examples/src/document_loaders/srt.ts b/examples/src/document_loaders/srt.ts index 29e02329481c..bee6f06d3c22 100644 --- a/examples/src/document_loaders/srt.ts +++ b/examples/src/document_loaders/srt.ts @@ -1,4 +1,4 @@ -import { SRTLoader } from "langchain/document_loaders"; +import { SRTLoader } from "langchain/document_loaders/fs/srt"; export const run = async () => { const loader = new SRTLoader( diff --git a/examples/src/document_loaders/text.ts b/examples/src/document_loaders/text.ts index b8a0d7b3a9e5..b5c48705a4c3 100644 --- a/examples/src/document_loaders/text.ts +++ b/examples/src/document_loaders/text.ts @@ -1,4 +1,4 @@ -import { TextLoader } from "langchain/document_loaders"; +import { TextLoader } from "langchain/document_loaders/fs/text"; export const run = async () => { const loader = new TextLoader( diff --git a/examples/src/document_loaders/unstructured.ts b/examples/src/document_loaders/unstructured.ts index eacd6eb567f0..db4e8b469bff 100644 --- a/examples/src/document_loaders/unstructured.ts +++ b/examples/src/document_loaders/unstructured.ts @@ -1,4 +1,4 @@ -import { UnstructuredLoader } from "langchain/document_loaders"; +import { UnstructuredLoader } from "langchain/document_loaders/fs/unstructured"; export const run = async () => { const loader = new UnstructuredLoader( diff --git a/examples/src/indexes/vector_stores/hnswlib_fromdocs.ts b/examples/src/indexes/vector_stores/hnswlib_fromdocs.ts index 1d1946bbe38f..f0bb42745334 100644 --- a/examples/src/indexes/vector_stores/hnswlib_fromdocs.ts +++ b/examples/src/indexes/vector_stores/hnswlib_fromdocs.ts @@ -1,6 +1,6 @@ import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; -import { TextLoader } from "langchain/document_loaders"; +import { TextLoader } from "langchain/document_loaders/fs/text"; export const run = async () => { // Create docs with a loader diff --git a/langchain/.gitignore b/langchain/.gitignore index 5ec3d9ea18d1..2ba276103975 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -94,6 +94,63 @@ docstore.d.ts document_loaders.cjs document_loaders.js document_loaders.d.ts +document_loaders/base.cjs +document_loaders/base.js +document_loaders/base.d.ts +document_loaders/web/cheerio.cjs +document_loaders/web/cheerio.js +document_loaders/web/cheerio.d.ts +document_loaders/web/puppeteer.cjs +document_loaders/web/puppeteer.js +document_loaders/web/puppeteer.d.ts +document_loaders/web/college_confidential.cjs +document_loaders/web/college_confidential.js +document_loaders/web/college_confidential.d.ts +document_loaders/web/gitbook.cjs +document_loaders/web/gitbook.js +document_loaders/web/gitbook.d.ts +document_loaders/web/hn.cjs +document_loaders/web/hn.js +document_loaders/web/hn.d.ts +document_loaders/web/imsdb.cjs +document_loaders/web/imsdb.js +document_loaders/web/imsdb.d.ts +document_loaders/web/github.cjs +document_loaders/web/github.js +document_loaders/web/github.d.ts +document_loaders/fs/directory.cjs +document_loaders/fs/directory.js +document_loaders/fs/directory.d.ts +document_loaders/fs/buffer.cjs +document_loaders/fs/buffer.js +document_loaders/fs/buffer.d.ts +document_loaders/fs/text.cjs +document_loaders/fs/text.js +document_loaders/fs/text.d.ts +document_loaders/fs/json.cjs +document_loaders/fs/json.js +document_loaders/fs/json.d.ts +document_loaders/fs/srt.cjs +document_loaders/fs/srt.js +document_loaders/fs/srt.d.ts +document_loaders/fs/pdf.cjs +document_loaders/fs/pdf.js +document_loaders/fs/pdf.d.ts +document_loaders/fs/docx.cjs +document_loaders/fs/docx.js +document_loaders/fs/docx.d.ts +document_loaders/fs/epub.cjs +document_loaders/fs/epub.js +document_loaders/fs/epub.d.ts +document_loaders/fs/csv.cjs +document_loaders/fs/csv.js +document_loaders/fs/csv.d.ts +document_loaders/fs/notion.cjs +document_loaders/fs/notion.js +document_loaders/fs/notion.d.ts +document_loaders/fs/unstructured.cjs +document_loaders/fs/unstructured.js +document_loaders/fs/unstructured.d.ts chat_models.cjs chat_models.js chat_models.d.ts diff --git a/langchain/document_loaders/fs/notion_markdown.cjs b/langchain/document_loaders/fs/notion_markdown.cjs new file mode 100644 index 000000000000..5c3fe7428e64 --- /dev/null +++ b/langchain/document_loaders/fs/notion_markdown.cjs @@ -0,0 +1 @@ +module.exports = require('../../dist/document_loaders/fs/notion_markdown.cjs'); \ No newline at end of file diff --git a/langchain/document_loaders/fs/notion_markdown.d.ts b/langchain/document_loaders/fs/notion_markdown.d.ts new file mode 100644 index 000000000000..9b384ddb5680 --- /dev/null +++ b/langchain/document_loaders/fs/notion_markdown.d.ts @@ -0,0 +1 @@ +export * from '../../dist/document_loaders/fs/notion_markdown.js' \ No newline at end of file diff --git a/langchain/document_loaders/fs/notion_markdown.js b/langchain/document_loaders/fs/notion_markdown.js new file mode 100644 index 000000000000..9b384ddb5680 --- /dev/null +++ b/langchain/document_loaders/fs/notion_markdown.js @@ -0,0 +1 @@ +export * from '../../dist/document_loaders/fs/notion_markdown.js' \ No newline at end of file diff --git a/langchain/package.json b/langchain/package.json index ec78c9727ef2..90c47946b8d8 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -106,6 +106,63 @@ "document_loaders.cjs", "document_loaders.js", "document_loaders.d.ts", + "document_loaders/base.cjs", + "document_loaders/base.js", + "document_loaders/base.d.ts", + "document_loaders/web/cheerio.cjs", + "document_loaders/web/cheerio.js", + "document_loaders/web/cheerio.d.ts", + "document_loaders/web/puppeteer.cjs", + "document_loaders/web/puppeteer.js", + "document_loaders/web/puppeteer.d.ts", + "document_loaders/web/college_confidential.cjs", + "document_loaders/web/college_confidential.js", + "document_loaders/web/college_confidential.d.ts", + "document_loaders/web/gitbook.cjs", + "document_loaders/web/gitbook.js", + "document_loaders/web/gitbook.d.ts", + "document_loaders/web/hn.cjs", + "document_loaders/web/hn.js", + "document_loaders/web/hn.d.ts", + "document_loaders/web/imsdb.cjs", + "document_loaders/web/imsdb.js", + "document_loaders/web/imsdb.d.ts", + "document_loaders/web/github.cjs", + "document_loaders/web/github.js", + "document_loaders/web/github.d.ts", + "document_loaders/fs/directory.cjs", + "document_loaders/fs/directory.js", + "document_loaders/fs/directory.d.ts", + "document_loaders/fs/buffer.cjs", + "document_loaders/fs/buffer.js", + "document_loaders/fs/buffer.d.ts", + "document_loaders/fs/text.cjs", + "document_loaders/fs/text.js", + "document_loaders/fs/text.d.ts", + "document_loaders/fs/json.cjs", + "document_loaders/fs/json.js", + "document_loaders/fs/json.d.ts", + "document_loaders/fs/srt.cjs", + "document_loaders/fs/srt.js", + "document_loaders/fs/srt.d.ts", + "document_loaders/fs/pdf.cjs", + "document_loaders/fs/pdf.js", + "document_loaders/fs/pdf.d.ts", + "document_loaders/fs/docx.cjs", + "document_loaders/fs/docx.js", + "document_loaders/fs/docx.d.ts", + "document_loaders/fs/epub.cjs", + "document_loaders/fs/epub.js", + "document_loaders/fs/epub.d.ts", + "document_loaders/fs/csv.cjs", + "document_loaders/fs/csv.js", + "document_loaders/fs/csv.d.ts", + "document_loaders/fs/notion.cjs", + "document_loaders/fs/notion.js", + "document_loaders/fs/notion.d.ts", + "document_loaders/fs/unstructured.cjs", + "document_loaders/fs/unstructured.js", + "document_loaders/fs/unstructured.d.ts", "chat_models.cjs", "chat_models.js", "chat_models.d.ts", @@ -501,14 +558,106 @@ "require": "./docstore.cjs" }, "./document_loaders": { - "types": "./document_loaders.d.ts", "node": { + "types": "./document_loaders.d.ts", "import": "./document_loaders.js", "require": "./document_loaders.cjs" - }, - "import": "./dist/document_loaders/index.lite.js", - "require": "./dist/document_loaders/index.lite.cjs", - "default": "./dist/document_loaders/index.lite.js" + } + }, + "./document_loaders/base": { + "types": "./document_loaders/base.d.ts", + "import": "./document_loaders/base.js", + "require": "./document_loaders/base.cjs" + }, + "./document_loaders/web/cheerio": { + "types": "./document_loaders/web/cheerio.d.ts", + "import": "./document_loaders/web/cheerio.js", + "require": "./document_loaders/web/cheerio.cjs" + }, + "./document_loaders/web/puppeteer": { + "types": "./document_loaders/web/puppeteer.d.ts", + "import": "./document_loaders/web/puppeteer.js", + "require": "./document_loaders/web/puppeteer.cjs" + }, + "./document_loaders/web/college_confidential": { + "types": "./document_loaders/web/college_confidential.d.ts", + "import": "./document_loaders/web/college_confidential.js", + "require": "./document_loaders/web/college_confidential.cjs" + }, + "./document_loaders/web/gitbook": { + "types": "./document_loaders/web/gitbook.d.ts", + "import": "./document_loaders/web/gitbook.js", + "require": "./document_loaders/web/gitbook.cjs" + }, + "./document_loaders/web/hn": { + "types": "./document_loaders/web/hn.d.ts", + "import": "./document_loaders/web/hn.js", + "require": "./document_loaders/web/hn.cjs" + }, + "./document_loaders/web/imsdb": { + "types": "./document_loaders/web/imsdb.d.ts", + "import": "./document_loaders/web/imsdb.js", + "require": "./document_loaders/web/imsdb.cjs" + }, + "./document_loaders/web/github": { + "types": "./document_loaders/web/github.d.ts", + "import": "./document_loaders/web/github.js", + "require": "./document_loaders/web/github.cjs" + }, + "./document_loaders/fs/directory": { + "types": "./document_loaders/fs/directory.d.ts", + "import": "./document_loaders/fs/directory.js", + "require": "./document_loaders/fs/directory.cjs" + }, + "./document_loaders/fs/buffer": { + "types": "./document_loaders/fs/buffer.d.ts", + "import": "./document_loaders/fs/buffer.js", + "require": "./document_loaders/fs/buffer.cjs" + }, + "./document_loaders/fs/text": { + "types": "./document_loaders/fs/text.d.ts", + "import": "./document_loaders/fs/text.js", + "require": "./document_loaders/fs/text.cjs" + }, + "./document_loaders/fs/json": { + "types": "./document_loaders/fs/json.d.ts", + "import": "./document_loaders/fs/json.js", + "require": "./document_loaders/fs/json.cjs" + }, + "./document_loaders/fs/srt": { + "types": "./document_loaders/fs/srt.d.ts", + "import": "./document_loaders/fs/srt.js", + "require": "./document_loaders/fs/srt.cjs" + }, + "./document_loaders/fs/pdf": { + "types": "./document_loaders/fs/pdf.d.ts", + "import": "./document_loaders/fs/pdf.js", + "require": "./document_loaders/fs/pdf.cjs" + }, + "./document_loaders/fs/docx": { + "types": "./document_loaders/fs/docx.d.ts", + "import": "./document_loaders/fs/docx.js", + "require": "./document_loaders/fs/docx.cjs" + }, + "./document_loaders/fs/epub": { + "types": "./document_loaders/fs/epub.d.ts", + "import": "./document_loaders/fs/epub.js", + "require": "./document_loaders/fs/epub.cjs" + }, + "./document_loaders/fs/csv": { + "types": "./document_loaders/fs/csv.d.ts", + "import": "./document_loaders/fs/csv.js", + "require": "./document_loaders/fs/csv.cjs" + }, + "./document_loaders/fs/notion": { + "types": "./document_loaders/fs/notion.d.ts", + "import": "./document_loaders/fs/notion.js", + "require": "./document_loaders/fs/notion.cjs" + }, + "./document_loaders/fs/unstructured": { + "types": "./document_loaders/fs/unstructured.d.ts", + "import": "./document_loaders/fs/unstructured.js", + "require": "./document_loaders/fs/unstructured.cjs" }, "./chat_models": { "node": { diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 16f1550402e5..cdf39c2c721f 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -52,6 +52,26 @@ const entrypoints = { docstore: "docstore/index", // document_loaders document_loaders: "document_loaders/index", + "document_loaders/base": "document_loaders/base", + "document_loaders/web/cheerio": "document_loaders/web/cheerio", + "document_loaders/web/puppeteer": "document_loaders/web/puppeteer", + "document_loaders/web/college_confidential": + "document_loaders/web/college_confidential", + "document_loaders/web/gitbook": "document_loaders/web/gitbook", + "document_loaders/web/hn": "document_loaders/web/hn", + "document_loaders/web/imsdb": "document_loaders/web/imsdb", + "document_loaders/web/github": "document_loaders/web/github", + "document_loaders/fs/directory": "document_loaders/fs/directory", + "document_loaders/fs/buffer": "document_loaders/fs/buffer", + "document_loaders/fs/text": "document_loaders/fs/text", + "document_loaders/fs/json": "document_loaders/fs/json", + "document_loaders/fs/srt": "document_loaders/fs/srt", + "document_loaders/fs/pdf": "document_loaders/fs/pdf", + "document_loaders/fs/docx": "document_loaders/fs/docx", + "document_loaders/fs/epub": "document_loaders/fs/epub", + "document_loaders/fs/csv": "document_loaders/fs/csv", + "document_loaders/fs/notion": "document_loaders/fs/notion", + "document_loaders/fs/unstructured": "document_loaders/fs/unstructured", // chat_models chat_models: "chat_models/index", "chat_models/base": "chat_models/base", @@ -83,6 +103,7 @@ const deprecatedNodeOnly = [ "chat_models", "vectorstores", "retrievers", + "document_loaders", ]; // Entrypoints in this list require an optional dependency to be installed. @@ -100,6 +121,24 @@ const requiresOptionalDependency = [ "vectorstores/hnswlib", "vectorstores/pinecone", "vectorstores/supabase", + "document_loaders/web/cheerio", + "document_loaders/web/puppeteer", + "document_loaders/web/college_confidential", + "document_loaders/web/gitbook", + "document_loaders/web/hn", + "document_loaders/web/imsdb", + "document_loaders/web/github", + "document_loaders/fs/directory", + "document_loaders/fs/buffer", + "document_loaders/fs/text", + "document_loaders/fs/json", + "document_loaders/fs/srt", + "document_loaders/fs/pdf", + "document_loaders/fs/docx", + "document_loaders/fs/epub", + "document_loaders/fs/csv", + "document_loaders/fs/notion", + "document_loaders/fs/unstructured", "sql_db", "retrievers/supabase", "retrievers/metal", diff --git a/langchain/src/document_loaders/path/buffer.ts b/langchain/src/document_loaders/fs/buffer.ts similarity index 100% rename from langchain/src/document_loaders/path/buffer.ts rename to langchain/src/document_loaders/fs/buffer.ts diff --git a/langchain/src/document_loaders/path/csv.ts b/langchain/src/document_loaders/fs/csv.ts similarity index 100% rename from langchain/src/document_loaders/path/csv.ts rename to langchain/src/document_loaders/fs/csv.ts diff --git a/langchain/src/document_loaders/path/directory.ts b/langchain/src/document_loaders/fs/directory.ts similarity index 100% rename from langchain/src/document_loaders/path/directory.ts rename to langchain/src/document_loaders/fs/directory.ts diff --git a/langchain/src/document_loaders/path/docx.ts b/langchain/src/document_loaders/fs/docx.ts similarity index 100% rename from langchain/src/document_loaders/path/docx.ts rename to langchain/src/document_loaders/fs/docx.ts diff --git a/langchain/src/document_loaders/path/epub.ts b/langchain/src/document_loaders/fs/epub.ts similarity index 100% rename from langchain/src/document_loaders/path/epub.ts rename to langchain/src/document_loaders/fs/epub.ts diff --git a/langchain/src/document_loaders/path/json.ts b/langchain/src/document_loaders/fs/json.ts similarity index 87% rename from langchain/src/document_loaders/path/json.ts rename to langchain/src/document_loaders/fs/json.ts index 2b29a88464d6..0df9b5483908 100644 --- a/langchain/src/document_loaders/path/json.ts +++ b/langchain/src/document_loaders/fs/json.ts @@ -115,3 +115,19 @@ export class JSONLoader extends TextLoader { return targetEntries; } } + +export class JSONLinesLoader extends TextLoader { + constructor(filePathOrBlob: string | Blob, public pointer: string) { + super(filePathOrBlob); + } + + protected async parse(raw: string): Promise { + const lines = raw.split("\n"); + const jsons = lines + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line)); + const pointer = jsonpointer.compile(this.pointer); + return jsons.map((json) => pointer.get(json)); + } +} diff --git a/langchain/src/document_loaders/path/notion_markdown.ts b/langchain/src/document_loaders/fs/notion.ts similarity index 100% rename from langchain/src/document_loaders/path/notion_markdown.ts rename to langchain/src/document_loaders/fs/notion.ts diff --git a/langchain/src/document_loaders/path/pdf.ts b/langchain/src/document_loaders/fs/pdf.ts similarity index 100% rename from langchain/src/document_loaders/path/pdf.ts rename to langchain/src/document_loaders/fs/pdf.ts diff --git a/langchain/src/document_loaders/path/srt.ts b/langchain/src/document_loaders/fs/srt.ts similarity index 100% rename from langchain/src/document_loaders/path/srt.ts rename to langchain/src/document_loaders/fs/srt.ts diff --git a/langchain/src/document_loaders/path/text.ts b/langchain/src/document_loaders/fs/text.ts similarity index 100% rename from langchain/src/document_loaders/path/text.ts rename to langchain/src/document_loaders/fs/text.ts diff --git a/langchain/src/document_loaders/path/unstructured.ts b/langchain/src/document_loaders/fs/unstructured.ts similarity index 100% rename from langchain/src/document_loaders/path/unstructured.ts rename to langchain/src/document_loaders/fs/unstructured.ts diff --git a/langchain/src/document_loaders/index.lite.ts b/langchain/src/document_loaders/index.lite.ts deleted file mode 100644 index 909b4ebd7e4a..000000000000 --- a/langchain/src/document_loaders/index.lite.ts +++ /dev/null @@ -1,8 +0,0 @@ -export type { DocumentLoader } from "./base.js"; -export { BaseDocumentLoader } from "./base.js"; -export { CheerioWebBaseLoader } from "./web/cheerio_web_base.js"; -export { CollegeConfidentialLoader } from "./web/college_confidential.js"; -export { GitbookLoader } from "./web/gitbook.js"; -export { HNLoader } from "./web/hn.js"; -export { IMSDBLoader } from "./web/imsdb.js"; -export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js"; diff --git a/langchain/src/document_loaders/index.ts b/langchain/src/document_loaders/index.ts index 513e5f20f985..3a37560e3e62 100644 --- a/langchain/src/document_loaders/index.ts +++ b/langchain/src/document_loaders/index.ts @@ -1,23 +1,19 @@ export type { DocumentLoader } from "./base.js"; export { BaseDocumentLoader } from "./base.js"; -export { CheerioWebBaseLoader } from "./web/cheerio_web_base.js"; -export { - PuppeteerWebBaseLoader, - PuppeteerEvaluate, -} from "./web/puppeteer_web_base.js"; +export { CheerioWebBaseLoader } from "./web/cheerio.js"; +export { PuppeteerWebBaseLoader, PuppeteerEvaluate } from "./web/puppeteer.js"; export { CollegeConfidentialLoader } from "./web/college_confidential.js"; export { GitbookLoader } from "./web/gitbook.js"; export { HNLoader } from "./web/hn.js"; export { IMSDBLoader } from "./web/imsdb.js"; -export { DirectoryLoader, UnknownHandling } from "./path/directory.js"; -export { SRTLoader } from "./path/srt.js"; -export { PDFLoader } from "./path/pdf.js"; -export { DocxLoader } from "./path/docx.js"; -export { EPubLoader } from "./path/epub.js"; -export { TextLoader } from "./path/text.js"; -export { JSONLoader } from "./path/json.js"; -export { JSONLinesLoader } from "./path/jsonl.js"; -export { CSVLoader } from "./path/csv.js"; -export { NotionLoader } from "./path/notion_markdown.js"; +export { DirectoryLoader, UnknownHandling } from "./fs/directory.js"; +export { SRTLoader } from "./fs/srt.js"; +export { PDFLoader } from "./fs/pdf.js"; +export { DocxLoader } from "./fs/docx.js"; +export { EPubLoader } from "./fs/epub.js"; +export { TextLoader } from "./fs/text.js"; +export { JSONLoader, JSONLinesLoader } from "./fs/json.js"; +export { CSVLoader } from "./fs/csv.js"; +export { NotionLoader } from "./fs/notion.js"; export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js"; -export { UnstructuredLoader } from "./path/unstructured.js"; +export { UnstructuredLoader } from "./fs/unstructured.js"; diff --git a/langchain/src/document_loaders/path/jsonl.ts b/langchain/src/document_loaders/path/jsonl.ts deleted file mode 100644 index eedfe131c878..000000000000 --- a/langchain/src/document_loaders/path/jsonl.ts +++ /dev/null @@ -1,19 +0,0 @@ -import jsonpointer from "jsonpointer"; - -import { TextLoader } from "./text.js"; - -export class JSONLinesLoader extends TextLoader { - constructor(filePathOrBlob: string | Blob, public pointer: string) { - super(filePathOrBlob); - } - - protected async parse(raw: string): Promise { - const lines = raw.split("\n"); - const jsons = lines - .map((line) => line.trim()) - .filter(Boolean) - .map((line) => JSON.parse(line)); - const pointer = jsonpointer.compile(this.pointer); - return jsons.map((json) => pointer.get(json)); - } -} diff --git a/langchain/src/document_loaders/tests/cheerio_web.test.ts b/langchain/src/document_loaders/tests/cheerio.test.ts similarity index 76% rename from langchain/src/document_loaders/tests/cheerio_web.test.ts rename to langchain/src/document_loaders/tests/cheerio.test.ts index fbc33cba9308..3d0881a0fd5c 100644 --- a/langchain/src/document_loaders/tests/cheerio_web.test.ts +++ b/langchain/src/document_loaders/tests/cheerio.test.ts @@ -1,5 +1,5 @@ import { test } from "@jest/globals"; -import { CheerioWebBaseLoader } from "../web/cheerio_web_base.js"; +import { CheerioWebBaseLoader } from "../web/cheerio.js"; test("Test cheerio web scraper loader", async () => { const loader = new CheerioWebBaseLoader( diff --git a/langchain/src/document_loaders/tests/csv-blob.test.ts b/langchain/src/document_loaders/tests/csv-blob.test.ts index 1206cbfb8ef1..44a17b904688 100644 --- a/langchain/src/document_loaders/tests/csv-blob.test.ts +++ b/langchain/src/document_loaders/tests/csv-blob.test.ts @@ -2,7 +2,7 @@ import * as url from "node:url"; import * as path from "node:path"; import * as fs from "node:fs/promises"; import { test, expect } from "@jest/globals"; -import { CSVLoader } from "../path/csv.js"; +import { CSVLoader } from "../fs/csv.js"; import { Document } from "../../document.js"; test("Test CSV loader from blob", async () => { diff --git a/langchain/src/document_loaders/tests/csv.test.ts b/langchain/src/document_loaders/tests/csv.test.ts index bcdcabd62c3b..5116de08e0c5 100644 --- a/langchain/src/document_loaders/tests/csv.test.ts +++ b/langchain/src/document_loaders/tests/csv.test.ts @@ -1,7 +1,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { test, expect } from "@jest/globals"; -import { CSVLoader } from "../path/csv.js"; +import { CSVLoader } from "../fs/csv.js"; import { Document } from "../../document.js"; test("Test CSV loader from file with column arg", async () => { diff --git a/langchain/src/document_loaders/tests/directory.test.ts b/langchain/src/document_loaders/tests/directory.test.ts index 9a9ad1807d2a..81f23cc43aae 100644 --- a/langchain/src/document_loaders/tests/directory.test.ts +++ b/langchain/src/document_loaders/tests/directory.test.ts @@ -1,11 +1,11 @@ import * as url from "node:url"; import * as path from "node:path"; import { test, expect } from "@jest/globals"; -import { DirectoryLoader, UnknownHandling } from "../path/directory.js"; -import { CSVLoader } from "../path/csv.js"; -import { PDFLoader } from "../path/pdf.js"; -import { TextLoader } from "../path/text.js"; -import { JSONLoader } from "../path/json.js"; +import { DirectoryLoader, UnknownHandling } from "../fs/directory.js"; +import { CSVLoader } from "../fs/csv.js"; +import { PDFLoader } from "../fs/pdf.js"; +import { TextLoader } from "../fs/text.js"; +import { JSONLoader } from "../fs/json.js"; test("Test Directory loader", async () => { const directoryPath = path.resolve( diff --git a/langchain/src/document_loaders/tests/docx.test.ts b/langchain/src/document_loaders/tests/docx.test.ts index ad8987be07e5..63395bb51bc0 100644 --- a/langchain/src/document_loaders/tests/docx.test.ts +++ b/langchain/src/document_loaders/tests/docx.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; import * as url from "node:url"; import * as path from "node:path"; -import { DocxLoader } from "../path/docx.js"; +import { DocxLoader } from "../fs/docx.js"; test("Test Word doc loader from file", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/epub.test.ts b/langchain/src/document_loaders/tests/epub.test.ts index 0a5edd79bee9..01e81501a046 100644 --- a/langchain/src/document_loaders/tests/epub.test.ts +++ b/langchain/src/document_loaders/tests/epub.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; import * as url from "node:url"; import * as path from "node:path"; -import { EPubLoader } from "../path/epub.js"; +import { EPubLoader } from "../fs/epub.js"; test("Test EPub loader from file", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/json-blob.test.ts b/langchain/src/document_loaders/tests/json-blob.test.ts index b206114c0d7e..d12b4c988280 100644 --- a/langchain/src/document_loaders/tests/json-blob.test.ts +++ b/langchain/src/document_loaders/tests/json-blob.test.ts @@ -2,7 +2,7 @@ import * as url from "node:url"; import * as path from "node:path"; import * as fs from "node:fs/promises"; import { test, expect } from "@jest/globals"; -import { JSONLoader } from "../path/json.js"; +import { JSONLoader } from "../fs/json.js"; import { Document } from "../../document.js"; test("Test JSON loader from blob", async () => { diff --git a/langchain/src/document_loaders/tests/json.test.ts b/langchain/src/document_loaders/tests/json.test.ts index 313329bdbfa8..61ad37c4b6cf 100644 --- a/langchain/src/document_loaders/tests/json.test.ts +++ b/langchain/src/document_loaders/tests/json.test.ts @@ -2,7 +2,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { test, expect } from "@jest/globals"; import { Document } from "../../document.js"; -import { JSONLoader } from "../path/json.js"; +import { JSONLoader } from "../fs/json.js"; test("Test JSON loader", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/jsonl-blob.test.ts b/langchain/src/document_loaders/tests/jsonl-blob.test.ts index 03b2255f95e2..fbc6df040280 100644 --- a/langchain/src/document_loaders/tests/jsonl-blob.test.ts +++ b/langchain/src/document_loaders/tests/jsonl-blob.test.ts @@ -2,7 +2,7 @@ import * as url from "node:url"; import * as path from "node:path"; import * as fs from "node:fs/promises"; import { test, expect } from "@jest/globals"; -import { JSONLinesLoader } from "../path/jsonl.js"; +import { JSONLinesLoader } from "../fs/json.js"; import { Document } from "../../document.js"; test("Test JSONL loader from blob", async () => { diff --git a/langchain/src/document_loaders/tests/jsonl.test.ts b/langchain/src/document_loaders/tests/jsonl.test.ts index 0aa550356eb8..a2f037644e6a 100644 --- a/langchain/src/document_loaders/tests/jsonl.test.ts +++ b/langchain/src/document_loaders/tests/jsonl.test.ts @@ -1,7 +1,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { test, expect } from "@jest/globals"; -import { JSONLinesLoader } from "../path/jsonl.js"; +import { JSONLinesLoader } from "../fs/json.js"; import { Document } from "../../document.js"; test("Test JSON loader from file", async () => { diff --git a/langchain/src/document_loaders/tests/notion_markdown.test.ts b/langchain/src/document_loaders/tests/notion.test.ts similarity index 88% rename from langchain/src/document_loaders/tests/notion_markdown.test.ts rename to langchain/src/document_loaders/tests/notion.test.ts index 1443447d9a10..025da7591f59 100644 --- a/langchain/src/document_loaders/tests/notion_markdown.test.ts +++ b/langchain/src/document_loaders/tests/notion.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; import * as url from "node:url"; import * as path from "node:path"; -import { NotionLoader } from "../path/notion_markdown.js"; +import { NotionLoader } from "../fs/notion.js"; test("Test Notion Loader", async () => { const directoryPath = path.resolve( diff --git a/langchain/src/document_loaders/tests/pdf-blob.test.ts b/langchain/src/document_loaders/tests/pdf-blob.test.ts index d1ab786db0a0..fc05d48a25af 100644 --- a/langchain/src/document_loaders/tests/pdf-blob.test.ts +++ b/langchain/src/document_loaders/tests/pdf-blob.test.ts @@ -2,7 +2,7 @@ import { test, expect } from "@jest/globals"; import * as url from "node:url"; import * as path from "node:path"; import * as fs from "node:fs/promises"; -import { PDFLoader } from "../path/pdf.js"; +import { PDFLoader } from "../fs/pdf.js"; test("Test PDF loader from blob", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/pdf.test.ts b/langchain/src/document_loaders/tests/pdf.test.ts index 3c211598270e..5160bf799d70 100644 --- a/langchain/src/document_loaders/tests/pdf.test.ts +++ b/langchain/src/document_loaders/tests/pdf.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; import * as url from "node:url"; import * as path from "node:path"; -import { PDFLoader } from "../path/pdf.js"; +import { PDFLoader } from "../fs/pdf.js"; test("Test PDF loader from file", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/puppeteer_web.int.test.ts b/langchain/src/document_loaders/tests/puppeteer.int.test.ts similarity index 92% rename from langchain/src/document_loaders/tests/puppeteer_web.int.test.ts rename to langchain/src/document_loaders/tests/puppeteer.int.test.ts index c1b1a9fa7d0f..9dd4397a9ede 100644 --- a/langchain/src/document_loaders/tests/puppeteer_web.int.test.ts +++ b/langchain/src/document_loaders/tests/puppeteer.int.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "@jest/globals"; -import { PuppeteerWebBaseLoader } from "../web/puppeteer_web_base.js"; +import { PuppeteerWebBaseLoader } from "../web/puppeteer.js"; test("Test puppeteer web scraper loader", async () => { const loader = new PuppeteerWebBaseLoader("https://www.google.com/"); diff --git a/langchain/src/document_loaders/tests/srt-blob.test.ts b/langchain/src/document_loaders/tests/srt-blob.test.ts index 9895864ad502..e59956297c8e 100644 --- a/langchain/src/document_loaders/tests/srt-blob.test.ts +++ b/langchain/src/document_loaders/tests/srt-blob.test.ts @@ -2,7 +2,7 @@ import * as url from "node:url"; import * as path from "node:path"; import * as fs from "node:fs/promises"; import { test, expect } from "@jest/globals"; -import { SRTLoader } from "../path/srt.js"; +import { SRTLoader } from "../fs/srt.js"; test("Test SRT loader from blob", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/srt.test.ts b/langchain/src/document_loaders/tests/srt.test.ts index 2f269a12cc99..7a214716055a 100644 --- a/langchain/src/document_loaders/tests/srt.test.ts +++ b/langchain/src/document_loaders/tests/srt.test.ts @@ -1,7 +1,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { test, expect } from "@jest/globals"; -import { SRTLoader } from "../path/srt.js"; +import { SRTLoader } from "../fs/srt.js"; test("Test SRT loader from file", async () => { const filePath = path.resolve( diff --git a/langchain/src/document_loaders/tests/text-blob.test.ts b/langchain/src/document_loaders/tests/text-blob.test.ts index cf5e2eb7ceb2..ace2adf0dc2b 100644 --- a/langchain/src/document_loaders/tests/text-blob.test.ts +++ b/langchain/src/document_loaders/tests/text-blob.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { TextLoader } from "../path/text.js"; +import { TextLoader } from "../fs/text.js"; test("Test Text loader from blob", async () => { const loader = new TextLoader( diff --git a/langchain/src/document_loaders/tests/text.test.ts b/langchain/src/document_loaders/tests/text.test.ts index 7b82174a2882..29cca92e3bfe 100644 --- a/langchain/src/document_loaders/tests/text.test.ts +++ b/langchain/src/document_loaders/tests/text.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { TextLoader } from "../path/text.js"; +import { TextLoader } from "../fs/text.js"; test("Test Text loader from file", async () => { const loader = new TextLoader( diff --git a/langchain/src/document_loaders/tests/unstructured.test.ts b/langchain/src/document_loaders/tests/unstructured.test.ts index 49ee6a30f9ff..ae496ad54ef6 100644 --- a/langchain/src/document_loaders/tests/unstructured.test.ts +++ b/langchain/src/document_loaders/tests/unstructured.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { UnstructuredLoader } from "../path/unstructured.js"; +import { UnstructuredLoader } from "../fs/unstructured.js"; test.skip("Test Unstructured base loader", async () => { const loader = new UnstructuredLoader( diff --git a/langchain/src/document_loaders/web/cheerio_web_base.ts b/langchain/src/document_loaders/web/cheerio.ts similarity index 100% rename from langchain/src/document_loaders/web/cheerio_web_base.ts rename to langchain/src/document_loaders/web/cheerio.ts diff --git a/langchain/src/document_loaders/web/college_confidential.ts b/langchain/src/document_loaders/web/college_confidential.ts index c4d8098ab932..5a87eef3542d 100644 --- a/langchain/src/document_loaders/web/college_confidential.ts +++ b/langchain/src/document_loaders/web/college_confidential.ts @@ -1,5 +1,5 @@ import { Document } from "../../document.js"; -import { CheerioWebBaseLoader } from "./cheerio_web_base.js"; +import { CheerioWebBaseLoader } from "./cheerio.js"; export class CollegeConfidentialLoader extends CheerioWebBaseLoader { constructor(webPath: string) { diff --git a/langchain/src/document_loaders/web/gitbook.ts b/langchain/src/document_loaders/web/gitbook.ts index b74864b596a2..7e3fcaedaaaf 100644 --- a/langchain/src/document_loaders/web/gitbook.ts +++ b/langchain/src/document_loaders/web/gitbook.ts @@ -1,6 +1,6 @@ import type { CheerioAPI } from "cheerio"; import { Document } from "../../document.js"; -import { CheerioWebBaseLoader } from "./cheerio_web_base.js"; +import { CheerioWebBaseLoader } from "./cheerio.js"; interface GitbookLoaderParams { shouldLoadAllPaths?: boolean; diff --git a/langchain/src/document_loaders/web/github.ts b/langchain/src/document_loaders/web/github.ts index 95e96829bb31..50345f46fb03 100644 --- a/langchain/src/document_loaders/web/github.ts +++ b/langchain/src/document_loaders/web/github.ts @@ -1,7 +1,7 @@ import binaryExtensions from "binary-extensions"; import { Document } from "../../document.js"; import { BaseDocumentLoader } from "../base.js"; -import { UnknownHandling } from "../path/directory.js"; +import { UnknownHandling } from "../fs/directory.js"; import { extname } from "../../util/extname.js"; const extensions = new Set(binaryExtensions); diff --git a/langchain/src/document_loaders/web/hn.ts b/langchain/src/document_loaders/web/hn.ts index 584c8b0c371a..673569bc910b 100644 --- a/langchain/src/document_loaders/web/hn.ts +++ b/langchain/src/document_loaders/web/hn.ts @@ -1,6 +1,6 @@ import type { CheerioAPI } from "cheerio"; import { Document } from "../../document.js"; -import { CheerioWebBaseLoader } from "./cheerio_web_base.js"; +import { CheerioWebBaseLoader } from "./cheerio.js"; export class HNLoader extends CheerioWebBaseLoader { constructor(public webPath: string) { diff --git a/langchain/src/document_loaders/web/imsdb.ts b/langchain/src/document_loaders/web/imsdb.ts index c14bee71a628..50aa1b5df400 100644 --- a/langchain/src/document_loaders/web/imsdb.ts +++ b/langchain/src/document_loaders/web/imsdb.ts @@ -1,5 +1,5 @@ import { Document } from "../../document.js"; -import { CheerioWebBaseLoader } from "./cheerio_web_base.js"; +import { CheerioWebBaseLoader } from "./cheerio.js"; export class IMSDBLoader extends CheerioWebBaseLoader { constructor(public webPath: string) { diff --git a/langchain/src/document_loaders/web/puppeteer_web_base.ts b/langchain/src/document_loaders/web/puppeteer.ts similarity index 100% rename from langchain/src/document_loaders/web/puppeteer_web_base.ts rename to langchain/src/document_loaders/web/puppeteer.ts diff --git a/langchain/tsconfig.json b/langchain/tsconfig.json index 0085e9813642..8c8acd9ea728 100644 --- a/langchain/tsconfig.json +++ b/langchain/tsconfig.json @@ -62,7 +62,25 @@ "src/memory/index.ts", "src/document.ts", "src/docstore/index.ts", - "src/document_loaders/index.ts", + "src/document_loaders/base.ts", + "src/document_loaders/web/cheerio.ts", + "src/document_loaders/web/puppeteer.ts", + "src/document_loaders/web/college_confidential.ts", + "src/document_loaders/web/gitbook.ts", + "src/document_loaders/web/hn.ts", + "src/document_loaders/web/imsdb.ts", + "src/document_loaders/web/github.ts", + "src/document_loaders/fs/directory.ts", + "src/document_loaders/fs/buffer.ts", + "src/document_loaders/fs/text.ts", + "src/document_loaders/fs/json.ts", + "src/document_loaders/fs/srt.ts", + "src/document_loaders/fs/pdf.ts", + "src/document_loaders/fs/docx.ts", + "src/document_loaders/fs/epub.ts", + "src/document_loaders/fs/csv.ts", + "src/document_loaders/fs/notion.ts", + "src/document_loaders/fs/unstructured.ts", "src/chat_models/base.ts", "src/chat_models/openai.ts", "src/chat_models/anthropic.ts", diff --git a/test-exports-cf/src/entrypoints.js b/test-exports-cf/src/entrypoints.js index 38b71b48b6eb..7d2bc264059c 100644 --- a/test-exports-cf/src/entrypoints.js +++ b/test-exports-cf/src/entrypoints.js @@ -14,7 +14,7 @@ export * from "langchain/text_splitter"; export * from "langchain/memory"; export * from "langchain/document"; export * from "langchain/docstore"; -export * from "langchain/document_loaders"; +export * from "langchain/document_loaders/base"; export * from "langchain/chat_models/base"; export * from "langchain/chat_models/openai"; export * from "langchain/chat_models/anthropic"; diff --git a/test-exports-cf/src/index.ts b/test-exports-cf/src/index.ts index 2b6fdc83df26..7a90aa70c7bf 100644 --- a/test-exports-cf/src/index.ts +++ b/test-exports-cf/src/index.ts @@ -20,7 +20,7 @@ import { } from "langchain/prompts"; import { OpenAI } from "langchain/llms/openai"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; -import { HNLoader } from "langchain/document_loaders"; +import { HNLoader } from "langchain/document_loaders/web/hn"; export interface Env { OPENAI_API_KEY: string; diff --git a/test-exports-cjs/src/entrypoints.js b/test-exports-cjs/src/entrypoints.js index b3114a5c9e0d..9d3551f1c2fc 100644 --- a/test-exports-cjs/src/entrypoints.js +++ b/test-exports-cjs/src/entrypoints.js @@ -14,7 +14,7 @@ const text_splitter = require("langchain/text_splitter"); const memory = require("langchain/memory"); const document = require("langchain/document"); const docstore = require("langchain/docstore"); -const document_loaders = require("langchain/document_loaders"); +const document_loaders_base = require("langchain/document_loaders/base"); const chat_models_base = require("langchain/chat_models/base"); const chat_models_openai = require("langchain/chat_models/openai"); const chat_models_anthropic = require("langchain/chat_models/anthropic"); diff --git a/test-exports-cjs/src/import.js b/test-exports-cjs/src/import.js index b12eda4702db..79bc1c337c6f 100644 --- a/test-exports-cjs/src/import.js +++ b/test-exports-cjs/src/import.js @@ -7,7 +7,7 @@ async function test() { const { HNSWLib } = await import("langchain/vectorstores/hnswlib"); const { OpenAIEmbeddings } = await import("langchain/embeddings/openai"); const { InMemoryDocstore, Document } = await import("langchain/docstore"); - const { CSVLoader } = await import("langchain/document_loaders"); + const { CSVLoader } = await import("langchain/document_loaders/fs/csv"); // Test exports assert(typeof OpenAI === "function"); diff --git a/test-exports-cjs/src/index.mjs b/test-exports-cjs/src/index.mjs index af09961186fc..4cdf38acb2db 100644 --- a/test-exports-cjs/src/index.mjs +++ b/test-exports-cjs/src/index.mjs @@ -6,7 +6,7 @@ import { loadPrompt } from "langchain/prompts/load"; import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { InMemoryDocstore, Document } from "langchain/docstore"; -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; // Test exports assert(typeof OpenAI === "function"); diff --git a/test-exports-cjs/src/index.ts b/test-exports-cjs/src/index.ts index 08268b83bc5f..8dd1fd1a0a95 100644 --- a/test-exports-cjs/src/index.ts +++ b/test-exports-cjs/src/index.ts @@ -6,7 +6,7 @@ import { loadPrompt } from "langchain/prompts/load"; import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { InMemoryDocstore, Document } from "langchain/docstore"; -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; async function test() { // Test exports diff --git a/test-exports-cjs/src/require.js b/test-exports-cjs/src/require.js index 4110bf83d6dd..9f618615152d 100644 --- a/test-exports-cjs/src/require.js +++ b/test-exports-cjs/src/require.js @@ -6,7 +6,7 @@ const { loadPrompt } = require("langchain/prompts/load"); const { HNSWLib } = require("langchain/vectorstores/hnswlib"); const { OpenAIEmbeddings } = require("langchain/embeddings/openai"); const { InMemoryDocstore, Document } = require("langchain/docstore"); -const { CSVLoader } = require("langchain/document_loaders"); +const { CSVLoader } = require("langchain/document_loaders/fs/csv"); async function test() { // Test exports diff --git a/test-exports-cra/src/entrypoints.js b/test-exports-cra/src/entrypoints.js index 38b71b48b6eb..7d2bc264059c 100644 --- a/test-exports-cra/src/entrypoints.js +++ b/test-exports-cra/src/entrypoints.js @@ -14,7 +14,7 @@ export * from "langchain/text_splitter"; export * from "langchain/memory"; export * from "langchain/document"; export * from "langchain/docstore"; -export * from "langchain/document_loaders"; +export * from "langchain/document_loaders/base"; export * from "langchain/chat_models/base"; export * from "langchain/chat_models/openai"; export * from "langchain/chat_models/anthropic"; diff --git a/test-exports-esm/src/entrypoints.js b/test-exports-esm/src/entrypoints.js index 4d555077699a..e745d9dd640c 100644 --- a/test-exports-esm/src/entrypoints.js +++ b/test-exports-esm/src/entrypoints.js @@ -14,7 +14,7 @@ import * as text_splitter from "langchain/text_splitter"; import * as memory from "langchain/memory"; import * as document from "langchain/document"; import * as docstore from "langchain/docstore"; -import * as document_loaders from "langchain/document_loaders"; +import * as document_loaders_base from "langchain/document_loaders/base"; import * as chat_models_base from "langchain/chat_models/base"; import * as chat_models_openai from "langchain/chat_models/openai"; import * as chat_models_anthropic from "langchain/chat_models/anthropic"; diff --git a/test-exports-esm/src/import.cjs b/test-exports-esm/src/import.cjs index b12eda4702db..79bc1c337c6f 100644 --- a/test-exports-esm/src/import.cjs +++ b/test-exports-esm/src/import.cjs @@ -7,7 +7,7 @@ async function test() { const { HNSWLib } = await import("langchain/vectorstores/hnswlib"); const { OpenAIEmbeddings } = await import("langchain/embeddings/openai"); const { InMemoryDocstore, Document } = await import("langchain/docstore"); - const { CSVLoader } = await import("langchain/document_loaders"); + const { CSVLoader } = await import("langchain/document_loaders/fs/csv"); // Test exports assert(typeof OpenAI === "function"); diff --git a/test-exports-esm/src/index.js b/test-exports-esm/src/index.js index af09961186fc..4cdf38acb2db 100644 --- a/test-exports-esm/src/index.js +++ b/test-exports-esm/src/index.js @@ -6,7 +6,7 @@ import { loadPrompt } from "langchain/prompts/load"; import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { InMemoryDocstore, Document } from "langchain/docstore"; -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; // Test exports assert(typeof OpenAI === "function"); diff --git a/test-exports-esm/src/index.ts b/test-exports-esm/src/index.ts index 08268b83bc5f..8dd1fd1a0a95 100644 --- a/test-exports-esm/src/index.ts +++ b/test-exports-esm/src/index.ts @@ -6,7 +6,7 @@ import { loadPrompt } from "langchain/prompts/load"; import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { InMemoryDocstore, Document } from "langchain/docstore"; -import { CSVLoader } from "langchain/document_loaders"; +import { CSVLoader } from "langchain/document_loaders/fs/csv"; async function test() { // Test exports diff --git a/test-exports-esm/src/require.cjs b/test-exports-esm/src/require.cjs index 4110bf83d6dd..9f618615152d 100644 --- a/test-exports-esm/src/require.cjs +++ b/test-exports-esm/src/require.cjs @@ -6,7 +6,7 @@ const { loadPrompt } = require("langchain/prompts/load"); const { HNSWLib } = require("langchain/vectorstores/hnswlib"); const { OpenAIEmbeddings } = require("langchain/embeddings/openai"); const { InMemoryDocstore, Document } = require("langchain/docstore"); -const { CSVLoader } = require("langchain/document_loaders"); +const { CSVLoader } = require("langchain/document_loaders/fs/csv"); async function test() { // Test exports