From 1e34cb8b3e35b954b6ad6960ebbc79f32cffe273 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Thu, 6 Apr 2023 14:43:24 -0400 Subject: [PATCH 1/4] added stay tuned to the end --- docs/docs/ecosystem/unstructured.md | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 docs/docs/ecosystem/unstructured.md diff --git a/docs/docs/ecosystem/unstructured.md b/docs/docs/ecosystem/unstructured.md new file mode 100644 index 000000000000..af8a88a74517 --- /dev/null +++ b/docs/docs/ecosystem/unstructured.md @@ -0,0 +1,38 @@ +# Unstructured + +This page covers how to use [Unstructured](https://unstructured.io) within LangChain. + +## What is Unstructured? + +Unstructured is an [open source](https://github.com/Unstructured-IO/unstructured) Python package +for extracting text from raw documents for use in machine learning applications. Currently, +Unstructured supports partitioning Word documents (in `.doc` or `.docx` format), +PowerPoints (in `.ppt` or `.pptx` format), PDFs, HTML files, images, +emails (in `.eml` or `.msg` format), epubs, markdown, and plain text files. +`unstructured` is a Python package and cannot be used directly with TS/JS, Unstructured +also maintains a [REST API](https://github.com/Unstructured-IO/unstructured-api) to support +pre-processing pipelines written in other programming languages. The endpoint for the +hosted Unstructured API is `https://api.unstructured.io/general/v0/general`, or you can run +the service locally using the instructions found +[here](https://github.com/Unstructured-IO/unstructured-api#dizzy-instructions-for-using-the-docker-image). + +## Quick start + +You can use Unstructured in`langchainjs` with the following code. +Replace the filename with the file you would like to process. +If you are running the container locally, switch the url to +`https://api.unstructured.io/general/v0/general`. + +```typescript +import { test, expect } from "@jest/globals"; +import { UnstructuredLoader } from "../unstructured.js"; + +const loader = new UnstructuredLoader( + "https://api.unstructured.io/general/v0/general", + "langchain/src/document_loaders/tests/example_data/example.txt" +); +const docs = await loader.load(); +``` + +Stayed tuned for future updates, including functionality equivalent to +`UnstructuredDirectoryLoader` in `langchain`!. From 577866ff7eb07efa10f69349c3197d94a29496f9 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Thu, 6 Apr 2023 14:49:01 -0400 Subject: [PATCH 2/4] linting, linting, linting --- docs/docs/ecosystem/unstructured.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/docs/ecosystem/unstructured.md b/docs/docs/ecosystem/unstructured.md index af8a88a74517..1d38f30e514e 100644 --- a/docs/docs/ecosystem/unstructured.md +++ b/docs/docs/ecosystem/unstructured.md @@ -27,6 +27,7 @@ If you are running the container locally, switch the url to import { test, expect } from "@jest/globals"; import { UnstructuredLoader } from "../unstructured.js"; + const loader = new UnstructuredLoader( "https://api.unstructured.io/general/v0/general", "langchain/src/document_loaders/tests/example_data/example.txt" From 350d648576cafa84aab20e23310b046a3d88f621 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Thu, 6 Apr 2023 14:55:36 -0400 Subject: [PATCH 3/4] more linting --- docs/docs/ecosystem/unstructured.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/docs/ecosystem/unstructured.md b/docs/docs/ecosystem/unstructured.md index 1d38f30e514e..24eff938bddb 100644 --- a/docs/docs/ecosystem/unstructured.md +++ b/docs/docs/ecosystem/unstructured.md @@ -1,6 +1,6 @@ # Unstructured -This page covers how to use [Unstructured](https://unstructured.io) within LangChain. +This page covers how to use [Unstructured](https://unstructured.io) within LangChain. ## What is Unstructured? @@ -24,9 +24,7 @@ If you are running the container locally, switch the url to `https://api.unstructured.io/general/v0/general`. ```typescript -import { test, expect } from "@jest/globals"; -import { UnstructuredLoader } from "../unstructured.js"; - +import { UnstructuredLoader } from "langchain/document_loaders/unstructured.js"; const loader = new UnstructuredLoader( "https://api.unstructured.io/general/v0/general", From dd0cc6d208f4fd5d1ffcee39b5667544cdb721f1 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Thu, 6 Apr 2023 14:56:14 -0400 Subject: [PATCH 4/4] update import statement --- docs/docs/ecosystem/unstructured.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/ecosystem/unstructured.md b/docs/docs/ecosystem/unstructured.md index 24eff938bddb..ad75dfa0802e 100644 --- a/docs/docs/ecosystem/unstructured.md +++ b/docs/docs/ecosystem/unstructured.md @@ -24,7 +24,7 @@ If you are running the container locally, switch the url to `https://api.unstructured.io/general/v0/general`. ```typescript -import { UnstructuredLoader } from "langchain/document_loaders/unstructured.js"; +import { UnstructuredLoader } from "langchain/document_loader"; const loader = new UnstructuredLoader( "https://api.unstructured.io/general/v0/general",