From 6655ce9580975fdcfdfc7ba61fbcf44a30120f76 Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Thu, 27 Jul 2023 14:07:06 +0100
Subject: [PATCH 1/6] Initial fixes, both PDF and website generation now
 succeed

---
 libs/langchain/langchain/chains/llm.py        | 1 +
 libs/langchain/langchain/embeddings/openai.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/llm.py b/libs/langchain/langchain/chains/llm.py
index bb24607a97c27..57547259c45f4 100644
--- a/libs/langchain/langchain/chains/llm.py
+++ b/libs/langchain/langchain/chains/llm.py
@@ -103,6 +103,7 @@ def generate(
             prompts,
             stop,
             callbacks=run_manager.get_child() if run_manager else None,
+            engine="gpt-35-turbo",
             **self.llm_kwargs,
         )
 
diff --git a/libs/langchain/langchain/embeddings/openai.py b/libs/langchain/langchain/embeddings/openai.py
index 2234975f0a90c..9ceca75f6e954 100644
--- a/libs/langchain/langchain/embeddings/openai.py
+++ b/libs/langchain/langchain/embeddings/openai.py
@@ -175,7 +175,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
     openai_organization: Optional[str] = None
     allowed_special: Union[Literal["all"], Set[str]] = set()
     disallowed_special: Union[Literal["all"], Set[str], Sequence[str]] = "all"
-    chunk_size: int = 1000
+    chunk_size: int = 16
     """Maximum number of texts to embed in each batch"""
     max_retries: int = 6
     """Maximum number of retries to make when generating."""

From fc3c4187b5dd897921641a2bbf5031679a20adbd Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Tue, 1 Aug 2023 11:54:50 +0100
Subject: [PATCH 2/6] Swapped sympy in for numexpr

---
 libs/langchain/langchain/chains/llm.py             |  1 -
 libs/langchain/langchain/chains/llm_math/base.py   | 13 +++----------
 libs/langchain/langchain/chains/llm_math/prompt.py | 10 ++++++----
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/libs/langchain/langchain/chains/llm.py b/libs/langchain/langchain/chains/llm.py
index 915ae2857e7f6..b1bbfae78a47e 100644
--- a/libs/langchain/langchain/chains/llm.py
+++ b/libs/langchain/langchain/chains/llm.py
@@ -103,7 +103,6 @@ def generate(
             prompts,
             stop,
             callbacks=run_manager.get_child() if run_manager else None,
-            engine="gpt-35-turbo",
             **self.llm_kwargs,
         )
 
diff --git a/libs/langchain/langchain/chains/llm_math/base.py b/libs/langchain/langchain/chains/llm_math/base.py
index b5e92620d462b..64ad0f051c107 100644
--- a/libs/langchain/langchain/chains/llm_math/base.py
+++ b/libs/langchain/langchain/chains/llm_math/base.py
@@ -6,7 +6,7 @@
 import warnings
 from typing import Any, Dict, List, Optional
 
-import numexpr
+from sympy import sympify
 from pydantic import Extra, root_validator
 
 from langchain.callbacks.manager import (
@@ -21,7 +21,7 @@
 
 
 class LLMMathChain(Chain):
-    """Chain that interprets a prompt and executes python code to do math.
+    """Chain that interprets a prompt and uses the sympy python package to do math.
 
     Example:
         .. code-block:: python
@@ -75,14 +75,7 @@ def output_keys(self) -> List[str]:
 
     def _evaluate_expression(self, expression: str) -> str:
         try:
-            local_dict = {"pi": math.pi, "e": math.e}
-            output = str(
-                numexpr.evaluate(
-                    expression.strip(),
-                    global_dict={},  # restrict access to globals
-                    local_dict=local_dict,  # add common mathematical functions
-                )
-            )
+            output = str(sympify(expression.strip()))
         except Exception as e:
             raise ValueError(
                 f'LLMMathChain._evaluate("{expression}") raised error: {e}.'
diff --git a/libs/langchain/langchain/chains/llm_math/prompt.py b/libs/langchain/langchain/chains/llm_math/prompt.py
index 86595553322c7..e481001fd4c14 100644
--- a/libs/langchain/langchain/chains/llm_math/prompt.py
+++ b/libs/langchain/langchain/chains/llm_math/prompt.py
@@ -1,13 +1,15 @@
 # flake8: noqa
 from langchain.prompts.prompt import PromptTemplate
 
-_PROMPT_TEMPLATE = """Translate a math problem into a expression that can be executed using Python's numexpr library. Use the output of running this code to answer the question.
+_PROMPT_TEMPLATE = """Translate a math problem into a expression that can be executed using the sympify function from the Python package Sympy.
+Use the output of running this code to answer the question.
+Sympify can handle most elementary mathematical expressions, including rounding, factorials, and calculus. It cannot handle string manipulation.
 
 Question: ${{Question with math problem.}}
 ```text
 ${{single line mathematical expression that solves the problem}}
 ```
-...numexpr.evaluate(text)...
+...sympify(text)...
 ```output
 ${{Output of running the code}}
 ```
@@ -19,7 +21,7 @@
 ```text
 37593 * 67
 ```
-...numexpr.evaluate("37593 * 67")...
+...sympify("37593 * 67")...
 ```output
 2518731
 ```
@@ -29,7 +31,7 @@
 ```text
 37593**(1/5)
 ```
-...numexpr.evaluate("37593**(1/5)")...
+...sympify("37593**(1/5)")...
 ```output
 8.222831614237718
 ```

From 755b880a9d384fa1eed0ce8c570b82af1a37cbea Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Wed, 2 Aug 2023 12:43:58 +0100
Subject: [PATCH 3/6] Formatting

---
 libs/langchain/langchain/chains/llm_math/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/llm_math/base.py b/libs/langchain/langchain/chains/llm_math/base.py
index 64ad0f051c107..0217d2ca266d5 100644
--- a/libs/langchain/langchain/chains/llm_math/base.py
+++ b/libs/langchain/langchain/chains/llm_math/base.py
@@ -1,7 +1,6 @@
 """Chain that interprets a prompt and executes python code to do math."""
 from __future__ import annotations
 
-import math
 import re
 import warnings
 from typing import Any, Dict, List, Optional

From 15b59677f3dff571333788920dced720c4e3a1a7 Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Wed, 2 Aug 2023 12:45:45 +0100
Subject: [PATCH 4/6] Restored original chunk size

---
 libs/langchain/langchain/embeddings/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/embeddings/openai.py b/libs/langchain/langchain/embeddings/openai.py
index 47711ccbc7373..383c8f4649f52 100644
--- a/libs/langchain/langchain/embeddings/openai.py
+++ b/libs/langchain/langchain/embeddings/openai.py
@@ -175,7 +175,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
     openai_organization: Optional[str] = None
     allowed_special: Union[Literal["all"], Set[str]] = set()
     disallowed_special: Union[Literal["all"], Set[str], Sequence[str]] = "all"
-    chunk_size: int = 16
+    chunk_size: int = 1000
     """Maximum number of texts to embed in each batch"""
     max_retries: int = 6
     """Maximum number of retries to make when generating."""

From 117c8ba43efec93fdd32ef1d63d81b893524adef Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Wed, 2 Aug 2023 12:56:07 +0100
Subject: [PATCH 5/6] Formatting

---
 libs/langchain/langchain/chains/llm_math/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/llm_math/base.py b/libs/langchain/langchain/chains/llm_math/base.py
index 0217d2ca266d5..7f9e0f6e0c4af 100644
--- a/libs/langchain/langchain/chains/llm_math/base.py
+++ b/libs/langchain/langchain/chains/llm_math/base.py
@@ -5,8 +5,8 @@
 import warnings
 from typing import Any, Dict, List, Optional
 
-from sympy import sympify
 from pydantic import Extra, root_validator
+from sympy import sympify
 
 from langchain.callbacks.manager import (
     AsyncCallbackManagerForChainRun,

From ad0d6ffa92ec52dc6cd0fa404cb3a3309ce0106b Mon Sep 17 00:00:00 2001
From: mevans <mevans@palantir.com>
Date: Wed, 2 Aug 2023 13:07:16 +0100
Subject: [PATCH 6/6] Updated dependencies

---
 libs/langchain/pyproject.toml | 494 +++++++++++++++++-----------------
 1 file changed, 250 insertions(+), 244 deletions(-)

diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index fbc749a4da8ab..3af3235f97871 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -17,117 +17,116 @@ SQLAlchemy = ">=1.4,<3"
 requests = "^2"
 PyYAML = ">=5.4.1"
 numpy = "^1"
-azure-core = {version = "^1.26.4", optional=true}
-tqdm = {version = ">=4.48.0", optional = true}
+azure-core = { version = "^1.26.4", optional = true }
+tqdm = { version = ">=4.48.0", optional = true }
 openapi-schema-pydantic = "^1.2"
-faiss-cpu = {version = "^1", optional = true}
-wikipedia = {version = "^1", optional = true}
-elasticsearch = {version = "^8", optional = true}
-opensearch-py = {version = "^2.0.0", optional = true}
-redis = {version = "^4", optional = true}
-manifest-ml = {version = "^0.0.1", optional = true}
-spacy = {version = "^3", optional = true}
-nltk = {version = "^3", optional = true}
-transformers = {version = "^4", optional = true}
-beautifulsoup4 = {version = "^4", optional = true}
-torch = {version = ">=1,<3", optional = true}
-jinja2 = {version = "^3", optional = true}
-tiktoken = {version = "^0.3.2", optional = true, python="^3.9"}
-pinecone-client = {version = "^2", optional = true}
-pinecone-text = {version = "^0.4.2", optional = true}
-pymongo = {version = "^4.3.3", optional = true}
-clickhouse-connect = {version="^0.5.14", optional=true}
-weaviate-client = {version = "^3", optional = true}
-marqo = {version = "^0.11.0", optional=true}
-google-api-python-client = {version = "2.70.0", optional = true}
-google-auth = {version = "^2.18.1", optional = true}
-wolframalpha = {version = "5.0.0", optional = true}
-anthropic = {version = "^0.3", optional = true}
-qdrant-client = {version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12"}
+faiss-cpu = { version = "^1", optional = true }
+wikipedia = { version = "^1", optional = true }
+elasticsearch = { version = "^8", optional = true }
+opensearch-py = { version = "^2.0.0", optional = true }
+redis = { version = "^4", optional = true }
+manifest-ml = { version = "^0.0.1", optional = true }
+spacy = { version = "^3", optional = true }
+nltk = { version = "^3", optional = true }
+transformers = { version = "^4", optional = true }
+beautifulsoup4 = { version = "^4", optional = true }
+torch = { version = ">=1,<3", optional = true }
+jinja2 = { version = "^3", optional = true }
+tiktoken = { version = "^0.3.2", optional = true, python = "^3.9" }
+pinecone-client = { version = "^2", optional = true }
+pinecone-text = { version = "^0.4.2", optional = true }
+pymongo = { version = "^4.3.3", optional = true }
+clickhouse-connect = { version = "^0.5.14", optional = true }
+weaviate-client = { version = "^3", optional = true }
+marqo = { version = "^0.11.0", optional = true }
+google-api-python-client = { version = "2.70.0", optional = true }
+google-auth = { version = "^2.18.1", optional = true }
+wolframalpha = { version = "5.0.0", optional = true }
+anthropic = { version = "^0.3", optional = true }
+qdrant-client = { version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12" }
 dataclasses-json = "^0.5.7"
-tensorflow-text = {version = "^2.11.0", optional = true, python = "^3.10, <3.12"}
+tensorflow-text = { version = "^2.11.0", optional = true, python = "^3.10, <3.12" }
 tenacity = "^8.1.0"
-cohere = {version = "^4", optional = true}
-openai = {version = "^0", optional = true}
-nlpcloud = {version = "^1", optional = true}
-nomic = {version = "^1.0.43", optional = true}
-huggingface_hub = {version = "^0", optional = true}
-octoai-sdk = {version = "^0.1.1", optional = true}
-jina = {version = "^3.14", optional = true}
-google-search-results = {version = "^2", optional = true}
-sentence-transformers = {version = "^2", optional = true}
+cohere = { version = "^4", optional = true }
+openai = { version = "^0", optional = true }
+nlpcloud = { version = "^1", optional = true }
+nomic = { version = "^1.0.43", optional = true }
+huggingface_hub = { version = "^0", optional = true }
+octoai-sdk = { version = "^0.1.1", optional = true }
+jina = { version = "^3.14", optional = true }
+google-search-results = { version = "^2", optional = true }
+sentence-transformers = { version = "^2", optional = true }
 aiohttp = "^3.8.3"
-arxiv = {version = "^1.4", optional = true}
-pypdf = {version = "^3.4.0", optional = true}
-networkx = {version="^2.6.3", optional = true}
-aleph-alpha-client = {version="^2.15.0", optional = true}
-deeplake = {version = "^3.6.8", optional = true}
-libdeeplake = {version = "^0.0.60", optional = true}
-pgvector = {version = "^0.1.6", optional = true}
-psycopg2-binary = {version = "^2.9.5", optional = true}
-pyowm = {version = "^3.3.0", optional = true}
-async-timeout = {version = "^4.0.0", python = "<3.11"}
-azure-identity = {version = "^1.12.0", optional=true}
-gptcache = {version = ">=0.1.7", optional = true}
-atlassian-python-api = {version = "^3.36.0", optional=true}
-pytesseract = {version = "^0.3.10", optional=true}
-html2text = {version="^2020.1.16", optional=true}
-numexpr = "^2.8.4"
-duckduckgo-search = {version="^3.8.3", optional=true}
-azure-cosmos = {version="^4.4.0b1", optional=true}
-lark = {version="^1.1.5", optional=true}
-lancedb = {version = "^0.1", optional = true}
-pexpect = {version = "^4.8.0", optional = true}
-pyvespa = {version = "^0.33.0", optional = true}
-O365 = {version = "^2.0.26", optional = true}
-jq = {version = "^1.4.1", optional = true}
-steamship = {version = "^2.16.9", optional = true}
-pdfminer-six = {version = "^20221105", optional = true}
-docarray = {version="^0.32.0", extras=["hnswlib"], optional=true}
-lxml = {version = "^4.9.2", optional = true}
-pymupdf = {version = "^1.22.3", optional = true}
-pypdfium2 = {version = "^4.10.0", optional = true}
-gql = {version = "^3.4.1", optional = true}
-pandas = {version = "^2.0.1", optional = true}
-telethon = {version = "^1.28.5", optional = true}
-neo4j = {version = "^5.8.1", optional = true}
-zep-python = {version=">=0.32", optional=true}
-langkit = {version = ">=0.0.6, <0.1.0", optional = true}
-chardet = {version="^5.1.0", optional=true}
-requests-toolbelt = {version = "^1.0.0", optional = true}
-openlm = {version = "^0.0.5", optional = true}
-scikit-learn = {version = "^1.2.2", optional = true}
-azure-ai-formrecognizer = {version = "^3.2.1", optional = true}
-azure-ai-vision = {version = "^0.11.1b1", optional = true}
-azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
-py-trello = {version = "^0.19.0", optional = true}
-momento = {version = "^1.5.0", optional = true}
-bibtexparser = {version = "^1.4.0", optional = true}
-singlestoredb = {version = "^0.7.1", optional = true}
-pyspark = {version = "^3.4.0", optional = true}
-clarifai = {version = ">=9.1.0", optional = true}
-tigrisdb = {version = "^1.0.0b6", optional = true}
-nebula3-python = {version = "^3.4.0", optional = true}
-mwparserfromhell = {version = "^0.6.4", optional = true}
-mwxml = {version = "^0.3.3", optional = true}
-awadb = {version = "^0.3.9", optional = true}
-azure-search-documents = {version = "11.4.0b6", optional = true}
-esprima = {version = "^4.0.1", optional = true}
-openllm = {version = ">=0.1.19", optional = true}
-streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
-psychicapi = {version = "^0.8.0", optional = true}
-cassio = {version = "^0.0.7", optional = true}
-rdflib = {version = "^6.3.2", optional = true}
-sympy = {version = "^1.12", optional = true}
-rapidfuzz = {version = "^3.1.1", optional = true}
+arxiv = { version = "^1.4", optional = true }
+pypdf = { version = "^3.4.0", optional = true }
+networkx = { version = "^2.6.3", optional = true }
+aleph-alpha-client = { version = "^2.15.0", optional = true }
+deeplake = { version = "^3.6.8", optional = true }
+libdeeplake = { version = "^0.0.60", optional = true }
+pgvector = { version = "^0.1.6", optional = true }
+psycopg2-binary = { version = "^2.9.5", optional = true }
+pyowm = { version = "^3.3.0", optional = true }
+async-timeout = { version = "^4.0.0", python = "<3.11" }
+azure-identity = { version = "^1.12.0", optional = true }
+gptcache = { version = ">=0.1.7", optional = true }
+atlassian-python-api = { version = "^3.36.0", optional = true }
+pytesseract = { version = "^0.3.10", optional = true }
+html2text = { version = "^2020.1.16", optional = true }
+duckduckgo-search = { version = "^3.8.3", optional = true }
+azure-cosmos = { version = "^4.4.0b1", optional = true }
+lark = { version = "^1.1.5", optional = true }
+lancedb = { version = "^0.1", optional = true }
+pexpect = { version = "^4.8.0", optional = true }
+pyvespa = { version = "^0.33.0", optional = true }
+O365 = { version = "^2.0.26", optional = true }
+jq = { version = "^1.4.1", optional = true }
+steamship = { version = "^2.16.9", optional = true }
+pdfminer-six = { version = "^20221105", optional = true }
+docarray = { version = "^0.32.0", extras = ["hnswlib"], optional = true }
+lxml = { version = "^4.9.2", optional = true }
+pymupdf = { version = "^1.22.3", optional = true }
+pypdfium2 = { version = "^4.10.0", optional = true }
+gql = { version = "^3.4.1", optional = true }
+pandas = { version = "^2.0.1", optional = true }
+telethon = { version = "^1.28.5", optional = true }
+neo4j = { version = "^5.8.1", optional = true }
+zep-python = { version = ">=0.32", optional = true }
+langkit = { version = ">=0.0.6, <0.1.0", optional = true }
+chardet = { version = "^5.1.0", optional = true }
+requests-toolbelt = { version = "^1.0.0", optional = true }
+openlm = { version = "^0.0.5", optional = true }
+scikit-learn = { version = "^1.2.2", optional = true }
+azure-ai-formrecognizer = { version = "^3.2.1", optional = true }
+azure-ai-vision = { version = "^0.11.1b1", optional = true }
+azure-cognitiveservices-speech = { version = "^1.28.0", optional = true }
+py-trello = { version = "^0.19.0", optional = true }
+momento = { version = "^1.5.0", optional = true }
+bibtexparser = { version = "^1.4.0", optional = true }
+singlestoredb = { version = "^0.7.1", optional = true }
+pyspark = { version = "^3.4.0", optional = true }
+clarifai = { version = ">=9.1.0", optional = true }
+tigrisdb = { version = "^1.0.0b6", optional = true }
+nebula3-python = { version = "^3.4.0", optional = true }
+mwparserfromhell = { version = "^0.6.4", optional = true }
+mwxml = { version = "^0.3.3", optional = true }
+awadb = { version = "^0.3.9", optional = true }
+azure-search-documents = { version = "11.4.0b6", optional = true }
+esprima = { version = "^4.0.1", optional = true }
+openllm = { version = ">=0.1.19", optional = true }
+streamlit = { version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0" }
+psychicapi = { version = "^0.8.0", optional = true }
+cassio = { version = "^0.0.7", optional = true }
+rdflib = { version = "^6.3.2", optional = true }
+sympy = { version = "^1.12" }
+rapidfuzz = { version = "^3.1.1", optional = true }
 langsmith = "~0.0.11"
-rank-bm25 = {version = "^0.2.2", optional = true}
-amadeus = {version = ">=8.1.0", optional = true}
-geopandas = {version = "^0.13.1", optional = true}
-xinference = {version = "^0.0.6", optional = true}
-python-arango = {version = "^7.5.9", optional = true}
-gitpython = {version = "^3.1.32", optional = true}
-librosa = {version="^0.10.0.post2", optional = true }
+rank-bm25 = { version = "^0.2.2", optional = true }
+amadeus = { version = ">=8.1.0", optional = true }
+geopandas = { version = "^0.13.1", optional = true }
+xinference = { version = "^0.0.6", optional = true }
+python-arango = { version = "^7.5.9", optional = true }
+gitpython = { version = "^3.1.32", optional = true }
+librosa = { version = "^0.10.0.post2", optional = true }
 
 [tool.poetry.group.test.dependencies]
 # The only dependencies that should be added are
@@ -143,7 +142,7 @@ responses = "^0.22.0"
 pytest-asyncio = "^0.20.3"
 lark = "^1.1.5"
 pandas = "^2.0.0"
-pytest-mock  = "^3.10.0"
+pytest-mock = "^3.10.0"
 pytest-socket = "^0.6.0"
 syrupy = "^4.0.2"
 
@@ -172,7 +171,7 @@ optional = true
 pytest-vcr = "^1.0.2"
 wrapt = "^1.15.0"
 openai = "^0.27.4"
-elasticsearch = {extras = ["async"], version = "^8.6.2"}
+elasticsearch = { extras = ["async"], version = "^8.6.2" }
 redis = "^4.5.4"
 pinecone-client = "^2.2.1"
 pinecone-text = "^0.4.2"
@@ -222,7 +221,20 @@ playwright = "^1.28.0"
 setuptools = "^67.6.1"
 
 [tool.poetry.extras]
-llms = ["anthropic", "clarifai", "cohere", "openai", "openllm", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers", "xinference"]
+llms = [
+  "anthropic",
+  "clarifai",
+  "cohere",
+  "openai",
+  "openllm",
+  "openlm",
+  "nlpcloud",
+  "huggingface_hub",
+  "manifest-ml",
+  "torch",
+  "transformers",
+  "xinference",
+]
 qdrant = ["qdrant-client"]
 openai = ["openai", "tiktoken"]
 text_helpers = ["chardet"]
@@ -232,148 +244,146 @@ docarray = ["docarray"]
 embeddings = ["sentence-transformers"]
 javascript = ["esprima"]
 azure = [
-    "azure-identity",
-    "azure-cosmos",
-    "openai",
-    "azure-core",
-    "azure-ai-formrecognizer",
-    "azure-ai-vision",
-    "azure-cognitiveservices-speech",
-    "azure-search-documents",
+  "azure-identity",
+  "azure-cosmos",
+  "openai",
+  "azure-core",
+  "azure-ai-formrecognizer",
+  "azure-ai-vision",
+  "azure-cognitiveservices-speech",
+  "azure-search-documents",
 ]
 all = [
-    "anthropic",
-    "clarifai",
-    "cohere",
-    "openai",
-    "nlpcloud",
-    "huggingface_hub",
-    "jina",
-    "manifest-ml",
-    "elasticsearch",
-    "opensearch-py",
-    "google-search-results",
-    "faiss-cpu",
-    "sentence-transformers",
-    "transformers",
-    "spacy",
-    "nltk",
-    "wikipedia",
-    "beautifulsoup4",
-    "tiktoken",
-    "torch",
-    "jinja2",
-    "pinecone-client",
-    "pinecone-text",
-    "marqo",
-    "pymongo",
-    "weaviate-client",
-    "redis",
-    "google-api-python-client",
-    "google-auth",
-    "wolframalpha",
-    "qdrant-client",
-    "tensorflow-text",
-    "pypdf",
-    "networkx",
-    "nomic",
-    "aleph-alpha-client",
-    "deeplake",
-    "libdeeplake",
-    "pgvector",
-    "psycopg2-binary",
-    "pyowm",
-    "pytesseract",
-    "html2text",
-    "atlassian-python-api",
-    "gptcache",
-    "duckduckgo-search",
-    "arxiv",
-    "azure-identity",
-    "clickhouse-connect",
-    "azure-cosmos",
-    "lancedb",
-    "langkit",
-    "lark",
-    "pexpect",
-    "pyvespa",
-    "O365",
-    "jq",
-    "docarray",
-    "steamship",
-    "pdfminer-six",
-    "lxml",
-    "requests-toolbelt",
-    "neo4j",
-    "openlm",
-    "azure-ai-formrecognizer",
-    "azure-ai-vision",
-    "azure-cognitiveservices-speech",
-    "momento",
-    "singlestoredb",
-    "tigrisdb",
-    "nebula3-python",
-    "awadb",
-    "esprima",
-    "octoai-sdk",
-    "rdflib",
-    "amadeus",
-    "xinference",
-    "librosa",
-    "python-arango",
+  "anthropic",
+  "clarifai",
+  "cohere",
+  "openai",
+  "nlpcloud",
+  "huggingface_hub",
+  "jina",
+  "manifest-ml",
+  "elasticsearch",
+  "opensearch-py",
+  "google-search-results",
+  "faiss-cpu",
+  "sentence-transformers",
+  "transformers",
+  "spacy",
+  "nltk",
+  "wikipedia",
+  "beautifulsoup4",
+  "tiktoken",
+  "torch",
+  "jinja2",
+  "pinecone-client",
+  "pinecone-text",
+  "marqo",
+  "pymongo",
+  "weaviate-client",
+  "redis",
+  "google-api-python-client",
+  "google-auth",
+  "wolframalpha",
+  "qdrant-client",
+  "tensorflow-text",
+  "pypdf",
+  "networkx",
+  "nomic",
+  "aleph-alpha-client",
+  "deeplake",
+  "libdeeplake",
+  "pgvector",
+  "psycopg2-binary",
+  "pyowm",
+  "pytesseract",
+  "html2text",
+  "atlassian-python-api",
+  "gptcache",
+  "duckduckgo-search",
+  "arxiv",
+  "azure-identity",
+  "clickhouse-connect",
+  "azure-cosmos",
+  "lancedb",
+  "langkit",
+  "lark",
+  "pexpect",
+  "pyvespa",
+  "O365",
+  "jq",
+  "docarray",
+  "steamship",
+  "pdfminer-six",
+  "lxml",
+  "requests-toolbelt",
+  "neo4j",
+  "openlm",
+  "azure-ai-formrecognizer",
+  "azure-ai-vision",
+  "azure-cognitiveservices-speech",
+  "momento",
+  "singlestoredb",
+  "tigrisdb",
+  "nebula3-python",
+  "awadb",
+  "esprima",
+  "octoai-sdk",
+  "rdflib",
+  "amadeus",
+  "xinference",
+  "librosa",
+  "python-arango",
 ]
 
 # An extra used to be able to add extended testing.
 # Please use new-line on formatting to make it easier to add new packages without
 # merge-conflicts
 extended_testing = [
- "beautifulsoup4",
- "bibtexparser",
- "cassio",
- "chardet",
- "esprima",
- "jq",
- "pdfminer.six",
- "pgvector",
- "pypdf",
- "pymupdf",
- "pypdfium2",
- "tqdm",
- "lxml",
- "atlassian-python-api",
- "mwparserfromhell",
- "mwxml",
- "pandas",
- "telethon",
- "psychicapi",
- "zep-python",
- "gql",
- "requests_toolbelt",
- "html2text",
- "py-trello",
- "scikit-learn",
- "streamlit",
- "pyspark",
- "openai",
- "sympy",
- "rapidfuzz",
- "openai",
- "rank_bm25",
- "geopandas",
- "jinja2",
- "xinference",
- "gitpython",
+  "beautifulsoup4",
+  "bibtexparser",
+  "cassio",
+  "chardet",
+  "esprima",
+  "jq",
+  "pdfminer.six",
+  "pgvector",
+  "pypdf",
+  "pymupdf",
+  "pypdfium2",
+  "tqdm",
+  "lxml",
+  "atlassian-python-api",
+  "mwparserfromhell",
+  "mwxml",
+  "pandas",
+  "telethon",
+  "psychicapi",
+  "zep-python",
+  "gql",
+  "requests_toolbelt",
+  "html2text",
+  "py-trello",
+  "scikit-learn",
+  "streamlit",
+  "pyspark",
+  "openai",
+  "sympy",
+  "rapidfuzz",
+  "openai",
+  "rank_bm25",
+  "geopandas",
+  "jinja2",
+  "xinference",
+  "gitpython",
 ]
 
 [tool.ruff]
 select = [
-  "E",  # pycodestyle
-  "F",  # pyflakes
-  "I",  # isort
-]
-exclude = [
-  "tests/integration_tests/examples/non-utf8-encoding.py",
+  "E", # pycodestyle
+  "F", # pyflakes
+  "I", # isort
 ]
+exclude = ["tests/integration_tests/examples/non-utf8-encoding.py"]
 
 [tool.mypy]
 ignore_missing_imports = "True"
@@ -381,9 +391,7 @@ disallow_untyped_defs = "True"
 exclude = ["notebooks", "examples", "example_data"]
 
 [tool.coverage.run]
-omit = [
-    "tests/*",
-]
+omit = ["tests/*"]
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
@@ -402,9 +410,7 @@ build-backend = "poetry.core.masonry.api"
 addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused"
 # Registering custom markers.
 # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
-markers = [
-  "requires: mark tests as requiring a specific library"
-]
+markers = ["requires: mark tests as requiring a specific library"]
 
 [tool.codespell]
 skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples'