Chainlit · KTS-o7 · Mar 7, 2024 · Mar 7, 2024
diff --git a/ollama-pdf-chat/InstantChat.py b/ollama-pdf-chat/InstantChat.py
@@ -0,0 +1,129 @@
+import os
+from typing import List
+from langchain_community.document_loaders import PyPDFLoader
+
+from langchain.embeddings.ollama import OllamaEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores.chroma import Chroma
+from langchain.chains import (
+    ConversationalRetrievalChain,
+)
+from langchain_community.chat_models import ChatOllama
+
+from langchain.docstore.document import Document
+from langchain.memory import ChatMessageHistory, ConversationBufferMemory
+
+
+import chainlit as cl
+from dotenv import load_dotenv
+load_dotenv(dotenv_path=".env",verbose=True)
+
+llm_model = os.getenv("LLM_MODEL", "gemma")
+print(f"LLM_MODEL value: {llm_model}")
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+
+
+@cl.on_chat_start
+async def on_chat_start():
+    files = None
+
+    # Wait for the user to upload a file
+    while files == None:
+        files = await cl.AskFileMessage(
+            content="Please upload a text file or PDF  to begin!",
+            accept=["text/plain","application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+
+    file = files[0]
+
+
+    msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
+    await msg.send()
+    if(file.type == "text/plain"):
+        with open(file.path, "r", encoding="utf-8") as f:
+            text = f.read()
+
+        # Split the text into chunks
+        texts = text_splitter.split_text(text)
+
+        # Create a metadata for each chunk
+        metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
+
+        # Create a Chroma vector store
+        embeddings = OllamaEmbeddings(temperature=0.3,top_k=20,show_progress=True, model=llm_model)
+        docsearch = await cl.make_async(Chroma.from_texts)(
+            texts, embeddings, metadatas=metadatas
+        )
+    elif file.type == "application/pdf":
+        # Load the PDF file
+        loader = PyPDFLoader(file.path)
+        embeddings = OllamaEmbeddings(temperature=0.3,top_k=20,show_progress=True, model=llm_model)
+
+        # Extract the text content
+        texts  = text_splitter.split_documents(loader.load())
+         # Extract the metadata
+        textCollection = []
+        metadatas = []
+        for text in texts:
+            textCollection.append(text.page_content)
+            metadatas.append(text.metadata)
+        docsearch = await cl.make_async(Chroma.from_texts)(
+            textCollection, embeddings, metadatas=metadatas
+        )
+
+
+
+    message_history = ChatMessageHistory()
+
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        output_key="answer",
+        chat_memory=message_history,
+        return_messages=True,
+    )
+
+    # Create a chain that uses the Chroma vector store
+    chain = ConversationalRetrievalChain.from_llm(
+        ChatOllama(model_name=llm_model, temperature=0.2, streaming=True),
+        chain_type="stuff",
+        retriever=docsearch.as_retriever(),
+        memory=memory,
+        return_source_documents=True,
+
+    )
+
+    # Let the user know that the system is ready
+    msg.content = f"Processing `{file.name}` done. You can now ask questions! We are using the {llm_model} model."
+    await msg.update()
+
+    cl.user_session.set("chain", chain)
+
+
+@cl.on_message
+async def main(message: cl.Message):
+    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
+    cb = cl.AsyncLangchainCallbackHandler()
+
+    res = await chain.acall(message.content, callbacks=[cb])
+    answer = res["answer"]
+    source_documents = res["source_documents"]  # type: List[Document]
+
+    text_elements = []  # type: List[cl.Text]
+
+    if source_documents:
+        for source_idx, source_doc in enumerate(source_documents):
+            source_name = f"source_{source_idx}"
+            # Create the text element referenced in the message
+            text_elements.append(
+                cl.Text(content=source_doc.page_content, name=source_name)
+            )
+        source_names = [text_el.name for text_el in text_elements]
+
+        if source_names:
+            answer += f"\nSources: {', '.join(source_names)}"
+        else:
+            answer += "\nNo sources found"
+
+    await cl.Message(content=answer, elements=text_elements).send()
diff --git a/ollama-pdf-chat/LICENSE b/ollama-pdf-chat/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Krishnatejaswi S
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ollama-pdf-chat/README.md b/ollama-pdf-chat/README.md
@@ -0,0 +1,127 @@
+# 🚀 Olama-PDF-Chat
+
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ![GitHub repo size](https://img.shields.io/github/repo-size/KTS-o7/RVChat) ![GitHub language count](https://img.shields.io/github/languages/count/KTS-o7/RVChat) ![GitHub top language](https://img.shields.io/github/languages/top/KTS-o7/RVChat)
+
+> Ollama pdf Chat is a web application built using the Chatlit library, Langchain, and Ollama. This README serves as a complete guide on how to set up and use the application properly.
+
+## 📚 Table of Contents
+
+- [Installation](#💻-installation)
+- [Usage](#🎯-usage)
+- [License](./LICENSE)
+
+## 💻 Installation
+
+- Prerequisites:
+
+  - [Chatlit](https://docs.chainlit.io/)
+  - [Langchain](https://www.langchain.com/)
+  - [Ollama](https://ollama.com/)
+
+First we need to install Ollama into your system. You can do this by going to this [website](https://ollama.com/) and following the instructions provided there.
+
+Then start the Ollama server with the following command:
+
+```bash
+    sudo systemctl start ollama
+```
+
+Now, to check if the server is running, use the following command:
+
+```bash
+    sudo systemctl status ollama
+```
+
+Once the server is ready and running, we need to install the required models for the application. You can do this by running the following command:
+
+```bash
+    ollama pull <model_name>
+```
+
+After this step you need to clone the repository
+
+```bash
+    git clone https://github.com/KTS-o7/cookbook.git
+```
+
+go into the directory
+
+```bash
+    cd ollama-pdf-chat
+```
+
+We need to create a virtual environment for the application
+
+```bash
+    python3 -m venv ./env
+    source ./env/bin/activate
+```
+
+and then install the required dependencies
+
+```bash
+    pip install -r requirements.txt
+```
+
+Set environment variables for the application
+in a `.env` file in the root directory of the application.
+We have provided an example environment file [here](./exampleEnv)
+
+```bash
+    touch .env
+    echo "ANONYMIZED_TELEMETRY=False" >> .env
+```
+
+## 🎯 Usage
+
+> You need to keep the required pdfs in a folder called `files` in the root directory of the application.
+> Call the ingestor to ingest the pdfs
+
+```bash
+    python ingestor.py
+```
+
+Once all these are done you can start the application by running the following command:
+
+```bash
+    chainlit run multiChat.py
+```
+
+> OR
+
+If you want to chat with just one PDF file, you can run the following command:
+
+```bash
+    chainlit run InstantChat.py
+```
+
+> Customization to theme can be done by chainging the config.toml file inside `.chainlit` directory.
+> An example config.toml file is also given
+
+## 📄 License
+
+Ollama-pdf-Chat is distributed under the MIT License. The terms of the license are as follows:
+
+```markdown
+MIT License
+
+Copyright (c) 2024 Krishnatejaswi S
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
diff --git a/ollama-pdf-chat/chainlit.md b/ollama-pdf-chat/chainlit.md
@@ -0,0 +1,29 @@
+# How to use Ollama-PDF Chat
+
+1. Upload Plain text file or PDF file.
+   > We donot support other file types.
+2. Wait till the file is processed. This will usually take anywhere from 1-5 minutes, depending on your file size and processing power of the computer.
+3. Once the file is processed, you will be able to see the chatbot in action.
+4. You can ask questions to the chatbot and it will answer them to the best of its ability.
+5. You can also ask the chatbot to summarize the document for you.
+6. You can also ask the chatbot to list out or summarize the important points in the document.
+7. This chatbot also provides the sources of the information it provides.
+
+> This is still under development, so we are unable to process images or extremely large files. We are working on it and will be able to support it soon.
+
+> read the [documentation](./README.md) for more information.
+
+## Welcome to Chainlit! 🚀🤖
+
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+
+## Useful Links 🔗
+
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+
+## Welcome screen
+
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
diff --git a/ollama-pdf-chat/exampleEnv b/ollama-pdf-chat/exampleEnv
@@ -0,0 +1,9 @@
+# environment variables can be defined here
+# This application needs a file named as .env 
+# currently we define a variable to denote the LLM Model to be used in application
+# At a given time only one LLM Model should be active. Hence comment out all the models except one at a time.
+
+
+#LLM_MODEL = llama2
+# LLM_MODEL = phi
+LLM_MODEL = gemma