diff --git a/.env.example b/.env.example
index 9bec06fe..fa71fec7 100644
--- a/.env.example
+++ b/.env.example
@@ -77,11 +77,10 @@ AGENT_CONFIG_PATH='app/config/agent.yml'
#############################################
# PDF Tool
#############################################
-PDF_PATH='app/tool_constants/pdf_data'
-PDF_TOOL_ENABLED="true"
-PDF_TOOL_DATA_PATH='app/tool_constants/pdf_data' # backend/app/app/tool_constants/pdf_data for local dev
+PDF_TOOL_ENABLED="true" # Set to "true" to enable the PDF tool.
+PDF_TOOL_DATA_PATH='app/tool_constants/pdf_data'
PDF_TOOL_DATABASE='pdf_indexing_1'
-PDF_TOOL_EXTRACTION_CONFIG_PATH='app/config/extraction.yml' # backend/app/app/config/extraction.yml for local dev
+PDF_TOOL_EXTRACTION_CONFIG_PATH='app/config/extraction.yml'
#############################################
# Langsmith variables
diff --git a/backend/app/app/db/vector_db_pdf_ingestion.py b/backend/app/app/db/vector_db_pdf_ingestion.py
index 3f93f5ff..ad22a33b 100755
--- a/backend/app/app/db/vector_db_pdf_ingestion.py
+++ b/backend/app/app/db/vector_db_pdf_ingestion.py
@@ -12,6 +12,7 @@
from app.core.config import settings
from app.schemas.ingestion_schema import LOADER_DICT, IndexingConfig
+from app.schemas.tool_schemas.pdf_tool_schema import MarkdownMetadata
from app.services.chat_agent.helpers.embedding_models import get_embedding_model
from app.utils.config_loader import get_ingestion_configs
@@ -60,9 +61,9 @@ def run(
return self._load_documents(folder_path=folder_path, collection_name=collection_name)
raise ValueError("folder_path must be provided if load_index is False")
- def _pdf_to_docs(
+ def _load_docs(
self,
- pdf_dir_path: str,
+ dir_path: str,
) -> List[Document]:
"""
Using specified PDF miner to convert PDF documents to raw text chunks.
@@ -70,11 +71,12 @@ def _pdf_to_docs(
Fallback: PyPDF
"""
documents = []
- for file_name in os.listdir(pdf_dir_path):
+ for file_name in os.listdir(dir_path):
file_extension = os.path.splitext(file_name)[1].lower()
+ # Load PDF files
if file_extension == ".pdf":
logger.info(f"Loading {file_name} into vectorstore")
- file_path = f"{pdf_dir_path}/{file_name}"
+ file_path = f"{dir_path}/{file_name}"
try:
loader: Any = self.pdf_loader(file_path) # type: ignore
file_docs = loader.load()
@@ -84,6 +86,35 @@ def _pdf_to_docs(
logger.error(
f"Could not extract text from PDF {file_name} with {self.pipeline_config.pdf_parser}: {repr(e)}"
)
+ # Load Markdown files
+ elif file_extension == ".md":
+ logger.info(f"Loading data from {file_name} as Document...")
+ file_path = f"{dir_path}/{file_name}"
+ try:
+ # Load md files as single document
+ with open(file_path, "r", encoding="utf-8") as f:
+ md_file = f.read()
+
+ md_doc = Document(
+ page_content=md_file,
+ metadata=MarkdownMetadata.parse_obj({"source": file_name, "type": "text"}).dict(),
+ )
+
+ # Further split at token-level, when splits are above chunk_size configuration (rare)
+ text_splitter = TokenTextSplitter(
+ chunk_size=self.pipeline_config.tokenizer_chunk_size,
+ chunk_overlap=self.pipeline_config.tokenizer_chunk_overlap,
+ )
+ file_docs = text_splitter.split_documents([md_doc])
+
+ documents.extend(file_docs)
+ if len(file_docs) > 1:
+ logger.info(
+ f"Split {file_name} to {len(file_docs)} documents due to "
+ f"chunk_size: ({self.pipeline_config.tokenizer_chunk_size})"
+ )
+ except Exception as e:
+ logger.error(f"Could not load MD file {file_name}: {repr(e)}")
return documents
@@ -93,7 +124,7 @@ def _load_documents(
collection_name: str,
) -> PGVector:
"""Load documents into vectorstore."""
- text_documents = self._pdf_to_docs(folder_path)
+ text_documents = self._load_docs(folder_path)
text_splitter = TokenTextSplitter(
chunk_size=self.pipeline_config.tokenizer_chunk_size,
chunk_overlap=self.pipeline_config.tokenizer_chunk_overlap,
diff --git a/backend/app/app/schemas/tool_schemas/pdf_tool_schema.py b/backend/app/app/schemas/tool_schemas/pdf_tool_schema.py
index ff192b86..51dbc1a5 100644
--- a/backend/app/app/schemas/tool_schemas/pdf_tool_schema.py
+++ b/backend/app/app/schemas/tool_schemas/pdf_tool_schema.py
@@ -1,10 +1,16 @@
# -*- coding: utf-8 -*-
-from typing import List
+from typing import List, Optional
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
class PdfAppendix(BaseModel):
doc_id: str
page_numbers: List[int]
reference_text: str
+
+
+class MarkdownMetadata(BaseModel):
+ type: str
+ source: str
+ header1: Optional[str] = Field(None, alias="Header 1")
diff --git a/backend/app/app/services/chat_agent/tools/library/pdf_tool/pdf_tool.py b/backend/app/app/services/chat_agent/tools/library/pdf_tool/pdf_tool.py
index ddf96f17..62419fa2 100644
--- a/backend/app/app/services/chat_agent/tools/library/pdf_tool/pdf_tool.py
+++ b/backend/app/app/services/chat_agent/tools/library/pdf_tool/pdf_tool.py
@@ -190,6 +190,5 @@ async def _aqa_pdf_chunks(
)
),
]
- response = await self._agenerate_response(question_messages)
- logger.info(response)
+ response = await self._agenerate_response(question_messages, discard_fast_llm=True, run_manager=run_manager)
return response
diff --git a/backend/app/app/tool_constants/tutorial_data/CONTRIBUTING.md b/backend/app/app/tool_constants/tutorial_data/CONTRIBUTING.md
new file mode 100644
index 00000000..0b6234bc
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/CONTRIBUTING.md
@@ -0,0 +1,29 @@
+# Contributing to AgentKit
+
+We want to make contributing to this project as easy and transparent as possible, whether it's:
+- Reporting a bug and ideally submitting a fix
+- Proposing new features
+- Adding a tool to the library
+
+## Guidelines
+We use Github to host code, to track issues and feature requests, as well as accept pull requests.
+
+We actively welcome your pull requests:
+1. Fork the repo and create your branch from `develop`.
+2. If you've changed key features (tools, APIs etc), update the documentation.
+3. Ensure the code is clean: passes the set of tests, lints etc.
+5. Issue a pull request to the main repo
+
+## Report bugs and add feature requests using Github issues
+Please not that *currently* there is no central team working on this repository that is actively resolving bugs and responding to feature requests.
+
+### Bugs:
+Provide
+1) Summary of the issue
+2) Steps to reproduce
+3) What you expected vs what actually happened
+4) Any notes on why you think this might be happening or things you tried that didn't work
+
+
+### Features:
+Provide a description of the feature you would like. You're encouraged to build it yourself and open a PR :-)
diff --git a/backend/app/app/tool_constants/tutorial_data/README.md b/backend/app/app/tool_constants/tutorial_data/README.md
new file mode 100644
index 00000000..5ad99351
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/README.md
@@ -0,0 +1,77 @@
+
+
+# AgentKit: rapidly build Agent apps
+AgentKit is a LangChain-based toolkit developed by BCG X to build Agent apps. Key advantages of the AgentKit framework include:
+- 🚀 **Quickly build high quality Agent apps**: Build a strong demo in a few hours using a modular, easy to configure tech stack based on FastAPI/Nextjs and a library of useful GenAI tools
+- 💻 **Flexible, reactive UI/UX designed for Agents**: React/Nextjs chat-based UI that is easy to configure, with features such as streaming, rendering of tables/visualizations/code, status of Agent actions and more
+- 🛡️ **Focus on reliability**: Easy to configure routing architecture gives control of possible paths Agent can take, increasing reliability and making it suited for real-life use cases
+
+[Placeholder for demo video]
+
+## Quickstart
+For a quick setup of AgentKit, use the steps below, where both the backend app and frontend app are run inside a Docker container. More elaborate setup instructions can be found at [setup.md](docs/setup.md).
+
+### Prerequisites
+- Docker: https://www.docker.com/get-started
+
+### Installation steps
+1. Clone the repository containing the source code for the backend and frontend apps.
+
+2. Copy the `frontend/.env.example` file in the frontend directory and change the name to `.env`. Also, copy the `.env.example` file in the root directory of the repository and change the name to `.env`.
+ - Change the OPENAI_API_KEY and OPENAI_ORGANIZATION to your own (n.b. OPENAI_ORGANIZATION should be your OpenAI 'Organization ID')
+
+3. In the terminal, navigate to the root directory of the cloned repository. Build and start the Docker containers with the following command:
+ ```
+ docker-compose -f docker-compose.yml up -d
+ ```
+ Wait for the containers to build and start, which may take a few minutes depending on your system. Once the containers are up and running, you can access the apps in your browser at [http://localhost](http://localhost/).
+
+## Chinook music database demo
+- Ensure you are on the `develop` branch
+- Follow the installation instructions above and run the app
+- Try the prompt "When was AC/DC founded?" to see AgentKit in action!
+
+## Set up your own app
+- Configure your Agent and Tools [link](docs/configure_agent_and_tools.md)
+- (Optional) Adjust the UI to your use case [link](docs/configure_ui.md)
+- (Optional) Set up evaluation with LangSmith [link](docs/evaluation.md)
+
+## Documentation
+- [Installation instructions for running frontend or entire app outside Docker](docs/setup_development.md)
+- [Key concepts](docs/key_concepts.md)
+- [Agent configuration](docs/configure_agent.md)
+- [UI configuration](docs/configure_ui.md)
+- [Optional features](docs/optional_features.md)
+- [Tool library](docs/tool_library.md)
+
+## How it works
+
+### Reliability
+AgentKit attempts to solve the reliability issue of agents such as ReAct agents by constraining the potential routes the agent can take to a pre-configured sets of routes, or **Action Plans**. Since for many use cases the potential routes the agent can take are known, we can use our human domain expertise to steer the agent in the right direction, and reduce it going into unexpected directions or rabbit holes. This is achieved by combining a **Meta Agent** with **Action Plans**: A set of tools which are executed linearily and in parallel, similar to a Chain. The Meta Agent takes in the user prompt and outputs the most suited Action Plan to generate an answer. Note: implementing multiple Meta Agents is possible, generating a tree of possible routes.
+
+### User experience
+To optimize user experience, the intermediary output of every step in the Action Plan can be shown to the user. For example, consider an Action Plan consisting of 2 toolsets: `[[sql_tool, pdf_tool], [generate_summary_tool, visualize_tool]]`. In the first action step, information from a SQL database and a vector database with embedded PDFs are retrieved in parallel. The retrieved data and most relevant PDF are streamed to the UI as soon as the first action step finishes. In the second action step, the output from step 1 is passed to a tool that generates a text summary and a tool that creates a JSX visualization from the data, which is streamed to the UI to create the final answer.
+
+For a high level overview of the routing flow and connection the UI, please see below diagram:
+
+
+## Additional optional features
+
+- **Feedback integration**: allow users to give feedback on generated answers by showing a pop up after each message with quantitative (thumbs up/down) and qualitative (comment) feedback
+
+[placeholder for picture of feedback feature]
+
+- **User settings**: Allow users to specify default settings in the app that can be used to customize prompts for the user
+
+[placeholder for picture of settings feature]
+
+- **User authentication**: Enable NextAuth on your app to authenticate users with Github or with email/password
+
+[placeholder for picture of authentication feature]
+
+See [optional feature documentation](docs/optional_features.md) for more detailed info.
+
+## Contributors
+
+[Placeholder for contributor list]
+Please read `CONTRIBUTING.md` for more details on how to contribute.
diff --git a/backend/app/app/tool_constants/tutorial_data/backend_flow.md b/backend/app/app/tool_constants/tutorial_data/backend_flow.md
new file mode 100644
index 00000000..ace2a499
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/backend_flow.md
@@ -0,0 +1,45 @@
+# AgentKit Backend Flow
+
+The general backend flow is mainly constructed through the following four Python files, each serving a specific purpose.
+
+## chat.py
+
+This is the main backend entry point of AgentKit. It exposes a FastAPI endpoint at `/agent` which accepts POST requests.
+The request body should contain a chat query, which is processed by the `agent_chat` function. This function creates a
+conversation with an agent and returns an `StreamingJsonListResponse` object.
+
+## meta_agent.py
+
+This file contains functions for creating and managing a meta agent. A meta agent is an instance of the `AgentExecutor`
+class, which is responsible for executing AgentKit's logic.
+- The `create_meta_agent` function creates a meta agent from a given configuration.
+- The `get_conv_token_buffer_memory` function retrieves the chat history and stores it in a
+`ConversationTokenBufferMemory` object.
+
+## SimpleRouterAgent.py
+
+This file contains the `SimpleRouterAgent` class. This class is
+responsible for managing AgentKit's actions based on the input it receives.
+- The `aplan` function decides what actions the agent should take based on the input and the intermediate steps taken so far.
+- The `create_prompt` function creates a prompt for the agent.
+- The `from_llm_and_tools` function constructs an agent from a language model and a set of tools.
+
+## get_tools.py
+
+This file contains the `get_tools` function, which retrieves a list of tools from a list of tool names. Each tool class
+is responsible for a specific functionality of AgentKit.
+
+## Flow
+
+1. A POST request is sent to the `/agent` endpoint with a chat query in the request body.
+2. The `agent_chat` function in `chat.py` is called. This function retrieves the meta agent associated with the API key
+specified in the chat query.
+3. The `agent_chat` function creates a conversation with the agent, handles exceptions, and returns a streaming response.
+4. The meta agent, which is an instance of the `AgentExecutor` class, executes AgentKit's logic. This logic is
+determined by the `SimpleRouterAgent` class in `SimpleRouterAgent.py`.
+5. The `SimpleRouterAgent` class decides what actions the agent should take based on the input it receives and the
+intermediate steps taken so far.
+6. The `get_tools` function in `get_tools.py` is called to retrieve the tools needed by the agent. These tools are used
+to perform various tasks, such as generating images, summarizing text, executing SQL queries, etc.
+7. The conversation continues until the agent decides to stop, at which point the `agent_chat` function returns a
+`StreamingJsonListResponse` object containing the conversation history.
diff --git a/backend/app/app/tool_constants/tutorial_data/commit_history.csv b/backend/app/app/tool_constants/tutorial_data/commit_history.csv
new file mode 100644
index 00000000..ea20f3a4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/commit_history.csv
@@ -0,0 +1,51 @@
+commit_hash,commit_timestamp,commit_user,commit_message,file_changed
+01eb37311cccdfaa5785e4b6de10adb49d1e628d,2022-11-13 14:03:00,drivian,Refactor agent communication protocol,agent.py
+400ac3705b6d721fd5febdc29b62820f86325860,2022-08-26 06:14:00,drivian,Update model training pipeline for scalability,agent.py
+d8ea7dec4bf1ff8df523f73ba088956d3415061e,2022-12-03 03:25:00,kaikun213,Enhance AI model with new generative capabilities,preprocess.py
+0a6163a70adf3dbcf359834376b9d7e416610aaa,2023-11-02 05:27:00,kaikun213,Add new dataset for generative model training,model.py
+ef19afd2f89531ef433f6e2c796870a7e73a2a25,2022-10-15 13:39:00,ielmansouri,Improve agent's decision-making algorithm,readme.md
+abb56928a1ee2e6e7a97872ac170a4a9f71fee2b,2023-11-25 23:19:00,ielmansouri,Update model training pipeline for scalability,preprocess.py
+85211da1ead7d91fa85bb853748cccbcf3945102,2023-09-27 18:04:00,drivian,Fix edge case in AI agent's environmental interaction,train.py
+a26b072c7828f588124d797e80318091befe6884,2022-04-01 08:04:00,drivian,Update data preprocessing for enhanced model training,preprocess.py
+585393694eb2be48f30073c29f586bdde2bf0858,2023-01-30 11:41:00,ielmansouri,Fix serialization bug in model saving routine,train.py
+01c89554a372b1b55a820e2d4a732acbc6c88923,2022-09-16 03:02:00,kaikun213,Update data preprocessing for enhanced model training,pipeline.py
+8a06ff822fdeef994ad23e602afaa899f8d88a33,2022-10-28 02:55:00,drivian,Optimize AI model for faster inference,readme.md
+42526f498c300273f3b94a9bfa86bfdb4a861945,2022-05-28 19:32:00,kaikun213,Improve agent's decision-making algorithm,protocol.py
+6cdbe6785ead3335c793f0400b694f3d7e1f370a,2023-05-15 09:30:00,drivian,Add new dataset for generative model training,agent.py
+4a2c1060b067cb08f41c7349ca72aeb2d18f86bc,2022-12-14 15:29:00,ielmansouri,Enhance AI model with new generative capabilities,preprocess.py
+24ebf648d835a5b486f7528dee664d55d7a76b4f,2023-08-03 16:43:00,ielmansouri,Fix edge case in AI agent's environmental interaction,dataset.py
+ce2946a852872adbd2d073d9c2c233742599ac0b,2022-08-31 19:17:00,ielmansouri,Optimize AI model for faster inference,protocol.py
+30d02009b171346c1ba6e2d17224e6bfe29287ec,2022-06-10 07:17:00,ielmansouri,Improve agent's decision-making algorithm,model.py
+de8cc18a049e49e51945e93808692eba45ef6720,2023-07-06 09:28:00,ielmansouri,Fix serialization bug in model saving routine,model.py
+f95e4770aa14b1c786cd56131e6c119761b5164d,2022-06-06 20:33:00,kaikun213,Refactor agent communication protocol,readme.md
+01bef5ea1c045a8037d903ef2d6595dc1480f429,2023-08-24 04:07:00,ielmansouri,Update model training pipeline for scalability,inference.py
+ad76a900dd8c601298de1a4662c624f53e18293e,2023-12-24 12:36:00,kaikun213,Enhance AI model with new generative capabilities,model.py
+11123611676d5730f8706a30103b6204e50ae360,2023-06-07 16:35:00,kaikun213,Update model training pipeline for scalability,inference.py
+913dca976475f703104ddd60a4e59b66ec669d1c,2023-08-02 14:46:00,drivian,Update data preprocessing for enhanced model training,model.py
+a65bcc469861e148444ade799ebc04e387ec8d86,2022-03-02 23:06:00,ielmansouri,Update model training pipeline for scalability,protocol.py
+abbce9b00b46671fa0edb41ccaa61d21f04be3cb,2022-03-29 16:48:00,kaikun213,Fix serialization bug in model saving routine,preprocess.py
+ded78830f81f4370122c5dabef2bb70c829e7005,2022-08-04 15:20:00,drivian,Improve agent's decision-making algorithm,dataset.py
+7cfb600dcfe735099a31039afc883cd6ce7b9c24,2023-09-21 06:39:00,kaikun213,Update data preprocessing for enhanced model training,agent.py
+fac27b87e59e9730e623ea4d5a0d80c9487bef37,2023-06-03 09:25:00,ielmansouri,Refactor agent communication protocol,readme.md
+6511196672015a06735733c176ed652811178c47,2022-02-28 23:42:00,drivian,Update model training pipeline for scalability,protocol.py
+3d4f7124e9ea97ee67a0200d3933fd68e8ed5f69,2022-08-08 10:05:00,kaikun213,Optimize AI model for faster inference,inference.py
+cfafb3d72012a0e983663cf59aa7d2b10e13f30c,2022-03-29 18:20:00,ielmansouri,Fix edge case in AI agent's environmental interaction,inference.py
+49379af5674144591fbf82e06c511ee3205b9462,2023-07-24 01:06:00,ielmansouri,Update data preprocessing for enhanced model training,agent.py
+c3f86ccf377c1037d9673f33e66e98facb216aad,2022-07-12 14:29:00,drivian,Update data preprocessing for enhanced model training,agent.py
+073add795b6067f7567c24f7d4a0b39fa18c9f6a,2023-02-16 21:22:00,ielmansouri,Implement initial generative AI model,protocol.py
+a8800a0e3c9427b7f44a6d45aeec39b813948999,2023-01-28 13:11:00,kaikun213,Add new dataset for generative model training,pipeline.py
+7095d9903ae7c3e90c5d08d9d1b73582b6d5371b,2023-05-10 00:56:00,kaikun213,Refactor agent communication protocol,environment.py
+d2fb1e2d612ae5cbd2c2ef8325c083178b572a56,2023-11-15 07:14:00,drivian,Fix serialization bug in model saving routine,environment.py
+f8e4c650fd55e18d32834cfebd8bb0058e389359,2022-11-21 10:41:00,kaikun213,Refactor agent communication protocol,environment.py
+bb1f431a97bf5ba772cc3c303c2de5ce014a332a,2023-05-24 19:20:00,ielmansouri,Implement initial generative AI model,environment.py
+5da3d6ec830c851cbb5096f4943868124391561c,2023-01-29 14:48:00,drivian,Add new dataset for generative model training,agent.py
+ba9a9f6c9f5b4a9c629d7799426d14c1b2a472b6,2023-08-19 22:15:00,kaikun213,Fix serialization bug in model saving routine,preprocess.py
+b64861ccd71a46f5499a4d2d75d0b4e86b04a728,2023-01-11 08:50:00,kaikun213,Optimize AI model for faster inference,preprocess.py
+cab2b21712c0dd38ee447194269fc2e582ebb399,2022-11-19 18:21:00,ielmansouri,Fix edge case in AI agent's environmental interaction,pipeline.py
+36b7c002dcca5296fedc5c142e63b2c15bd92dba,2022-01-14 02:01:00,ielmansouri,Enhance AI model with new generative capabilities,environment.py
+fbb4980d6a1f141cadc18d530f9934a079724607,2022-07-05 08:38:00,kaikun213,Refactor agent communication protocol,pipeline.py
+98d9a7c12412ef1bc3c63ab3774eb392ec880e5a,2022-04-07 07:13:00,drivian,Add new dataset for generative model training,readme.md
+1b27b52ca1c4b32f6527250f033e657c93a2f5c4,2022-05-10 21:42:00,kaikun213,Refactor agent communication protocol,readme.md
+cfc8ec267cd31f9d45eafb0c6c245d2b1a215ee2,2023-11-07 08:31:00,ielmansouri,Add new dataset for generative model training,environment.py
+0cd66ac977fc886c4066a753582686badceeef54,2022-01-08 13:59:00,ielmansouri,Fix serialization bug in model saving routine,agent.py
+8751c692132b922bcca60df342a89320e7047043,2022-02-16 01:15:00,kaikun213,Update data preprocessing for enhanced model training,train.py
diff --git a/backend/app/app/tool_constants/tutorial_data/configure_agent_and_tools.md b/backend/app/app/tool_constants/tutorial_data/configure_agent_and_tools.md
new file mode 100644
index 00000000..93ee98ca
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/configure_agent_and_tools.md
@@ -0,0 +1,43 @@
+# Configure your agent and tools
+
+## Steps to complete
+- Configure your agent in `agent.yml`, [see agent section for more detailed instructions](#agent-configuration)
+- Add your own tools in `services/chat_agent/tools/YOURAPP` and configure them in `tools.yml`, [see tools section for more detailed instructions](#tools-configuration)
+
+
+## Agent configuration
+
+The agent and action plans can be configured in `agent.yml`.
+
+### LLMs
+
+`default_llm` & `default_fast_llm`: Set the name of the LLMs you want to use. In `llm.py` you can add your own model (any model compatible with LangChain e.g. Google, Anthropic, or open source like Llama2).
+
+We currently have 2 ways to choose if `default_llm` or `default_fast_llm` is used.
+- If you set `discard_fast_llm=True` to True in the LLM call in a tool, `default_llm` will always be used
+- [TO BE CHANGED] Otherwise, `default_fast_llm` will be used for prompts < 2500 tokens (configurable in ExtendedBaseTool.py currently, needs to be cleaned up to add as a key setting) and `default_llm` for >=2500 tokens
+
+### Tools and Action Plans
+Add all the tools in use in `tools`. Ensure the names match the tool names in `tools.py` and your custom tools.
+
+Configure the Action Plans available for the Meta Agent to choose from in `action_plans`. Give each Action Plan a clear `description` of what the use case is, this will improve the reliability and accuracy of the Meta Agent. Add all the tools in `actions`. Each sublist is 1 action step, so add tools as subitems if you want to execute them in parallel.
+
+### Meta agent prompts
+
+It is very important to have a clear system prompt in `system_context` for the Meta Agent so that it chooses the right Action Plans (`prompt_message` typically can typically be kept the same). Always include a role for the agent ("You are an expert in ...") and a clear goal ("Your goal is to select the right action plan.."). Include some principles to ensure the agent has the right behaviour for the use case, e.g. only run an optimization when the agent is very sure the user wants this, as it takes a lot of time. If there are common failure modes in the agent's routing choices, add a principle or add an example of good behaviour to solve it.
+
+## Tools configuration
+
+### Using a library tool
+Check out the library of commonly used tools in `services/chat_agent/tools/library`. Using these tools is simple; ensure the tool is in the `tools` list in `agent.yml` and configure the prompts in `tools.yml`. Detailed documentation on the library tools can be found in `docs/library_tool_docs`.
+
+### Add a tool
+
+1) Add your own tool folder to `services/chat_agent/tools` with a new file `yourtool.py`
+2) Implement your tool. See `template_tool.py` for a template tool, or look at the other library tools for inspiration
+3) In `tools.py`, add the tool in `all_tool_classes` and import it
+4) Add the tool in `tools` in `agent.yml` and add the tool to the applicable action plans
+5) Add your tool and configure the tool and prompts in `tools.yml`
+
+Optional:
+Customize the Actions for the tool in the UI, see [the UI documentation](configure_ui.md).
diff --git a/backend/app/app/tool_constants/tutorial_data/configure_ui.md b/backend/app/app/tool_constants/tutorial_data/configure_ui.md
new file mode 100644
index 00000000..3017c667
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/configure_ui.md
@@ -0,0 +1,68 @@
+# Frontend customization guide
+This guide provides instructions on how to customize the user interface elements of your project, such as colors, fonts, and general styling, using Tailwind CSS within your codebase
+
+AgentKit frontend is leveraging [tailwindcss](https://tailwindcss.com/) to efficiently configure and customize theme colors and fonts.
+
+The main UI config files are available in `frontend/tailwind.config.ts` and inside `frontend/src/style/` directory.
+
+## Theme colors
+The colour pallete used is defined in the `frontend/tailwind.config.ts` file:
+
+``` typescript
+ colors: {
+ 'bcg-green': '#156648', // replace indigo-600
+ 'bcg-green-light': '#39b27c',
+ 'bcg-green-x-light': '#5dfdb0',
+ 'bcg-x-green': '#00E0B5',
+ 'custom-light-green': 'rgba(100, 255, 100, 0.1)',
+ 'bcg-dark': 'rgb(28, 31, 61)',
+ 'bcg-dark-hover': 'rgb(0, 168, 135)',
+ 'bcg-dark-select': 'rgb(0, 224, 181)',
+ 'bcg-light': 'rgb(228,228,233)',
+ 'bcg-light-select': 'rgb(0, 168, 135)',
+ 'bcg-light-hover': '#00E0B5',
+ },
+```
+
+To customize the color of table elements in the chat, you need to modify the `frontend/src/style/global.css` file:
+
+``` css
+.rdt_Pagination {
+ @apply !border-t-0 dark:bg-bcg-dark-select dark:text-gray-300;
+}
+
+.rdt_Pagination button { /* pagination and buttons */
+ @apply dark:bg-bcg-dark-select dark:fill-gray-300 dark:text-gray-300;
+}
+
+.rdt_Table, /* Color classes */
+.rdt_TableHead,
+.rdt_TableHeadRow,
+.rdt_TableRow { /* Hover and Selected States */
+ @apply dark:bg-bcg-dark-select dark:text-gray-300;
+}
+```
+
+## Fonts
+
+Update the fontFamily section in your `tailwind.config.ts` file:
+
+``` typescript
+ fontFamily: {
+ sans: ['Henderson BCG Sans', ...defaultTheme.fontFamily.sans],
+ },
+```
+
+## Logos and images
+
+For logos and images, you will directly replace the files in your `frontend/public/logo_***.png`.Make sure the new images follow the same naming conventions and file formats as the old ones to avoid broken references.
+
+
+## Renderers
+
+If your application defines tools, it has to define application-specific artifact-renderers and action-renderers as well. The rest of the GUI & application flow is fixed (LLM-streaming, Meta-agent routing, memory, etc.).
+
+- Add your own renderer for steps in `ToolActionRenderer/applications/YOUR_APP` and add your application to `ToolActionRenderer/index.tsx`
+- (Optional) Add your own renderer for appendices in `ToolAppendixRenderer/applications/YOUR_APP`
+
+Customize the Actions for your custom tools in the UI: create `ToolActionRenderer/applications/YOUR_APP` and add your application to `ToolActionRenderer/index.tsx`
diff --git a/backend/app/app/tool_constants/tutorial_data/entertainer_expert_tools.md b/backend/app/app/tool_constants/tutorial_data/entertainer_expert_tools.md
new file mode 100644
index 00000000..00d7bdd4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/entertainer_expert_tools.md
@@ -0,0 +1 @@
+WIP
diff --git a/backend/app/app/tool_constants/tutorial_data/evaluation.md b/backend/app/app/tool_constants/tutorial_data/evaluation.md
new file mode 100644
index 00000000..e64ab0d1
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/evaluation.md
@@ -0,0 +1,12 @@
+# Evaluation
+
+Evaluating an AgentKit app can be done on multiple levels:
+- Routing layer: Evaluate the meta agent's accuracy of choosing the right action plan based on the user query
+- Tool layer: Evaluate individual tools
+- Output layer: Evaluate the final output quality
+
+
+
+See `experimental/evaluation_example.ipynb` for an example of evaluating an AgentKit app in a notebook.
+
+AgentKit natively integrates with LangSmith, which is a useful tool for tracing and tracking the performance of your app. https://docs.smith.langchain.com/
diff --git a/backend/app/app/tool_constants/tutorial_data/image_generation_tool.md b/backend/app/app/tool_constants/tutorial_data/image_generation_tool.md
new file mode 100644
index 00000000..00d7bdd4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/image_generation_tool.md
@@ -0,0 +1 @@
+WIP
diff --git a/backend/app/app/tool_constants/tutorial_data/key_concepts.md b/backend/app/app/tool_constants/tutorial_data/key_concepts.md
new file mode 100644
index 00000000..1837e062
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/key_concepts.md
@@ -0,0 +1,12 @@
+WIP: tbd if needed
+
+## Key concepts
+
+- **Actions**: One of the steps to obtain an answer to the user's query, corresponding to executing a Tool(chain). An action has its own action-renderer for visualization in the frontend.
+- **Action Step**: A pre-configured set of actions, e.g. sql_tool and pdf_tool. An action step can consist of multiple actions which are executed in parallel to achieve a certain outcome (e.g. retrieve information).
+- **Action Plan**: A set of action steps which are executed linearily to achieve a certain outcome. For example, consider an action plan consisting of 2 toolsets: [[tool_1, tool_2], [tool_3, tool_4]]. In the first action step, tool_1 and tool_2 are executed in parallel and generate output. In the second action step, this output is passed to tool_3 and tool_4, which are executed in parallel, and the final output is passed to the frontend.
+- **LLM Outputs**: Text output of an LLM from a Tool that is streamed to the frontend (e.g. output from explainer tool). In the output section, there is a preliminary LLM output (currently only from entertainer tool) and a final LLM output (final answer of agent).
+- **Appendices**: Additional objects that are added below the final LLM output, such as visualizations, tables etc.
+- **Signals**: Signals sent from backend of output status. E.g. 'Action ended', 'LLM Output final'.
+
+
diff --git a/backend/app/app/tool_constants/tutorial_data/memory.md b/backend/app/app/tool_constants/tutorial_data/memory.md
new file mode 100644
index 00000000..00d7bdd4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/memory.md
@@ -0,0 +1 @@
+WIP
diff --git a/backend/app/app/tool_constants/tutorial_data/optional_features.md b/backend/app/app/tool_constants/tutorial_data/optional_features.md
new file mode 100644
index 00000000..6cbd06ba
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/optional_features.md
@@ -0,0 +1,65 @@
+# Optional Features
+
+In addition to the core functionality, AgentKit supports optional security and tracking features out of the box.
+
+## LLM Run Tracing
+
+To use a self-hosted langchain in docker, set `LANGCHAIN_TRACING_V2` to `true` and `LANGCHAIN_ENDPOINT` to `"http://langchain-backend:1984"`, e.g.
+Note that `LANGCHAIN_API_KEY` must be set, but will not be used in self-hosted context.
+
+You will access stored runs / feedback at [http://localhost:9091](http://localhost:9091)
+
+```
+#############################################
+# Langsmith variables
+#############################################
+LANGCHAIN_TRACING_V2="true"
+LANGCHAIN_ENDPOINT="http://langchain-backend:1984"
+LANGCHAIN_API_KEY="not-used" # must be set to real key if using hosted - key must be set for self-hosted
+LANGCHAIN_PROJECT="default"
+```
+
+### Hosted Langsmith
+
+To use hosted Langsmith, set `LANGCHAIN_ENDPOINT` to `"https://api.langchain.plus"` and fill `LANGCHAIN_API_KEY`:
+
+1. Create an API Key by navigating to the [settings page](https://smith.langchain.com/settings).
+3. Configure runtime environment - replace "" with the API key generated in step 1
+
+```
+#############################################
+# Langsmith variables
+#############################################
+LANGCHAIN_TRACING_V2="true"
+LANGCHAIN_ENDPOINT="https://api.langchain.plus"
+LANGCHAIN_API_KEY=""
+LANGCHAIN_PROJECT="default"
+```
+
+## Feedback integration
+
+> Note: LLM Run tracing must be enabled for this feature.
+
+To enable feedback from the tool frontend, set `NEXT_PUBLIC_ENABLE_MESSAGE_FEEDBACK` in the /frontend/.env file is set to `true`,
+
+A pop-up will appear after each message giving the user the ability to rate the message quantitatively (thumbs up/down) and qualitatively (comment). This functionality can be useful e.g. for a PoC user testing session for your application. It could be further customised by adjusting the `FeedbackView` typescript file.
+
+
+Frontend implementation: [frontend/src/components/ConversationView/MessageView/FeedbackView/index.tsx](frontend/src/components/ConversationView/MessageView/FeedbackView/index.tsx)
+Backend route: [backend/app/app/api/v1/endpoints/statistics.py](backend/app/app/api/v1/endpoints/statistics.py)
+
+### Retrieving Feedback results:
+1. Via the UI:
+ Feedback is collected in Langsmith as for LLM runs - Navigate to http://localhost:9091 (https://smith.langchain.com/ if using hosted) and select the project.
+2. Via python API: by following example scripts at https://docs.smith.langchain.com/cookbook/exploratory-data-analysis/exporting-llm-runs-and-feedback
+3. Via DB: by connecting to the DB directly in `public.runs` / `public.feedback` at `jdbc:postgresql://localhost:5433/postgres`
+
+
+## User authentication
+
+User authentication can be done through NextAuth. Set `NEXT_PUBLIC_USE_AUTH` to `true` and fill the 'NEXTAUTH_SECRET' variable in `frontend/.env` with the generated secret key. A secret key can be generated using:
+```
+openssl rand -base64 32
+```
+For more information, check the [NextAuth documentation](https://next-auth.js.org/configuration/options#secret).
+Additionally, if you want to enable GitHub authentication, the `GITHUB_ID` and `GITHUB_SECRET` in `frontend/.env` should be filled with the corresponding values from your GitHub app credentials.
diff --git a/backend/app/app/tool_constants/tutorial_data/pdf_tool.md b/backend/app/app/tool_constants/tutorial_data/pdf_tool.md
new file mode 100644
index 00000000..12ee873d
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/pdf_tool.md
@@ -0,0 +1,17 @@
+# PDF tool guide
+## How it works
+
+The standard PDF tool uses PGVector for indexing (https://github.com/pgvector/pgvector) and saves the embeddings in PostgresQL. However, any preferred index can be used easily if it is supported by Langchain. We use 'PyMuPDF' as the standard PDF parser, but any of the options in `LOADER_DICT` can be used.
+
+Note: This tool can easily be extended other documents as well, e.g. PPT, Word, by adding specific parsers for those document types.
+
+The general process is
+
+1) Create an index and fill with embedded documents (on app startup, or can be persisted), see `vector_db_pdf_ingestion.py`. Some choices can be made:
+ - The index as mentioned (PGVector in this template)
+ - Embedding model (OpenAI in this template)
+ - How the documents are split into chunks (TokenTextSplitter with chunk size 2000 and overlap 200 tokens in this template)
+
+2) When the PDF tool is run, the k most relevant document chunks are returned (4 in this template)
+
+3) These document chunks are entered in a LLM prompt along with the user question and the result is returned to the user
diff --git a/backend/app/app/tool_constants/tutorial_data/setup.md b/backend/app/app/tool_constants/tutorial_data/setup.md
new file mode 100644
index 00000000..5fc78f15
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/setup.md
@@ -0,0 +1,45 @@
+# Setup
+If this is your first time setting up AgentKit, please use the recommended installation method ([development setup modes](docs/setup_development.md) if hot-reload is required). We will run the backend app as well as the frontend app inside a Docker container.
+
+### Prerequisites
+- Docker: https://www.docker.com/get-started
+
+### Installation steps
+
+1. Clone the repository containing the source code for the backend and frontend apps.
+
+2. Copy the `.env.example` file in the root directory of the repository and change the name to `.env`.
+ - Change the OPENAI_API_KEY and OPENAI_ORGANIZATION to your own (n.b. OPENAI_ORGANIZATION should be your OpenAI 'Organization ID', not 'Organization name'):
+ ```sh
+ OPENAI_API_KEY=
+ OPENAI_ORGANIZATION=
+ ```
+3. Copy the `frontend/.env.example` file in the frontend directory and change the name to `.env`.
+
+4. In the terminal, navigate to the root directory of the cloned repository. Build and start the Docker containers using the `docker-compose.yml` configuration file:
+ ```
+ docker-compose -f docker-compose.yml up -d
+ ```
+
+5. Wait for the containers to build and start. This may take a few minutes depending on your system. Once the containers are up and running, you can access the apps in your browser:
+ - Frontend app (Next.js): [http://localhost](http://localhost/)
+ - Backend app (FastAPI): http://localhost/api/v1
+
+6. You have successfully installed and run the apps using Docker and the Caddy reverse proxy!
+
+## (Optional) Langchain tracing (Langsmith)
+
+See https://docs.smith.langchain.com/ on how to set up LangSmith. Once you have set up LangSmith and the .env variables, you will be able to see the AgentKit traces in LangSmith.
+
+## (Optional) Pre-commit
+
+We are using pre-commit to automatically run some hygiene checks. Install this by running `make install-pre-commit`
+
+There is also a dockerized service that can be run using `docker-compose run pre-commit`
+
+To link this from a local python installation, run:
+```sh
+pip install pre-commit
+cp pre-commit/.pre-commit-config.yaml .
+pre-commit install
+```
diff --git a/backend/app/app/tool_constants/tutorial_data/setup_development.md b/backend/app/app/tool_constants/tutorial_data/setup_development.md
new file mode 100644
index 00000000..5b68600d
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/setup_development.md
@@ -0,0 +1,279 @@
+# Development Setup (hot reload)
+
+There are two additional setups available in this development setup guide:
+1. Local frontend mode: This allows for an easy set-up, while having the flexibility of local development tools for the frontend.
+
+2. Full Local Mode: Use when you want to run both backend and frontend apps entirely locally without Docker. Suitable for local development without Docker, development on machines without Docker support, or quick prototyping.
+
+## Local frontend
+
+### Prerequisites
+Before you begin, make sure you have the following installed on your machine:
+- Docker: Docker: https://www.docker.com/get-started
+- Node.js and pnpm: https://pnpm.io/installation
+
+### Installation Steps
+
+1. Clone the repository containing the source code for the backend and frontend apps.
+
+2. Copy the `.env.example` file in the root directory of the repository and change the name to .env.
+ - Change the OPENAI_API_KEY and OPENAI_ORGANIZATION to your own (n.b. OPENAI_ORGANIZATION should be your OpenAI 'Organization ID', not 'Organization name'):
+ ```sh
+ OPENAI_API_KEY=
+ OPENAI_ORGANIZATION=
+ ```
+3. Copy the `frontend/.env.example` file in the frontend directory and change the name to `.env`. Change the `DB_PORT` and `DB_HOST` variables as follows:
+ - `DB_PORT`: Change to 5732.
+ - `DB_HOST`: Change to localhost
+
+ If needed, change the following variable (not required for a succesfull demo installation):
+ - `NEXT_PUBLIC_USE_AUTH`: Set to `true` if you would like to add an identity layer using Next Auth.
+ - `NEXTAUTH_SECRET`: Generate a secret key and replace `# TODO: Generate a secret and add it here` with the secret key.
+ - `GITHUB_ID` and `GITHUB_SECRET`: If you want to enable GitHub authentication, replace the corresponding values with your GitHub app credentials.
+ - `DB_USER`, `DB_PASSWORD`, `DB_HOST`,`DB_USER`,`DB_PORT` and `DB_NAME`: If you want to customize the database connection settings, update these values accordingly.
+
+4. In the root directory, create a file `docker-compose-local.yml` and paste the docker setup template from below:
+ docker-compose-local.yml
+
+ ```sh
+ version: '3.8'
+
+ services:
+ fastapi_server:
+ container_name: fastapi_server
+ build: ./backend
+ restart: always
+ command: "sh -c 'alembic upgrade head && uvicorn app.main:app --reload --workers 1 --host 0.0.0.0 --port 9090'"
+ volumes:
+ - ./backend/app:/code
+ expose:
+ - 9090
+ env_file: ".env"
+ depends_on:
+ - database
+
+ database:
+ image: ankane/pgvector:v0.4.1
+ restart: always
+ container_name: database
+ env_file: ".env"
+ user: root
+ volumes:
+ - ./db_docker:/var/lib/postgresql
+ - ./scripts/create-dbs.sql:/docker-entrypoint-initdb.d/create-dbs.sql
+ ports:
+ - 5732:5432 # Remove this on production, use same port as in .env for fastapi_db
+ expose:
+ - 5732
+ environment:
+ - POSTGRES_USERNAME=${DATABASE_USER}
+ - POSTGRES_PASSWORD=${DATABASE_PASSWORD}
+ - POSTGRES_DATABASE=${DATABASE_NAME}
+ - POSTGRES_HOST_AUTH_METHOD= "trust"
+
+ redis_server:
+ image: redis:alpine
+ container_name: redis_server
+ restart: always
+ ports:
+ - 6379:6379 # Remove this on production
+ expose:
+ - 6379
+ env_file: .env
+ langchain-playground:
+ image: langchain/${_LANGSMITH_IMAGE_PREFIX-}langchainplus-playground@sha256:f61ce9762babcb4a51af3e5b0cc628453ac7087237c5fc8694834de49b56d16e
+ langchain-frontend:
+ image: langchain/${_LANGSMITH_IMAGE_PREFIX-}langchainplus-frontend@sha256:e0ab157b2b9cb7f75743d45237f0d8ede75a3811d913f234585484255afe5b5a
+ ports:
+ - 9091:80
+ expose:
+ - 9091
+ environment:
+ - NEXT_PUBLIC_BACKEND_URL=http://langchain-backend:1984
+ depends_on:
+ - langchain-backend
+ - langchain-playground
+ volumes:
+ - ./conf/nginx.conf:/etc/nginx/default.conf:ro
+ langchain-backend:
+ image: langchain/${_LANGSMITH_IMAGE_PREFIX-}langchainplus-backend@sha256:1196c12308b450548195c10927d469963c7d8e62db0e67f8204c83adb91f9031
+ environment:
+ - PORT=1984
+ - LANGCHAIN_ENV=local_docker
+ - LOG_LEVEL=warning
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
+ ports:
+ - 1984:1984
+ depends_on:
+ - langchain-db
+ - langchain-redis
+ langchain-db:
+ image: postgres:14.1
+ command:
+ [
+ "postgres",
+ "-c",
+ "log_min_messages=WARNING",
+ "-c",
+ "client_min_messages=WARNING"
+ ]
+ environment:
+ - POSTGRES_PASSWORD=postgres
+ - POSTGRES_USER=postgres
+ - POSTGRES_DB=postgres
+ volumes:
+ - langchain-db-data:/var/lib/postgresql/data
+ ports:
+ - 5433:5432
+ langchain-redis:
+ image: redis:7
+ ports:
+ - 63791:6379
+ volumes:
+ - langchain-redis-data:/data
+ langchain-queue:
+ image: langchain/${_LANGSMITH_IMAGE_PREFIX-}langchainplus-backend@sha256:1196c12308b450548195c10927d469963c7d8e62db0e67f8204c83adb91f9031
+ environment:
+ - LANGCHAIN_ENV=local_docker
+ - LOG_LEVEL=warning
+ entrypoint: "rq worker --with-scheduler -u redis://langchain-redis:6379 --serializer lc_database.queue.serializer.ORJSONSerializer --worker-class lc_database.queue.worker.Worker --connection-class lc_database.queue.connection.RedisRetry --job-class lc_database.queue.job.AsyncJob"
+ depends_on:
+ - langchain-redis
+ langchain-hub:
+ image: langchain/${_LANGSMITH_IMAGE_PREFIX-}langchainhub-backend@sha256:73b4c2c3e7cd81729e766bb4eece2b28883bebf7c710567a21d1a6c114abff5a
+ environment:
+ - PORT=1985
+ - LANGCHAIN_ENV=local_docker
+ - LOG_LEVEL=warning
+ ports:
+ - 1985:1985
+ depends_on:
+ - langchain-db
+ - langchain-redis
+ caddy_reverse_proxy:
+ container_name: caddy_reverse_proxy
+ image: caddy:alpine
+ restart: always
+ ports:
+ - 80:80
+ - 9090:9090
+ - 443:443
+ environment:
+ - EXT_ENDPOINT1=${EXT_ENDPOINT1}
+ - LOCAL_1=${LOCAL_1}
+ - LOCAL_2=${LOCAL_2}
+ volumes:
+ - ./caddy/Caddyfile:/etc/caddy/Caddyfile
+ #- ./static:/code/static
+ - caddy_data:/data
+ - caddy_config:/config
+
+ volumes:
+ caddy_data:
+ caddy_config:
+ langchain-db-data:
+ langchain-redis-data:
+ ```
+
+
+5. In the terminal, navigate to the root directory of the cloned repository. Build and start the Docker containers using the created `docker-compose-local.yml` configuration file:
+ ```
+ docker-compose -f docker-compose-local.yml up -d
+ ```
+
+6. Wait for the containers to build and start. This may take a few minutes depending on your system. While the Docker containers are running, open a new terminal window and navigate to the `frontend` directory.
+
+7. Install the frontend app's dependencies and set up prisma using pnpm:
+ ```
+ pnpm install
+ pnpm prisma:generate
+ ```
+
+8. Once the dependencies are installed, start the frontend app:
+ ```
+ pnpm dev
+ ```
+
+9. The frontend app (Next.js) will now be running locally at http://localhost:3000. The backend app (FastAPI) is still running inside the Docker container and can be accessed at http://localhost/api/v1.
+
+10. You have successfully installed and run the apps using Docker for the backend and running the frontend locally with Next.js and pnpm! You can go to http://localhost:3000 to try AgentKit.
+
+Additional notes:
+- The backend app will automatically reload whenever you make changes to the source code inside the `backend/app` directory. You can see the changes reflected by refreshing the backend app in your browser.
+- The frontend app (Next.js) will also automatically reload whenever you make changes to the source code inside the `frontend` directory. You can see the changes reflected by refreshing the frontend app in your browser.
+
+Remember to stop the Docker containers when you're done:
+```
+docker-compose -f docker-compose-local-frontend.yml down
+```
+
+### Langchain tracing (Langsmith)
+
+See https://docs.smith.langchain.com/ on how to set up LangSmith. Once you have set up LangSmith and the .env variables, you will be able to see the AgentKit traces in LangSmith.
+
+
+## Full local mode setup (for non-docker users)
+
+### Prerequisites
+Version requirements:
+* Python: **Python>=3.10**
+* Poetry: **>=1.4.2**
+* Nodejs: **>=18.16.0**
+
+1. Make sure to create a postgresql database with the name *fastapi_db** (e.g. by running the script `/scripts/create_dbs.sql`).
+Set up the .env files from the examples and change the database url in the .env file to:
+
+ - for /frontend/.env file: Use /frontend/.env.example as an example and change the DB_PORT and possibly any other variables to your own:
+ - `DB_PORT`: Change to 5732.
+ - If needed, also change the following variables to your own:
+ ```sh
+ DB_USER=postgres
+ DB_PASSWORD=postgres
+ DB_HOST=database
+ DB_PORT=5432
+ DB_NAME=fastapi_db
+ ```
+
+ - for .env file: (use .env.example as an example) but change the DATABASE_HOST and DATABASE_PORT to your own
+ ```sh
+ DATABASE_HOST=
+ DATABASE_USER=postgres
+ DATABASE_PASSWORD=postgres
+ DATABASE_NAME=fastapi_db
+ DATABASE_PORT=
+ ```
+ - Also, change the OPENAI_API_KEY and OPENAI_ORGANIZATION to your own:
+ ```sh
+ OPENAI_API_KEY=
+ OPENAI_ORGANIZATION=
+ ```
+
+ Finally, apply the .env variables:
+ ```sh
+ export $(grep -v '^#' .env | sed 's/#.*$//' | xargs)
+ ```
+
+2. In the frontend folder:
+ install dependencies and run the application:
+ ```sh
+ pnpm install
+ ```
+ then run the application:
+ ```sh
+ pnpm prisma:generate
+ pnpm dev
+ ```
+
+3. In the backend/app folder:
+ ```sh
+ poetry config --local virtualenvs.in-project true
+ poetry env use 3.10
+ poetry install
+ ```
+
+4. In the root folder (make sure .env variables are applied):
+ ```sh
+ uvicorn "app.main:app" "--app-dir" "backend/app" "--reload" "--workers" "1" "--host" "0.0.0.0" "--port" "9090"
+ ```
+
+5. If you visit http://localhost:3000, you should be able to see the application!
diff --git a/backend/app/app/tool_constants/tutorial_data/sql_tool.md b/backend/app/app/tool_constants/tutorial_data/sql_tool.md
new file mode 100644
index 00000000..0775aecc
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/sql_tool.md
@@ -0,0 +1,30 @@
+# SQL tool guide
+## How it works
+The SQL tool currently consists of the following steps:
+1) `_alist_sql_tables`: Find the tables relevant to the user's query and filter the database for only those tables
+2) `_aquery_with_schemas`: Writes an SQL query with a prompt summarizing the schema of the selected tables and the user question
+3) `_avalidate_response`: Validate the response from the executing the SQL query
+ a) `_parse_query`: Parse the SQL query from the response and remove extra characters
+ b) `run_no_str`: Execute SQL query against configured database, checks if results are returned
+ c) LLM validates that the SQL query answers the question the user asked
+4) `_aimprove_query`: If the SQL query does not answer the question sufficiently, prompt the LLM to improve it
+5) Return the SQL query and the results
+
+To add your own database, you can add your sql script in `scripts`, and modify the sql scripts in the docker-compose for `database` to create your database upon starting the docker (see for example `docker-compose-demo.yml`).
+
+## Prompt engineering tips
+
+- Always include examples for important steps that are tailored to your database
+- Where you observe frequent errors (in any of the steps), add specific examples of how it should be done correctly, e.g. "In WHERE clauses use substring comparison to account for unknown string formulations (e.g. inhibitor -> LIKE '%inhibitor%')"
+
+`system_context`:
+- Mention the role of the LLM: "You are an expert in ... database, your goal is to ..."
+- Specify the required output format, e.g. markdown code block
+- Specify the SQL dialect (e.g. PostgreSQL, Snowflake)
+- For safety, instruct it to not use any DML statements
+- Instruct it to only use field and table names from the provided database schemas (to reduce hallucinations)
+
+
+`prompt_inputs`:
+- Always use 'few-shot learning': give an example of a typical user query and a correct SQL query
+- In `table_definitions`, give a description of each table and describe what information is exactly in this table. This will give better results in the table selection step
diff --git a/backend/app/app/tool_constants/tutorial_data/summarizer_tool.md b/backend/app/app/tool_constants/tutorial_data/summarizer_tool.md
new file mode 100644
index 00000000..00d7bdd4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/summarizer_tool.md
@@ -0,0 +1 @@
+WIP
diff --git a/backend/app/app/tool_constants/tutorial_data/visualizer_tool.md b/backend/app/app/tool_constants/tutorial_data/visualizer_tool.md
new file mode 100644
index 00000000..00d7bdd4
--- /dev/null
+++ b/backend/app/app/tool_constants/tutorial_data/visualizer_tool.md
@@ -0,0 +1 @@
+WIP
diff --git a/docker-compose-demo.yml b/docker-compose-demo.yml
index fbd4e662..db3a87c9 100644
--- a/docker-compose-demo.yml
+++ b/docker-compose-demo.yml
@@ -37,8 +37,8 @@ services:
user: root
volumes:
- ./db_docker:/var/lib/postgresql
- - ./scripts/create-dbs.sql:/docker-entrypoint-initdb.d/create-dbs.sql
- - ./scripts/sql_db_tool/chinook_psql_load.sql:/docker-entrypoint-initdb.d/chinook_psql_load.sql
+ - ./scripts/1-create-dbs.sql:/docker-entrypoint-initdb.d/1-create-dbs.sql
+ - ./scripts/sql_db_tool/2-chinook_psql_load.sql:/docker-entrypoint-initdb.d/2-chinook_psql_load.sql
ports:
- 5432:5432
expose:
diff --git a/docker-compose-development.yml b/docker-compose-development.yml
index eb1c9b43..213ab4cb 100644
--- a/docker-compose-development.yml
+++ b/docker-compose-development.yml
@@ -27,7 +27,7 @@ services:
user: root
volumes:
- ./db_docker:/var/lib/postgresql
- - ./scripts/create-dbs.sql:/docker-entrypoint-initdb.d/create-dbs.sql
+ - ./scripts/1-create-dbs.sql:/docker-entrypoint-initdb.d/1-create-dbs.sql
ports:
- 5432:5432
expose:
diff --git a/docker-compose.yml b/docker-compose.yml
index 44dab6d9..bc2bdff8 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -47,7 +47,7 @@ services:
user: root
volumes:
- ./db_docker:/var/lib/postgresql
- - ./scripts/create-dbs.sql:/docker-entrypoint-initdb.d/create-dbs.sql
+ - ./scripts/1-create-dbs.sql:/docker-entrypoint-initdb.d/1-create-dbs.sql
ports:
- 5432:5432
expose:
diff --git a/docs/docusaurus/docs/advanced/overview_codebase.md b/docs/docusaurus/docs/advanced/overview_codebase.md
index d7d5d2a9..7d144ec0 100644
--- a/docs/docusaurus/docs/advanced/overview_codebase.md
+++ b/docs/docusaurus/docs/advanced/overview_codebase.md
@@ -88,5 +88,5 @@ Link to API documentation: https://agentkit.infra.x.bcg.com/api/v1/docs#/
└── pre-commit.git
[scripts] // sql scripts to create and load databases, add script for a custom database
├── create-dbs.sql
- └── [db_sql_tool]
+ └── [sql_db_tool]
```
diff --git a/docs/docusaurus/docs/setup/setup.md b/docs/docusaurus/docs/setup/setup.md
index 4ee0be70..5ea21ae1 100644
--- a/docs/docusaurus/docs/setup/setup.md
+++ b/docs/docusaurus/docs/setup/setup.md
@@ -14,6 +14,7 @@ If this is your first time setting up AgentKit, please follow these steps. If yo
OPENAI_API_KEY=
OPENAI_ORGANIZATION=
```
+ - Note, if you do not want to use OpenAI, choose another LLM in `llm.py` or add your own model choice
3. Copy the `frontend/.env.example` file in the frontend directory and change the name to `.env`.
4. In the terminal, navigate to the root directory of the cloned repository. Build and start the Docker containers using the `docker-compose.yml` configuration file:
diff --git a/docs/docusaurus/docs/tutorial/tutorial.md b/docs/docusaurus/docs/tutorial/tutorial.md
index 1583d2e5..694b0d59 100644
--- a/docs/docusaurus/docs/tutorial/tutorial.md
+++ b/docs/docusaurus/docs/tutorial/tutorial.md
@@ -7,24 +7,23 @@ Let's run through a full example of how you can rapidly build a high quality Age
We're going to build an Agent which can guide us through a GitHub repository, with access to two sorts of information:
1. **Codebase documentation**: All documentation in the codebase, including setup instructions, overall architecture, and feature-specific docs.
-2. **GitHub reposistory commit history**: Commit log from the GitHub repository. We want this data to be in tabular form.
+2. **GitHub repository commit history**: Commit log from the GitHub repository. We want this data to be in tabular form.
Developers can use this Agent to familiarize themselves with the contents of the repository, ask questions, and track contributions (e.g. "What were the latest commits from Joe to the frontend?"). The agent will use semantic similarity to retrieve information from
-the documentation and generate a SQL query to retrieve information from the GitHub repository information.
+the documentation (RAG) and generate a SQL query to retrieve information from the GitHub repository information. This can be easily extended to also include code files and more complex tools such as optimization or ML based tools (see [public demo](https://agentkit.infra.x.bcg.com/) for an example).
-In this example, we'll use data from the AgentKit repository itself (meta!), but you can do this for any repository.
+In this example, we'll use data from the AgentKit repository itself (meta!), but you can do this for any repository. We'll assume you have run through the [setup](docs/setup/setup.md).
All it takes is 3 steps:
## Step 1: Ingest data
-The first thing we'll do is download all AgentKit docs as PDF. We can also do this in native Markdown format, but it's quicker to use PDFs because AgentKit has off-the-shelf PDF ingestion and retrieval.
-Next, we need a directory to store the PDFs in, for which we can use `backend/app/app/tool_constants/pdf_data` (and we can delete the music-related default PDFs already there). We can also specify any other location
-for the PDFs, as long as we correctly point the `PDF_TOOL_DATA_PATH` parameter in `.env` to it.
+### Documentation (markdown or PDF)
+The first thing we'll do is ingest the documentation that we can use to answer questions. You can use your own codebase or follow along ingesting the AgentKit documentation, which is already loaded in `backend/app/app/tool_constants/tutorial_data`. Make sure you update `PDF_TOOL_DATA_PATH` to this path in `.env`. If the PDF tool is enabled (`PDF_TOOL_ENABLED="true"` in .env), the ingestion pipeline in `vector_db_pdf_ingestion.py` will run to embed the data and store it in a local `PGVector` vector database.
-We now want to a CSV file of GitHub commit history into the PostgreSQL DB to use with the SQL tool. The tables include columns such as commit hash, commit date, commit user, and file changed. First, add this data to `tool_constants/`.
+### Commit history
-We'll go to `scripts/db_sql_tool/` and create a SQL script to load the data, calling it `load_commits.sql`.
+Next, we're going to ingest the CSV of commit data. This is stored in `backend/app/app/tool_constants/tutorial_data/commit_history.csv`. Next, we are going to create a SQL script in `scripts/sql_db_tool/` to load the commit data into the database in our Docker container `database`, calling it `2-load_commits.sql` (to ensure it's run after `1-create-dbs.sql`). By default, the data is loaded into the `postgres` database. The `2-load_commits.sql` looks like:
```sql
-- Create table
CREATE TABLE COMMITS (
@@ -42,107 +41,89 @@ WITH CSV HEADER;
```
Notice that the CSV path is referenced as `/docker-entrypoint-initdb.d/commit_history.csv`. This is because we need to map
-local paths to paths within the Docker container running the `db_sql_tool` service. This includes the paths of the data and the SQL script to load it.
+local paths to paths within the Docker container running the `database` service.
-So, one additional step is to go to
-`docker-compose.yml` (or whichever `docker-compose` file you're using) and add these mappings to the `db_sql_tool` service.
+Finally, the last step is to go to `docker-compose.yml` (or whichever `docker-compose` file you're using) and add the `commit_history.csv` file into the container, and run the script generating our SQL database to the database container so the data is loaded when the container starts up.
This is what it looks like:
```yaml
- db_sql_tool:
- image: postgres:11
+ database:
+ image: ankane/pgvector:v0.4.1
restart: always
- container_name: db_sql_tool
+ container_name: database
+ env_file: ".env"
+ user: root
volumes:
- ./db_docker:/var/lib/postgresql
- - ./scripts/db_sql_tool/load_commits.sql:/docker-entrypoint-initdb.d/load_commits.sql
- - ./backend/app/app/tool_constants/public_demo_data/commit_history.csv:/docker-entrypoint-initdb.d/commit_history.csv
- environment:
- - POSTGRES_USER=postgres
- - POSTGRES_PASSWORD=postgres
- - POSTGRES_DB=sqltool
+ - ./scripts/1-create-dbs.sql:/docker-entrypoint-initdb.d/1-create-dbs.sql
+ - ./backend/app/app/tool_constants/tutorial_data/commit_history.csv:/docker-entrypoint-initdb.d/commit_history.csv
+ - ./scripts/sql_db_tool/2-load_commits.sql:/docker-entrypoint-initdb.d/2-load_commits.sql
ports:
- - "5632:5432"
- healthcheck:
- test: [ "CMD-SHELL", "pg_isready -U postgres" ]
- interval: 10s
- timeout: 15s
- retries: 5
+ - 5432:5432
+ expose:
+ - 5432
+ environment:
+ - POSTGRES_USERNAME=${DATABASE_USER}
+ - POSTGRES_PASSWORD=${DATABASE_PASSWORD}
+ - POSTGRES_DATABASE=${DATABASE_NAME}
+ - POSTGRES_HOST_AUTH_METHOD= "trust"
```
+## Step 2: Write action plans
+The second step is to write action plans to use our tools. Recall that we want two functionalities:
+1. Retrieve relevant docs from codebase documentation and use them to answer the user's question
+2. Generate and execute a SQL query to retrieve data from the commit history
-## Step 2: Configure tools
-
-Recall that we want two functionalities: RAG over codebase documentation, and SQL query generation to search commit history. AgentKit provides off-the-shelf tools for both of these things.
-
-For RAG, we want to use a combination of `pdf_tool` and `expert_tool`, where `pdf_tool` retrieves documents and `expert_tool` makes an LLM call to generate an answer. The goal, then, is to run them sequentially: `pdf_tool` first and then `expert_tool` after with access to the retrieved docs.
-No change needs to be made to `pdf_tool`, we need to add a few lines of code to the `_arun` method of `expert_tool` and write some prompts.
-```python
-from app.schemas.tool_schema import ToolConfig
-
-class ExpertTool(ExtendedBaseTool):
+AgentKit provides off-the-shelf library tools for both of these things.
- # Other code omitted ...
-
- async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
- """Use the tool asynchronously."""
- try:
- tool_input = ToolInputSchema.parse_raw(query)
- user_question = tool_input.latest_human_message
+We can use two simple action plans in `agent.yml`:
+```yaml
+action_plans:
+ '0':
+ name: ''
+ description: Use this plan to answer technical questions about AgentKit - related to setup, code, codebase navigation, or other technical questions.
+ actions:
+ - - pdf_tool
- docs = tool_input.intermediate_steps["PDF Tool"]
+ '1':
+ name: ''
+ description: |-
+ Use this plan to fetch Github-related information from the repository of AgentKit, such as commits, issues, pull requests.
+ actions:
+ - - sql_tool
+ - - expert_tool
+```
- messages = [
- SystemMessage(content=self.system_context),
- HumanMessage(content=self.prompt_message.format(question=user_question, retrieved_docs=docs))
- ]
- response = await self._agenerate_response(messages, discard_fast_llm=True, run_manager=run_manager)
+In action plan '0', we use the `pdf_tool` to retrieve relevant documentation and answer the user's question.
+In action plan '1', we first retrieve relevant data with the `sql_tool`, and then pass that data to `expert_tool` to answer the user's question.
- logger.info(f"Expert Tool response - {response}")
+## Step 3: Configure tools and prompts
- return response
+Finally, we will configure the tools and prompts for the defined action plans. For question and answering with RAG, we can use `pdf_tool` to retrieve documents and answer questions. No changes need to be made to `pdf_tool`, but we will change the prompts in `tools.yml` slightly:
- except Exception as e:
- if run_manager is not None:
- await run_manager.on_tool_error(e, tool=self.name)
- return repr(e)
- else:
- raise e
```
-The code above parses the input provided to the `expert_tool` and fetches the docs retrieved by `pdf_tool`. It then formats these
-docs into the prompt for the `expert_tool`, which needs to be set in `tools.yml`:
-
-```yaml
-expert_tool:
- default_llm: "gpt-4"
- default_fast_llm: "gpt-3.5-turbo-1106"
+pdf_tool:
description: >-
- Tool to answer the user question based on the documents retrieved by the pdf_tool. It analyzes the documents to provide reliable, helpful answers to specific technical queries related to the codebase, such as setup procedures or tool additions.
+ Summarization and Q&A tool to answer questions about the codebase.
+ The tool identifies the relevant documentation and answers the users question.
+ Input is a query string, output is a string the answer to the user question.
{examples}
prompt_message: |-
- Answer the user's question based on the documents retrieved.
- User question:
- <<<
- {{question}}
- >>>
- Retrieved documents:
- <<<
+ Documentation extracts:
{{retrieved_docs}}
- >>>
- Concise Answer:
+
+ User Question:
+ {{question}}
system_context: |-
- You are an expert in software engineering and communicating technical ideas. Your goal is to answer the user question solely based on the given documents.
+ You are an expert in answering questions on a codebase based on the AgentKit codebase.
+ You need to answer a user's question based on retrieved relevant documentation.
prompt_inputs:
- name: examples
content: |-
- Example Input: "What are the steps to set up the development environment?"
- Example Output: "You can set up your development environment locally or on Docker. To set up on Docker follow these steps: ..."
- max_token_length: 8000
+ Example Input: \"User question: How do I create a new tool?\"
+ Example Output: \"Start by creating a new folder in `services/chat_agent/tools`..\"
```
-To recap, here's what's happening: `pdf_tool` retrieves docs from codebase documentation, the `expert_tool` is tasked
-with synthesizing an answer using these docs and an LLM call, and we write prompts for the `expert_tool` to which we add these docs.
-
-Luckily, we don't need to change any code in `sql_tool` for data retrieval from the commit history. We do, however, need to add prompts to tell the tool the structure of our data:
+We also don't need to change any code in `sql_tool` for data retrieval from the commit history. We do, however, need to add prompts to tell the tool the structure of our data in `tools.yml`:
```yaml
sql_tool:
@@ -161,9 +142,6 @@ sql_tool:
---
Please answer the following user questions with an SQL query:
{{question}}
- ---
- Current conversation history:
- {{chat_history}}
system_context: |-
You are an expert on the GitHub repository of AgentKit, an LLM-powered agent. Your main task is to use
SQL queries to retrieve information from structured tables containing commit history of the repository.
@@ -249,37 +227,80 @@ sql_tool:
```
The configs give the tool information about the commits table, allowing it to write an informed query.
+Lastly, we're going to add a tool that reads the data obtained from `sql_tool` and answers the user's question in text. To do this, we're going to slightly adjust the `basellm_tool` template (which can be used for simple LLM calls with input from previous tools) and create a new tool called `expert_tool`. We have to add a few lines of code to the `_arun` method of `basellm_tool` to take the data retrieved by `sql_tool` in previous steps and add them to the prompt:
+```python
+class BaseLLM(ExtendedBaseTool):
+ # Other code omitted ...
+ async def _arun(
+ self,
+ *args: Any,
+ run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+ **kwargs: Any,
+ ) -> str:
+ """Use the tool asynchronously."""
+ try:
+ query = kwargs.get(
+ "query",
+ args[0],
+ )
+ tool_input = ToolInputSchema.parse_raw(query)
+ user_question = tool_input.latest_human_message
+
+ data = tool_input.intermediate_steps["sql_tool"]
+
+ messages = [
+ SystemMessage(content=self.system_context),
+ HumanMessage(content=self.prompt_message.format(question=user_question, retrieved_data=data))
+ ]
+ response = await self._agenerate_response(messages, discard_fast_llm=True, run_manager=run_manager)
+
+ logger.info(f"Expert Tool response - {response}")
+
+ return response
+ except Exception as e:
+ if run_manager is not None:
+ await run_manager.on_tool_error(e, tool=self.name)
+ return repr(e)
+ raise e
+```
+The code above parses the input provided to the tool, taking out the user prompt in `user_question` and the retrieved data from `sql_tool` in `intermediate_steps`. It then formats these into the prompt for the `expert_tool` (defined as class of BaseLLM in `tools.py`), which needs to be set in `tools.yml`:
-## Step 3: Write action plans
-
-The last step is to write action plans to use our tools. Recall that we want the Agent to be able to do two tasks:
-
-1. Retrieve relevant docs from codebase documentation and use them to answer the user's question
-2. Generate and execute a SQL query to retrieve data from the commit history
-
-This suggests using two action plans in `agent.yml`:
```yaml
-action_plans:
- '0':
- name: ''
- description: Use this plan to answer technical questions about AgentKit - related to setup, code, codebase navigation, or other technical questions.
- actions:
- - - pdf_tool
- - - expert_tool
-
- '1':
- name: ''
- description: |-
- Use this plan to fetch Github-related information from the repository of AgentKit, such as commits, issues, pull requests.
- actions:
- - - sql_tool
+expert_tool:
+ default_llm: "gpt-4"
+ default_fast_llm: "gpt-3.5-turbo-1106"
+ description: >-
+ Tool to answer the user question based on the data retrieved by the sql_tool.
+ {examples}
+ prompt_message: |-
+ Answer the user's question based on the data retrieved.
+ User question:
+ <<<
+ {{question}}
+ >>>
+ Retrieved data:
+ <<<
+ {{retrieved_data}}
+ >>>
+ Concise Answer:
+ system_context: |-
+ You are an expert in software engineering and communicating technical ideas. Your goal is to answer the user question solely based on the given data.
+ prompt_inputs:
+ - name: examples
+ content: |-
+ Example Input: "How many commits are there in total?"
+ Example Output: "There were a total of 50 commits made."
+ max_token_length: 8000
```
+To recap, here's what's happening: `sql_tool` retrieves data from our SQL database, passes that to the next action step where the `expert_tool` is tasked
+with synthesizing an answer using the data. You can imagine how this can be used to create complex prompt chains, where for example in the first step, information is obtained from 5 different sources in parallel, which is then summarized and passed to an `expert_tool` type tool which can interpret the various data sources and formulate an answer. See [the `agent.yml` file for the public demo](https://github.com/BCG-X-Official/agentkit/blob/github-com/demo/backend/app/app/config/agent.yml) for a slightly more advanced example.
+
## Conclusion
-In three simple steps, we have set up a high quality Agent which can provide informed guidance on a codebase. See the results below!
+In three simple steps, we have set up a high quality Agent app which can provide informed guidance on a codebase. See the results below!
Fetching docs:
![Docs retrieval image](img/tutorial1.png)
diff --git a/frontend/.env.example b/frontend/.env.example
index 2c40cc74..be0ff7d6 100644
--- a/frontend/.env.example
+++ b/frontend/.env.example
@@ -27,9 +27,9 @@ NEXTAUTH_URL="http://localhost:3000"
NEXTAUTH_URL_INTERNAL="http://localhost:3000"
GITHUB_ENTERPRISE_URL="https://github.your.enterprise.com/" # TEMPORARY: for BCG users use https://github.gamma.bcg.com
-# Next Auth Provider
-GITHUB_ID="cadd5256336ae4912af2"
-GITHUB_SECRET="b3d0b714c857bd06e9ddcda293dfb5a70b3fb721"
+# Next Auth Provider (Github as standard, but can be changed to others e.g. Google)
+GITHUB_ID=""
+GITHUB_SECRET=""
#############################################
# Database
diff --git a/frontend/src/components/ToolActionRenderer/actions.tsx b/frontend/src/components/ToolActionRenderer/actions.tsx
index e060bac5..932ea592 100644
--- a/frontend/src/components/ToolActionRenderer/actions.tsx
+++ b/frontend/src/components/ToolActionRenderer/actions.tsx
@@ -56,7 +56,7 @@ const actions = {
},
pdf_tool: {
icon: () => ,
- text: () => "Seaching in PDF documents...",
+ text: () => "Searching in PDF documents...",
},
filter_docs: {
icon: () => ,
diff --git a/scripts/create-dbs.sql b/scripts/1-create-dbs.sql
similarity index 100%
rename from scripts/create-dbs.sql
rename to scripts/1-create-dbs.sql
diff --git a/scripts/sql_db_tool/chinook_psql_load.sql b/scripts/sql_db_tool/2-chinook_psql_load.sql
similarity index 100%
rename from scripts/sql_db_tool/chinook_psql_load.sql
rename to scripts/sql_db_tool/2-chinook_psql_load.sql
diff --git a/scripts/sql_db_tool/2-load_commits.sql b/scripts/sql_db_tool/2-load_commits.sql
new file mode 100644
index 00000000..e24a64d4
--- /dev/null
+++ b/scripts/sql_db_tool/2-load_commits.sql
@@ -0,0 +1,13 @@
+-- Create table
+CREATE TABLE COMMITS (
+ commit_hash TEXT,
+ commit_timestamp TIMESTAMP,
+ commit_user TEXT,
+ commit_message TEXT,
+ file_changed TEXT
+);
+
+-- Copy CSV
+COPY COMMITS (commit_hash, commit_timestamp, commit_user, commit_message, file_changed)
+FROM '/docker-entrypoint-initdb.d/commit_history.csv'
+WITH CSV HEADER;
\ No newline at end of file