defenseunicorns · CollectiveUnicorn · Jul 29, 2024 · Jul 18, 2024 · Jul 19, 2024 · Jul 19, 2024
@@ -59,7 +59,7 @@ jobs:
             python-version-file: 'pyproject.toml'
 
         - name: Install Python Deps
-          run: python -m pip install "."
+          run: python -m pip install ".[dev]"
 
         - name: Setup Node
           uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2

@@ -56,7 +56,7 @@ jobs:
         run: docker run -p 50051:50051 -d --name=repeater ghcr.io/defenseunicorns/leapfrogai/repeater:dev
 
       - name: Install Python Deps
-        run: pip install "." "src/leapfrogai_api" "src/leapfrogai_sdk"
+        run: pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk"
 
       - name: Run Pytest
         run: python -m pytest tests/pytest -v

@@ -13,15 +13,15 @@ license = {file = "LICENSE"}
 dependencies = [  # Dev dependencies needed for all of lfai
     "openai",
     "pip-tools == 7.3.0",
-    "pytest",
-    "pytest-asyncio",
     "httpx",
     "ruff",
     "python-dotenv",
-    "pytest-asyncio"
 ]
 requires-python = "~=3.11"
 
+[project.optional-dependencies]
+dev = ["locust", "pytest-asyncio", "requests-toolbelt", "pytest"]
+
 [tool.pip-tools]
 generate-hashes = true
 

@@ -7,6 +7,7 @@ export SUPABASE_ANON_KEY=$(shell supabase status | grep -oP '(?<=anon key: ).*')
 install:
 	python -m pip install ../../src/leapfrogai_sdk
 	python -m pip install -e .
+	python -m pip install "../../.[dev]"
 
 dev:
 	python -m uvicorn main:app --port 3000 --reload --log-level info

diff --git a/tests/data/russian.mp3 b/tests/data/russian.mp3
@@ -30,7 +30,7 @@ make build-llama-cpp-python
 uds zarf package deploy zarf-package-llama-cpp-python-*.tar.zst
 
 # Install the python dependencies
-python -m pip install "."
+python -m pip install ".[dev]"
 
 # Run the tests!
 # NOTE: Each model backend has its own e2e test files

@@ -0,0 +1,47 @@
+# LeapfrogAI Load Tests
+
+## Overview
+
+These tests check the API's ability to handle different amounts of load. The tests simulate a specified number of users hitting the endpoints with some number of requests per second.
+
+# Requirements
+
+### Environment Setup
+
+Before running the tests, ensure that your API URL and key are properly configured in your environment variables. Follow these steps:
+
+1. Set the API URL:
+   ```bash
+   export API_URL="https://leapfrogai-api.uds.dev"
+   ```
+
+2. Set the API key:
+   ```bash
+   export API_KEY="<your-supabase-api-key-here>"
+   ```
+
+   **Note:** The API key should be your Supabase API key. For information on generating a key, please refer to the [Supabase README.md](../../packages/supabase/README.md). While an API key generated from the LeapfrogAI API endpoint can be used, it will cause the key generation load tests to fail.
+
+## Running the Tests
+
+To start the Locust web interface and run the tests:
+
+1. Install dependencies from the project root.
+   ```bash
+   pip install ".[dev]"
+   ```
+
+2. Navigate to the directory containing `loadtest.py`.
+
+3. Execute the following command:
+   ```bash
+   locust -f loadtest.py --web-port 8089
+   ```
+
+4. Open your web browser and go to `http://0.0.0.0:8089`.
+
+5. Use the Locust web interface to configure and run your tests:
+   - Set the number of users to simulate
+   - Set the spawn rate (users per second)
+   - Choose the host to test against (should match your `API_URL`)
+   - Start the test and monitor results in real-time
diff --git a/tests/load/loadtest.py b/tests/load/loadtest.py
@@ -0,0 +1,246 @@
+import mimetypes
+import threading
+
+import requests
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+import os
+from locust import HttpUser, task, between, SequentialTaskSet
+import warnings
+import tempfile
+import uuid
+
+# Suppress SSL-related warnings
+warnings.filterwarnings("ignore", category=Warning)
+
+
+class MissingEnvironmentVariable(Exception):
+    pass
+
+
+try:
+    API_KEY = os.environ["API_KEY"]
+    API_URL = os.environ["API_URL"]
+except KeyError:
+    raise MissingEnvironmentVariable(
+        "API_KEY and API_URL must be defined for the test to run. "
+        "Please check the loadtest README at /tests/load/README.md for instructions on setting these values."
+    )
+
+
+class SharedResources:
+    pdf_path = None
+    pdf_lock = threading.Lock()
+
+
+def download_arxiv_pdf():
+    with SharedResources.pdf_lock:
+        if SharedResources.pdf_path is None or not os.path.exists(
+            SharedResources.pdf_path
+        ):
+            url = "https://arxiv.org/pdf/2305.16291.pdf"
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+                SharedResources.pdf_path = temp_file.name
+
+            response = requests.get(url)
+            if response.status_code == 200:
+                with open(SharedResources.pdf_path, "wb") as file:
+                    file.write(response.content)
+                print("ArXiv PDF downloaded successfully.")
+            else:
+                raise Exception(
+                    f"Failed to download PDF from ArXiv. Status code: {response.status_code}"
+                )
+        else:
+            print("Using existing ArXiv PDF.")
+
+    return SharedResources.pdf_path
+
+
+def load_audio_file():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    file_path = os.path.join(script_dir, "..", "data", "russian.mp3")
+    with open(file_path, "rb") as file:
+        return file.read()
+
+
+class RAGTasks(SequentialTaskSet):
+    """Run these tasks in order to simulate full RAG flow"""
+
+    file_id = None
+    vector_store_id = None
+    assistant_id = None
+    thread_id = None
+    pdf_path = None
+
+    def on_start(self):
+        self.pdf_path = download_arxiv_pdf()
+
+    @task
+    def upload_file(self):
+        mime_type, _ = mimetypes.guess_type(self.pdf_path)
+        if mime_type is None:
+            mime_type = "application/octet-stream"
+
+        m = MultipartEncoder(
+            fields={
+                "file": ("arxiv_2305.16291.pdf", open(self.pdf_path, "rb"), mime_type),
+                "purpose": "assistants",
+            }
+        )
+
+        headers = {"Content-Type": m.content_type}
+        response = self.client.post("/openai/v1/files", data=m, headers=headers)
+
+        if response.status_code == 200:
+            self.file_id = response.json()["id"]
+            print(f"Uploaded file ID: {self.file_id}")
+
+    @task
+    def create_vector_store(self):
+        payload = {
+            "name": f"Test Vector Store {uuid.uuid4()}",
+            "file_ids": [self.file_id],
+            "metadata": {"test": "data"},
+        }
+        response = self.client.post("/openai/v1/vector_stores", json=payload)
+        if response.status_code == 200:
+            self.vector_store_id = response.json()["id"]
+            print(f"Created vector store ID: {self.vector_store_id}")
+
+    @task
+    def create_assistant(self):
+        payload = {
+            "model": "vllm",
+            "name": f"RAG Assistant {uuid.uuid4()}",
+            "instructions": "You are a helpful assistant with access to a knowledge base. Use the file_search tool to find relevant information.",
+            "tools": [{"type": "file_search"}],
+            "tool_resources": {
+                "file_search": {"vector_store_ids": [self.vector_store_id]}
+            },
+        }
+        response = self.client.post("/openai/v1/assistants", json=payload)
+        if response.status_code == 200:
+            self.assistant_id = response.json()["id"]
+            print(f"Created assistant with ID: {self.assistant_id}")
+
+    @task
+    def create_thread_and_run(self):
+        # Create a thread
+        thread_payload = {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What information can you provide about the capital of France?",
+                }
+            ]
+        }
+        thread_response = self.client.post("/openai/v1/threads", json=thread_payload)
+
+        if thread_response.status_code == 200:
+            self.thread_id = thread_response.json()["id"]
+
+            # Create a run using the thread and assistant
+            run_payload = {
+                "assistant_id": self.assistant_id,
+                "instructions": "Please use the file_search tool to find information about the capital of France and provide a detailed response.",
+            }
+            run_response = self.client.post(
+                f"/openai/v1/threads/{self.thread_id}/runs", json=run_payload
+            )
+
+            if run_response.status_code == 200:
+                run_id = run_response.json()["id"]
+                print(f"Created run with ID: {run_id}")
+
+                # Check run status
+                status_response = self.client.get(
+                    f"/openai/v1/threads/{self.thread_id}/runs/{run_id}"
+                )
+                print(f"Run status: {status_response.json()['status']}")
+
+    @task
+    def stop(self):
+        self.interrupt()
+
+
+class LeapfrogAIUser(HttpUser):
+    """This class represents a user that will kick off tasks over the life of the test"""
+
+    # Root url to use for all client requests
+    host = API_URL
+    # Add some wait time in-between kicking off tasks
+    wait_time = between(1, 3)
+
+    def on_start(self):
+        # Turn off SSL verification to get rid of unnecessary TLS version issues
+        self.client.verify = False
+        self.client.headers.update({"Authorization": f"Bearer {API_KEY}"})
+
+    @task
+    def perform_rag_tasks(self):
+        rag_tasks = RAGTasks(self)
+        rag_tasks.run()
+
+    @task
+    def test_list_api_keys(self):
+        self.client.get("/leapfrogai/v1/auth/list-api-keys")
+
+    @task
+    def test_openai_models(self):
+        self.client.get("/openai/v1/models")
+
+    @task
+    def test_chat_completions(self):
+        payload = {
+            "model": "vllm",
+            "messages": [{"role": "user", "content": "Hello, how are you?"}],
+            "max_tokens": 50,
+        }
+        self.client.post("/openai/v1/chat/completions", json=payload)
+
+    @task
+    def test_embeddings(self):
+        payload = {
+            "model": "text-embeddings",
+            "input": "The quick brown fox jumps over the lazy dog",
+        }
+        self.client.post("/openai/v1/embeddings", json=payload)
+
+    @task
+    def test_transcribe(self):
+        audio_content = load_audio_file()
+        files = {"file": ("russian.mp3", audio_content, "audio/mpeg")}
+        data = {"model": "whisper", "language": "ru"}
+        self.client.post("/openai/v1/audio/transcriptions", files=files, data=data)
+
+    @task
+    def test_translate(self):
+        audio_content = load_audio_file()
+        files = {"file": ("russian.mp3", audio_content, "audio/mpeg")}
+        data = {"model": "whisper"}
+        self.client.post("/openai/v1/audio/translations", files=files, data=data)
+
+    @task
+    def test_list_files(self):
+        self.client.get("/openai/v1/files")
+
+    @task
+    def test_list_vector_stores(self):
+        self.client.get("/openai/v1/vector_stores")
+
+    @task
+    def test_list_assistants(self):
+        self.client.get("/openai/v1/assistants")
+
+    @task
+    def test_healthz(self):
+        self.client.get("/healthz")
+
+    @task
+    def test_models(self):
+        self.client.get("/models")
+
+    @task
+    def test_create_api_key(self):
+        payload = {"name": "Test API Key"}
+        self.client.post("/leapfrogai/v1/auth/create-api-key", json=payload)
diff --git a/tests/make-tests.mk b/tests/make-tests.mk
@@ -30,6 +30,9 @@ test-int-api:
 test-unit:
 	PYTHONPATH=$$(pwd) pytest -vv -s tests/unit
 
+test-load:
+	locust -f ${PWD}/tests/load/loadtest.py --web-port 8089
+
 debug:
 	@echo ${SUPABASE_URL}
 	@echo ${SUPABASE_ANON_KEY}