diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index c471d965..10959ab4 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] name: Lint steps: - name: Check out source repository diff --git a/.gitignore b/.gitignore index 8abb7959..33e7d13a 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ go.work.sum .DS_Store __pycache__ +# Python virtual environment directory +.venv + # Jupyter notebook .ipynb_checkpoints/ @@ -33,8 +36,11 @@ __pycache__ docs/build/ !**/*.template.rst - # benchmark logs, result and figs benchmarks/autoscaling/logs benchmarks/autoscaling/output_stats -benchmarks/autoscaling/workload_plot \ No newline at end of file +benchmarks/autoscaling/workload_plot + +# simulator cache and output +docs/development/simulator/simulator_output +docs/development/simulator/cache diff --git a/docs/development/simulator/Dockerfile b/docs/development/simulator/Dockerfile new file mode 100644 index 00000000..a0777710 --- /dev/null +++ b/docs/development/simulator/Dockerfile @@ -0,0 +1,33 @@ +# Use the official Python base image +FROM python:3.10-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV WANDB_MODE=disabled + +# Set the working directory +WORKDIR /simulator + +# Copy the requirements file into the container +COPY requirements.txt /simulator/ + +# Install dependencies +RUN apt update && apt install -y curl jq git + +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code into the container +COPY ./*.py /simulator/ +# COPY ./model_cache /simulator/model_cache + +ENV MODEL_NAME=llama2-7b +ARG GPU_TYPE=a100 + # Trigger profiling +RUN python app.py --time_limit 1000 --replica_config_device ${GPU_TYPE} + +# Expose the port the app runs on +EXPOSE 8000 + +# Run the application +CMD ["python", "app.py"] diff --git a/docs/development/simulator/Makefile b/docs/development/simulator/Makefile new file mode 100644 index 00000000..2805f99d --- /dev/null +++ b/docs/development/simulator/Makefile @@ -0,0 +1,80 @@ +all: build + +build-a100: + docker build -t aibrix/vllm-simulator:nightly -f Dockerfile . + +build-a40: + docker build -t aibrix/vllm-simulator-a40:nightly --build-arg GPU_TYPE=a40 -f Dockerfile . + +build: build-a100 + +deploy-a100: + kubectl apply -f deployment-a100.yaml + +deploy-a40: + kubectl apply -f deployment-a40.yaml + +deploy: deploy-a100 + sleep 2 + kubectl -n aibrix-system port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 & + +clean: + kubectl delete -f deployment-a100.yaml + kubectl delete -f deployment-a40.yaml + sleep 1 + curl http://localhost:8000/metrics + +test: + curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ \ + "model": "llama2-7b", \ + "messages": [{"role": "user", "content": "Say this is a test!"}], \ + "temperature": 0.7 \ + }' + +test2: + curl http://localhost:8000/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ \ + "model": "llama2-7b", \ + "prompt": "Say this is a test!", \ + "temperature": 0.7, \ + "max_tokens": 50 \ + }' + +test-long: + curl http://localhost:8000/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ \ + "model": "llama2-7b", \ + "prompt": "Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test! Say this is a test!", \ + "temperature": 0.7, \ + "max_tokens": 50 \ + }' + +init-local-gateway-call: + kubectl -n aibrix-system port-forward svc/aibrix-gateway-users 8090:8090 1>/dev/null 2>&1 & + kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 & + sleep 1 + curl http://localhost:8090/CreateUser \ + -H "Content-Type: application/json" \ + -d '{"name": "your-user-name","rpm": 1000,"tpm": 100000}' + +test-gateway: + curl -v http://localhost:8888/v1/chat/completions \ + -H "user: your-user-name" \ + -H "model: llama2-7b" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ \ + "model": "llama2-7b", \ + "messages": [{"role": "user", "content": "Say this is a test!"}], \ + "temperature": 0.7 \ + }' + +metrics: + curl http://localhost:8000/metrics \ No newline at end of file diff --git a/docs/development/simulator/README.md b/docs/development/simulator/README.md new file mode 100644 index 00000000..b337a053 --- /dev/null +++ b/docs/development/simulator/README.md @@ -0,0 +1,75 @@ +# vLLM application simulator + +## Run locally + +Ensure that you have Python 3.10 installed on your system. Refer https://www.bitecode.dev/p/installing-python-the-bare-minimum +Create a virtual environment using venv module using python3.10 -m venv .venv +Activate the virtual environment using source .venv/bin/activate +Install the dependencies using python -m pip install -r requirements.txt +Run python app.py to start the server. +Run deactivate to deactivate the virtual environment + +## Run in kubernetes + +1. Build simulated base model image +```dockerfile +docker build -t aibrix/vllm-simulator:nightly -f Dockerfile . + +# If you are using Docker-Desktop on Mac, Kubernetes shares the local image repository with Docker. +# Therefore, the following command is not necessary. +kind load docker-image aibrix/vllm-simulator:nightly +``` + +2. Deploy simulated model image +```shell +kubectl apply -f docs/development/simulator/deployment.yaml +kubectl -n aibrix-system port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 & +``` + +## Test python app separately + +```shell +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ + "model": "llama2-7b", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + +```shell +kubectl delete -f docs/development/simulator/deployment.yaml +``` + +## Test with envoy gateway + +Add User: + + +Port forward to the User and Envoy service: +```shell +kubectl -n aibrix-system port-forward svc/aibrix-gateway-users 8090:8090 1>/dev/null 2>&1 & +kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 & +``` + +Add User +```shell +curl http://localhost:8090/CreateUser \ + -H "Content-Type: application/json" \ + -d '{"name": "your-user-name","rpm": 100,"tpm": 1000}' +``` + +Test request (ensure header model name matches with deployment's model name for routing) +```shell +curl -v http://localhost:8888/v1/chat/completions \ + -H "user: your-user-name" \ + -H "model: llama2-7b" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{ + "model": "llama2-7b", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' & \ No newline at end of file diff --git a/docs/development/simulator/app.py b/docs/development/simulator/app.py new file mode 100644 index 00000000..604f10a7 --- /dev/null +++ b/docs/development/simulator/app.py @@ -0,0 +1,336 @@ +import logging +import os +import sys +import time +from datetime import datetime +from random import randint + +from flask import Flask, Response, jsonify, request + +try: + from kubernetes import client, config +except Exception as e: + print(f"Failed to import kubernetes, skip: {e}") + +from simulator import Simulator +from transformers import AutoTokenizer +from vidur.config import SimulationConfig +from vidur.config_optimizer.config_explorer.config import ModelConfig +from vidur.entities import Request + +MODEL_NAME = os.getenv('MODEL_NAME', 'llama2-70b') +DEPLOYMENT_NAME = os.getenv('DEPLOYMENT_NAME', 'llama2-70b') +NAMESPACE = os.getenv('NAMESPACE', 'default') +DEFAULT_REPLICAS = int(os.getenv('DEFAULT_REPLICAS', '1')) + +# Load the tokenizer for your model +tokenizer = AutoTokenizer.from_pretrained( + 'bert-base-uncased', + model_max_length=16384, # Suppress warning + clean_up_tokenization_spaces=True) + +app = Flask(__name__) +modelMaps = { + "llama2-7b": "meta-llama/Llama-2-7b-hf", + "llama2-70b": "meta-llama/Llama-2-70b-hf" +} +sys.argv.append(f"--replica_config_model_name={modelMaps.get(MODEL_NAME, MODEL_NAME)}") +simulator_config: SimulationConfig = SimulationConfig.create_from_cli_args() +simulator = Simulator(simulator_config) +v1 = None + +# Global storage for overridden values +overrides = {} + +logger = logging.getLogger(__name__) + +def get_token_count(text): + try: + # Encode the text + encoded_input = tokenizer(text) + + # Get the number of tokens + return len(encoded_input['input_ids']) + except Exception as e: + logger.error(f"Failed to get number of tokens: {e}") + + return 1 + +models = [ + { + "id": "meta-llama/Llama-2-7b-hf", + "object": "model", + "created": 1715644056, + "owned_by": "vllm", + "root": "meta-llama/Llama-2-7b-hf", + "parent": None, + "permission": [ + { + "id": "modelperm-cb1adf4457b2417e8c7770aadcffe4cc", + "object": "model_permission", + "created": 1715644056, + "allow_create_engine": False, + "allow_sampling": True, + "allow_logprobs": True, + "allow_search_indices": False, + "allow_view": True, + "allow_fine_tuning": False, + "organization": "*", + "group": None, + "is_blocking": False + } + ] + }, + { + "id": "startup-default-lora", + "object": "model", + "created": 1715644056, + "owned_by": "vllm", + "root": "meta-llama/Llama-2-7b-hf", + "parent": None, + "permission": [ + { + "id": "modelperm-6a01d79e4d0e452b94d52d2c2e8c8562", + "object": "model_permission", + "created": 1715644056, + "allow_create_engine": False, + "allow_sampling": True, + "allow_logprobs": True, + "allow_search_indices": False, + "allow_view": True, + "allow_fine_tuning": False, + "organization": "*", + "group": None, + "is_blocking": False + } + ] + } +] + +@app.route('/v1/models', methods=['GET']) +def get_models(): + return jsonify({ + "object": "list", + "data": models + }) + +@app.route('/v1/load_lora_adapter', methods=['POST']) +def load_model(): + lora_name = request.json.get('lora_name') + # Check if the model already exists + if any(model['id'] == lora_name for model in models): + return jsonify({"status": "success", "message": "Model already loaded"}), 200 + + new_model = { + 'id': lora_name, + 'created': int(time.time()), + 'object': "model", + 'owned_by': "vllm", + 'parent': None, + 'root': request.json.get('lora_path') + } + + models.append(new_model) + return jsonify({"status": "success", "message": "Model loaded successfully"}), 200 + + +@app.route('/v1/unload_lora_adapter', methods=['POST']) +def unload_model(): + model_id = request.json.get('lora_name') + global models + models = [model for model in models if model['id'] != model_id] + return jsonify({"status": "success", "message": "Model unloaded successfully"}), 200 + + +@app.route('/v1/completions', methods=['POST']) +def completion(): + try: + prompt = request.json.get('prompt') + model = request.json.get('model') + max_tokens = request.json.get('max_tokens') + if not prompt or not model: + return jsonify({"status": "error", "message": "Prompt and model are required"}), 400 + + arrived_at = datetime.now().timestamp() + input_tokens = get_token_count(prompt) + output_tokens = max_tokens if max_tokens else randint(10, 500) + arrived_next = request.json.get('next_in') + if not arrived_next: + arrived_next = 0.0 + else: + arrived_next += arrived_at + + start = datetime.now().timestamp() + latency = simulator.execute(Request(arrived_at, input_tokens, output_tokens, arrived_next=arrived_next)) + + # Simulated response + response = { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": int(arrived_at), + "model": model, + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": f"This is simulated message from {model}!", + "index": 0, + "logprobs": None, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + "time": latency + } + } + overhead = datetime.now().timestamp()-start + if latency > overhead: + time.sleep(latency-overhead) + else: + logger.warning(f"Latency is less than overhead: L{latency} - O{overhead}") + + return jsonify(response), 200 + except Exception as e: + import traceback + traceback.print_exc() + + +@app.route('/v1/chat/completions', methods=['POST']) +def chat_completions(): + messages = request.json.get('messages') + model = request.json.get('model') + max_tokens = request.json.get('max_tokens') + if not messages or not model: + return jsonify({"status": "error", "message": "Messages and model are required"}), 400 + + arrived_at = datetime.now().timestamp() + input_tokens = sum(get_token_count(message["content"]) for message in messages) + output_tokens = max_tokens if max_tokens else randint(10, 500) + arrived_next = request.json.get('next_in') + if not arrived_next: + arrived_next = 0.0 + else: + arrived_next += arrived_at + + start = datetime.now().timestamp() + latency = simulator.execute(Request(arrived_at, input_tokens, output_tokens, arrived_next=arrived_next)) + + # Simulated response + response = { + "id": "chatcmpl-abc123", + "object": "chat.completion", + "created": int(arrived_at), + "model": model, + "usage": { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + "time": latency + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": f"\n\nThis is simulated message from {model}!" + }, + "logprobs": None, + "finish_reason": "stop", + "index": 0 + } + ] + } + overhead = datetime.now().timestamp()-start + if latency > overhead: + time.sleep(latency-overhead) + else: + logger.warning(f"Latency is less than overhead: L{latency} - O{overhead}") + return jsonify(response), 200 + +@app.route('/set_metrics', methods=['POST']) +def set_metrics(): + global overrides + # Get JSON data from the request + data = request.json + if data: + # Update overrides with new key-value pairs + overrides.update(data) + return {"status": "success", "message": "Overrides updated"}, 200 + else: + return {"status": "error", "message": "No data provided"}, 400 + +@app.route('/metrics') +def metrics(): + # get deployment information + try: + apps_v1 = client.AppsV1Api() + resp = apps_v1.read_namespaced_deployment(DEPLOYMENT_NAME, NAMESPACE) + replicas = resp.spec.replicas if resp.spec.replicas is not None else 1 + except Exception as e: + print(f"Failed to get deployment information: {DEPLOYMENT_NAME=} {NAMESPACE=} {e=}, set replicas to {DEFAULT_REPLICAS}") + replicas = DEFAULT_REPLICAS + + # a reasonable mock total value + total = overrides.get("total", 0) + model_name = overrides.get("model_name", MODEL_NAME) + # calculate metrics with potential overrides + success_total = overrides.get("success_total", total / replicas) + avg_prompt_throughput = overrides.get("avg_prompt_throughput", total / replicas if replicas > 0 else 0) + avg_generation_throughput = overrides.get("avg_generation_throughput", total / replicas if replicas > 0 else 0) + running = overrides.get("running", 0) + waiting = overrides.get("waiting", 0) + swapped = overrides.get("swapped", 0) + max_running_capacity = 100 + gpu_cache_usage_perc = overrides.get("gpu_cache_usage_perc", min(100.0, (running / max_running_capacity) * 100)) + + # construct Prometheus-style Metrics + metrics_output = f"""# HELP vllm:request_success_total Count of successfully processed requests. +# TYPE vllm:request_success_total counter +vllm:request_success_total{{finished_reason="stop",model_name="{model_name}"}} {success_total} +# HELP vllm:num_requests_running Number of requests currently running on GPU. +# TYPE vllm:num_requests_running gauge +vllm:num_requests_running{{model_name="{model_name}"}} {running} +# HELP vllm:num_requests_swapped Number of requests swapped to CPU. +# TYPE vllm:num_requests_swapped gauge +vllm:num_requests_swapped{{model_name="{model_name}"}} {swapped} +# HELP vllm:num_requests_waiting Number of requests waiting to be processed. +# TYPE vllm:num_requests_waiting gauge +vllm:num_requests_waiting{{model_name="{model_name}"}} {waiting} +# HELP vllm:avg_prompt_throughput_toks_per_s Average prefill throughput in tokens/s. +# TYPE vllm:avg_prompt_throughput_toks_per_s gauge +vllm:avg_prompt_throughput_toks_per_s{{model_name="{model_name}"}} {avg_prompt_throughput} +# HELP vllm:avg_generation_throughput_toks_per_s Average generation throughput in tokens/s. +# TYPE vllm:avg_generation_throughput_toks_per_s gauge +vllm:avg_generation_throughput_toks_per_s{{model_name="{model_name}"}} {avg_generation_throughput} +# HELP vllm:gpu_cache_usage_perc GPU KV-cache usage. 1 means 100 percent usage. +# TYPE vllm:gpu_cache_usage_perc gauge +vllm:gpu_cache_usage_perc{{model_name="model_name"}} {gpu_cache_usage_perc} +""" + return Response(metrics_output, mimetype='text/plain') + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + logging.getLogger("kubernetes.client.rest").setLevel(logging.ERROR) # Suppress kubenetes logs + + print(f"Starting app. DEPLOYMENT_NAME: {DEPLOYMENT_NAME}, NAMESPACE: {NAMESPACE}, MODEL: {MODEL_NAME}") + + thread = simulator.start() + + import sys + if '--time_limit' not in sys.argv: + try: + # config.load_kube_config() + config.load_incluster_config() + except Exception as e: + print(f"Failed to load k8s config: {e}") + + # Perform profiling and skip actual run + app.run(host='0.0.0.0', port=8000) + + # latency = simulator.execute(Request(0, 25, 100)) + # print(f"request latency: {latency}") + + simulator.stop() + + thread.join() diff --git a/docs/development/simulator/deployment-a100.yaml b/docs/development/simulator/deployment-a100.yaml new file mode 100644 index 00000000..8436f5ef --- /dev/null +++ b/docs/development/simulator/deployment-a100.yaml @@ -0,0 +1,161 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: simulator-llama2-7b-a100 + namespace: aibrix-system + labels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + model.aibrix.ai/port: "8000" + model.aibrix.ai/min_replicas: "1" # min replica for gpu optimizer when no workloads. +spec: + replicas: 1 + selector: + matchLabels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + template: + metadata: + labels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + app: "simulator-llama2-7b-a100" + spec: + serviceAccountName: pod-autoscaler + automountServiceAccountToken: true # Important! + containers: + - name: llmengine-simulator + image: aibrix/vllm-simulator:nightly + command: ["python", "app.py", "--replica_config_device", "a100"] + ports: + - containerPort: 8000 + env: + - name: DEPLOYMENT_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app'] + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: MODEL_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['model.aibrix.ai/name'] +--- +# Debug only: Make sure pod can be visited from controller that deployed in mac. +apiVersion: v1 +kind: Service +metadata: + name: llama2-7b + namespace: aibrix-system +spec: + selector: + model.aibrix.ai/name: "llama2-7b" + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 + nodePort: 30081 + type: NodePort +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-autoscaler + namespace: aibrix-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: pod-reader + namespace: aibrix-system +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: read-pods + namespace: aibrix-system +subjects: + - kind: ServiceAccount + name: pod-autoscaler + namespace: aibrix-system +roleRef: + kind: Role + name: pod-reader + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: aibrix-system + name: deployment-reader +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: deployment-reader-binding + namespace: aibrix-system +subjects: + - kind: ServiceAccount + name: pod-autoscaler + namespace: aibrix-system +roleRef: + kind: Role + name: deployment-reader + apiGroup: rbac.authorization.k8s.io +# --- +# for test-purpose, if need to create HTTPRoute object manually +# apiVersion: gateway.networking.k8s.io/v1 +# kind: HTTPRoute +# metadata: +# name: llama2-7b-router +# namespace: aibrix-system +# spec: +# parentRefs: +# - name: aibrix-eg +# rules: +# - matches: +# - headers: +# - type: Exact +# name: model +# value: llama2-7b +# backendRefs: +# - name: llama2-7b +# port: 8000 +--- +# Pod autoscaler works with gpu-optimizer +apiVersion: autoscaling.aibrix.ai/v1alpha1 +kind: PodAutoscaler +metadata: + name: podautoscaler-simulator-llama2-7b-a100 + labels: + app.kubernetes.io/name: aibrix + app.kubernetes.io/managed-by: kustomize + namespace: aibrix-system +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: simulator-llama2-7b-a100 + minReplicas: 0 + maxReplicas: 10 + targetMetric: "avg_prompt_throughput_toks_per_s" # Ignore if metricsSources is configured + metricsSources: + - endpoint: gpu-optimizer.aibrix-system.svc.cluster.local:8080 + path: /metrics/aibrix-system/simulator-llama2-7b-a100 + metric: "vllm:deployment_replicas" + targetValue: "1" + scalingStrategy: "KPA" \ No newline at end of file diff --git a/docs/development/simulator/deployment-a40.yaml b/docs/development/simulator/deployment-a40.yaml new file mode 100644 index 00000000..7d172142 --- /dev/null +++ b/docs/development/simulator/deployment-a40.yaml @@ -0,0 +1,160 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: simulator-llama2-7b-a40 + namespace: aibrix-system + labels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + model.aibrix.ai/port: "8000" +spec: + replicas: 1 + selector: + matchLabels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + template: + metadata: + labels: + modeladapter.aibricks.ai/enabled: "true" + model.aibrix.ai/name: "llama2-7b" + app: "simulator-llama2-7b-a40" + spec: + serviceAccountName: pod-autoscaler + automountServiceAccountToken: true # Important! + containers: + - name: llmengine-simulator + image: aibrix/vllm-simulator-a40:nightly + command: ["python", "app.py", "--replica_config_device", "a40"] + ports: + - containerPort: 8000 + env: + - name: DEPLOYMENT_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app'] + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: MODEL_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['model.aibrix.ai/name'] +--- +# Debug only: Make sure pod can be visited from controller that deployed in mac. +apiVersion: v1 +kind: Service +metadata: + name: llama2-7b + namespace: aibrix-system +spec: + selector: + model.aibrix.ai/name: "llama2-7b" + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 + nodePort: 30081 + type: NodePort +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-autoscaler + namespace: aibrix-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: pod-reader + namespace: aibrix-system +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: read-pods + namespace: aibrix-system +subjects: + - kind: ServiceAccount + name: pod-autoscaler + namespace: aibrix-system +roleRef: + kind: Role + name: pod-reader + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: aibrix-system + name: deployment-reader +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: deployment-reader-binding + namespace: aibrix-system +subjects: + - kind: ServiceAccount + name: pod-autoscaler + namespace: aibrix-system +roleRef: + kind: Role + name: deployment-reader + apiGroup: rbac.authorization.k8s.io +# --- +# for test-purpose, if need to create HTTPRoute object manually +# apiVersion: gateway.networking.k8s.io/v1 +# kind: HTTPRoute +# metadata: +# name: llama2-7b-router +# namespace: aibrix-system +# spec: +# parentRefs: +# - name: aibrix-eg +# rules: +# - matches: +# - headers: +# - type: Exact +# name: model +# value: llama2-7b +# backendRefs: +# - name: llama2-7b +# port: 8000 +--- +# Pod autoscaler works with gpu-optimizer +apiVersion: autoscaling.aibrix.ai/v1alpha1 +kind: PodAutoscaler +metadata: + name: podautoscaler-simulator-llama2-7b-a40 + labels: + app.kubernetes.io/name: aibrix + app.kubernetes.io/managed-by: kustomize + namespace: aibrix-system +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: simulator-llama2-7b-a40 + minReplicas: 0 + maxReplicas: 10 + targetMetric: "avg_prompt_throughput_toks_per_s" # Ignore if metricsSources is configured + metricsSources: + - endpoint: gpu-optimizer.aibrix-system.svc.cluster.local:8080 + path: /metrics/aibrix-system/simulator-llama2-7b-a40 + metric: "vllm:deployment_replicas" + targetValue: "1" + scalingStrategy: "KPA" \ No newline at end of file diff --git a/docs/development/simulator/environment.yml b/docs/development/simulator/environment.yml new file mode 100644 index 00000000..60f4b6d7 --- /dev/null +++ b/docs/development/simulator/environment.yml @@ -0,0 +1,29 @@ +name: simulator +channels: + - conda-forge + - plotly +dependencies: + - python>=3.10 + - setuptools + - pip + - numpy + - plotly_express + - jupyterlab + - matplotlib + - pyyaml + - snakeviz + - scikit-learn + - python-kaleido + - wandb + - fasteners + - ray-all + - streamlit + - randomname + - flask + - kubernetes + - transformers + - pip: + - kaleido + - ddsketch + - paretoset + - git+https://github.com/zhangjyr/vidur.git \ No newline at end of file diff --git a/docs/development/simulator/requirements.txt b/docs/development/simulator/requirements.txt new file mode 100644 index 00000000..7386688c --- /dev/null +++ b/docs/development/simulator/requirements.txt @@ -0,0 +1,14 @@ +flask +kubernetes +numpy +pandas +scikit-learn +wandb +kaleido +ddsketch +plotly_express +matplotlib +seaborn +fasteners +transformers +git+https://github.com/zhangjyr/vidur.git \ No newline at end of file diff --git a/docs/development/simulator/simulator.py b/docs/development/simulator/simulator.py new file mode 100644 index 00000000..fb84022e --- /dev/null +++ b/docs/development/simulator/simulator.py @@ -0,0 +1,219 @@ +import asyncio +import atexit +import heapq +import json +import sys +import threading +from typing import List + +from vidur.config import SimulationConfig +from vidur.entities import Cluster, Request +from vidur.events import BaseEvent, RequestArrivalEvent +from vidur.logger import init_logger +from vidur.metrics import MetricsStore +from vidur.request_generator import RequestGeneratorRegistry +from vidur.scheduler import BaseGlobalScheduler, GlobalSchedulerRegistry +from vidur.types import EventType +from vidur.utils.random import set_seeds + +logger = init_logger(__name__) + + +class Simulator: + def __init__(self, config: SimulationConfig) -> None: + self._config: SimulationConfig = config + set_seeds(config.seed) + + self._time = 0 + self._terminate = False + self._time_limit = self._config.time_limit + if not self._time_limit: + self._time_limit = float("inf") + + self._event_queue = [] + + self._event_trace = [] + self._event_chrome_trace = [] + + self._cluster = Cluster( + self._config.cluster_config, + self._config.metrics_config, + self._config.request_generator_config, + ) + self._metric_store = MetricsStore(self._config) + self._request_generator = RequestGeneratorRegistry.get( + self._config.request_generator_config.get_type(), + self._config.request_generator_config, + ) + self._scheduler = GlobalSchedulerRegistry.get( + self._config.cluster_config.global_scheduler_config.get_type(), + self._config, + self._cluster.replicas, + ) + + self._loop = None + self._expect_next_tick = sys.float_info.max + self._queue_buffer: List[Request] = [] + self._queue = None + + # self._init_event_queue() + atexit.register(self._write_output) + + @property + def scheduler(self) -> BaseGlobalScheduler: + return self._scheduler + + @property + def metric_store(self) -> MetricsStore: + return self._metric_store + + def start(self): + logger.info( + f"Starting simulation with cluster: {self._cluster}, model: {self._config.cluster_config.replica_config.model_name}, seed: {self._config.seed}" + ) + + # Start the event loop + self._loop = asyncio.new_event_loop() + self._queue = asyncio.Queue() + self._done = asyncio.Event() + + # Create and start a new thread to run the loop + t = threading.Thread(target=self._run) + t.start() + + asyncio.run_coroutine_threadsafe(self._serve(), self._loop) + + return t + + def _run(self): + asyncio.set_event_loop(self._loop) + self._loop.run_forever() + + def stop(self): + asyncio.run_coroutine_threadsafe(self._wait_done(asyncio.all_tasks(loop=self._loop)), self._loop).result() + self._loop.call_soon_threadsafe(self._loop.stop) + + async def _wait_done(self, pending): + # Graceful shutdown (if needed) + logger.info(f"pending:{len(pending)}") + for task in pending: + task.cancel() + await asyncio.gather(*pending, return_exceptions=True) + + async def _serve(self): + while True: + # Enqueue arrived requests. + if not self._queue.empty(): + request: Request = await self._queue.get() + self._serve_request(request) + self._queue.task_done() # Signal that the task is complete + continue + + # Drive events. + while self._event_queue and not self._terminate: + _, event = heapq.heappop(self._event_queue) + self._set_time(event._time) + new_events = event.handle_event(self._scheduler, self._metric_store) + self._add_events(new_events) + logger.debug("Executed event: %s", event) + + if self._config.metrics_config.write_json_trace: + self._event_trace.append(event.to_dict()) + + if self._config.metrics_config.enable_chrome_trace: + chrome_trace = event.to_chrome_trace() + if chrome_trace: + self._event_chrome_trace.append(chrome_trace) + + if event.event_type == EventType.REQUEST_END and event._request.response != None: + event._request.response.set_result(event._time - event._request.arrived_at) + + # Pause at the next request predicts. + if self._expect_next_tick > 0 and event._time >= self._expect_next_tick: + self._expect_next_tick = 0 + break + + # Reset expecting next request. + # if self._expect_next_tick == 0 and (not self._event_queue or self._terminate): + # return + + # Expecting next request + request: Request = await self._queue.get() + self._serve_request(request) + self._queue.task_done() # Signal that the task is complete + + + def _serve_request(self, request: Request): + # Update next expected request. + if self._expect_next_tick == 0: + self._expect_next_tick = request.arrived_next + else: + self._expect_next_tick = min(self._expect_next_tick, request.arrived_next) + + self._add_event(RequestArrivalEvent(request.arrived_at, request)) + + def execute(self, request: Request) -> float: + return asyncio.run_coroutine_threadsafe(self._execute(request), self._loop).result() + + async def _execute(self, request: Request) -> float: + if self._queue == None: + self._queue_buffer.append(request) + return 0.0 + + request.response = self._loop.create_future() + await self._queue.put(request) + + result = await request.response + return result + + + def _write_output(self) -> None: + logger.info("Writing output") + + self._metric_store.plot() + logger.info("Metrics written") + + if self._config.metrics_config.write_json_trace: + self._write_event_trace() + self._scheduler.write_batching_history() + logger.info("Json event trace written") + + if self._config.metrics_config.enable_chrome_trace: + self._write_chrome_trace() + logger.info("Chrome event trace written") + + def _add_event(self, event: BaseEvent, queue=None) -> None: + if queue == None: + queue = self._event_queue + heapq.heappush(queue, (event._priority_number, event)) + + def _add_events(self, events: List[BaseEvent], queue=None) -> None: + for event in events: + self._add_event(event, queue) + + def _init_event_queue(self) -> None: + requests = self._request_generator.generate() + + for request in requests: + self._add_event(RequestArrivalEvent(request.arrived_at, request)) + + def _set_time(self, time: float) -> None: + self._time = time + if self._time > self._time_limit: + logger.info( + f"Time limit reached: {self._time_limit}s terminating the simulation." + ) + self._terminate = True + + def _write_event_trace(self) -> None: + trace_file = f"{self._config.metrics_config.output_dir}/event_trace.json" + with open(trace_file, "w") as f: + json.dump(self._event_trace, f) + + def _write_chrome_trace(self) -> None: + trace_file = f"{self._config.metrics_config.output_dir}/chrome_trace.json" + + chrome_trace = {"traceEvents": self._event_chrome_trace} + + with open(trace_file, "w") as f: + json.dump(chrome_trace, f) diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 04a60698..53316c09 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -64,8 +64,13 @@ const ( modelIdentifier = "model.aibrix.ai/name" podPort = 8000 defaultPodMetricRefreshIntervalInMS = 50 - writeRequestTraceIntervalInSeconds = 10 expireWriteRequestTraceIntervalInMins = 10 + keyWriteRequestTraceIntervalInSeconds = "meta_interval_sec" + writeRequestTraceIntervalInSeconds = 10 + keyPrecisionRequestTrace = "meta_precision" + precisionRequestTrace = 0.1 + keyVersionRequestTrace = "meta_v" + versionRequestTrace = 2 ) var ( @@ -514,14 +519,17 @@ func (c *Cache) AddRequestTrace(modelName string, inputTokens, outputTokens int6 c.mu.Lock() defer c.mu.Unlock() - inputIndex := math.Trunc(math.Log2(float64(inputTokens))) - outputIndex := math.Trunc(math.Log2(float64(outputTokens))) + inputIndex := int64(math.Round(math.Log2(float64(inputTokens)) / precisionRequestTrace)) // Round to the nearest precision and convert to int + outputIndex := int64(math.Round(math.Log2(float64(outputTokens)) / precisionRequestTrace)) klog.V(5).Infof("inputTokens: %v, inputIndex: %v, outputTokens: %v, outputIndex: %v", inputTokens, inputIndex, outputTokens, outputIndex) if len(c.requestTrace[modelName]) == 0 { c.requestTrace[modelName] = map[string]int{} + c.requestTrace[modelName][keyWriteRequestTraceIntervalInSeconds] = writeRequestTraceIntervalInSeconds + c.requestTrace[modelName][keyPrecisionRequestTrace] = int(1 / precisionRequestTrace) + c.requestTrace[modelName][keyVersionRequestTrace] = versionRequestTrace } c.requestTrace[modelName][fmt.Sprintf("%v:%v", inputIndex, outputIndex)] += 1 diff --git a/python/aibrix/aibrix/gpu_optimizer/Makefile b/python/aibrix/aibrix/gpu_optimizer/Makefile new file mode 100644 index 00000000..e4a8ac3a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/Makefile @@ -0,0 +1,64 @@ +all: build + +DP ?= profiling +DATASET ?= [set your DATASET path] + +.PHONY: deploy +deploy: + kubectl apply -f deployment.yaml + sleep 2 + kubectl -n aibrix-system port-forward svc/gpu-optimizer 8080:8080 1>/dev/null 2>&1 & + +.PHONY: clean +clean: + kubectl delete -f deployment.yaml + sleep 1 + curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b + +.PHONY: benchmark +benchmark: + optimizer/profiling/benchmark.sh $(DP) + +.PHONY: gen-profile +gen-profile: + python optimizer/profiling/gen_profile.py $(DP) -o "redis://localhost:6379/?model=llama2-7b" + +.PHONY: debug-init +debug-init: + kubectl -n aibrix-system port-forward svc/aibrix-redis-master 6379:6379 1>/dev/null 2>&1 & + +.PHONY: debug +debug: + python -m app --debug + +.PHONY: debug-init-simulator +debug-init-simulator: + curl http://localhost:8080/monitor/aibrix-system/simulator-llama2-7b \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{}' + +.PHONY: debug-scale-simulator +debug-scale-simulator: + curl http://localhost:8080/scale/aibrix-system/simulator-llama2-7b/2 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" \ + -d '{}' + +.PHONY: debug-stop-simulator +debug-stop-simulator: + curl -X DELETE http://localhost:8080/monitor/aibrix-system/simulator-llama2-7b \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any_key" + +.PHONY: debug-metrics +debug-metrics: + curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b + +.PHONY: debug-workload +debug-workload: + python optimizer/profiling/gpu_benchmark.py --backend=vllm --port 8888 --request-rate=10 --num-prompts=100 --input_len 2000 --output_len 512 --model=llama2-7b --verbose + +.PHONY: visualizer +visualizer: + python -m load_monitor.visualizer --dataset $(DATASET) --redisprofile "redis://localhost:6379/?model=llama2-7b" \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/README.md b/python/aibrix/aibrix/gpu_optimizer/README.md new file mode 100644 index 00000000..347a2603 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/README.md @@ -0,0 +1,56 @@ +# GPU Optimizer: a vLLM Auto Scaler with Heterogeneous GPU support + +## Run in kubernetes + +1. Make sure Aibrix components are up-to-date. + +2. For now, build GPU Optimizer base image using Dockerfile within this folder. +```dockerfile +docker build -t aibrix/gpu-optimizer:nightly -f Dockerfile . + +# Or use make +make build +``` + +3. Prepare performance benchmark using optimizer/profiling/benchmark.sh. See optimizer/profiling/README.md. You may need to expose pod interface first: +```shell +# Make sure pod is accessable locally: +kubectl -n aibrix-system port-forward [pod_name] 8010:8000 1>/dev/null 2>&1 & +``` + +If using CPU based vLLM simulator, sample profiles is included in optimizer/profiling/result. + + +1. Generate profile based on SLO target using optimizer/profiling/gen-profile.py. If using CPU based vLLM simulator, execute +```shell +# Make sure Redis is accessable locally: +kubectl -n aibrix-system port-forward svc/aibrix-redis-master 6379:6379 1>/dev/null 2>&1 & +# Or use make +make debug-init + +python optimizer/profiling/gen-profile.py simulator-llama2-7b-a100 -o "redis://localhost:6379/?model=llama2-7b" +# Or use make +make DP=simulator-llama2-7b-a100 gen-profile +``` + +5. Deploy GPU Optimizer +```shell +kubectl apply -f deployment.yaml +kubectl -n aibrix-system port-forward svc/gpu-optimizer 8080:8080 1>/dev/null 2>&1 & + +# Or use make +make deploy +``` + +4. Deploy your vLLM model. If run locally a CPU based vLLM simulator is provided. See docs/development/simulator for details + +5. Start workload and see how model scale. Benchmark toolkit can be used to generate workload as: +```shell +# Make sure gateway's local access, see docs/development/simulator/README.md for details. +python optimizer/profiling/gpu-benchmark.py --backend=vllm --port 8888 --request-rate=10 --num-prompts=100 --input_len 2000 --output_len 128 --model=llama2-7b +``` + +6. Observability: visit http://localhost:8080/dash/llama2-7b for workload pattern visualization. A independent visualization demo can access by: +``` +python -m loadmonitor.visualizer +``` \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/__init__.py b/python/aibrix/aibrix/gpu_optimizer/__init__.py new file mode 100644 index 00000000..6461ec1a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/aibrix/aibrix/gpu_optimizer/app.py b/python/aibrix/aibrix/gpu_optimizer/app.py new file mode 100644 index 00000000..4368a2ac --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/app.py @@ -0,0 +1,364 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import threading +from typing import Dict, Optional, Tuple + +import redis +import uvicorn +from kubernetes import client, config, watch +from starlette.applications import Starlette +from starlette.responses import JSONResponse, PlainTextResponse + +from aibrix.gpu_optimizer.load_monitor.load_reader import GatewayLoadReader +from aibrix.gpu_optimizer.load_monitor.monitor import DeploymentStates, ModelMonitor +from aibrix.gpu_optimizer.load_monitor.profile_reader import RedisProfileReader +from aibrix.gpu_optimizer.load_monitor.visualizer import mount_to as mount_visulizer +from aibrix.gpu_optimizer.utils import ExcludePathsFilter + +NAMESPACE = os.getenv("NAMESPACE", "aibrix-system") +MODEL_LABEL = "model.aibrix.ai/name" +MIN_REPLICAS_LABEL = "model.aibrix.ai/min_replicas" +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + +routes = [] # type: ignore +model_monitors: Dict[str, ModelMonitor] = {} # Dictionary to store serving threads + +mount_visulizer( + routes, "/dash/{model_name}", lambda model_name: model_monitors.get(model_name) +) +app = Starlette(routes=routes) +redis_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=0) # Default DB +debug = False + + +def validate_model(deployment) -> Tuple[str, Optional[ModelMonitor]]: + """Validate the deployment and return the model monitor if the deployment is valid.""" + labels = deployment.metadata.labels + if not labels: + raise Exception( + f'No labels found for this deployment, please specify "{MODEL_LABEL}" label in deployment.' + ) + + # Access model label + model_name = labels.get(MODEL_LABEL) + if not model_name: + raise Exception( + f'No "{MODEL_LABEL}" label found for this deployment, please specify "{MODEL_LABEL}" label in deployment.' + ) + + if model_name in model_monitors: + return model_name, model_monitors[model_name] + else: + return model_name, None + + +def new_deployment(deployment): + """Return a new DeploymentStates object from the deployment.""" + min_replicas = 0 + labels = deployment.metadata.labels + if labels: + label = labels.get(MIN_REPLICAS_LABEL) + if label: + min_replicas = int(label) + + return DeploymentStates( + deployment.metadata.name, + deployment.spec.replicas if deployment.spec.replicas is not None else 0, + min_replicas, + ) + + +def start_serving_thread(watch_ver, deployment, watch_event: bool) -> bool: + """Start model monitor, returns True if a new server thread is created, False otherwise.""" + model_name, model_monitor = validate_model(deployment) + + # Get deployment specs + deployment_name = deployment.metadata.name + namespace = deployment.metadata.namespace + + # Update profile if key exists + if model_monitor is not None: + model_monitor.add_deployment( + watch_ver, deployment_name, namespace, lambda: new_deployment(deployment) + ) + logger.info( + f'Deployment "{deployment_name}" added to the model monitor for "{model_name}"' + ) + return False + + reader = GatewayLoadReader(redis_client, model_name) + profile = RedisProfileReader(redis_client, model_name) + model_monitor = ModelMonitor( + model_name, + watch_ver, + reader, + deployment=new_deployment(deployment), + namespace=namespace, + profile_reader=profile, + debug=debug, + ) + model_monitor.start() + model_monitors[model_name] = model_monitor + if watch_event: + logger.info( + f'New model monitor started for "{model_name}". Deployment "{deployment_name}" added.' + ) + else: + logger.info( + f'Model monitor started for existed "{model_name}". Deployment "{deployment_name}" added.' + ) + return True + + +def remove_deployment(deployment): + """Remove deployment from model monitor""" + model_name, model_monitor = validate_model(deployment) + + deployment_name = deployment.metadata.name + namespace = deployment.metadata.namespace + if model_monitor is None: + logger.warning( + f'Removing "{deployment_name}" from the model monitor, but "{model_name}" has not monitored.' + ) + return + + if model_monitor.remove_deployment(deployment_name, namespace) == 0: + model_monitor.stop() + del model_monitors[model_name] + logger.info( + f'Removing "{deployment_name}" from the model monitor, no deployment left in "{model_name}", stopping the model monitor.' + ) + return + + logger.info( + f'Removing "{deployment_name}" from the model monitor for "{model_name}".' + ) + + +@app.route("/monitor/{namespace}/{deployment_name}", methods=["POST"]) +async def start_deployment_optimization(request): + namespace = request.path_params["namespace"] + deployment_name = request.path_params["deployment_name"] + try: + # Verify the deployment exists + apps_v1 = client.AppsV1Api() + deployment = apps_v1.read_namespaced_deployment(deployment_name, namespace) + + # Start the deployment optimization + if start_serving_thread(None, deployment, True): + return JSONResponse({"message": "Deployment optimization started"}) + else: + return JSONResponse({"message": "Deployment optimization already started"}) + except Exception as e: + return JSONResponse( + {"error": f"Error starting deployment optimization: {e}"}, status_code=500 + ) + + +@app.route("/monitor/{namespace}/{deployment_name}", methods=["DELETE"]) +async def stop_deployment_optimization(request): + namespace = request.path_params["namespace"] + deployment_name = request.path_params["deployment_name"] + try: + # Verify the deployment exists + apps_v1 = client.AppsV1Api() + deployment = apps_v1.read_namespaced_deployment(deployment_name, namespace) + + # Start the deployment optimization + remove_deployment(deployment) + return JSONResponse({"message": "Deployment optimization stopped"}) + except Exception as e: + return JSONResponse( + {"error": f"Error stopping deployment optimization: {e}"}, status_code=500 + ) + + +@app.route("/scale/{namespace}/{deployment_name}/{replicas}", methods=["POST"]) +async def scale_deployment(request): + namespace = request.path_params["namespace"] + deployment_name = request.path_params["deployment_name"] + replicas = request.path_params["replicas"] + try: + # Verify the deployment exists + apps_v1 = client.AppsV1Api() + deployment = apps_v1.read_namespaced_deployment(deployment_name, namespace) + + _, monitor = validate_model(deployment) + if monitor is None: + raise Exception(f'Model "{model_name}" is not monitored.') + + # Set the scaling metrics + monitor.update_deployment_num_replicas(deployment_name, namespace, replicas) + + return JSONResponse({"message": f"Scaled to {replicas}"}) + except Exception as e: + return JSONResponse( + {"error": f"Error starting deployment optimization: {e}"}, status_code=500 + ) + + +@app.route("/metrics/{namespace}/{deployment_name}") +async def get_deployment_metrics(request): + namespace = request.path_params["namespace"] + deployment_name = request.path_params["deployment_name"] + # get deployment information + try: + apps_v1 = client.AppsV1Api() + deployment = apps_v1.read_namespaced_deployment(deployment_name, namespace) + + model_name, monitor = validate_model(deployment) + if monitor is None: + raise Exception(f'Model "{model_name}" is not monitored.') + + replicas = monitor.read_deployment_num_replicas(deployment_name, namespace) + + # construct Prometheus-style Metrics + metrics_output = f"""# HELP vllm:deployment_replicas Number of suggested replicas. +# TYPE vllm:deployment_replicas gauge +vllm:deployment_replicas{{model_name="{model_name}"}} {replicas} +""" + return PlainTextResponse(metrics_output) + except Exception as e: + logger.error(f"Failed to read metrics: {e}") + return JSONResponse({"error": f"Failed to read metrics: {e}"}, status_code=404) + + +def main(signal, timeout): + logger.info(f"Starting GPU optimizer (debug={debug}) ...") + while not signal["done"]: + signal["watch"] = None + + # Mark all deployments as outdated + for model_name, model_monitor in model_monitors.items(): + model_monitor.mark_deployments_outdated() + + try: + apps_v1 = client.AppsV1Api() + + # List existing deployments + logger.info(f"Looking for deployments in {NAMESPACE} with {MODEL_LABEL}") + deployments = apps_v1.list_namespaced_deployment( + namespace=NAMESPACE, label_selector=MODEL_LABEL + ) + watch_version = deployments.metadata.resource_version + logger.debug(f"last watch version: {watch_version}") + for deployment in deployments.items: + try: + start_serving_thread(watch_version, deployment, False) + except Exception as e: + logger.warning( + f"Error on handle existing deployment {deployment.metadata.name}: {e}" + ) + except client.rest.ApiException as ae: + logger.error( + f"Error connecting to Kubernetes API: {ae}. Please manually initiate GPU optimizer by calling the /monitor/{{namespace}}/{{deployment_name}} endpoint" + ) + return + except Exception as e: + logger.warning(f"Unexpect error on exam exist deployment: {e}") + return + + for model_name, model_monitor in model_monitors.items(): + if model_monitor.clear_outdated_deployments() == 0: + logger.info( + f'No deployment exists any more in "{model_name}", stopping the model monitor.' + ) + model_monitor.stop() + del model_monitors[model_name] + + try: + # Watch future deployments + w = watch.Watch() + signal["watch"] = w + for event in w.stream( + apps_v1.list_namespaced_deployment, + namespace=NAMESPACE, + label_selector=MODEL_LABEL, + resource_version=watch_version, + timeout_seconds=timeout, + ): + if signal["done"]: + return + try: + deployment = event["object"] + if event["type"] == "ADDED": + start_serving_thread(watch_version, deployment, True) + elif event["type"] == "DELETED": + remove_deployment(deployment) + except Exception as e: + logger.warning( + f"Error on handle event {event['type']} {deployment.metadata.name}: {e}" + ) + except client.rest.ApiException as ae: + logger.warning(f"Error connecting to Kubernetes API: {ae}. Will retry.") + except Exception as e: + logger.warning(f"Unexpect error on watch deployment: {e}") + return + + +if __name__ == "__main__": + import sys + + if "--debug" in sys.argv: + debug = True + + # Setup default logger + logging.basicConfig(level=logging.DEBUG if debug else logging.INFO) + logging.getLogger("kubernetes.client.rest").setLevel( + logging.ERROR + ) # Suppress kubenetes logs + logging.getLogger("pulp.apis.core").setLevel(logging.INFO) # Suppress pulp logs + logger = logging.getLogger("aibrix.gpuoptimizer") + + timeout = 600 + try: + config.load_incluster_config() + except Exception: + # Local debug + config.load_kube_config(config_file="~/.kube/config") + signal = {"done": False, "watch": None} + threading.Thread( + target=main, + args=( + signal, + timeout, + ), + ).start() # Run Kubernetes informer in a separate thread + + uvicorn.run( + app, + host="0.0.0.0", + port=8080, + log_config={ + "version": 1, + "disable_existing_loggers": False, + "loggers": {"uvicorn.access": {"filters": ["dash_update_filter"]}}, + "filters": { + "dash_update_filter": { + "()": ExcludePathsFilter, + "exclude_paths": ["/dash/{model_name}/_dash"], # Paths to exclude + }, + }, + }, + ) + + signal["done"] = True + if signal["watch"] is not None: + signal["watch"].stop() # type: ignore + + # clean up + for model_name, model_monitor in model_monitors.items(): + model_monitor.stop() diff --git a/python/aibrix/aibrix/gpu_optimizer/deployment.yaml b/python/aibrix/aibrix/gpu_optimizer/deployment.yaml new file mode 100644 index 00000000..1e122b93 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-autoscaler + namespace: aibrix-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: aibrix-system + name: deployment-reader +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: deployment-reader-binding + namespace: aibrix-system +subjects: + - kind: ServiceAccount + name: pod-autoscaler + namespace: aibrix-system +roleRef: + kind: Role + name: deployment-reader + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gpu-optimizer + namespace: aibrix-system +spec: + replicas: 1 + selector: + matchLabels: + app: gpu-optimizer + template: + metadata: + labels: + app: gpu-optimizer + spec: + serviceAccountName: pod-autoscaler + automountServiceAccountToken: true # Important! + containers: + - name: gpu-optimizer + image: aibrix/runtime:nightly + command: ["python", "-m", "aibrix.gpu_optimizer.app", "--debug"] + ports: + - containerPort: 8080 + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: REDIS_HOST + value: aibrix-redis-master.aibrix-system.svc.cluster.local +--- +# Debug only: Make sure pod can be visited from controller that deployed in mac. +apiVersion: v1 +kind: Service +metadata: + name: gpu-optimizer + namespace: aibrix-system +spec: + selector: + app: gpu-optimizer + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 + nodePort: 30008 + type: NodePort \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/__init__.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/__init__.py new file mode 100644 index 00000000..6461ec1a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/clusterer.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/clusterer.py new file mode 100644 index 00000000..5fc887d9 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/clusterer.py @@ -0,0 +1,189 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import math +import sys +from datetime import datetime +from typing import Callable, Iterable, List, Optional, Protocol, Tuple, Union + +from incdbscan import IncrementalDBSCAN + +from aibrix.gpu_optimizer.utils import DelayedLog + +from .helpers import Centeroid, DataPoints + +logger = logging.getLogger("aibrix.gpuoptimizer.clusterer") + + +class Clusterer(Protocol): + def insert(self, points): + """Pass in a list of data points to be clustered""" + + def reset(self): + """Reset the clusterer""" + + def get_cluster_labels( + self, points: DataPoints, uncategorized: Optional[List] = None + ) -> Tuple[Iterable[int], Iterable[Centeroid]]: + """Get the cluster labels for the given data points""" + + @property + def length(self): + """Get the number of data points in the clusterer""" + + +class DBSCANClusterer: + def __init__(self, eps: float, min_pts: int): + self.eps = eps + self.min_pts = min_pts + self.reset() + + def insert(self, points: DataPoints): + self.clusterer.insert(points.signatures) + self._length += len(points) + + def reset(self): + self.clusterer = IncrementalDBSCAN(eps=self.eps, min_pts=self.min_pts) + self._length = 0 + self.created = datetime.now().timestamp() + + def clone(self): + return DBSCANClusterer(self.eps, self.min_pts) + + def get_cluster_labels( + self, points: DataPoints, uncategorized: Optional[List] = None + ) -> Tuple[Iterable[int], Iterable[Centeroid]]: + labels = self.clusterer.get_cluster_labels(points.signatures) + centers = {} + start_label = sys.maxsize + for i, label in enumerate(labels): + if math.isnan(label): + continue + if label < 0: + if uncategorized is not None: + uncategorized.append(points.datapoint(i)) + continue + start_label = min(start_label, label) + if label not in centers: + centers[label] = Centeroid() + if len(centers) > 10: + print(f"unepxected label:{label}") + centers[label].add(points.datapoint(i)) + # Try fixing label index. + if start_label == sys.maxsize: + start_label = 0 + if start_label != 0: + for i, label in enumerate(labels): + if label >= 0: + labels[i] -= start_label + return labels, centers.values() + + @property + def length(self): + return self._length + + +class MovingDBSCANClusterer: + """MovingCluster uses extra buffer space to store a moving DBSCAN cluster""" + + def __init__( + self, + eps: float, + min_pts: int, + buffer_size=4, + window: Union[int, float, Callable[[DBSCANClusterer], bool]] = 4000, + ): + if isinstance(window, int): + self.window_cb = self._get_points_window_cb(window) + elif isinstance(window, float): + self.window_cb = self._get_time_window_cb(window) + else: + self.window_cb = window + + self.buffer_size = buffer_size + self.frontier = 0 + self.clusterers = [DBSCANClusterer(eps, min_pts)] + self._reason = None + + def validate(self) -> bool: + """Do necessary window rotating and return if data refreshing is necessary""" + current = (self.frontier + 1) % len(self.clusterers) + if not self.window_cb(self.clusterers[self.frontier]): + return False + + # Reset next slot in buffer. + if len(self.clusterers) < self.buffer_size: + self.clusterers.append(self.clusterers[current].clone()) + self.frontier = len(self.clusterers) - 1 + logger.debug("test") + logger.debug( + "moving buffer created: %s, buffers: %s", + self._reason, + DelayedLog(lambda: [cluster.length for cluster in self.clusterers]), + ) + return False + else: + self.clusterers[current].reset() + self.frontier = current + current = (current + 1) % self.buffer_size + logger.debug( + "moving buffer created: %s, now available: %s, buffers: %s", + self._reason, + self.clusterer.length, + DelayedLog(lambda: [cluster.length for cluster in self.clusterers]), + ) + return True + # data.trim_head(-cluster['clusterers'][0][current].length) + + def insert(self, points: DataPoints): + for clusterer in self.clusterers: + clusterer.insert(points) + + def reset(self): + self.clusterers = [self.clusterers[0].clone()] + self.frontier = 0 + + def get_cluster_labels( + self, points: DataPoints, uncategorized: Optional[List] = None + ) -> Tuple[Iterable[int], Iterable[Centeroid]]: + return self.clusterer.get_cluster_labels(points, uncategorized=uncategorized) + + @property + def length(self): + return self.clusterer.length + + @property + def clusterer(self): + return self.clusterers[(self.frontier + 1) % len(self.clusterers)] + + def _get_points_window_cb(self, window: int) -> Callable[[DBSCANClusterer], bool]: + return ( + lambda clusterer: clusterer.length >= window / self.buffer_size + and self.reason(f"reached {round(window / self.buffer_size)} points") + ) + + def _get_time_window_cb(self, window: float) -> Callable[[DBSCANClusterer], bool]: + return ( + lambda clusterer: datetime.now().timestamp() - clusterer.created + >= window / self.buffer_size + and self.reason( + f"timeout after {round(window / self.buffer_size, 2)} seconds" + ) + ) + + def reason(self, msg) -> bool: + """Provide a reason for the window to be rotated, always return True.""" + self._reason = msg + return True diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/helpers.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/helpers.py new file mode 100644 index 00000000..03739aa9 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/helpers.py @@ -0,0 +1,292 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, List, Optional, Tuple, Union + +import numpy as np +from typing_extensions import TypeAlias + +DataSignatures: TypeAlias = np.ndarray +"""ndarray of shape(n, 2)""" + +DataSignature: TypeAlias = np.ndarray +"""ndarray of shape(2,)""" + + +class DataPoint(np.ndarray): + def __new__( + cls, + *args, + age: Union[int, float] = 0, + ndarray: Optional[np.ndarray] = None, + **kwargs, + ): + if ndarray is not None: + return ndarray.view(cls) + + ins = np.empty((3,)) + if len(args) > 0: + ins[0] = args[0] + if len(args) > 1: + ins[1] = args[1] + ins[2] = age + return ins + + @property + def age(self): + return self[2] + + @property + def signature(self) -> DataSignature: + return self[:2] + + +class DataPoints(np.ndarray): + def __new__(cls, ndarray: np.ndarray): + return ndarray.view(cls) + + @property + def signatures(self) -> DataSignatures: + return self[:, :2] + + def datapoint(self, idx): + return DataPoint(ndarray=self[idx]) + + +class DataBuffer: + def __init__(self, cap: int): + self._xy = np.empty((cap, 3), dtype=float) + self._commited = 0 + """The length of data that has been processed and ready to read""" + self._head = 0 + """All length of all data that includes processing data points.""" + + def reconcile(self, cap: int): + if cap < self._xy.shape[0]: + # We do not shrink + return + + new_cap = self._xy.shape[0] * 2 + while new_cap < cap: + new_cap *= 2 + self._xy = np.resize(self._xy, (new_cap, 3)) + + def append(self, tokens: List[DataPoint], commit: bool = False) -> DataPoints: + """Append data points to the buffer. If commit is True, the data points will be committed immediately and could lead to data inconsistent if it takes a long time to process new data.""" + size_gap = self._commited + len(tokens) - self._xy.shape[0] + # Check buffer size. + if size_gap > 0: + # We do not expand the buffer automatically, simply evicting some records to make room for new data. + self.trim_head(size_gap) + + # Check tokens size. If tokens is too large, buffer has been cleared at this point. + if len(tokens) > self._xy.shape[0]: + tokens = tokens[-self._xy.shape[0] :] + + self._xy[self._commited : self._commited + len(tokens)] = tokens + ret = DataPoints(self._xy[self._commited : self._commited + len(tokens)]) + self._head += len(tokens) + if commit: + self.commit() + return ret + + def commit(self): + self._commited = self._head + + def trim_head(self, start): + if self._head != self._commited: + raise Exception("Cannot trim head when there are uncommited data points.") + + if start >= self._commited: + # empty(), skip data moving + self._head = self._commited = 0 + return + elif start <= -self._commited: + # keep all data + return + + # Do compacting. + # implement self._xy[:-start] = self._xy[start:] with variable buffer length. + if start > 0: + self._xy[: self._commited - start] = self._xy[start : self._commited] + self._commited -= start + else: + self._xy[:-start] = self._xy[self._commited + start : self._commited] + self._commited = -start + self._head = self._commited + + def clear(self): + self._commited = 0 + self._head = 0 + + @property + def x(self): + return self._xy[: self._commited, 0] + + @property + def y(self): + return self._xy[: self._commited, 1] + + @property + def datapoints(self) -> DataPoints: + return DataPoints(self._xy[: self._commited]) + + @property + def len(self): + return self._commited + + @property + def cap(self): + return self._xy.shape[0] + + +class Centeroid: + def __init__(self): + """Centeroid calculates the mass center, radius, and size of data points.""" + self._sum_center = None + self._range_max = None + self._range_min = None + self._span_max = 0 + self._span_min = 0 + self._size = 0 + self._signature = None + self._up_to_date = False # Whether the signature is up to date. + + def add(self, point: DataPoint): + if self._sum_center is None: + self._sum_center = list(point.signature) + self._range_min = list(point.signature) + self._range_max = list(point.signature) + self._span_max = point.age + self._span_min = point.age + self._signature = np.zeros_like(self._sum_center, dtype=int) + else: + for i, val in enumerate(point.signature): # type: ignore + self._sum_center[i] += val + self._range_min[i] = min(self._range_min[i], val) + self._range_max[i] = max(self._range_max[i], val) + self._span_min = min(self._span_min, point.age) + self._span_max = max(self._span_max, point.age) + self._up_to_date = False + + self._size += 1 + + def get_signature( + self, + indexes: List[List[float]], + error_suppressor: Optional[ + Callable[[int, float, float, float, float], None] + ] = None, + ) -> Tuple[int]: + """Generate the index signature of the centroid within the indexes' range. + + Args: + indexes: A list of list of float, each list is a range of values. + error_suppressor: A function to handle the error with parameters(value, index assigned, value of index, offset). If None, raise an exception. + """ + if len(self._signature) != len(indexes): + raise Exception( + f"Indexes and centeroid signature size mismatch, {len(self._signature)}:{len(indexes)}" + ) + + if self._up_to_date: + return self.signature + + for i, value in enumerate(self.center): + if len(indexes[i]) == 0: + raise Exception("Indexes size mismatch, at least 1.") + elif len(indexes[i]) == 1: + self._signature[i] = 0 + continue + + # Assuming indexes are ascending ordered. + distance = (indexes[i][-1] - indexes[i][0]) / (len(indexes[i]) - 1) + if value < indexes[i][0] - distance / 2: + if error_suppressor is not None: + self._signature[i] = 0 + error_suppressor(i, value, 0, indexes[i][0], -distance / 2) + else: + raise Exception( + f"Centeroid is out of range: {i}:{value} and accepted minimum {indexes[i][0]} (with offset {- distance / 2})." + ) + elif value > indexes[i][-1] + distance / 2: + if error_suppressor is not None: + self._signature[i] = len(indexes[i]) - 1 + error_suppressor( + i, value, len(indexes[i]) - 1, indexes[i][-1], distance / 2 + ) + else: + raise Exception( + f"Centeroid is out of range: {i}:{value} and accepted maximum {indexes[i][-1]} (with offset {distance / 2})." + ) + else: + # Find the index using binary search. + left, right = 0, len(indexes[i]) - 1 + found = False + while left < right - 1: + mid = (left + right) // 2 + if value < indexes[i][mid]: + right = mid + elif value > indexes[i][mid]: + left = mid + else: + self._signature[i] = mid + found = True + break + if not found: + self._signature[i] = ( + left + if value < (indexes[i][left] + indexes[i][left]) / 2 + else right + ) + + self._up_to_date = True + return self.signature + + @property + def center(self): + return tuple(val / self._size for val in self._sum_center) + + @property + def radius(self): + return max( + (val - self._range_min[i]) / 2 for i, val in enumerate(self._range_max) + ) + + @property + def size(self): + return self._size + + @property + def span(self): + return self._span_max - self._span_min + 1 + + @property + def signature(self) -> Tuple[int]: + if not self._up_to_date: + raise Exception("Signature is not up to date.") + return tuple(self._signature.tolist()) + + @property + def rate(self): + return self._size / self.span + + def to_array(self): + ret = list(self.center) + ret.append(self.radius) + ret.append(self.rate) + return ret + + def __str__(self) -> str: + return f"Centeroid(center={self.center}, rps={self.rate})" diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/load_reader.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/load_reader.py new file mode 100644 index 00000000..ea645040 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/load_reader.py @@ -0,0 +1,293 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import re +from datetime import datetime +from typing import Any, List, Optional, Protocol, Union + +import numpy as np +import pandas as pd +from redis import Redis + +logger = logging.getLogger("aibrix.gpu_optimizer.load_reader") + +unittest_filepath = "unittest_694cb6cf-f5b3-42ca-b3c1-55ff0b358bdb" + + +class LoadRecord(tuple): + """LoadRecord models a tuple with the following fields: ts, input tokens, output tokens, and frequency.""" + + def __new__(cls, *args: Any, **kwargs: Any) -> "LoadRecord": + return super(LoadRecord, cls).__new__(cls, args) + + @property + def ts(self) -> float: + return self[0] + + @property + def input_tokens(self) -> float: + return self[1] + + @property + def output_tokens(self) -> float: + return self[2] + + @property + def freq(self) -> int: + if len(self) < 4: + return 1 + return self[3] + + +class LoadReader(Protocol): + def read(self, ts: float = 0.0) -> List[LoadRecord]: + """Read the next batch of records from the data source.""" + + def progress(self) -> str: + """Return the progress description of the data source.""" + + def next_available(self) -> float: + """Return the timestamp next batch of data will be available.""" + + +class DatasetLoadReader: + """DatasetLoadReader reads the load records from a dataset. + To match the behavior of the gateway, the input and output tokens are rounded to the nearest integer of log2. + """ + + def __init__( + self, filepath, rps: int = 10, scale: float = 1.0, interval: int = 10 + ) -> None: + if filepath != unittest_filepath: + self.df = pd.read_csv(filepath) + self.df["input_tokens"] = self.log2_aggregate( + self.df["input_tokens"] * scale, 1 + ) + self.df["output_tokens"] = self.log2_aggregate( + self.df["output_tokens"] * scale, 1 + ) + # self.df['input_tokens'] = self.stair_aggregate(self.df['input_tokens'] * scale) + # self.df['output_tokens'] = self.stair_aggregate(self.df['output_tokens'] * scale) + + self.rps = rps + self.interval = 10 + self.n_read = 0 + self.n = 0 + + def log2_aggregate(self, series: pd.Series, precision: int = 0) -> List: + return np.round(np.log2(series), precision) + + def stair_aggregate(self, series: List, skip_log2: bool = False) -> List: + BaseBucketBits = 3 + ScalingBits = 4 + + scale = ( + np.maximum(np.floor(np.log2(series)) - BaseBucketBits, 0) // ScalingBits + 1 + ) + bucketbits = np.maximum( + (scale - 1) * ScalingBits + BaseBucketBits - 1, BaseBucketBits + ) + aggregated = np.maximum(series - np.mod(series, 2**bucketbits), 1) + return aggregated if skip_log2 else np.log2(aggregated) + + def read(self, ts: float = 0.0) -> List[LoadRecord]: + """Read the next batch of records from the data source. + + args: + ts: float, ignored. + """ + records = [] + # Simulate the arrival of requests using Poisson distribution + n_batch = np.random.poisson(self.rps * self.interval) + self.last_ts = ts + end = self.n_read + n_batch + if end > len(self.df): + end = len(self.df) + + chunk = self.df.iloc[self.n_read : end] + self.n_read = end + for _, row in chunk.iterrows(): + records.append( + LoadRecord( + self.n * self.interval, row["input_tokens"], row["output_tokens"] + ) + ) + self.n += 1 + + return records + + def progress(self) -> str: + return f"{round(self.n_read / len(self.df) * 100, 2)}%" + + def next_available(self) -> float: + """Dataset is available to read anytime.""" + return datetime.now().timestamp() + + +class GatewayLoadReader: + """GatewayLoadReader reads the load records from gateway generated statistics stored in Redis. + Currently, gateway will aggregate the load records into a single key per interval(e.g., 10s) with the following format: + + aibrix:{model_name}_request_trace_{ts} + + The value of the key is a json object with the following format: + + { + "{round(log2(input_tokens))}-{round(log2(output_tokens))}: {frequency} + } + """ + + def __init__( + self, redis_client: Redis, model_name: str, key_ts_alignment: int = 10 + ) -> None: + self.client: Redis = redis_client + self.start = 0.0 + self.last_ts = 0.0 + self.prefix = f"aibrix:{model_name}_request_trace_" + self.key_ts_alignment = key_ts_alignment + + def read(self, ts: float = 0.0) -> List[LoadRecord]: + """Read the next batch of records from the data source.""" + try: + if self.start == 0: + self.start = ts + return self.read_first() + + # Align the ts according to key_ts_alignment + ts = ts - ts % self.key_ts_alignment + if ts <= self.last_ts: + # Seen + return [] + + # TODO: Now profile seems to be have a interval delay. Further investigation is needed. + profiles = self.read_key( + f"{self.prefix}{int(ts - self.key_ts_alignment)}", True + ) + self.last_ts = ts + + if profiles is None or len(profiles) == 0: + return [] + + return self._parse_profiles(profiles, ts) + + except Exception as e: + logger.warning(f"Failed to read from Redis: {e}") + return [] + + def read_first(self) -> List[LoadRecord]: + """Read the first batch of records from the data source.""" + cursor = 0 + matching_keys = [] + while True: + cursor, keys = self.client.scan(cursor=cursor, match=f"{self.prefix}*") # type: ignore + for key in keys: + # Decode the key from bytes to string + strkey = key.decode() + match = re.search(r"(?:.*?)_(\d+)$", strkey) + if match is None: + logger.warning(f"Unexpected {strkey} from Redis") + continue + matching_keys.append((key, int(match.group(1)))) + if cursor == 0: + break + if len(matching_keys) == 0: + self.last_ts = datetime.now().timestamp() + logger.info( + f"No pre-existed load profile matching {self.prefix}* found in Redis" + ) + return [] + + # Sort by ts to ensure profiles are processed by time order. + matching_keys = sorted(matching_keys, key=lambda k: k[1]) + + # Retrieve the objects associated with the keys + records: List[LoadRecord] = [] + for key in matching_keys: + try: + # Deserialize by json: dict[string]int + self.last_ts = key[1] + profiles = self.read_key(key[0], False) + if profiles is None or len(profiles) == 0: + continue + + self._parse_profiles(profiles, key[1], records) + except Exception as e: + logger.warning(f"Failed to parse {key[0].decode()} from Redis: {e}") + continue + + return records + + def read_key(self, key: Union[str, bytes], optional: bool) -> Optional[dict]: + logging_key = key.decode() if isinstance(key, bytes) else key + logger.debug( + f"Loading profile {logging_key} at {datetime.now().timestamp()}..." + ) + profile_data = self.client.get(key) + if profile_data is None: + if optional: + logger.debug(f"No load profile for {logging_key}") + else: + logger.warning(f"Failed to retrieve {logging_key} from Redis") + return None + + # Deserialize by json: dict[string]int + try: + profile = json.loads(profile_data.decode()) + if not isinstance(profile, dict): + raise Exception("Load profile is not a dictionary") + + return profile + except Exception as e: + raise Exception(f"{e}, raw: {profile_data.decode()}") + + def progress(self) -> str: + return "" + + def next_available(self) -> float: + """Dataset is available to read anytime.""" + return ( + self.last_ts + self.key_ts_alignment + 2 + ) # Add 1 second to tolerate possible delay + + def _parse_profiles( + self, profiles: dict, ts: float, out_records: List[LoadRecord] = [] + ) -> List[LoadRecord]: + # Load metainfo. + version = profiles.get("meta_version", 1) + precision = profiles.get("meta_precision", 1) + if version >= 2: + self.key_ts_alignment = profiles.get("meta_interval_sec", 10) + + # Parse load profile entries. + for k, v in profiles.items(): + # skip metainfos. + if re.match(r"^meta_", k): + continue + + # parse key: log2(input_tokens)-log2(output_tokens) + match = re.search(r"^(\d+):(\d+)$", k) + if match is None: + raise Exception(f'Unexpected load profile key {k}, expect "int:int".') + + value = int(v) + if value == 0 and v != "0": + raise Exception(f"Load profile value is not an integer: {v}") + + input_tokens = int(match.group(1)) / precision + output_tokens = int(match.group(2)) / precision + out_records.append(LoadRecord(ts, input_tokens, output_tokens, value)) + + return out_records diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py new file mode 100644 index 00000000..091fc152 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py @@ -0,0 +1,497 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import threading +import time +from datetime import datetime +from functools import reduce +from typing import Callable, Dict, Iterable, List, Optional, Union + +import numpy as np +import pandas as pd + +from aibrix.gpu_optimizer.optimizer import GPUProfile, Optimizer +from aibrix.gpu_optimizer.utils import DelayedLog + +from .clusterer import Clusterer, MovingDBSCANClusterer +from .helpers import Centeroid, DataBuffer, DataPoint +from .load_reader import LoadReader, LoadRecord +from .profile_reader import ProfileReader + +Empty_Array: Iterable = [] + +logger = logging.getLogger("aibrix.gpuoptimizer.loadmonitor") + +debug_gpu_profile = GPUProfile( + gpu="default", cost=1.0, tputs=[[100]], indexes=[[10], [10]] +) + + +class DeploymentStates: + """States of a deployment with resource version.""" + + def __init__(self, name: str, replicas: int = 1, min_replicas: int = 0): + self.name = name + self.replicas = replicas + """The replicas output, ignore min_replicas in the normal mode.""" + self.min_replicas = min_replicas + """The replicas for minimum mode. Ignore in normal optimization mode.""" + self.profile: Optional[GPUProfile] = None + self.watch_ver: Optional[str] = None + + @property + def cost(self): + return 0.0 if self.profile is None else self.profile.cost * self.replicas + + def minimize(self): + """Set replica to minimum mode.""" + self.replicas = max(0, self.min_replicas) + + def __repr__(self): + return f"{self.name}: {self.replicas}(${self.cost})" + + +class ModelMonitor: + def __init__( + self, + model_name: str, + watch_ver: str, + load_reader: LoadReader, + window: int = 240, + deployment: Optional[DeploymentStates] = None, + namespace: Optional[str] = None, + profile_reader: Optional[ProfileReader] = None, + debug: bool = False, + ): + """Initialize the model monitor. + + Args: + model_name: The name of the model to monitor. This should be a unique identifier for the model. + watch_ver: The k8s resource version of the deployment watching. This is used to keep track of the deployment's state. + load_reader: An instance of the LoadReader class, used to retrieve workload information for the model. + deployment_name: (optional) The name of the deployment associated with the model. Each deployment is designated to a specific GPU model. + namespace: (optional) The Kubernetes namespace where the model deployment resides. + replicas: (optional) The initial number of replicas for the model deployment. + interval: (optional) The interval (in seconds) at which to monitor the model. Defaults to 10 seconds. + window: (optional) The window (in seconds) to consider for clustering. Defaults to 300 seconds. + debug: (optional) Whether to enable debugging behavior. Defaults to False. + """ + self.model_name = model_name + self.deployments: Dict[str, DeploymentStates] = {} + self.thread = None + self.outdated_watch_version = None + self.last_watch_version = None + self.debug = debug + self.done = False + self.window = float(window) + self._lock = threading.Lock() + + # Load reader + self._load_reader: LoadReader = load_reader + + # Profile reader + self._profile_reader: Optional[ProfileReader] = profile_reader + + # Optimizer + self._profiles: Dict[str, GPUProfile] = {} + self._optimizer = Optimizer() + + # Monitor states + self._centers: Iterable[Centeroid] = Empty_Array + self._labels: Iterable[int] = Empty_Array + self._data: Optional[DataBuffer] = None + self._progress: float = 0.0 + self._cost = 0.0 + + if profile_reader is not None: + self.load_profiles(profile_reader) + elif self.debug: + # Add debug_gpu_profile anyway if debugging + self._optimizer.set_profile(debug_gpu_profile) + + # Add first deployment + if deployment is not None: + self.add_deployment(watch_ver, deployment.name, namespace, deployment) + + def add_deployment( + self, + watch_ver: str, + deployment_name: str, + namespace: Optional[str], + deployment: Union[DeploymentStates, Callable[[], DeploymentStates]], + ): + # Update optimizer + key = self._deployment_entry_point(deployment_name, namespace) + profile = self._match_profile(key, deployment_name) + if profile is not None: + # No lock required here since the deployment has not been added to deployments. + self._optimizer.set_profile(profile) + else: + logger.warning( + f"No GPU profile found for {key}. Optimizer will skip the GPU." + ) + + # add to deployment registry + self._lock.acquire(blocking=True) + if key not in self.deployments: + self.deployments[key] = deployment() if callable(deployment) else deployment + + old_cost = self.deployments[key].cost + self.deployments[key].profile = profile + self.deployments[key].watch_ver = watch_ver + self._cost += self.deployments[key].cost - old_cost + self.last_resource_version = watch_ver + self._lock.release() + + def remove_deployment(self, deployment_name: str, namespace: str) -> int: + """remove deployment from monitor, return the number of deployments left.""" + key = self._deployment_entry_point(deployment_name, namespace) + + self._lock.acquire(blocking=True) + self._optimizer.delete_profile(key) + del self.deployments[key] + self._lock.release() + + return len(self.deployments) + + def read_deployment_num_replicas(self, deployment_name: str, namespace: str) -> int: + key = self._deployment_entry_point(deployment_name, namespace) + if key not in self.deployments: + raise Exception( + f"Deployment {namespace}:{deployment_name} of model {self.model_name} is not monitored" + ) + return self.deployments[key].replicas + + def update_deployment_num_replicas( + self, deployment_name: str, namespace: str, replicas: int + ): + key = self._deployment_entry_point(deployment_name, namespace) + if key not in self.deployments: + raise Exception( + f"Deployment {namespace}:{deployment_name} of model {self.model_name} is not monitored" + ) + + self.deployments[key].replicas = replicas + + def mark_deployments_outdated(self): + """Save last resource version and start the validation""" + self.outdated_watch_version = self.last_watch_version + + def clear_outdated_deployments(self) -> int: + """Remove outdated deployments from the monitor. + Return the number of deployments left.""" + for key, states in self.deployments.items(): + if states.watch_ver == self.outdated_watch_version: + del self.deployments[key] + return len(self.deployments) + + def load_profiles(self, profile_reader: Optional[ProfileReader] = None): + """Load profiles from a file""" + try: + if profile_reader is None: + if self._profile_reader is None: + return + profile_reader = self._profile_reader + else: + self._profile_reader = profile_reader + + profiles = profile_reader.read() + for profile in profiles: + if self._update_profile(profile): + logger.debug(f"Profile of {profile.gpu} updated.") + except Exception as e: + logger.error(f"Failed to load profiles: {e}") + + def _update_profile(self, profile: GPUProfile) -> bool: + """Update a profile, will update the formal alias copy, too.""" + key = profile.gpu + cost_diff = profile.cost + log_event = ( + True # log event if the profile is added to non-profile deployments. + ) + if key in self._profiles: + # profile already exists, check if it is updated + if profile.created <= self._profiles[key].created: + return False + + cost_diff -= self._profiles[ + key + ].cost # We can safely assume the existing profile has been added to the optimizer if any deployments match it. + log_event = False + + if self._profiles[key].gpu != key: + # key is a abbreviation copy, update the formal copy + profile.gpu = self._profiles[key].gpu + if profile.gpu in self._profiles: + self._profiles[profile.gpu] = profile + + # update the profile of key, note that the profile.gpu is already formalized. + self._profiles[key] = profile + + # apply update to optimizer for existing deployments. + deployment_key: Optional[str] = ( + profile.gpu + ) # Fast path, note that the profile.gpu is already formalized if it match any deployments. + if profile.gpu not in self.deployments: + deployment_key = None + # slow path, find deployment by deployment_name + # noted that the profile.gpu is not formalized if the code reaches here. + for key, states in self.deployments.items(): + if states.name != profile.gpu: + continue + + deployment_key = profile.gpu = key # formalize the gpu field + break + # deployment existed + if deployment_key is not None: + self._lock.acquire(blocking=True) + if profile.gpu in self.deployments: # double check + self._optimizer.set_profile(profile) + self._cost += cost_diff * self.deployments[key].replicas + else: + log_event = False + self._lock.release() + if log_event: + logger.info( + f"Profile added to {profile.gpu}. Optimizer will consider corresponding GPU." + ) + + return True + + def start(self): + """Start the model monitor thread""" + self.thread = threading.Thread(target=self._run) + self.thread.daemon = True + self.thread.start() + + def _run(self): + """Monitor the model""" + logger.debug(f"{self.model_name} started") + try: + next(self._run_yieldable(False)) + except StopIteration: + pass + except Exception as e: + logger.error(f"Unexpected error on monitoring {self.model_name}: {e}") + logger.info(f"{self.model_name} stopped") + return + + def _run_yieldable(self, yieldable: bool, window_scaling: float = 1.0): + """_run implementation. Using a separate yieldable implementation for _run being accepted by Threading""" + # Define clusterer + clusterers: List[Clusterer] = [ + MovingDBSCANClusterer(0.5, 10, 4, self.window * window_scaling) + # MovingDBSCANClusterer(0.8, 100, 4, self.window * window_scaling), + # DBSCANClusterer(0.5, 10), + ] + self._data = DataBuffer( + int(self.window) * 10 + ) # Assume 10 RPS, will expand according to the actual RPS + # lvl2data = DataBuffer(window) + + logger.debug(f"{self.model_name} initialized") + + n = 0 + while not self.done: + start = datetime.now().timestamp() + + # Keep window rotating + movingCluster: MovingDBSCANClusterer = clusterers[0] # type: ignore + if movingCluster.validate(): + # Data refreshing + self._data.trim_head(-movingCluster.length) + + # Read new tokens + tokens = list( + self._expand_records(self._load_reader.read(datetime.now().timestamp())) + ) # read data + if len(tokens) > 0: + self._data.reconcile( + movingCluster.length + len(tokens) + ) # since databuffer.append will not expand the buffer automatically, we need to reconcile ourself. + dps = self._data.append(tokens) + movingCluster.insert(dps) + self._data.commit() + + # track domanent token patterns + if self._data.len > 0: + uncategorized = None # Set to [] for further analysis + self._labels, self._centers = movingCluster.get_cluster_labels( + self._data.datapoints, uncategorized=uncategorized + ) + else: + self._labels, self._centers = Empty_Array, Empty_Array + + n += 1 + duration = (datetime.now().timestamp() - start) * 1000 + centers = list(self._centers) + logger.debug( + "%s batch %d took %d ms: %d centers: %s", + self.model_name, + n, + round(duration), + len(centers), + DelayedLog(lambda: str([str(center) for center in self._centers])), + ) + + if len(centers) > 0: + # Optimize + self._optimize(centers, self._data.len) + elif self._data.len == 0: + self._minimize() + else: + logger.info("Skip optimization, insufficient data") + + if yieldable: + # If yieldable, return the _run it self for further processing + # This allows caller controls the progress. + yield + else: + wait = self._load_reader.next_available() - datetime.now().timestamp() + if wait > 0: + time.sleep(wait) + # Validate time elapsed + while ( + datetime.now().timestamp() < self._load_reader.next_available() + ): + time.sleep(1) + + def stop(self): + """Stop the model monitor thread""" + self.done = True + logger.debug(f"Model monitor {self.model_name} stop signaled") + pass + + def _expand_records(self, records: Iterable[LoadRecord]): + for record in records: + for i in range(record.freq): + yield DataPoint( + record.input_tokens, record.output_tokens, age=record.ts + ) + + def _deployment_entry_point(self, deployment_name: str, namespace: Optional[str]): + """Entry point for each deployment""" + if namespace is None: + return deployment_name + + return f"{namespace}/{deployment_name}" + + def _match_profile(self, key, deployment_name) -> Optional[GPUProfile]: + if key in self._profiles: + return self._profiles[key] + elif deployment_name in self._profiles: + # Update the gpu name to formalized key. + profile: GPUProfile = self._profiles[deployment_name] + profile.gpu = key + return profile + elif self.debug: + # Copy the debug profile and override the gpu name with given key + copy = GPUProfile(**debug_gpu_profile.__dict__) + copy.gpu = key + return copy + + return None + + def _optimize(self, centers: Iterable[Centeroid], total_request_rate: int): + # Update profiles. + self.load_profiles() + + if not self._optimizer.set_workload_distribution(centers, total_request_rate): + return + + start = datetime.now().timestamp() + result = self._optimizer.run() + duration = (datetime.now().timestamp() - start) * 1000 + if result is None: + logger.info( + f"{self.model_name} optimization took {duration} ms, unexpected void rsult, skip." + ) + return + + cost = result["cost"] + del result["cost"] + + # Update deployment + self._lock.acquire(blocking=True) + # Insure all deployments are up to date. + for key, replicas in result.items(): + if key not in self.deployments and replicas > 0: + logger.warning( + f"Not all deployments in optimization result available: {key}, discard result" + ) + self._lock.release() + return + # Reset replicas of all deployments. + for key, states in self.deployments.items(): + states.replicas = result[key] if key in result else 0 + self._cost = cost + self._lock.release() + + logger.info( + f"{self.model_name} optimization took {duration} ms, cost ${self._cost}, coverage: {self.coverage}%: {list(self.deployments.values())}" + ) + + def _minimize(self): + # Update deployment + self._lock.acquire(blocking=True) + # Reset replicas of all deployments. + cost = 0.0 + for states in self.deployments.values(): + states.minimize() # Will apply min_replicas obligation. + cost += states.cost + self._cost = cost + self._lock.release() + logger.info( + f"{self.model_name} scaled to minimum, cost ${self._cost}: {list(self.deployments.values())}" + ) + + @property + def centers(self): + return self._centers + + @property + def dataframe(self): + if self._data is None: + return None + + df = pd.DataFrame( + data=np.array([self._data.x, self._data.y, self._labels]).transpose(), + columns=["input_tokens", "output_tokens", "label"], + ) + return df + + @property + def labeled(self): + return reduce(lambda cnt, center: cnt + center.size, self._centers, 0) + + @property + def progress(self) -> str: + """A progress indicator of the data source. + For dataset, it is the percentage of the data read. + For stream, it is the time elapsed since the start of the monitor.""" + return self._load_reader.progress() + + @property + def cost(self) -> float: + """The total cost of the model.""" + return self._cost + + @property + def coverage(self) -> float: + """The coverage of the model.""" + if self._data is None: + return 0.0 + + return self.labeled / self._data.len * 100 diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/profile_reader.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/profile_reader.py new file mode 100644 index 00000000..0dc0f52a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/profile_reader.py @@ -0,0 +1,94 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +from typing import List, Protocol + +from aibrix.gpu_optimizer.optimizer import GPUProfile + +logger = logging.getLogger("aibrix.gpuoptimizer.profile_reader") + + +class ProfileReader(Protocol): + def read(self) -> List[GPUProfile]: + """Read the next batch of records from the data source.""" + + +class FileProfileReader: + def __init__(self, filepath: str) -> None: + self.filepath = filepath + + def read(self) -> List[GPUProfile]: + """Read the next batch of records from the data source.""" + with open(self.filepath, "r") as f: + try: + # Try parse as singal json + profiles = json.load(f) + except Exception: + try: + # Try parse as list of json (jsonl) + profiles = [] + for line in f: + if line.strip() == "": + continue + profiles.append(json.loads(line)) + except Exception as e: + logger.warning( + f"Invalid profile file format, expected list or dict: {e}" + ) + + if isinstance(profiles, dict): + profiles = [profiles] + elif not isinstance(profiles, list): + logger.warning("Invalid profile file format, expected list or dict.") + + return [GPUProfile(**profile) for profile in profiles] + + +class RedisProfileReader: + def __init__( + self, redis_client, model_name: str, key_prefix: str = "aibrix:profile_%s_" + ) -> None: + self.client = redis_client + self.key_prefix = key_prefix % (model_name) + + def read(self) -> List[GPUProfile]: + """Read the next batch of records from the data source.""" + cursor = 0 + matching_keys = [] + while True: + cursor, keys = self.client.scan(cursor=cursor, match=f"{self.key_prefix}*") + for key in keys: + matching_keys.append(key) + if cursor == 0: + break + if len(matching_keys) == 0: + logger.warning(f"No profiles matching {self.key_prefix}* found in Redis") + + # Retrieve the objects associated with the keys + records = [] + for key in matching_keys: + # Deserialize by json: dict[string]int + profile_data = self.client.get(key) + if profile_data is None: + raise Exception(f"Failed to retrieve {key.decode()} from Redis.") + + # Deserialize by json: dict[string]int + profile = json.loads(profile_data) + if not isinstance(profile, dict): + raise Exception("Profile is not a dictionary") + + records.append(GPUProfile(**profile)) + + return records diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/visualizer.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/visualizer.py new file mode 100644 index 00000000..c29a5c9e --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/visualizer.py @@ -0,0 +1,374 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import threading +from datetime import datetime +from typing import Any, Callable, List, Optional, Tuple + +import dash +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objs as go +from dash import dcc, html +from dash.dependencies import Input, Output +from starlette.middleware.wsgi import WSGIMiddleware +from starlette.routing import Mount + +from .load_reader import DatasetLoadReader, LoadReader +from .monitor import ModelMonitor +from .profile_reader import FileProfileReader, ProfileReader, RedisProfileReader + +canvas_size = 1000 +scale = 16 +interval = 1000 # in milliseconds +reader_interval = 10 # in seconds +interval_scaling = interval / 1000 / reader_interval + +# window = 4000 +# # Load dataset reader +# directory = os.path.dirname(os.path.abspath(__file__)) +# reader: LoadReader = DatasetLoadReader(directory + '/data/sharegpt.csv', n_batch=100) +# span = window / reader.n_batch +# # Define clusterer +# clusterers: List[Clusterer] = [MovingDBSCANClusterer(0.8, 100, 4, window), DBSCANClusterer(0.5, 10)] +# Simulated data source for display +# data = DataBuffer(window) +# lvl2data = DataBuffer(window) + +colors = [ + "red", + "green", + "pink", + "blue", + "navy", + "orange", + "purple", + "cyan", + "magenta", + "yellow", + "black", + "gray", + "brown", + "olive", + "teal", + "maroon", +] + + +def default_datasource(_: str) -> Optional[ModelMonitor]: + return get_debug_model_montior(None) + + +debug_monitor = None +figure = type("", (), {})() # create a empty object +figure.__dict__ = { + "debug_run": True, + "debug_driver": None, + "datasource": default_datasource, + "last": dash.no_update, + "lock": threading.Lock(), +} + +logger = logging.getLogger("aibrix.gpuoptimizer.loadmonitor.visualizer") + + +def get_debug_model_montior( + path: Optional[str], + scale: float = 1.0, + profile: Optional[str] = None, + redisprofile: Optional[str] = None, +) -> Optional[ModelMonitor]: + global debug_monitor + + if debug_monitor is None: + if path is None: + directory = os.path.dirname(os.path.abspath(__file__)) + path = directory + "/data/sharegpt.csv" + loadReader: LoadReader = DatasetLoadReader( + path, rps=10, scale=scale, interval=reader_interval + ) + + profile_reader: Optional[ProfileReader] = None + if profile is not None: + profile_reader = FileProfileReader(profile) + elif redisprofile is not None: + profile_reader = RedisProfileReader( + *parse_redis_connection_str(redisprofile) + ) + + debug_monitor = ModelMonitor( + "sharegpt", "0", loadReader, profile_reader=profile_reader, debug=True + ) + + return debug_monitor + + +def parse_redis_connection_str(connection_str: str) -> Tuple[Any, str]: + from urllib.parse import parse_qs, urlparse + + import redis + + # Parse the Redis URL + url = urlparse(connection_str) + + # Connect to the Redis server + db_name = str(url.path).strip("/") + if db_name == "": + db_name = "0" + redis_client = redis.Redis( + host=str(url.hostname), + port=6379 if url.port is None else int(url.port), + db=int(db_name), + username=url.username, + password=url.password, + ) + + # Store the result in Redis + query_params = parse_qs(url.query) + model_name = query_params.get("model", [""])[0] + if model_name == "": + raise Exception('"model" in Redic connection arguments is not provided.') + + return redis_client, model_name + + +def make_color(color, alpha=1): + rgb = plt.matplotlib.colors.to_rgb(color) + return f"rgba({rgb[0]*255}, {rgb[1]*255}, {rgb[2]*255}, {alpha})" + + +def update_graph(n, model_name): + # Reset initial figure + if n == 0: + figure.last = dash.no_update + + # Acquire the lock at the beginning of the callback + if not figure.lock.acquire(blocking=False): + # If the lock is already acquired, skip this execution + return figure.last + + try: + start = datetime.now().timestamp() + + monitor: Optional[ModelMonitor] = figure.datasource(model_name) + if monitor is None: + figure.last = { + "data": [], + "layout": go.Layout( + title=f"Live data update of {model_name} is unavailable: model not monitored", + xaxis=dict(range=[0, scale], title="input_tokens(log2)"), + yaxis=dict(range=[0, scale], title="output_tokens(log2)"), + ), + } + return figure.last + + if figure.debug_run: + # Drive the monitor progress for debugging + if figure.debug_driver is None: + figure.debug_driver = monitor._run_yieldable( + True, window_scaling=interval_scaling + ) + + next(figure.debug_driver) + + data_df = monitor.dataframe + if data_df is None or len(data_df) == 0: + figure.last = { + "data": [], + "layout": go.Layout( + title=f"Live data update of {model_name} is unavailable: insufficient data", + xaxis=dict(range=[0, scale], title="input_tokens(log2)"), + yaxis=dict(range=[0, scale], title="output_tokens(log2)"), + ), + } + return figure.last + centers = monitor.centers + labeled = monitor.labeled + data_colors = [ + colors[int(label) % len(colors)] if label >= 0 else "black" + for label in data_df["label"] + ] + # label_seen = len(centers) + + # recluster level 2 + # lvl2data.clear() + # lvl2data.append(uncategorized) + # clusterers[1].reset() + # clusterers[1].insert(uncategorized) + # lvl2labels, lvl2centers = clusterers[1].get_cluster_labels(lvl2data.xy) + # labeled = reduce(lambda cnt, center: cnt+center.size, lvl2centers, labeled) + # for i, label in enumerate(lvl2labels): + # if label < 0: + # continue + # lvl2data._color[i] = colors[(int(label)+label_seen) % len(colors)] + # label_seen += len(lvl2centers) + # if len(lvl2centers) > 0: + # center_df = pd.concat([center_df, pd.DataFrame(data=np.array([center.to_array(span) for center in lvl2centers]), columns=['x', 'y', 'radius', 'size'])], ignore_index=True) + + duration = (datetime.now().timestamp() - start) * 1000 + plotdata = [ + go.Scatter( + x=data_df["input_tokens"], + y=data_df["output_tokens"], + mode="markers", + name="major patterns", + marker=dict( + color=data_colors, # Specify the color of the marker + size=3, # Set the size of the marker + ), + ), + # go.Scatter( + # x=lvl2data.x, + # y=lvl2data.y, + # mode='markers', + # name='minor patterns', + # marker=dict( + # symbol='square', + # color=lvl2data.color, # Specify the color of the marker + # size=3, # Set the size of the marker + # ) + # ), + ] + if len(centers) > 0: + center_df = pd.DataFrame( + data=np.array([center.to_array() for center in centers]), + columns=["x", "y", "radius", "size"], + ) + # assign color to center_df + center_colors = [ + make_color(colors[int(idx) % len(colors)], alpha=0.5) + for idx in center_df.index + ] + # print(center_df['size']) + plotdata.append( + go.Scatter( + x=center_df["x"], + y=center_df["y"], + mode="markers", + name="RPS", + marker=dict( + sizeref=1, # Adjust this value to control size + sizemode="diameter", + size=np.maximum( + (canvas_size / (scale + 2)) + * (np.log2(center_df["size"]) + 1), + 10, + ), # Assuming you have a column with size values + color=center_colors, + symbol="circle", + ), + ) + ) + + figure.last = { + "data": plotdata, + "layout": go.Layout( + title=f"Live Data Update({n}:{round(duration)}ms) of {model_name}, labeled: {round(labeled/len(data_df)*100, 2)}%, processed: {monitor.progress}", + # xaxis=dict(range=[0, max(data['x']) + 1]), + # yaxis=dict(range=[0, max(data['y']) + 1]) + xaxis=dict(range=[0, scale], title="input_tokens(log2)"), + yaxis=dict(range=[0, scale], title="output_tokens(log2)"), + ), + } + return figure.last + except Exception as e: + logger.error(f"Failed to prepare figure: {e}") + import traceback + + traceback.print_exc() + finally: + # Release the lock at the end of the callback + figure.lock.release() + + +def store_model_name(pathname): + # Extract model_name from pathname (e.g., /dash/model_name/) + try: + model_name = pathname.strip("/").split("/")[-1] + except IndexError: + model_name = None # Handle cases where model_name is not present + return model_name + + +def init(prefix=""): + app = dash.Dash(__name__, requests_pathname_prefix=prefix + "/") + + app.layout = html.Div( + [ + dcc.Location(id="url", refresh=False), # To access the URL + dcc.Input(id="model-name-input", type="hidden", value=""), + html.Div(id="model-info"), + dcc.Interval( + id="interval-component", + interval=1000, # in milliseconds + n_intervals=0, # start at 0 + ), + dcc.Graph( + id="live-graph", + style={"width": f"{canvas_size}px", "height": f"{canvas_size}px"}, + ), + ] + ) + + app.callback(Output("model-name-input", "value"), Input("url", "pathname"))( + store_model_name + ) + app.callback( + Output("live-graph", "figure"), + [ + Input("interval-component", "n_intervals"), + Input("model-name-input", "value"), + ], + )(update_graph) + + return app + + +def mount_to( + routes: List, prefix: str, datasrc: Callable[[str], Optional[ModelMonitor]] +): + figure.datasource = datasrc + figure.debug_run = False + routes.append(Mount(prefix, WSGIMiddleware(init(prefix).server))) + return routes + + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + logging.getLogger("pulp.apis.core").setLevel(logging.INFO) # Suppress pulp logs + + import argparse + + parser = argparse.ArgumentParser(description="Please provide dataset path:") + parser.add_argument("--dataset", type=str, default=None, help="Dataset path.") + parser.add_argument("--scaledata", type=float, default=1, help="Dataset path.") + parser.add_argument("--profile", type=str, default=None, help="Profile path.") + parser.add_argument( + "--redisprofile", + type=str, + default=None, + help="Redis connection string for profiles.", + ) + args = parser.parse_args() + if args.dataset is not None: + figure.datasource = lambda _: get_debug_model_montior( + args.dataset, + args.scaledata, + profile=args.profile, + redisprofile=args.redisprofile, + ) + init().run_server(debug=True) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/__init__.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/__init__.py new file mode 100644 index 00000000..d80bfd18 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .optimizer import Optimizer as Optimizer +from .types import GPUProfile as GPUProfile diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/optimizer.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/optimizer.py new file mode 100644 index 00000000..942ef338 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/optimizer.py @@ -0,0 +1,127 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from functools import reduce +from typing import Iterable, Optional, Tuple + +import numpy as np + +from .solver.melange import Config as MelangConfig +from .solver.melange import SolverRunner +from .types import GPUProfile, WorkloadProfile + +logger = logging.getLogger("aibrix.gpuoptimizer.optimizer") + + +class Optimizer: + def __init__(self, profiles: Optional[Iterable[GPUProfile]] = None): + self._config = MelangConfig() + self._workload_distribution_template: Optional[np.ndarray] = None + self._indexes: Optional[list] = None # Values ticks of tputs columns and rows + if profiles is not None: + for profile in profiles: + self.set_profile(profile) + + def set_profile(self, profile: GPUProfile): + if self._workload_distribution_template is None: + self._workload_distribution_template = np.zeros_like(profile.tputs) + self._indexes = profile.indexes + elif ( + self._workload_distribution_template.shape != np.shape(profile.tputs) + or self._indexes != profile.indexes + ): + raise Exception( + f"Profile({profile.gpu}) applied should keep a same shape and value ticks. shapes: {self._workload_distribution_template.shape} vs {np.shape(profile.tputs)}, indexes: {self._indexes} vs {profile.indexes}" + ) + + logger.debug( + "Applied profile for %s, shape: %s, indees: %s", + profile.gpu, + profile.tputs, + self._indexes, + ) + self._config.gpu_info[profile.gpu] = profile.__dict__ + + def delete_profile(self, gpu): + if gpu in self._config.gpu_info: + del self._config.gpu_info[gpu] + + def set_workload_distribution( + self, profiles: Iterable[WorkloadProfile], total_request_rate: int + ) -> bool: + """Update workload distribution and return success or failure.""" + if self._workload_distribution_template is None: + return False + + # Maintain the overall request scale disregard some request are not covered. + self._config.total_request_rate = total_request_rate + # covered_request_rate is used to calculate the workload distribution. + covered_request_rate = reduce( + lambda cnt, center: cnt + center.rate, profiles, 0.0 + ) + success = True + for profile in profiles: + try: + self._workload_distribution_template[ + self._validate_workload_signature(profile) + ] = profile.rate / covered_request_rate # type: ignore + except Exception as e: + logger.error( + f"Fail to set workload distribution: {profile.signature}: {e}" + ) + success = False + self._config.workload_distribution = ( + self._workload_distribution_template.tolist() + ) + return success + + def run(self) -> Optional[dict]: + """Run the solver and return the result. + Return None if no profiles are added. + The result is a dict with the following format: + + { + "gpu1": replicas1, + "gpu1": replicas2, + "cost": cost, + } + """ + logger.debug(f"Starting solver for {self._config.gpu_info.keys()}") + if len(self._config.gpu_info) == 0: + return None + + runner = SolverRunner(self._config) + ret = runner.run() + logger.debug(f"Done solver: {ret}") + return ret + + def _validate_workload_signature(self, profile: WorkloadProfile) -> Tuple[int]: + """Validate workload's signature by regard each element in signature tuple a index. + return valid index tuple for accessing self._workload_distribution_template""" + if self._workload_distribution_template is None or self._indexes is None: + raise Exception("Load profile not set.") + + signature = profile.get_signature(self._indexes, self._log_signature_error) + if len(signature) != self._workload_distribution_template.ndim: + raise Exception( + f"Unmatch workload profile, expected a signature of length {self._workload_distribution_template.ndim} , got {len(signature)}." + ) + + # No validation on the shape. Leave set function to throw error + return signature + + def _log_signature_error(self, dimeansion, value, index, index_value, offset): + logger.warning( + f"Signature item {dimeansion}:{value} is out of range, counted as{index_value} (reference offset: {offset})" + ) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/README.md b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/README.md new file mode 100644 index 00000000..9afe3066 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/README.md @@ -0,0 +1,23 @@ +# GPU Profiling + +## About +GPU performance toolkits. First, use `benchmark.sh` to get throughput, latency, and other SLOs for different length of inputs and outputs. Then `gpu-profile.py` can be used to create GPU profile that matches specified SLOs. + +## Help on `benchmark.sh` +First, deploy your model of choice on the GPU you wish to profile. We support [vLLM](https://github.com/vllm-project/vllm/tree/main) like inference engine. + +Once your model is up and running, modify `benchmark.sh` to configure the following parameters for the profiling: +* input_start: The starting input length for profling +* input_limit: The ending input length for profling +* output_start: The starting output length for profling +* output_limit: The ending output length for profling +* rate_start: The starting request rate for profling +* rate_limit: The ending request rate for profling + +Run `pip install -r requirements.txt` to install dependency. + +Finally, run `benchmark.sh [your deployment name]`, the results will be in the result directory. + +## Help on `gpu-profile.py` + +Run `python gen-profile.py -h` to see the help message. \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh new file mode 100755 index 00000000..256560cd --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Result files will be added to 'PATH_PREFIX' directory. +PATH_PREFIX=`dirname "$0"` +FILE_NAME="result" +TOTAL=100 + +if [ -n "$1" ]; then + # If first argument is provided, use the first argument as filename + FILE_NAME="$1" +fi + +# Make sure the directory exists and clear output file +OUTPUT_FILE="${PATH_PREFIX}/result/${FILE_NAME}.jsonl" +mkdir -p `dirname "$OUTPUT_FILE"` +# echo "" > ${OUTPUT_FILE} + +# TODO: Set your preferred request sizes and rates here. +input_start=128 +input_limit=$((2**11)) # 2K +output_start=4 +output_limit=$((2**9)) # 512 +rate_start=1 +rate_limit=$((2**6)) # 32 + +input_len=$input_start +while [[ $input_len -le $input_limit ]]; do + output_len=$output_start + while [[ $output_len -le $output_limit ]]; do + req_rate=$rate_start + while [[ $req_rate -le $rate_limit ]]; do + python $PATH_PREFIX/gpu_benchmark.py --backend=vllm --port 8010 --model=llama2-7b --request-rate=$req_rate --num-prompts=$TOTAL --input_len $input_len --output_len $output_len >> ${OUTPUT_FILE} + req_rate=$((req_rate * 2)) + done + output_len=$((output_len * 2)) + done + input_len=$((input_len * 2)) +done + +echo "Profiling finished." \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_profile.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_profile.py new file mode 100644 index 00000000..d2076765 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_profile.py @@ -0,0 +1,231 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json +import os +from datetime import datetime + +import numpy as np +import pandas as pd + +REDIS_PROFILE_KEY = "aibrix:profile_%s_%s" + + +def main(args): + # Init dataframe and load benchmark results + benchmark = os.path.dirname(__file__) + f"/result/{args.deployment}.jsonl" + if args.benchmark is not None: + benchmark = args.benchmark + + benchmark_results = [] + with open(benchmark, "r") as f: + for line in f: + if line == "\n": + continue + benchmark_results.append(json.loads(line)) + benchmark_df = pd.DataFrame( + benchmark_results, + columns=[ + "input_tokens", + "output_tokens", + "request_rate", + "seed", + "model", + "samples", + "metric", + "mean", + "P50", + "P90", + "P99", + ], + ) + + # Construct matrix indexes based on unique input and output tokens + input_tokens = benchmark_df["input_tokens"].unique() + output_tokens = benchmark_df["output_tokens"].unique() + input_tokens.sort() + output_tokens.sort() + slo_tputs = np.zeros((len(output_tokens), len(input_tokens)), dtype=float) + + # Decide the percentile to use for SLO calculation + percentile_field = "mean" + if args.percentile > 0: + percentile_field = f"P{args.percentile}" + + # Iterate slo_tputs and fill in the matrix with the throughput values that matches the SLO + for i in range(len(output_tokens)): + for j in range(len(input_tokens)): + filtered_df = benchmark_df.loc[ + (benchmark_df["input_tokens"] == input_tokens[j]) + & (benchmark_df["output_tokens"] == output_tokens[i]) + ] + + # Filter the bencmarks by throughput SLO + tput_df = filtered_df.loc[ + (filtered_df["metric"] == "TPUT") & (filtered_df["mean"] >= args.tput) + ] + if len(tput_df) == 0: + continue + filtered_df = filtered_df.loc[ + filtered_df["request_rate"].isin(tput_df["request_rate"]) + ] + + # Filter the bencmarks by token throughput SLO + tt_df = filtered_df.loc[ + (filtered_df["metric"] == "TT") & (filtered_df["mean"] >= args.tt) + ] + if len(tt_df) == 0: + continue + filtered_df = filtered_df.loc[ + filtered_df["request_rate"].isin(tt_df["request_rate"]) + ] + + # Filter the bencmarks by E2E latency SLO + e2e_df = filtered_df.loc[ + (filtered_df["metric"] == "E2E") + & (filtered_df[percentile_field] <= args.e2e) + ] + if len(e2e_df) == 0: + continue + filtered_df = filtered_df.loc[ + filtered_df["request_rate"].isin(e2e_df["request_rate"]) + ] + + # Filter the bencmarks by TTFT SLO + ttft_df = filtered_df.loc[ + (filtered_df["metric"] == "TTFT") + & (filtered_df[percentile_field] <= args.ttft) + ] + if len(ttft_df) == 0: + continue + filtered_df = filtered_df.loc[ + filtered_df["request_rate"].isin(ttft_df["request_rate"]) + ] + + # Filter the bencmarks by TPOT SLO + tpot_df = filtered_df.loc[ + (filtered_df["metric"] == "TPOT") + & (filtered_df[percentile_field] <= args.TPOT) + ] + if len(tpot_df) == 0: + continue + filtered_df = filtered_df.loc[ + filtered_df["request_rate"].isin(tpot_df["request_rate"]) + ] + + # Conclude + slo_tputs[i, j] = np.max( + filtered_df.loc[filtered_df["metric"] == "TPUT", "mean"] + ) + + # Print the matrix + filename = os.path.splitext(os.path.basename(benchmark))[0] + result = { + "gpu": filename, + "cost": args.cost, + "tputs": slo_tputs.tolist(), + "indexes": [output_tokens.tolist(), input_tokens.tolist()], + "created": datetime.now().timestamp(), + } + if args.o is not None: + if _try_store_redis(args, result): + return + + with open(args.o, "w") as f: + json.dump(result, f) + else: + print(json.dumps(result)) + + +def _try_store_redis(args, result) -> bool: + import json + import sys + from urllib.parse import parse_qs, urlparse + + import redis + + # Parse the Redis URL + url = urlparse(args.o) + + # Connect to the Redis server + if url.scheme != "redis": + return False + + # Connect to the Redis server + db_name = str(url.path).strip("/") + if db_name == "": + db_name = "0" + redis_client = redis.Redis( + host=str(url.hostname), + port=6379 if url.port is None else int(url.port), + db=int(db_name), + username=url.username, + password=url.password, + ) + + # Store the result in Redis + query_params = parse_qs(url.query) + model_name = query_params.get("model", [""])[0] + if model_name == "": + print('"model" in Redic connection arguments is not provided.', file=sys.stderr) + return True + + redis_key = REDIS_PROFILE_KEY % (model_name, args.deployment) + redis_client.set(redis_key, json.dumps(result)) + print(f"Result stored in Redis: {redis_key}.") + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Benchmark the online serving throughput." + ) + parser.add_argument( + "deployment", type=str, help="Target deployment", default="result" + ) + parser.add_argument( + "--benchmark", type=str, default=None, help="Benchmark result file." + ) + parser.add_argument( + "--tput", type=float, default=0, help="Throughput SLO target as RPS." + ) + parser.add_argument( + "--tt", type=float, default=0, help="Token Throughput SLO target." + ) + parser.add_argument( + "--e2e", type=float, default=300, help="E2E latency SLO target." + ) + parser.add_argument( + "--ttft", type=float, default=60, help="Time To First Token SLO target." + ) + parser.add_argument( + "--TPOT", type=float, default=1, help="Time Per Output Token SLO target." + ) + parser.add_argument( + "--percentile", + type=int, + default=0, + help="Percentile to use for SLO calculation. Default to ignore percentile and use mean.", + choices=[0, 50, 90, 99], + ) + parser.add_argument("--cost", type=float, default=1.0, help="Cost of the GPU.") + parser.add_argument( + "-o", + type=str, + default=None, + help="Output file name. support redis as: redis://[username:password@]hostname:port[/db_name]?model=[model_name]", + ) + args = parser.parse_args() + main(args) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py new file mode 100644 index 00000000..39b6f555 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py @@ -0,0 +1,359 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Benchmark online serving throughput. + +Adapted from https://github.com/tyler-griggs/melange-release/blob/main/melange/profiling/gpu-benchmark.py, which is adapted from +https://github.com/vllm-project/vllm/blob/main/benchmarks/benchmark_serving.py + +""" + +import argparse +import asyncio +import json +import random +import time +from typing import AsyncGenerator, List, Tuple + +import aiohttp +import numpy as np + +# (prompt len, output len, request latency) +REQUEST_LATENCY: List[Tuple[int, int, float]] = [] +# (prompt len, output len, [per-token latencies]) +TOKEN_LATENCY: List[Tuple[int, int, List[float]]] = [] +TIME_TO_FIRST_TOKEN: List[float] = [] +TEMPERATURE = 0.0 + + +def sample_requests( + num_requests: int, + config_input_len: int, + config_output_len: int, +) -> List[Tuple[str, int, int]]: + return [ + ("hi " * config_input_len, config_input_len, config_output_len) + for _ in range(num_requests) + ] + + +async def get_request( + input_requests: List[Tuple[str, int, int]], + request_rate: float, + num_requests: int, +) -> AsyncGenerator[Tuple[str, int, int, float], None]: + requests = iter(input_requests) + for i, request in enumerate(requests): + interval = 0.0 + if i < num_requests - 1 and request_rate != float("inf"): + # Sample the request interval from the exponential distribution. + interval = np.random.exponential(1.0 / request_rate) + + request_with_next = (request[0], request[1], request[2], interval) + yield request_with_next + + if request_rate == float("inf"): + # If the request rate is infinity, then we don't need to wait. + continue + + # The next request will be sent after the interval. + await asyncio.sleep(interval) + + +async def send_request( + idx: int, + backend: str, + api_url: str, + model: str, + prompt: str, + prompt_len: int, + output_len: int, + next_in: float, + best_of: int, + use_beam_search: bool, + log_error: bool, +) -> None: + headers = { + "User-Agent": "Benchmark Client", + "user": "your-user-name", + "model": model, + } + streaming = True + if backend == "vllm": + pload = { + "model": model, + "prompt": prompt, + # "n": 1, + # "best_of": best_of, + # "use_beam_search": use_beam_search, + "temperature": 0.0 if use_beam_search else TEMPERATURE, + # "top_p": 1.0, + "max_tokens": output_len, + # "ignore_eos": True, + # "stream": stream, + } + if next_in > 0.0: + pload["next_in"] = next_in + else: + raise ValueError(f"Unknown backend: {backend}") + + request_start_time = time.perf_counter() + timeout = aiohttp.ClientTimeout(total=3 * 3600) + async with aiohttp.ClientSession(timeout=timeout) as session: + while True: + # print(f"Sending request: {api_url}:{pload}") + async with session.post(api_url, headers=headers, json=pload) as response: + chunks = [] + token_latencies = [] + previous_token_time = time.perf_counter() + first = True + try: + if streaming: + async for chunk, _ in response.content.iter_chunks(): + # Stream on: Each chunk in the response is the full response so far + chunks = [chunk] + + now_time = time.perf_counter() + if first: + time_to_first = now_time - previous_token_time + first = False + else: + token_latencies.append(now_time - previous_token_time) + previous_token_time = now_time + + # Stream off: Chunks are full response. + # chunks.append(chunk) + + output = b"".join(chunks).decode("utf-8") + santicized = output[:-1] # Get rid of EOF + else: + time_to_first = time.perf_counter() - previous_token_time + output = await response.text() + santicized = output + except Exception as e: + if log_error: + print(f"Failed to read response for request {idx}: {e}") + break + try: + ret = json.loads(santicized) + + # Re-send the request if it failed. + if "error" not in ret: + break + except Exception: + # Will retry + if log_error: + print(f"Invalid response for request {idx}: {output}") + break + + request_end_time = time.perf_counter() + request_latency = request_end_time - request_start_time + if len(token_latencies) == 0: + token_latencies = [0] + REQUEST_LATENCY.append((prompt_len, output_len, request_latency)) + TOKEN_LATENCY.append((prompt_len, output_len, token_latencies)) + TIME_TO_FIRST_TOKEN.append(time_to_first) + + +async def benchmark( + backend: str, + api_url: str, + model: str, + input_requests: List[Tuple[str, int, int]], + best_of: int, + use_beam_search: bool, + request_rate: float, + num_requests: int, + log_error: bool, +) -> None: + tasks: List[asyncio.Task] = [] + + async for request in get_request(input_requests, request_rate, num_requests): + prompt, prompt_len, output_len, next_in = request + task = asyncio.create_task( + send_request( + len(tasks), + backend, + api_url, + model, + prompt, + prompt_len, + output_len, + next_in, + best_of, + use_beam_search, + log_error, + ) + ) + tasks.append(task) + + await asyncio.gather(*tasks) + + +def main(args: argparse.Namespace): + result = {} + if args.verbose: + print(args) + else: + result["input_tokens"] = args.input_len + result["output_tokens"] = args.output_len + result["request_rate"] = args.request_rate + result["seed"] = args.seed + result["model"] = args.model + result["samples"] = args.num_prompts + + random.seed(args.seed) + np.random.seed(args.seed) + + api_url = f"http://{args.host}:{args.port}/v1/completions" + input_requests = sample_requests(args.num_prompts, args.input_len, args.output_len) + + benchmark_start_time = time.perf_counter() + asyncio.run( + benchmark( + args.backend, + api_url, + args.model, + input_requests, + args.best_of, + args.use_beam_search, + args.request_rate, + args.num_prompts, + args.verbose, + ) + ) + benchmark_end_time = time.perf_counter() + benchmark_time = benchmark_end_time - benchmark_start_time + + if args.verbose: + print() + print("RESULT SUMMARY") + print(f"Request rate: {args.request_rate} req/s") + print(f"Prompt count: {len(REQUEST_LATENCY)}") + print(f"Total time: {benchmark_time:.2f} s") + print( + f"Request Throughput: {len(REQUEST_LATENCY) / benchmark_time:.2f} requests/s" + ) + print( + f"Output Token Throughput: {sum([output for _, output, _ in REQUEST_LATENCY]) / benchmark_time:.2f} tokens/s" + ) + print() + else: + result["metric"] = "TPUT" # Throughput + result["mean"] = len(REQUEST_LATENCY) / benchmark_time + print(json.dumps(result)) + result["metric"] = "TT" # Token throughput + result["mean"] = ( + sum([output for _, output, _ in REQUEST_LATENCY]) / benchmark_time + ) + print(json.dumps(result)) + + # Compute the latency statistics. + avg_latency = np.mean([latency for _, _, latency in REQUEST_LATENCY]) + if args.verbose: + print("REQUEST LATENCIES") + print(f"Avg: {avg_latency:.2f} s") + print( + f"50p: {np.percentile([latency for _, _, latency in REQUEST_LATENCY], 50)} s" + ) + print( + f"90p: {np.percentile([latency for _, _, latency in REQUEST_LATENCY], 90)} s" + ) + print( + f"99p: {np.percentile([latency for _, _, latency in REQUEST_LATENCY], 99)} s" + ) + print() + else: + result["metric"] = "E2E" # Request latency + result["mean"] = avg_latency + result["P50"] = np.percentile( + [latency for _, _, latency in REQUEST_LATENCY], 50 + ) + result["P90"] = np.percentile( + [latency for _, _, latency in REQUEST_LATENCY], 90 + ) + result["P99"] = np.percentile( + [latency for _, _, latency in REQUEST_LATENCY], 99 + ) + print(json.dumps(result)) + + all_token_latencies = np.array( + [token_latencies for _, _, token_latencies in TOKEN_LATENCY] + ) + if args.verbose: + print("TOKEN LATENCIES") + print("TTFT") + print(f"Avg: {np.mean(TIME_TO_FIRST_TOKEN)}") + print(f"50p: {np.percentile(TIME_TO_FIRST_TOKEN, 50)}") + print(f"90p: {np.percentile(TIME_TO_FIRST_TOKEN, 90)}") + print(f"99p: {np.percentile(TIME_TO_FIRST_TOKEN, 99)}") + print("TPOT") + print(f"Avg: {np.mean(all_token_latencies)}") + print(f"50p: {np.percentile(all_token_latencies, 50)}") + print(f"90p: {np.percentile(all_token_latencies, 90)}") + print(f"99p: {np.percentile(all_token_latencies, 99)}") + print() + else: + result["metric"] = "TTFT" # Time to first token + result["mean"] = np.mean(TIME_TO_FIRST_TOKEN) + result["P50"] = np.percentile(TIME_TO_FIRST_TOKEN, 50) + result["P90"] = np.percentile(TIME_TO_FIRST_TOKEN, 90) + result["P99"] = np.percentile(TIME_TO_FIRST_TOKEN, 99) + print(json.dumps(result)) + result["metric"] = "TPOT" # Token latency + result["mean"] = np.mean(all_token_latencies) + result["P50"] = np.percentile(all_token_latencies, 50) + result["P90"] = np.percentile(all_token_latencies, 90) + result["P99"] = np.percentile(all_token_latencies, 99) + print(json.dumps(result)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Benchmark the online serving throughput." + ) + parser.add_argument("--backend", type=str, default="vllm", choices=["vllm"]) + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--model", type=str, default="llama2-7b") + parser.add_argument( + "--best-of", + type=int, + default=1, + help="Generates `best_of` sequences per prompt and " "returns the best one.", + ) + parser.add_argument("--use-beam-search", action="store_true") + parser.add_argument( + "--num-prompts", type=int, default=1000, help="Number of prompts to process." + ) + parser.add_argument( + "--request-rate", + type=float, + default=float("inf"), + help="Number of requests per second. If this is inf, " + "then all the requests are sent at time 0. " + "Otherwise, we use Poisson process to synthesize " + "the request arrival times.", + ) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument( + "--trust-remote-code", + action="store_true", + help="trust remote code from huggingface", + ) + parser.add_argument("--input_len", type=int, default=0) + parser.add_argument("--output_len", type=int, default=0) + parser.add_argument("--verbose", action="store_true") + args = parser.parse_args() + main(args) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.json b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.json new file mode 100644 index 00000000..c3db63ef --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.json @@ -0,0 +1 @@ +{"gpu": "simulator-llama2-7b-a100", "cost": 1.0, "tputs": [[62.4770592037908, 54.41609758859857, 27.479217348298977, 13.233761409619609, 6.364180103253477], [60.316466916176516, 50.191932920882834, 26.125096038397714, 12.725834514073275, 6.189499703439544], [56.75260595422426, 47.022895270218406, 24.2563402721543, 11.908798077103592, 5.906818680155886], [50.55276394015143, 40.282564741237664, 21.527138429115244, 11.091621232153178, 5.340385383428255], [39.156908682972, 31.064483545526286, 17.981327336506435, 9.485761082753623, 4.766802405552006], [25.48358337772967, 20.84102605997694, 13.326249617862985, 7.462876735738044, 3.8320375960764395], [14.400357243268942, 12.221727123400482, 8.517260437465913, 5.099627101138905, 2.732587694175802], [7.062882339740163, 6.174371506110377, 4.67281574035367, 2.662288521332232, 1.6317610529300335]], "indexes": [[4, 8, 16, 32, 64, 128, 256, 512], [128, 256, 512, 1024, 2048]]} \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.jsonl b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.jsonl new file mode 100644 index 00000000..6e2132cc --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100.jsonl @@ -0,0 +1,1401 @@ + +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.086357362685611} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.345429450742444} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5771615283563734, "P50": 0.07662364543648437, "P90": 1.549518520978747, "P99": 3.8811742244416414} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7528682947158812e-05, "P50": 1.0521034710109234e-05, "P90": 2.0274636335670964e-05, "P99": 0.00016197128919884568} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1682014960177365} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.672805984070946} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.19920351037057116, "P50": 0.07027070800540969, "P90": 0.44559421732556087, "P99": 1.6741827056091292} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0985367698594927e-05, "P50": 9.416020475327969e-06, "P90": 1.2779200915247215e-05, "P99": 4.8935888335108806e-05} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.318810254169183} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.27524101667673} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.13517433913191781, "P50": 0.07053945795632899, "P90": 0.2768572124303319, "P99": 0.7365042199345784} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4327096287161113e-05, "P50": 9.874987881630659e-06, "P90": 1.4874746557325129e-05, "P99": 0.00010921860230155327} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.566511271549214} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.266045086196854} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.10041515829390846, "P50": 0.0754016874707304, "P90": 0.16009159610839568, "P99": 0.35752390045905563} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.687670242972672e-05, "P50": 1.0895950254052877e-05, "P90": 2.0363123621791604e-05, "P99": 0.0001617666915990417} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.84719843738847} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.38879374955388} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.07889864373020827, "P50": 0.07035477104363963, "P90": 0.09365727554541083, "P99": 0.22760147962369973} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.190663876011968e-05, "P50": 9.333016350865364e-06, "P90": 1.6449461691081526e-05, "P99": 0.00048634295118973083} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 33.02580437455391} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 132.10321749821563} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.08276698998641223, "P50": 0.0773780204472132, "P90": 0.10706411215942355, "P99": 0.12116073743905877} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.206206205300987e-05, "P50": 9.458570275455713e-06, "P90": 1.2412201613187807e-05, "P99": 0.0006101573863998062} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 62.4770592037908} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 249.9082368151632} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.09952875833841972, "P50": 0.09836156200617552, "P90": 0.12406870770500976, "P99": 0.14246371580287817} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.9593348521739244e-05, "P50": 7.415947038680315e-06, "P90": 1.3582967221736918e-05, "P99": 0.000831242722924801} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0848212250839921} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.678569800671937} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4038926017086487, "P50": 0.1173883544979617, "P90": 0.9488833664567211, "P99": 2.915633083698345} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.978917163796723e-05, "P50": 1.587503356859088e-05, "P90": 2.3733917623758318e-05, "P99": 0.00018421690445393334} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.161653887225289} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.293231097802312} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.25576053583994507, "P50": 0.11605822946876287, "P90": 0.5508630291908051, "P99": 1.4720924447826134} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.283498761244118e-05, "P50": 1.9812490791082382e-05, "P90": 2.549228956922889e-05, "P99": 5.747760296799378e-05} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.295218545839691} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.36174836671753} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.17068605831707828, "P50": 0.11447974992915988, "P90": 0.3068012208561413, "P99": 0.751132020846709} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.995875314809382e-05, "P50": 1.4021527022123337e-05, "P90": 2.0787480752915148e-05, "P99": 4.651532392018326e-05} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.508878860853818} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.07103088683054} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.13067865751800128, "P50": 0.11227306252112612, "P90": 0.14098229573573923, "P99": 0.44679273651330764} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.2169125732034444e-05, "P50": 1.3666518498212099e-05, "P90": 2.1383829880505805e-05, "P99": 0.00014777324628085153} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.705719069411668} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.64575255529334} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.12004649792099371, "P50": 0.1149146250099875, "P90": 0.13718223348259928, "P99": 0.24815340642002423} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.9990850016474724e-05, "P50": 9.750016033649445e-06, "P90": 2.0545220468193293e-05, "P99": 0.0005513804371003088} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 32.29420450222575} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 258.353636017806} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.1318141533760354, "P50": 0.127228063007351, "P90": 0.1676196794607677, "P99": 0.18667740105302075} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0130070149898529e-05, "P50": 8.333532605320215e-06, "P90": 1.0050390847027303e-05, "P99": 2.207825193181751e-05} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 60.316466916176516} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 482.53173532941213} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.17259948405786418, "P50": 0.17381235351786017, "P90": 0.21828428328735755, "P99": 0.24213344969437461} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.9953820891678336e-05, "P50": 7.68748577684164e-06, "P90": 0.00018046253826469185, "P99": 0.00038570118020289694} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0838613392072483} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.341781427315972} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.47775447918334973, "P50": 0.19243758398806676, "P90": 1.0760029837489133, "P99": 2.9227947705681485} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.769251306541264e-05, "P50": 1.95419997908175e-05, "P90": 2.2871547844260935e-05, "P99": 2.8598294593393916e-05} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1590861743178604} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.54537878908577} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.30894945496111176, "P50": 0.19140570849413052, "P90": 0.5684892293764279, "P99": 1.4819532390998236} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9241607515141367e-05, "P50": 1.9020983017981052e-05, "P90": 2.1475658286362886e-05, "P99": 4.056974430568586e-05} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.282746999738656} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.52395199581849} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.23515985541860573, "P50": 0.19270591653184965, "P90": 0.27263510028133203, "P99": 0.8623110938456382} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5940004959702492e-05, "P50": 1.593749038875103e-05, "P90": 2.3020745720714332e-05, "P99": 2.5741837453097123e-05} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.446410236647264} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 135.14256378635622} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.20224567544762975, "P50": 0.19472839549416676, "P90": 0.21129165841266517, "P99": 0.4319290520134386} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.847798496484756e-05, "P50": 1.2499978765845299e-05, "P90": 2.209648955613375e-05, "P99": 5.8233310701342987e-05} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.420122517245584} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 262.72196027592935} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.21016802792903036, "P50": 0.20612227049423382, "P90": 0.24052918772213164, "P99": 0.2596136144921184} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4541749153286217e-05, "P50": 1.0145478881895542e-05, "P90": 2.0549935288727288e-05, "P99": 0.0003329421230591852} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 31.390153356402557} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 502.2424537024409} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2401115512580145, "P50": 0.234509312431328, "P90": 0.2886574381846003, "P99": 0.30181776609504596} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.0110431145876644e-05, "P50": 8.167000487446785e-06, "P90": 1.4225556515157223e-05, "P99": 0.0005764270096551654} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 56.75260595422426} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 908.0416952675881} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.32703391212853605, "P50": 0.34461652097525075, "P90": 0.37702022021403536, "P99": 0.39075285891187383} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.831577862612903e-05, "P50": 7.791444659233093e-06, "P90": 0.00018998327432200366, "P99": 0.0011166604037862284} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.082283403848549} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.633068923153566} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.587237141279038, "P50": 0.3461519375559874, "P90": 1.1370888003031725, "P99": 2.925335025603422} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1342841209843753e-05, "P50": 1.9437400624155998e-05, "P90": 2.3545336443930864e-05, "P99": 0.00014682390843518116} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1524319056176275} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.87782097976408} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.423451142932754, "P50": 0.3473209374351427, "P90": 0.45898117935285004, "P99": 1.7005480199877652} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.985002520494163e-05, "P50": 1.9458006136119366e-05, "P90": 2.2675294894725086e-05, "P99": 0.00020690952776932146} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.256613355570663} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.21162737826123} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.36586510961526075, "P50": 0.35069879150250927, "P90": 0.37691783281043173, "P99": 0.544881962051151} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.211330574937165e-05, "P50": 1.8125458154827356e-05, "P90": 2.29000230319798e-05, "P99": 0.00020807203603908435} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.324839684603202} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 266.39486990730245} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.36615637212409635, "P50": 0.36275454150745645, "P90": 0.39060438718879603, "P99": 0.42966384326689894} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5092898393049836e-05, "P50": 1.368753146380186e-05, "P90": 2.2670254111289978e-05, "P99": 3.198199672624472e-05} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.020796167558455} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 512.6654773618706} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.396078654108569, "P50": 0.38884189596865326, "P90": 0.4471976249944419, "P99": 0.47633762589190154} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.598957042209804e-05, "P50": 1.1729018297046423e-05, "P90": 2.029588213190437e-05, "P99": 0.0002238259126897939} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 29.659845691502795} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 949.1150621280894} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.48604107543011194, "P50": 0.49269695801194757, "P90": 0.5577930417377501, "P99": 0.5698050788254478} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1557082179933787e-05, "P50": 8.93743708729744e-06, "P90": 1.4817167539149531e-05, "P99": 3.5090878373012954e-05} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 50.55276394015143} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1617.6884460848457} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6444297766697127, "P50": 0.662377624947112, "P90": 0.7578444083803333, "P99": 0.7731921944150236} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.3900426719337703e-05, "P50": 8.645467460155487e-06, "P90": 0.00018765870481729564, "P99": 0.0005236325168516495} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0787484610079943} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 69.03990150451163} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8056521712243557, "P50": 0.6502022710628808, "P90": 0.8679594916524376, "P99": 3.3431883729994323} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0910429302603006e-05, "P50": 1.870852429419756e-05, "P90": 2.279612235724926e-05, "P99": 7.351448060944829e-05} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.13820603502695} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.8451862417248} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6810257883556187, "P50": 0.6552358124754392, "P90": 0.6790873624500818, "P99": 1.7024451811308985} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0059965318068863e-05, "P50": 1.8437509424984455e-05, "P90": 2.213336993008852e-05, "P99": 0.0001088576007168744} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.197434759786384} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 268.6358246263286} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6750569645350333, "P50": 0.6689727289485745, "P90": 0.7130743202986196, "P99": 0.7979935437906533} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7592562362551688e-05, "P50": 1.82289513759315e-05, "P90": 2.2641674149781466e-05, "P99": 7.397647364996381e-05} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.122701089852669} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 519.8528697505708} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7175694587652106, "P50": 0.7059098544996232, "P90": 0.7758956332923844, "P99": 0.8204708795587071} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4952084058895706e-05, "P50": 1.4166987966746092e-05, "P90": 2.4716684129089125e-05, "P99": 0.00020380698493682164} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 15.195016931044663} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 972.4810835868584} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8164577254245523, "P50": 0.8123938749777153, "P90": 0.8952106539509259, "P99": 0.9224257393123116} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5271628508344294e-05, "P50": 1.1416501365602016e-05, "P90": 2.225849311798811e-05, "P99": 3.479701234027829e-05} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 26.437457148837527} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1691.9972575256018} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0289503704279195, "P50": 1.0502058129641227, "P90": 1.1632120788097382, "P99": 1.1988216087420005} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.498255973681808e-05, "P50": 9.104027412831783e-06, "P90": 1.7854163888841875e-05, "P99": 0.000551900434074925} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 39.156908682972} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2506.042155710208} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.2972781229345127, "P50": 1.310415166488383, "P90": 1.4566859166370705, "P99": 1.4743735903303605} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.215879111550748e-05, "P50": 8.125032763928175e-06, "P90": 0.0002047669491730631, "P99": 0.0008925050136167557} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.071665740629028} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.17321480051558} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.3211867283086758, "P50": 1.2702567080268636, "P90": 1.3005923797492869, "P99": 3.3733571592555407} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8947493517771364e-05, "P50": 1.9104452803730965e-05, "P90": 2.3212283849716187e-05, "P99": 4.5569501817227194e-05} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.109996730112899} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.07958145445104} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.2965419499750714, "P50": 1.2903598544653505, "P90": 1.344870241661556, "P99": 1.3767585446976591} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.936998683959246e-05, "P50": 1.9083498045802116e-05, "P90": 2.297976752743125e-05, "P99": 3.2929203007371156e-05} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.092633645464883} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 523.857106619505} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.366081199174514, "P50": 1.3527686669840477, "P90": 1.4612741496879607, "P99": 1.4814240517199506} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.789877756498754e-05, "P50": 1.9146478734910488e-05, "P90": 2.3837201297283173e-05, "P99": 0.00014181922189894917} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.6749238663857575} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 982.390254897377} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.51887807746185, "P50": 1.522552208043635, "P90": 1.6183976580854504, "P99": 1.642096971906722} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.061041934415698e-05, "P50": 1.4417048078030348e-05, "P90": 2.4883321020752202e-05, "P99": 0.00042688084417022806} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.412244571079286} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1716.7673050981487} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.8140000416431576, "P50": 1.8535969374352135, "P90": 1.9932838034583256, "P99": 2.025360436635092} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.831129150465131e-05, "P50": 1.2896023690700531e-05, "P90": 2.162504242733122e-05, "P99": 0.0010160666971933128} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 20.16729900274998} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2581.4142723519976} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.27416826458415, "P50": 2.3008645209483802, "P90": 2.4679352839360944, "P99": 2.488999500090722} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.27266603987664e-05, "P50": 1.1103518772870302e-05, "P90": 2.2641255054622895e-05, "P99": 0.0010583138209767687} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 25.48358337772967} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3261.8986723493977} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6427507874695584, "P50": 2.6775518539943732, "P90": 2.7731099204858767, "P99": 2.7969747928739523} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.6861704429611564e-05, "P50": 8.417060598731041e-06, "P90": 2.1467031911015557e-05, "P99": 0.0005505002534482636} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0578387750070264} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.80672640179876} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.5522095283912494, "P50": 2.5381988955195993, "P90": 2.6451381584513003, "P99": 2.7053999315598047} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0924545824527742e-05, "P50": 2.0541483536362648e-05, "P90": 2.780823269858957e-05, "P99": 5.079204449430213e-05} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0539029754410136} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 525.7991617128995} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6874480787478388, "P50": 2.6767549790092744, "P90": 2.868166512728203, "P99": 2.887389548289357} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9700394477695227e-05, "P50": 1.9000028260052204e-05, "P90": 2.2812827955931426e-05, "P99": 6.018881569616589e-05} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8569367968050043} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 987.3758199820811} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.9778688216709996, "P50": 2.991946000023745, "P90": 3.145292520755902, "P99": 3.1722968020895497} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.040127292275429e-05, "P50": 1.8895952962338924e-05, "P90": 2.3445731494575744e-05, "P99": 0.00010863741394132453} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.733075967661567} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1723.6674477213612} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.5372579934296664, "P50": 3.6115806460147724, "P90": 3.8503064579796047, "P99": 3.8832920228142758} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.724792209453881e-05, "P50": 1.672899816185236e-05, "P90": 2.3795804008841517e-05, "P99": 0.001797456034692027} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 10.069568467454008} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2577.809527668226} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.446349104582332, "P50": 4.58973133348627, "P90": 4.792359408375342, "P99": 4.817792727851774} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.266415908001363e-05, "P50": 1.0999967344105244e-05, "P90": 2.2062764037400485e-05, "P99": 0.0007490627979859729} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.49178066748219} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3197.895850875441} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.29856045031338, "P50": 5.3649150835117325, "P90": 5.495641728816554, "P99": 5.533529489948414} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.4528286196291443e-05, "P50": 1.0874529834836721e-05, "P90": 2.1996442228555693e-05, "P99": 0.0006225476588588224} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.400357243268942} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3686.4914542768493} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.623559730021515, "P50": 5.651662062504329, "P90": 5.756198083690833, "P99": 5.764771016449668} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.930795567110181e-05, "P50": 8.500006515532732e-06, "P90": 0.000198095536325127, "P99": 0.0007137205544859177} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0276552523330384} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 526.1594891945157} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.479355860430514, "P50": 5.489068562514149, "P90": 5.870515241660178, "P99": 5.910998695405433} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7998770345002414e-05, "P50": 1.9312486983835697e-05, "P90": 2.3020850494503976e-05, "P99": 4.567282274365438e-05} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.9291060162397229} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 987.7022803147381} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.132683566295309, "P50": 6.180763042008039, "P90": 6.494049708324019, "P99": 6.563638061523671} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4726239498704672e-05, "P50": 1.9021041225641966e-05, "P90": 2.305436646565795e-05, "P99": 5.3718753624711155e-05} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.3143843168668763} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1696.9647702358407} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.470833628680557, "P50": 7.6403723754920065, "P90": 8.185349625337404, "P99": 8.28930053969496} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.198627917096019e-05, "P50": 1.9291997887194157e-05, "P90": 2.2808299399912363e-05, "P99": 0.00010356292361393766} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.795936192082975} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2455.519330346483} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.554453217975096, "P50": 9.976080396038014, "P90": 10.378577524621505, "P99": 10.414448448014445} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8312917090952397e-05, "P50": 1.739547587931156e-05, "P90": 2.2221193648874764e-05, "P99": 0.00013061723555438223} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.848074295800323} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2994.2140394497656} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.30363092791871, "P50": 11.583069666463416, "P90": 11.848537462181412, "P99": 11.8886851079599} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7619647551327943e-05, "P50": 1.3104465324431658e-05, "P90": 2.1629268303513527e-05, "P99": 0.00011157787754200478} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.584831586889207} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3371.433772487274} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.266573041288648, "P50": 12.403535083460156, "P90": 12.539860474993475, "P99": 12.576047049037879} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.693169914186001e-05, "P50": 1.0958057828247547e-05, "P90": 2.4299381766468287e-05, "P99": 0.000633028440643102} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.062882339740163} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3616.1957579469636} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.71862816042616, "P50": 12.734790687973145, "P90": 12.821993129188195, "P99": 12.838499542864739} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.664961714297533e-05, "P50": 8.916540537029505e-06, "P90": 2.2175244521349732e-05, "P99": 0.0007496873056516066} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0858991608700372} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.343596643480149} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5765578621206805, "P50": 0.08346270851325244, "P90": 1.5433203752851132, "P99": 3.871423469659643} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1242945911362767e-05, "P50": 9.47900116443634e-06, "P90": 1.4924642164260157e-05, "P99": 5.668285069987179e-05} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.163464015945299} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.653856063781197} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.22893346664030104, "P50": 0.08304983307607472, "P90": 0.5502983874757776, "P99": 1.355692834298828} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0170494681224228e-05, "P50": 1.6707985196262598e-05, "P90": 2.1187460515648133e-05, "P99": 0.00013212230405770299} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.304508314891276} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.218033259565104} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.15099859419162384, "P50": 0.08782633295049891, "P90": 0.30420143281808126, "P99": 0.7446962890564477} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.320911968126893e-05, "P50": 1.6979000065475702e-05, "P90": 2.3224682081490755e-05, "P99": 0.00018635206855833556} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.525861768181803} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.103447072727214} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.12295378495473415, "P50": 0.08494802098721266, "P90": 0.23066320416983235, "P99": 0.5013619510713034} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.7920853355899456e-05, "P50": 1.208350295200944e-05, "P90": 2.0254089031368496e-05, "P99": 0.0005231851106509572} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.76634409667255} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.0653763866902} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.09441445502103306, "P50": 0.08633639599429443, "P90": 0.12368540446041158, "P99": 0.18793641378637427} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.325835986994207e-05, "P50": 9.56258736550808e-06, "P90": 1.904941163957119e-05, "P99": 0.0003305008669849508} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 32.3132063155413} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 129.2528252621652} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.12239365121233277, "P50": 0.11566745844902471, "P90": 0.17721074547152968, "P99": 0.22005603091791276} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.171617048792541e-05, "P50": 8.6455256678164e-06, "P90": 1.6308575868606568e-05, "P99": 0.0006583392585162074} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 54.41609758859857} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 217.66439035439427} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2193210779619403, "P50": 0.20476914598839357, "P90": 0.32059574230806903, "P99": 0.34998332040268004} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2262030504643916e-05, "P50": 6.9785164669156075e-06, "P90": 1.090425066649914e-05, "P99": 0.00015965735074132694} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.08474402761641} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.67795222093128} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4085118675371632, "P50": 0.12456560449209064, "P90": 0.9619420624687338, "P99": 2.9055838279810318} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.263620379380882e-05, "P50": 1.8458522390574217e-05, "P90": 2.5145884137600664e-05, "P99": 0.00017170489532873146} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1627836604222628} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.302269283378102} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2611129966657609, "P50": 0.12403954192996025, "P90": 0.5433537039207298, "P99": 1.4724144953792009} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.750748953782022e-05, "P50": 1.71255087479949e-05, "P90": 2.461731201037765e-05, "P99": 3.5342444898561013e-05} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.29736097804135} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.3788878243308} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.17888113708700984, "P50": 0.1290393119561486, "P90": 0.29002419939497504, "P99": 0.7475321508094215} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.751749194227159e-05, "P50": 1.516647171229124e-05, "P90": 2.2041122429072856e-05, "P99": 3.691108780913132e-05} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.495714069272305} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.96571255417844} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.14458079370902852, "P50": 0.12405370845226571, "P90": 0.174541000055615, "P99": 0.4449405495566319} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.214871907606721e-05, "P50": 1.4104472938925028e-05, "P90": 2.4974741972982886e-05, "P99": 0.0006270113098435143} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.675448722071046} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.40358977656837} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.1446495067316573, "P50": 0.13588352093938738, "P90": 0.1976655750302598, "P99": 0.23097994384937934} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7694979906082153e-05, "P50": 9.146053344011307e-06, "P90": 1.1566898319870236e-05, "P99": 2.9251169180501854e-05} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 31.743326005665654} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 253.94660804532523} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.19343522920506076, "P50": 0.18544729199493304, "P90": 0.2831004670006223, "P99": 0.30610120845492933} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3172902399674057e-05, "P50": 8.834002073854208e-06, "P90": 1.0800024028867488e-05, "P99": 2.3369910195472455e-05} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 50.191932920882834} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 401.5354633670627} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3642905450053513, "P50": 0.35862472851295024, "P90": 0.45837252115597954, "P99": 0.5068265449348838} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.5240430627018216e-05, "P50": 8.47899354994297e-06, "P90": 0.0001076165470294658, "P99": 0.000669586401199924} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.084564027534048} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.35302444054477} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.47924435454886405, "P50": 0.1981679790187627, "P90": 1.0702562996535567, "P99": 2.904140042376244} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0906634852290153e-05, "P50": 9.979470632970333e-06, "P90": 1.388746313750744e-05, "P99": 2.351672155782581e-05} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1613659889561814} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.5818558232989} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3200889616936911, "P50": 0.20188506203703582, "P90": 0.6295414792490194, "P99": 1.3604823970841249} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.827503114938736e-05, "P50": 1.0165967978537083e-05, "P90": 2.1133653353899725e-05, "P99": 0.00019023792468944383} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.292095405326213} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.67352648521941} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2362159941403661, "P50": 0.19839270797092468, "P90": 0.24613456676015635, "P99": 0.8469354207802106} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.127123017795384e-05, "P50": 9.645591489970684e-06, "P90": 1.7920893151313072e-05, "P99": 2.1712342277169228e-05} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.490544521006296} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 135.84871233610073} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.22126480214647018, "P50": 0.2111527500092052, "P90": 0.24576425004052002, "P99": 0.45748469296726424} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.142294241115451e-05, "P50": 9.541516192257404e-06, "P90": 1.4420982915908102e-05, "P99": 0.0006816491531208174} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.586837328466626} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 265.389397255466} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2569010691402946, "P50": 0.24417910404736176, "P90": 0.35069497864460586, "P99": 0.3948676114098635} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2650812277570366e-05, "P50": 9.041046723723412e-06, "P90": 1.1112168431282045e-05, "P99": 7.091202191077307e-05} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 31.44077673744191} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 503.0524277990706} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.34489422958809884, "P50": 0.34849243744974956, "P90": 0.4533042631112039, "P99": 0.506082383391913} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.417001989670098e-05, "P50": 8.875038474798203e-06, "P90": 1.190835610032082e-05, "P99": 0.0003449399047531198} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 47.022895270218406} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 752.3663243234945} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6315249346243218, "P50": 0.632048396510072, "P90": 0.7682575619081036, "P99": 0.8171403134963475} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.379376488737762e-05, "P50": 8.895527571439743e-06, "P90": 3.22380685247483e-05, "P99": 0.0004042853391729319} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.082718338008157} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.64698681626103} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5940912608557847, "P50": 0.3556391460588202, "P90": 1.1372055505868048, "P99": 2.909663898278964} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.507456117309629e-05, "P50": 1.0936986654996872e-05, "P90": 2.076658420264721e-05, "P99": 0.000324073777301266} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.155156350726697} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.9650032232543} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.43360095455078407, "P50": 0.35531560401432216, "P90": 0.5001523664104753, "P99": 1.6918341012205942} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8644654192030428e-05, "P50": 1.4396035112440586e-05, "P90": 2.2104464005678898e-05, "P99": 0.0001711410633288326} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.260420155979824} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.33344499135436} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.37919944456894883, "P50": 0.3656423540087417, "P90": 0.3954117499990389, "P99": 0.859657835825347} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.467413268983364e-05, "P50": 1.004151999950409e-05, "P90": 1.9995518960058694e-05, "P99": 0.00010551412939094013} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.371612561082499} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 267.89160195463995} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.40052190453745423, "P50": 0.38955043751047924, "P90": 0.46579234191449365, "P99": 0.4970958173216791} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.967866137623787e-05, "P50": 9.89600084722042e-06, "P90": 1.5495915431529317e-05, "P99": 0.0008687480341177442} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.083903072480584} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 514.6848983193787} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.475287935041124, "P50": 0.4562642710516229, "P90": 0.5863971089245752, "P99": 0.6074958244978916} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.2750824466347695e-05, "P50": 9.72894486039877e-06, "P90": 1.9254675135016458e-05, "P99": 0.00040070739458315124} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 29.443006402919238} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 942.1762048934156} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6922839175269473, "P50": 0.7236537295393646, "P90": 0.8314497996703722, "P99": 0.8657554671715479} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.3338320897892117e-05, "P50": 9.604031220078468e-06, "P90": 1.7800286877900405e-05, "P99": 0.0003620444820262501} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 40.282564741237664} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1289.0420717196052} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.134811123739928, "P50": 1.1242357494775206, "P90": 1.2637590463156811, "P99": 1.2862915969989264} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5381698505952955e-05, "P50": 7.208494935184717e-06, "P90": 1.5308323781937378e-05, "P99": 0.0003398148843552918} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0793271122953128} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 69.07693518690002} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.817967022084631, "P50": 0.6647232500254177, "P90": 0.8892593753058471, "P99": 3.361503370036374} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4715873403474688e-05, "P50": 1.2000033166259527e-05, "P90": 2.1774985361844303e-05, "P99": 2.996671944856644e-05} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.140994976141398} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.02367847304947} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7002016733144409, "P50": 0.6741890835110098, "P90": 0.7083513953257352, "P99": 1.6932028951798581} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.34079219121486e-05, "P50": 1.1312018614262342e-05, "P90": 2.266615629196167e-05, "P99": 7.27231404744234e-05} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.212901764079116} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 269.6257129010634} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7078833233122714, "P50": 0.6972402919782326, "P90": 0.7671542707947083, "P99": 0.8179272063612008} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.398540339432657e-05, "P50": 9.916489943861961e-06, "P90": 1.9508227705955505e-05, "P99": 0.0005129866511561188} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.142376233317094} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 521.112078932294} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7880544170713983, "P50": 0.7763510000077076, "P90": 0.8861776502337307, "P99": 0.9353437683999073} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6681220149621367e-05, "P50": 9.208510164171457e-06, "P90": 1.3761979062110204e-05, "P99": 7.102979812771382e-05} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 15.141240615344003} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 969.0393993820162} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9879068507999181, "P50": 1.0033135000849143, "P90": 1.142919736832846, "P99": 1.163783793596085} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0186288747936487e-05, "P50": 9.604031220078468e-06, "P90": 1.3088108971714984e-05, "P99": 0.00010864900657907516} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 25.145491291404674} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1609.3114426498992} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.485665160829667, "P50": 1.5225230835494585, "P90": 1.7970578750246207, "P99": 1.8687943149730564} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.3204610915854573e-05, "P50": 9.624985978007317e-06, "P90": 1.4975073281675585e-05, "P99": 0.0002808860375080292} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 31.064483545526286} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1988.1269469136823} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.9781895812531003, "P50": 1.9915550419827923, "P90": 2.1995081339613534, "P99": 2.2318708943005188} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.3821655670180917e-05, "P50": 9.145529475063086e-06, "P90": 3.3220741897821765e-05, "P99": 0.0004436560953035957} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0722246288080135} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.24475248742573} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.3472194529499393, "P50": 1.2936318130232394, "P90": 1.3407184826792218, "P99": 3.3670402868068803} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6509974375367165e-05, "P50": 1.2666510883718729e-05, "P90": 2.3920834064483645e-05, "P99": 6.883035995997505e-05} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.112754743406063} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.4326071559761} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.350550255830167, "P50": 1.333793104509823, "P90": 1.444447854324244, "P99": 1.4946413125062836} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.736534044146538e-05, "P50": 1.1520518455654383e-05, "P90": 2.5279738474637275e-05, "P99": 0.002360447916435081} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.0877555756688215} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 523.2327136856092} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4581953858351335, "P50": 1.4457659379695542, "P90": 1.5874719165381976, "P99": 1.6668392648384907} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0645371405407788e-05, "P50": 1.0187504813075066e-05, "P90": 2.0507688168436297e-05, "P99": 0.00023017956526018816} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.646377471511056} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 978.7363163534152} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.7140269558737053, "P50": 1.7305274170357734, "P90": 1.8700695788604207, "P99": 1.908657256292645} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0431591654196382e-05, "P50": 9.875046089291573e-06, "P90": 1.4941894914954903e-05, "P99": 2.0512539194896823e-05} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.038678256697022} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1668.9508168572188} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.283179170031799, "P50": 2.344745291979052, "P90": 2.6471125167096035, "P99": 2.684625677795848} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7220425652340053e-05, "P50": 9.374984074383974e-06, "P90": 1.375444699078799e-05, "P99": 0.00024897904484532773} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 18.03851095908702} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2308.9294027631386} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.138036660423968, "P50": 3.1811152914888225, "P90": 3.4244643034995534, "P99": 3.474593295444502} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.9535462381318212e-05, "P50": 8.85402550920844e-06, "P90": 1.2166087981313466e-05, "P99": 0.00044112209347076965} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 20.84102605997694} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2667.6513356770483} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.529533728362294, "P50": 3.531091375509277, "P90": 3.725023891939782, "P99": 3.762186046538409} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5944968219846486e-05, "P50": 6.66698906570673e-06, "P90": 1.0941841173917068e-05, "P99": 0.0005420935293659596} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0582229589376073} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.9050774880275} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6342328229057603, "P50": 2.608069520967547, "P90": 2.799116020859219, "P99": 2.863766221321421} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1779611706733704e-05, "P50": 1.2791540939360857e-05, "P90": 2.2691453341394674e-05, "P99": 0.00020377425593324047} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0532183906249} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 525.6239079999744} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.8324651416938287, "P50": 2.82581064599799, "P90": 3.0670187246403655, "P99": 3.094458782011643} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0284107886254788e-05, "P50": 1.0833493433892727e-05, "P90": 2.091671340167523e-05, "P99": 7.319516735151706e-05} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8474025810402654} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 984.935060746308} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.245595489592524, "P50": 3.2797237500199117, "P90": 3.467827195231803, "P99": 3.5047620676760562} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2650018325075507e-05, "P50": 9.938084986060858e-06, "P90": 1.7762463539838794e-05, "P99": 4.047763533890274e-05} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.593161885347587} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1687.8494426489822} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.136487186633749, "P50": 4.245379353989847, "P90": 4.626938991062343, "P99": 4.736760062355315} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.190252718515694e-05, "P50": 9.854556992650032e-06, "P90": 1.538757933303714e-05, "P99": 3.849240834824768e-05} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.295145945831312} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2379.557362132816} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.661479999246076, "P50": 5.787344875512645, "P90": 6.132600600260775, "P99": 6.157418797396822} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2024965835735202e-05, "P50": 9.457929991185665e-06, "P90": 1.4471088070422416e-05, "P99": 5.984054878354139e-05} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.165376184240026} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2858.3363031654467} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.497012099531712, "P50": 6.562527354457416, "P90": 6.784898883616552, "P99": 6.800261413507396} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6600041417405008e-05, "P50": 9.124982170760632e-06, "P90": 1.2421153951436292e-05, "P99": 0.00026984214549884307} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.221727123400482} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3128.7621435905235} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.873195062942105, "P50": 6.89473168796394, "P90": 6.99461697100196, "P99": 7.008639948905912} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6089977473020555e-05, "P50": 9.312527254223824e-06, "P90": 1.3441964983940137e-05, "P99": 0.00042991193477064684} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0265539448969656} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 525.5956197872464} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.721908029631013, "P50": 5.762606895528734, "P90": 6.175257675314787, "P99": 6.229448454482481} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9243741407990457e-05, "P50": 1.1208525393158197e-05, "P90": 2.6104180142283448e-05, "P99": 0.00020167805952951327} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.9183577369235485} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 982.1991613048568} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.582034687490668, "P50": 6.686537728994153, "P90": 6.985474583378528, "P99": 7.117095041297143} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.3006683913990856e-05, "P50": 1.252046786248684e-05, "P90": 2.3087149020284415e-05, "P99": 9.933170396834904e-05} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.2494035898757923} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1663.6946380164056} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.431422060885234, "P50": 8.63577237498248, "P90": 9.447434616251849, "P99": 9.52179555294686} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5707049751654267e-05, "P50": 1.0292045772075653e-05, "P90": 1.9829394295811658e-05, "P99": 0.00011714723659679362} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.478650042974047} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2293.068822002712} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.355362017943989, "P50": 11.725737541972194, "P90": 12.388852799753659, "P99": 12.418326241929316} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4318728353828192e-05, "P50": 9.958050213754177e-06, "P90": 1.946735428646207e-05, "P99": 8.250533370301189e-05} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.331616090870169} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2729.7874385255263} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.333894744606223, "P50": 13.517764271004125, "P90": 13.878819721494802, "P99": 13.906422782826704} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2285070260986685e-05, "P50": 1.0000017937272787e-05, "P90": 1.1491449549794199e-05, "P99": 3.375266562216097e-05} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.928119432352029} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3035.197149364239} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.300957123341504, "P50": 14.35390995797934, "P90": 14.558064133103471, "P99": 14.575903834614437} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.8432928267866376e-05, "P50": 9.312003385275602e-06, "P90": 1.0791944805532695e-05, "P99": 0.00028738484485076136} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.174371506110377} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 3161.2782111285132} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.82087841957691, "P50": 14.844757520942949, "P90": 14.89970308688935, "P99": 14.928390535311774} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.562582328915596e-05, "P50": 8.666480425745249e-06, "P90": 1.5287450514733796e-05, "P99": 0.0002344709180761139} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0853241548796302} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.341296619518521} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.39963760920218194, "P50": 0.1203714165603742, "P90": 0.9526600709301422, "P99": 2.9231106685858737} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6821272438392044e-05, "P50": 1.0853982530534267e-05, "P90": 1.9229552708566206e-05, "P99": 0.00016254590358585162} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.16535498393855} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.6614199357542} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2524859571002889, "P50": 0.11601018795045093, "P90": 0.5500460784998725, "P99": 1.4613116896699674} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.911210943944752e-05, "P50": 1.031247666105628e-05, "P90": 2.1453993394970903e-05, "P99": 0.0001484577124938381} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.314128833104234} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.256515332416935} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.1833609674940817, "P50": 0.12072152103064582, "P90": 0.3069796048686841, "P99": 0.9812813914811712} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5477073611691593e-05, "P50": 9.874987881630659e-06, "P90": 1.960056833922863e-05, "P99": 7.230461691506331e-05} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.530691533059661} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.122766132238645} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.14948496914701537, "P50": 0.12429747899295762, "P90": 0.22903599580749884, "P99": 0.44002392294234627} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3858780730515719e-05, "P50": 9.541516192257404e-06, "P90": 1.3124977704137574e-05, "P99": 0.00012249372201040393} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.794872306236712} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.17948922494685} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.21428067207685672, "P50": 0.21031452098395675, "P90": 0.3594025249592961, "P99": 0.3971601021976677} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1872831964865328e-05, "P50": 9.166018571704626e-06, "P90": 9.904033504426481e-06, "P99": 0.00011241833912208715} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 25.957487622288475} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 103.8299504891539} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4710971937573049, "P50": 0.4080684999935329, "P90": 0.8388939250609838, "P99": 0.9066706329328009} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.461999353021383e-05, "P50": 9.312527254223824e-06, "P90": 1.1337222531437874e-05, "P99": 0.00032796134008095367} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 27.479217348298977} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 109.91686939319591} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0949444357992615, "P50": 1.0369808745454066, "P90": 1.9708821167121642, "P99": 2.125427024040837} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0071706492453814e-05, "P50": 9.666953701525927e-06, "P90": 1.0682782158255578e-05, "P99": 2.8082060161978047e-05} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.085394783651674} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.683158269213392} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.44662249618326316, "P50": 0.15150356251979247, "P90": 1.1229449583566748, "P99": 2.90671318295063} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.587324193678796e-05, "P50": 9.562005288898945e-06, "P90": 1.6374990809708834e-05, "P99": 7.629990926943886e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.162793795355247} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.302350362841977} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2849906612641644, "P50": 0.16304879152448848, "P90": 0.578767899691593, "P99": 1.4753983842546612} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3267077738419175e-05, "P50": 1.0040937922894955e-05, "P90": 2.083812141790986e-05, "P99": 5.858580931089823e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.299446187205345} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.39556949764276} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.20510351672885008, "P50": 0.16317314602201805, "P90": 0.2620076284860264, "P99": 0.7467090870591359} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.273617730475962e-05, "P50": 9.729003068059683e-06, "P90": 1.4932861085981133e-05, "P99": 5.5093717528507716e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.524247833391248} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.19398266712999} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.19602585462038405, "P50": 0.17045127099845558, "P90": 0.26605992496479325, "P99": 0.43072046905872446} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1172948870807885e-05, "P50": 9.62504418566823e-06, "P90": 1.0950269643217328e-05, "P99": 5.3938331548124646e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.648846618248662} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.1907729459893} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3083386933861766, "P50": 0.2682743959594518, "P90": 0.5048838833812624, "P99": 0.5485062028910034} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.248549553565681e-05, "P50": 9.25001222640276e-06, "P90": 1.2041581794619563e-05, "P99": 6.92185969091957e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 24.793860484748553} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 198.35088387798842} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6870286045770626, "P50": 0.6115455830004066, "P90": 1.0900208118138834, "P99": 1.1421177928405815} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.8224963461980223e-05, "P50": 9.500014130026102e-06, "P90": 1.3012241106480376e-05, "P99": 0.00045479775522835983} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 26.125096038397714} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 209.00076830718172} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.2987456187675708, "P50": 1.2242275000317022, "P90": 2.2646263000788167, "P99": 2.3089365715906025} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1867977445945143e-05, "P50": 9.833485819399357e-06, "P90": 1.1150294449180373e-05, "P99": 6.33249594829978e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0843900621859033} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.350240994974452} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5018717679183464, "P50": 0.2385536669753492, "P90": 1.063537195534446, "P99": 2.909070706211501} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3546659611165523e-05, "P50": 9.770505130290985e-06, "P90": 1.8603925127536062e-05, "P99": 7.089018588885714e-05} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.161747587681536} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.587961402904575} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.35619536541868, "P50": 0.2426312500028871, "P90": 0.580104450217914, "P99": 1.9483272375387621} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5441671712324025e-05, "P50": 1.00834877230227e-05, "P90": 2.11667618714273e-05, "P99": 0.00011646452359855209} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.289590681645076} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.63345090632122} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2787030562036671, "P50": 0.24022550001973286, "P90": 0.3391672171768734, "P99": 0.8606667644192937} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4389186399057507e-05, "P50": 1.0021030902862549e-05, "P90": 1.7766666132956743e-05, "P99": 0.00017080090707167984} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.469826618481418} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 135.5172258957027} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3105983008199837, "P50": 0.2820404789526947, "P90": 0.4392833708086982, "P99": 0.5038204266352113} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3464543735608459e-05, "P50": 9.68750100582838e-06, "P90": 1.487475819885731e-05, "P99": 7.530915201641686e-05} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.35548438044074} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 261.6877500870518} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.47929574372596107, "P50": 0.4132692499551922, "P90": 0.7390338796307333, "P99": 0.8330001516919584} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0221645934507251e-05, "P50": 9.2705013230443e-06, "P90": 1.4208024367690086e-05, "P99": 2.392726368270815e-05} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 23.52159521767381} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 376.34552348278095} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.012398488278268, "P50": 0.9647991039091721, "P90": 1.385409437888302, "P99": 1.4936023400363048} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3198357773944735e-05, "P50": 9.89600084722042e-06, "P90": 1.4891568571329117e-05, "P99": 0.00010590247926302285} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 24.2563402721543} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 388.1014443544688} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.6698636929597706, "P50": 1.654417833487969, "P90": 2.5981183917494493, "P99": 2.6219213143514937} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4357474865391851e-05, "P50": 9.416020475327969e-06, "P90": 1.1304742656648167e-05, "P99": 5.78641064930727e-05} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0826645394182621} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.64526526138439} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6761564442119561, "P50": 0.3996112289605662, "P90": 1.2269341497099966, "P99": 3.88155975276721} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3817474246025085e-05, "P50": 1.1104508303105831e-05, "P90": 2.064143773168326e-05, "P99": 2.7653073193505443e-05} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.153694516353925} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.9182245233256} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4681118050310761, "P50": 0.39666131255216897, "P90": 0.48317511639324967, "P99": 1.6936822388321173} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9055057782679798e-05, "P50": 1.0124989785254002e-05, "P90": 2.0675279665738346e-05, "P99": 0.0003394294250756503} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.266155001103381} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.5169600353082} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.44546285588410683, "P50": 0.42691791692050174, "P90": 0.5188306455966086, "P99": 0.8576424438215333} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.784166321158409e-05, "P50": 1.0000017937272787e-05, "P90": 1.916696783155203e-05, "P99": 4.726091632619769e-05} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.323197008288682} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 266.34230426523783} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5448914375097956, "P50": 0.5143633539555594, "P90": 0.7475221786065959, "P99": 0.8348009220149836} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8829567125067115e-05, "P50": 9.916024282574654e-06, "P90": 1.9775063265115036e-05, "P99": 0.0003314582211896782} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 15.737020124678372} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 503.5846439897079} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.827185874566203, "P50": 0.8275362920248881, "P90": 1.1217277327319608, "P99": 1.1746486150298734} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4245003694668412e-05, "P50": 9.124982170760632e-06, "P90": 1.4321168418973687e-05, "P99": 0.00017175912507809725} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 21.18948292238883} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 678.0634535164426} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.684050167093519, "P50": 1.7344680834794417, "P90": 2.0251322750002148, "P99": 2.1845607652934267} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0022138012573122e-05, "P50": 9.770970791578293e-06, "P90": 1.2283329851925376e-05, "P99": 1.5021667350083601e-05} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 21.527138429115244} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 688.8684297316878} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.375194926239783, "P50": 2.491477583476808, "P90": 3.1235096166958103, "P99": 3.1367141312139575} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6624636482447387e-05, "P50": 9.624985978007317e-06, "P90": 1.1500692926347259e-05, "P99": 0.00014386707334779393} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0791045853533707} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 69.06269346261573} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8555678475368768, "P50": 0.7085971669876017, "P90": 0.8787690293625936, "P99": 3.3650370262400275} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4818764757364988e-05, "P50": 1.0916555766016245e-05, "P90": 2.2145872935652734e-05, "P99": 5.208754562772815e-05} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1391320430516005} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.90445075530243} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7676465124811511, "P50": 0.7423416454694234, "P90": 0.8394255415652879, "P99": 1.7168188383360405} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7104605212807656e-05, "P50": 9.792041964828968e-06, "P90": 2.115427050739527e-05, "P99": 0.0001018375542480512} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.2029147193765075} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 268.9865420400965} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8367153870814945, "P50": 0.8209761040052399, "P90": 0.9849633715464734, "P99": 1.0838123386877123} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4097032835707068e-05, "P50": 9.895535185933113e-06, "P90": 1.8929515499621632e-05, "P99": 0.000108546882402152} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.084202765314256} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 517.3889769801124} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.041013903748244, "P50": 1.0140293749282137, "P90": 1.289649792236742, "P99": 1.3931722254841588} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.348959980532527e-05, "P50": 9.812472853809595e-06, "P90": 1.8754787743091586e-05, "P99": 0.00012705833534710118} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.209913194792176} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 909.4344444666992} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.8330590513010976, "P50": 1.8146581249893643, "P90": 2.374987591779791, "P99": 2.474742190254619} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1663362383842469e-05, "P50": 9.562470950186253e-06, "P90": 1.4662521425634625e-05, "P99": 4.809073172509686e-05} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 17.455680137061332} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1117.1635287719253} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.970888859987026, "P50": 2.935387938050553, "P90": 3.203610887203831, "P99": 3.239047703183023} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1418721405789257e-05, "P50": 9.25001222640276e-06, "P90": 1.1671124957501888e-05, "P99": 3.5475435433910054e-05} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 17.981327336506435} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1150.8049495364119} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.6095944599772336, "P50": 3.859713853977155, "P90": 4.032191053801216, "P99": 4.047412172476761} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1543730506673455e-05, "P50": 9.541516192257404e-06, "P90": 1.225003506988287e-05, "P99": 4.431516164913817e-05} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.072029297403856} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.21975006769358} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4215292874013539, "P50": 1.363378207955975, "P90": 1.4710476207430476, "P99": 3.3722765638609444} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5246657421812414e-05, "P50": 1.1208001524209976e-05, "P90": 2.0662241149693733e-05, "P99": 6.264385068789154e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.110176476143976} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.1025889464289} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4896124074771069, "P50": 1.4579396460321732, "P90": 1.6797418454661965, "P99": 1.7505321681429633} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5953780859708786e-05, "P50": 1.0624993592500687e-05, "P90": 2.2921035997569562e-05, "P99": 0.00016499321674928453} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.076586788374159} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 521.8031089118923} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.7197102995798923, "P50": 1.7119940624688752, "P90": 1.949200586916413, "P99": 2.0305882126220967} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2563751079142094e-05, "P50": 9.979004971683025e-06, "P90": 2.096726093441248e-05, "P99": 3.697022213600591e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.469959534415053} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 956.1548204051268} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.342019975812873, "P50": 2.367041292018257, "P90": 2.768015795585234, "P99": 2.8600424846983517} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.900854201987386e-06, "P50": 9.354494977742434e-06, "P90": 1.196719240397215e-05, "P99": 2.1777956280857467e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.41342358751579} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1460.9182192020212} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.9562007754202932, "P50": 4.178131186985411, "P90": 4.753968679590616, "P99": 4.814964109440334} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1312146671116353e-05, "P50": 9.708513971418142e-06, "P90": 1.32378307171166e-05, "P99": 3.486038069240791e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.91762611100206} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1653.4561422082636} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.158058143334929, "P50": 5.170413729560096, "P90": 5.441997204220388, "P99": 5.481457641128218} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.712903993204237e-06, "P50": 9.62504418566823e-06, "P90": 1.0833644773811106e-05, "P99": 1.7842815723270204e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.326249617862985} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1705.759951086462} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.727120938351145, "P50": 5.756192083004862, "P90": 5.978621029353235, "P99": 5.996542964992114} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.829154005274176e-06, "P50": 9.20856837183237e-06, "P90": 1.031683059409261e-05, "P99": 3.643950098194189e-05} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0571981975744853} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.64273857906824} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.8292019816802347, "P50": 2.789546500018332, "P90": 3.145806662796531, "P99": 3.203061231202446} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.958497567102313e-05, "P50": 1.1478958185762167e-05, "P90": 2.2975006140768547e-05, "P99": 0.000114136527990923} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0451159881512075} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 523.5496929667091} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.1781493575382047, "P50": 3.1685657710186206, "P90": 3.50332374147838, "P99": 3.5805413016490637} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2985854409635067e-05, "P50": 1.0104035027325153e-05, "P90": 1.9758578855544338e-05, "P99": 3.788499627262361e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.788259676585335} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 969.7944772058457} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.9633901528839486, "P50": 4.056622874515597, "P90": 4.38472432049457, "P99": 4.455863529234193} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.340037677437067e-05, "P50": 1.0167015716433525e-05, "P90": 1.9562849774956704e-05, "P99": 7.520840503275411e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.115295846298667} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1565.5157366524588} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.018960616713157, "P50": 6.1123060419922695, "P90": 6.9935738711967135, "P99": 7.074975113404216} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.124581671319902e-05, "P50": 9.603973012417555e-06, "P90": 1.8587743397802118e-05, "P99": 3.846860956400635e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.704883148017656} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1972.4500858925198} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.240362638332881, "P50": 8.403064062469639, "P90": 8.840508841886185, "P99": 8.893440094406252} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.779994143173098e-06, "P50": 9.395997039973736e-06, "P90": 9.975582361221314e-06, "P99": 2.335834200493997e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.403653265526522} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2151.3352359747896} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.250544152477524, "P50": 9.277617249987088, "P90": 9.446275887521915, "P99": 9.489515680031618} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2886228505522013e-05, "P50": 9.228999260812998e-06, "P90": 1.4208909124135992e-05, "P99": 3.6164312623442414e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.517260437465913} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2180.4186719912736} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.886163354595192, "P50": 9.927259354502894, "P90": 10.19002681254642, "P99": 10.218074534691404} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.49798384681344e-06, "P50": 9.209034033119678e-06, "P90": 1.0345515329390765e-05, "P99": 2.351418137550357e-05} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0234988320883922} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 524.0314020292568} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.244554732975084, "P50": 6.264284791483078, "P90": 6.863005396141671, "P99": 6.921067132966127} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6097093466669322e-05, "P50": 1.2145494110882282e-05, "P90": 2.217014553025365e-05, "P99": 3.810332273133143e-05} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8920203624397653} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 968.7144255691599} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.671305700450903, "P50": 7.948975292034447, "P90": 8.36609931247076, "P99": 8.412384276775411} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7414935864508152e-05, "P50": 1.287501072511077e-05, "P90": 2.068724716082216e-05, "P99": 9.74344636779286e-05} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0432583038337726} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1558.1482515628916} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.146890860842541, "P50": 11.6416577710188, "P90": 12.745596000319347, "P99": 12.856521723348415} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0415812032297253e-05, "P50": 1.0958581697195768e-05, "P90": 2.5437865406274796e-05, "P99": 0.00015291948569938618} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8935065037744963} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1993.475329932542} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.365222542970441, "P50": 15.742177229491062, "P90": 16.51752832918428, "P99": 16.589490385081152} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1519608087837695e-05, "P50": 9.416951797902584e-06, "P90": 1.923809759318829e-05, "P99": 2.5463529163971864e-05} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.417024204459922} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2261.5163926834803} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.687428593297955, "P50": 17.84090816648677, "P90": 18.267255395220126, "P99": 18.376761466348544} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5802925918251277e-05, "P50": 9.624985978007317e-06, "P90": 1.393326092511416e-05, "P99": 0.00017205995391123075} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.660174312606765} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2386.0092480546637} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 18.660517739955104, "P50": 18.66968716646079, "P90": 18.74806161718443, "P99": 18.792115757793653} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.701234521344304e-06, "P50": 9.437440894544125e-06, "P90": 1.0415923316031695e-05, "P99": 2.2108835401013634e-05} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.67281574035367} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 2392.481659061079} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 19.417977633817355, "P50": 19.49491033348022, "P90": 19.847440050018484, "P99": 19.86775625674287} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4022516552358866e-05, "P50": 8.978997357189655e-06, "P90": 1.0141683742403985e-05, "P99": 3.684334107674906e-05} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.085485647391279} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.341942589565116} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.45760101166786626, "P50": 0.1659188959747553, "P90": 1.1304090452147648, "P99": 2.9049094170646326} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.472166390158236e-05, "P50": 1.0583549737930298e-05, "P90": 2.026248257607222e-05, "P99": 3.0389163875953223e-05} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.162998558623766} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.651994234495064} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3033832325169351, "P50": 0.19801997946342453, "P90": 0.5819727919995785, "P99": 1.4639358428644507} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.607216010801494e-05, "P50": 1.0416493751108646e-05, "P90": 2.397977514192462e-05, "P99": 0.0002628553938120636} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.306859991595581} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.227439966382324} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.24030142383067868, "P50": 0.20040295843500644, "P90": 0.3358201292343439, "P99": 0.7441975062293945} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4822875382378697e-05, "P50": 9.833485819399357e-06, "P90": 1.7995596863329414e-05, "P99": 9.06591827515518e-05} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.506609663869995} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.02643865547998} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.31731165578588844, "P50": 0.29627424996579066, "P90": 0.5588125793379731, "P99": 0.6309169633127749} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2945483904331923e-05, "P50": 9.624985978007317e-06, "P90": 1.5087169595062743e-05, "P99": 2.641188213601834e-05} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.722840011626383} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 50.89136004650553} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9461505012842827, "P50": 0.8250248959520832, "P90": 1.8511670701671392, "P99": 2.0549534126499203} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.885410545393824e-06, "P50": 9.375042282044888e-06, "P90": 1.0420114267617465e-05, "P99": 1.9531252328306517e-05} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.057429058267578} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 52.22971623307031} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.3663575387804303, "P50": 2.2919036670355126, "P90": 4.290557700069622, "P99": 4.6934531922405585} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.889252151362598e-05, "P50": 9.707990102469921e-06, "P90": 1.0750023648142815e-05, "P99": 0.0008242380910087392} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.233761409619609} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 52.935045638478435} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.0319823757745326, "P50": 2.9905312500195578, "P90": 5.494011595298071, "P99": 6.0303201072954105} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1748719261959195e-05, "P50": 9.707990102469921e-06, "P90": 1.058327034115792e-05, "P99": 6.0427825665101566e-05} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0848258039756618} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.678606431805294} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4879608687502332, "P50": 0.22909075050847605, "P90": 1.073125075246208, "P99": 2.923190919193444} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5840816777199505e-05, "P50": 9.999959729611874e-06, "P90": 2.1804508287459614e-05, "P99": 3.704049973748741e-05} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1628842888059094} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.303074310447276} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.33109991370001807, "P50": 0.2326534585445188, "P90": 0.5647629790939391, "P99": 1.4686654461280002} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.763584092259407e-05, "P50": 9.791459888219833e-06, "P90": 1.956282649189235e-05, "P99": 0.00011561882798560066} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.30326174981921} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.42609399855368} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2843221721064765, "P50": 0.23393004201352596, "P90": 0.4354692369117405, "P99": 0.8546073282172444} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8444162560626864e-05, "P50": 9.292038157582283e-06, "P90": 1.0425271466374398e-05, "P99": 0.00010838786954991882} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.456487777894496} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.65190222315597} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.43137065838091077, "P50": 0.42327397945337, "P90": 0.7369023247156292, "P99": 0.8051316710980609} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0081251384690403e-05, "P50": 9.895535185933113e-06, "P90": 1.0754121467471122e-05, "P99": 1.5053476672619587e-05} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.30578472167344} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 98.44627777338752} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.1793478961405344, "P50": 1.042463708552532, "P90": 2.244974278472364, "P99": 2.376851667080773} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0111626470461489e-05, "P50": 9.500072337687016e-06, "P90": 1.091696321964264e-05, "P99": 2.4409504840150494e-05} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.604277948576891} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 100.83422358861513} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6185149938147516, "P50": 2.5330845415010117, "P90": 4.6574927330715585, "P99": 4.976031968375901} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0898754699155688e-05, "P50": 9.666022378951311e-06, "P90": 1.1666922364383937e-05, "P99": 2.1602039923891776e-05} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.725834514073275} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 101.8066761125862} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.340833688332932, "P50": 3.275162541482132, "P90": 5.933256200316828, "P99": 6.34355123998248} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2675009202212095e-05, "P50": 1.0083080269396305e-05, "P90": 1.448304392397405e-05, "P99": 4.6938339946791905e-05} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0839120773172688} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.3425932370763} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5496738887159154, "P50": 0.3118618124863133, "P90": 1.1214591371477585, "P99": 2.921242369117686} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7134540248662232e-05, "P50": 1.0562478564679623e-05, "P90": 1.9695749506354337e-05, "P99": 0.00014288590522482995} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1575944763409676} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.52151162145548} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3941471233172342, "P50": 0.32189758348977193, "P90": 0.5022326208301824, "P99": 1.4635692386387407} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.280586213804782e-05, "P50": 1.0124989785254002e-05, "P90": 1.9020820036530495e-05, "P99": 0.0005531728547066502} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.284918025391899} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.55868840627038} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.39349940039217474, "P50": 0.3406774794566445, "P90": 0.5792226167395712, "P99": 0.8578326604631736} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.207592897117138e-05, "P50": 1.025001984089613e-05, "P90": 1.7408176790922888e-05, "P99": 2.4374950444325996e-05} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.319937915785582} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.1190066525693} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6583689095533919, "P50": 0.6003236669930629, "P90": 1.0513028535293418, "P99": 1.1693844669940883} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3719581766054034e-05, "P50": 9.771028999239206e-06, "P90": 1.553307520225645e-05, "P99": 9.541451465338509e-05} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.662821871758768} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 186.60514994814028} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.6521119424235076, "P50": 1.5180227915407158, "P90": 2.8123575999983585, "P99": 2.8993421592202506} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4801238905638455e-05, "P50": 9.624985978007317e-06, "P90": 1.4242355246096852e-05, "P99": 0.00010864385520108642} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.71425899925196} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 187.42814398803137} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.1868224687490145, "P50": 3.1421753955073655, "P90": 5.438030425575562, "P99": 5.572460758396192} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1322512291371822e-05, "P50": 9.792041964828968e-06, "P90": 1.2383353896439077e-05, "P99": 4.2594405822455955e-05} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.908798077103592} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 190.54076923365747} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.849420009604655, "P50": 3.828638458508067, "P90": 6.641030599735678, "P99": 6.875907020384911} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7486620927229523e-05, "P50": 9.708572179079056e-06, "P90": 1.1108920443803075e-05, "P99": 7.81916652340472e-05} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0823278369345182} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.63449078190458} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7039809687796514, "P50": 0.4738154785009101, "P90": 1.140290812263266, "P99": 3.876727922754365} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1927492450922728e-05, "P50": 1.0124989785254002e-05, "P90": 1.6379286535084248e-05, "P99": 3.145575290545823e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.152420620571863} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.87745985829962} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5459730337641667, "P50": 0.4775873539620079, "P90": 0.6581056292983705, "P99": 1.697644401530271} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2982068583369255e-05, "P50": 1.0270916391164064e-05, "P90": 2.0004226826131344e-05, "P99": 3.5595126682892706e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.252430044881791} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.07776143621732} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6575726899667643, "P50": 0.6027270205086097, "P90": 0.983001333323773, "P99": 1.0756354621320499} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.135036513209343e-05, "P50": 9.917013812810183e-06, "P90": 1.937530469149352e-05, "P99": 4.0462093893443415e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.09950347977996} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 259.1841113529587} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0668736191641073, "P50": 0.9679012084961869, "P90": 1.6406559829832987, "P99": 1.8095920241868593} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1087058810517192e-05, "P50": 9.624985978007317e-06, "P90": 1.4725024811923512e-05, "P99": 2.2844944614917065e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 10.720295922983665} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 343.0494695354773} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.5809446599520744, "P50": 2.4876106459414586, "P90": 3.6672112369444223, "P99": 3.8334691118204502} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.114832004532218e-05, "P50": 9.771028999239206e-06, "P90": 1.2737826909869915e-05, "P99": 3.388581797480594e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 10.844088486097938} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 347.010831555134} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.044570271695266, "P50": 4.05053000053158, "P90": 6.248816245829221, "P99": 6.289776078158757} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0447072563692927e-05, "P50": 9.770505130290985e-06, "P90": 1.1878937948495153e-05, "P99": 1.6552320448681825e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.091621232153178} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 354.9318794289017} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.624787917060312, "P50": 4.652673187491018, "P90": 7.377466033573728, "P99": 7.479527404743712} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0999167570844293e-05, "P50": 9.875569958239794e-06, "P90": 1.147052971646191e-05, "P99": 2.7676172321662376e-05} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0778722442461472} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.98382363175342} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9409613050776534, "P50": 0.7967628334881738, "P90": 1.0006340205669404, "P99": 3.3586936112435097} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4582930598407984e-05, "P50": 1.1395430192351341e-05, "P90": 1.9803666509687902e-05, "P99": 6.622522021643847e-05} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.134969161914277} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.63802636251373} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9157413303491194, "P50": 0.8758650625240989, "P90": 1.1152209872263485, "P99": 1.3453066207619868} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.326462185010314e-05, "P50": 1.0436982847750187e-05, "P90": 2.540843561291695e-05, "P99": 0.0001647323404904482} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.183414479861363} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 267.73852671112724} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.187691234233789, "P50": 1.1205975624616258, "P90": 1.6286135628004559, "P99": 1.8241353680042085} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.253714202903211e-05, "P50": 9.68750100582838e-06, "P90": 2.15042382478714e-05, "P99": 2.5441005127504543e-05} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.635497267355755} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 488.67182511076834} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.1203144049376714, "P50": 2.286113291454967, "P90": 2.683965341548901, "P99": 2.862120393359801} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.168369664810598e-05, "P50": 9.604031220078468e-06, "P90": 1.8712773453444246e-05, "P99": 3.087872406467796e-05} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.326684422379364} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 596.9078030322793} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.2611872992047575, "P50": 4.413974499970209, "P90": 5.212681762257126, "P99": 5.497934295912273} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6172946197912096e-05, "P50": 9.62504418566823e-06, "P90": 1.2108078226447108e-05, "P99": 0.00013407592428848206} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.420884490956778} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 602.9366074212338} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.745772592519643, "P50": 5.994263645960018, "P90": 7.634459075622726, "P99": 7.672258369355696} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.48729349207133e-05, "P50": 9.374984074383974e-06, "P90": 1.0491348803043369e-05, "P99": 8.479207870551671e-05} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.485761082753623} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 607.0887092962319} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.425438877099659, "P50": 6.693967458035331, "P90": 8.855311529210303, "P99": 8.950944676935906} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0562528623268007e-05, "P50": 9.583542123436928e-06, "P90": 1.1732999701052905e-05, "P99": 2.5392097886651828e-05} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0713041762704276} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.12693456261474} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.5979818066675215, "P50": 1.5178970204433426, "P90": 1.7795944167766722, "P99": 3.3760571612720405} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.063209587708116e-05, "P50": 1.1166499461978674e-05, "P90": 2.1759001538157466e-05, "P99": 0.00018478411482647633} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1043043482333257} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 269.3509565738657} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.8311890667292756, "P50": 1.786955499497708, "P90": 2.241478658653796, "P99": 2.4343469438934697} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5502508031204343e-05, "P50": 1.1896074283868074e-05, "P90": 2.1337810903787616e-05, "P99": 4.864037153311104e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.021491925138638} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 514.7509664177456} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.50050834288937, "P50": 2.387215479509905, "P90": 3.128021683602128, "P99": 3.4197787638159935} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3525045942515134e-05, "P50": 1.0104500688612461e-05, "P90": 1.8483004532754423e-05, "P99": 5.4824501276016575e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.539975407575194} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 837.1168521696248} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.980409730840474, "P50": 4.920209416013677, "P90": 6.360344717360567, "P99": 6.45702915608068} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1715912260115146e-05, "P50": 1.004151999950409e-05, "P90": 1.4404172543436292e-05, "P99": 4.372069379314787e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.329876363931667} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 938.2241745832533} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.637681170416763, "P50": 7.943099646014161, "P90": 8.277895549684763, "P99": 8.433866045483155} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0493751615285873e-05, "P50": 9.312527254223824e-06, "P90": 1.0083068627864122e-05, "P99": 1.8271591980010708e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.462876735738044} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 955.2482221744697} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.969755875036354, "P50": 9.62007118749898, "P90": 10.398176417232026, "P99": 10.438378122834257} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.055956818163395e-05, "P50": 9.584007784724236e-06, "P90": 1.0375049896538258e-05, "P99": 3.195721190422786e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.395412413877767} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 946.6127889763542} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.8887797754223, "P50": 10.561784187448211, "P90": 11.856760495563503, "P99": 11.982770028223749} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2332068290561438e-05, "P50": 9.708048310130835e-06, "P90": 1.1274695862084633e-05, "P99": 5.6361618917435956e-05} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0555344468179126} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 270.21681838538564} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.2570806545601227, "P50": 3.167490979016293, "P90": 3.8719704458257187, "P99": 3.969902479170123} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.618127222172916e-05, "P50": 1.2666510883718729e-05, "P90": 2.1470512729138138e-05, "P99": 3.440626896917892e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0288591107923666} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 519.3879323628458} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.021366707490524, "P50": 4.03012143750675, "P90": 4.6758762875455435, "P99": 4.8642915016424375} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.213246305473149e-05, "P50": 1.2249976862221956e-05, "P90": 2.1941948216408494e-05, "P99": 0.0003218263015151032} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.5803006737425123} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 916.5569724780831} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.54757571542752, "P50": 6.854474791500252, "P90": 8.160284187796062, "P99": 8.638129017313476} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1220737835392356e-05, "P50": 9.874987881630659e-06, "P90": 1.6692152712494145e-05, "P99": 0.0004472387733403597} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.70120429516809} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1203.508299563031} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.413529436164536, "P50": 11.97548431251198, "P90": 12.816594370722305, "P99": 12.945375449244166} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3006736990064383e-05, "P50": 9.812472853809595e-06, "P90": 1.3516459148377185e-05, "P99": 8.100702194497045e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.082140326010609} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1301.0279234587158} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.94038208289072, "P50": 13.935025916493032, "P90": 14.165007249987685, "P99": 14.356433224542998} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0371595853939653e-05, "P50": 9.790994226932526e-06, "P90": 1.0754226241260768e-05, "P99": 2.380581689067194e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.0811881774073715} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1300.784173416287} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.527940833293833, "P50": 15.74263160402188, "P90": 16.642466087487993, "P99": 16.718864456781883} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.176087069325149e-05, "P50": 9.791459888219833e-06, "P90": 1.054608728736639e-05, "P99": 3.638559952378346e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.099627101138905} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1305.5045378915597} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.22480244957027, "P50": 16.460403333534487, "P90": 17.905245845625178, "P99": 18.063639151982496} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.082063334062696e-06, "P50": 9.47900116443634e-06, "P90": 1.0375049896538258e-05, "P99": 1.611827523447592e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0162825805454194} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 520.3366812392547} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.508401151361177, "P50": 7.526904458471108, "P90": 8.534081232722382, "P99": 8.620630243399646} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.1115388050675395e-05, "P50": 1.316593261435628e-05, "P90": 2.1453597582876686e-05, "P99": 0.0003530213423073434} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8113469556800694} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 927.4096413081955} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.941447713261004, "P50": 11.494620562531054, "P90": 13.119602270727047, "P99": 13.243544810669263} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1803760901093484e-05, "P50": 1.1020514648407698e-05, "P90": 2.0242307800799612e-05, "P99": 5.899590207264205e-05} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.5287534625209878} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1294.7217728107457} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 18.880496942040043, "P50": 19.877285875030793, "P90": 21.49375434156973, "P99": 21.599627774388065} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6004648059606553e-05, "P50": 1.0146002750843763e-05, "P90": 2.065006410703063e-05, "P99": 0.00013716630521230436} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.6308368898492116} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1346.9884876027963} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 24.98549484791816, "P50": 25.431526229076553, "P90": 26.054551507672294, "P99": 26.25598886579275} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1169537901878358e-05, "P50": 9.499955922365189e-06, "P90": 1.6275024972856058e-05, "P99": 2.512052771635368e-05} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.6528456327366845} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1358.2569639611825} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 27.60679464243818, "P50": 27.435029957967345, "P90": 30.275461679510773, "P99": 31.802262439199257} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4689974961802362e-05, "P50": 9.833020158112049e-06, "P90": 1.6087829135358336e-05, "P99": 9.972395258956287e-05} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.656434864664895} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1360.0946507084263} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 29.149524212500545, "P50": 29.129850708530284, "P90": 32.867283529159614, "P99": 34.59052716854727} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1726652737706899e-05, "P50": 9.999959729611874e-06, "P90": 1.488327980041504e-05, "P99": 2.1885788301006348e-05} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.662288521332232} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1363.0917229221027} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 29.85792608671123, "P50": 29.844756854057778, "P90": 34.0721852837014, "P99": 35.913789553269744} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3388298684731126e-05, "P50": 9.770505130290985e-06, "P90": 1.65082747116685e-05, "P99": 7.27330904919659e-05} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0840985380031156} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.3363941520124625} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5431232321227435, "P50": 0.3347180000273511, "P90": 1.0655437545734463, "P99": 2.9217998498678255} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5411763451993466e-05, "P50": 1.0354502592235804e-05, "P90": 2.2191891912370923e-05, "P99": 5.156446248293005e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1600529561041886} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.640211824416754} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4203798433532938, "P50": 0.34634614596143365, "P90": 0.6117654461064377, "P99": 1.4806152582541132} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.3197539374232292e-05, "P50": 1.0167015716433525e-05, "P90": 2.013723133131862e-05, "P99": 2.538042725063867e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.25505318548248} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.02021274192992} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5534239103773143, "P50": 0.48803345806663856, "P90": 0.9918643253389746, "P99": 1.1481125494977462} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.190909999422729e-05, "P50": 9.374984074383974e-06, "P90": 1.8082698807120328e-05, "P99": 3.2626044703647924e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.227698837766686} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 24.910795351066746} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.9488248146360274, "P50": 1.7117452084785327, "P90": 4.027362787211314, "P99": 4.465676301306813} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0195029899477959e-05, "P50": 9.416486136615276e-06, "P90": 1.0675319936126471e-05, "P99": 1.746986294165269e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.278096821098191} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 25.112387284392764} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.999760984561871, "P50": 4.923311083519366, "P90": 9.135607029055246, "P99": 10.073529960522428} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4000348746776581e-05, "P50": 9.437499102205038e-06, "P90": 1.0811910033226016e-05, "P99": 9.10293566994378e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.305122533763865} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 25.22049013505546} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.514300874545006, "P50": 6.465518562938087, "P90": 11.67464853336569, "P99": 12.84855179672246} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.293211244046688e-05, "P50": 9.666488040238619e-06, "P90": 1.0804447811096908e-05, "P99": 0.00010427919216454056} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.364180103253477} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 25.456720413013908} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.170504124554572, "P50": 7.151725895993877, "P90": 12.860142958944198, "P99": 14.156447019048501} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0823752963915467e-05, "P50": 9.66701190918684e-06, "P90": 1.09252636320889e-05, "P99": 1.732899923808913e-05} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.084003812192082} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.672030497536657} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5811438600311521, "P50": 0.3800810419488698, "P90": 1.1223832371993923, "P99": 2.9128523031366105} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0649175383150578e-05, "P50": 9.792041964828968e-06, "P90": 1.1499982792884112e-05, "P99": 2.457501483149828e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1552859843569427} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.242287874855542} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4813788874913007, "P50": 0.3982332499581389, "P90": 0.6684493336477323, "P99": 1.483301521741089} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.543044578284025e-05, "P50": 1.4521530829370022e-05, "P90": 2.153805689886213e-05, "P99": 3.92862572334707e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.232097897906285} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.85678318325028} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6822485379409045, "P50": 0.6491517705144361, "P90": 1.1950028753606603, "P99": 1.344794329206925} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.946211443282664e-05, "P50": 1.9645493011921644e-05, "P90": 2.3425358813256027e-05, "P99": 8.07919702492655e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.082103793899009} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 48.65683035119207} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.2002546467620414, "P50": 1.9184614999685436, "P90": 4.43900754195638, "P99": 4.911649462581845} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0237956196069718e-05, "P50": 2.039596438407898e-05, "P90": 2.3375544697046283e-05, "P99": 4.3637613998726674e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.137646999045881} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 49.10117599236705} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.291160557479597, "P50": 5.13527814601548, "P90": 9.514361687505152, "P99": 10.4663907312823} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.524001618847251e-05, "P50": 2.0208535715937614e-05, "P90": 3.15503450110555e-05, "P99": 0.0007843713078182214} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.141855126640351} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 49.13484101312281} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.8332920870860105, "P50": 6.74559720850084, "P90": 12.17526537058875, "P99": 13.262936198266688} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6818761359900236e-05, "P50": 1.933303428813815e-05, "P90": 2.294194418936968e-05, "P99": 2.5173086905852006e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.189499703439544} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 49.51599762751635} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.437689197877189, "P50": 7.377283979498316, "P90": 13.251219378889074, "P99": 14.456432717553108} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8317532958462835e-05, "P50": 2.016650978475809e-05, "P90": 2.2542080841958523e-05, "P99": 2.476664027199182e-05} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0821240907737097} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.313985452379356} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6621256383485161, "P50": 0.4787798125180416, "P90": 1.1368777660420166, "P99": 2.9202613584976693} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8012489890679715e-05, "P50": 1.8937571439892054e-05, "P90": 2.3837212938815356e-05, "P99": 2.8435215353965812e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.151869875190017} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.42991800304027} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.576265311705647, "P50": 0.47611297899857163, "P90": 0.8562046881299467, "P99": 1.6992274472117437} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.7523322496563197e-05, "P50": 1.9353989046067e-05, "P90": 2.2046267986297607e-05, "P99": 7.163667469285881e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.195196435104038} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.12314296166461} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.940295041234931, "P50": 0.8641508750151843, "P90": 1.5588079122710043, "P99": 1.7364127847517377} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.3626620424911377e-05, "P50": 1.9103928934782743e-05, "P90": 2.7124746702611462e-05, "P99": 8.67654266767271e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.84375105530597} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 93.50001688489552} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.7855121683247854, "P50": 2.483734187495429, "P90": 5.262884442310315, "P99": 5.643083753935062} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4755461597815156e-05, "P50": 9.979470632970333e-06, "P90": 2.101254649460316e-05, "P99": 3.965842421166667e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.831848702601632} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 93.30957924162611} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.954265717844246, "P50": 5.855879103997722, "P90": 10.532833524735178, "P99": 11.270426398172276} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2599993497133254e-05, "P50": 9.833951480686665e-06, "P90": 1.9921117927879096e-05, "P99": 3.627164522185923e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.873173832406772} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 93.97078131850836} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.44377578841988, "P50": 7.386338145995978, "P90": 13.057207083876712, "P99": 14.004255186957307} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.1595826363191009e-05, "P50": 9.68750100582838e-06, "P90": 1.0566983837634326e-05, "P99": 8.675919030793011e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.906818680155886} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 94.50909888249417} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.113860762538389, "P50": 8.081869271001779, "P90": 14.248282441915945, "P99": 15.340886845717906} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0478299809619785e-05, "P50": 9.83354402706027e-06, "P90": 1.1124997399747372e-05, "P99": 2.2693877108395115e-05} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.080198593373196} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.56635498794227} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8010242208419368, "P50": 0.655614854011219, "P90": 1.063813887513243, "P99": 2.929307807239243} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8759957747533917e-05, "P50": 1.9291997887194157e-05, "P90": 2.2887531667947772e-05, "P99": 3.581467899493927e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.144252938565845} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.61609403410704} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8416300095885526, "P50": 0.7364843540126458, "P90": 1.3259295336087236, "P99": 1.6844695390120614} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8667096737772228e-05, "P50": 1.87290133908391e-05, "P90": 2.4158379528671506e-05, "P99": 6.938327918760483e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.097482292889981} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 131.1194333724794} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4869349837116896, "P50": 1.3608307705144398, "P90": 2.424116040975787, "P99": 2.681176949770889} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.093120525591076e-05, "P50": 1.9250030163675547e-05, "P90": 2.2696086671203378e-05, "P99": 0.0001934938563499643} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.282771144064563} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 169.04867661006602} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.299775737923337, "P50": 4.057559208420571, "P90": 7.399769825278781, "P99": 7.53915718558128} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9536613253876568e-05, "P50": 1.956248888745904e-05, "P90": 2.363778185099364e-05, "P99": 3.8140561664477424e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.30252146506577} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 169.68068688210465} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.395289485355606, "P50": 7.333979104005266, "P90": 12.686728346021846, "P99": 12.970041925815167} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9849191885441543e-05, "P50": 1.9625003915280104e-05, "P90": 2.4808407761156563e-05, "P99": 6.623836234211959e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.311759668459976} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 169.97630939071922} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.946781686341856, "P50": 8.938193458481692, "P90": 15.264622382819654, "P99": 15.804821062370902} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9050874980166553e-05, "P50": 2.0645500626415014e-05, "P90": 2.35880957916379e-05, "P99": 2.4751978926360608e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.340385383428255} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 170.89233226970416} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.641908357063075, "P50": 9.658034958527423, "P90": 16.504334233305418, "P99": 17.143276155967033} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5252557825297118e-05, "P50": 2.022896660491824e-05, "P90": 2.398292999714613e-05, "P99": 7.041261880658866e-05} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0764236828564968} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.8911157028158} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.1181416534190065, "P50": 0.9986447294941172, "P90": 1.3503278585500094, "P99": 3.36554916666006} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.846877159550786e-05, "P50": 1.875002635642886e-05, "P90": 2.2238108795136216e-05, "P99": 6.799412891268775e-05} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.129120296317119} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.26369896429563} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4773646287689917, "P50": 1.333727374963928, "P90": 2.3531906667514706, "P99": 2.561694954453269} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.2051219129934907e-05, "P50": 1.929153222590685e-05, "P90": 2.477016532793642e-05, "P99": 0.00017914602998644126} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.9453497193741334} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 252.50238203994454} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.666910813357681, "P50": 2.640511437493842, "P90": 3.8058204833185303, "P99": 4.176948367503938} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.972082769498229e-05, "P50": 1.98955531232059e-05, "P90": 2.6049348525702955e-05, "P99": 4.5199907617643516e-05} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.721779900823648} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 302.1939136527135} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.615470693387324, "P50": 6.531417520542163, "P90": 9.785515021439641, "P99": 10.074993142265594} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8370787147432564e-05, "P50": 1.904141390696168e-05, "P90": 2.2932700812816623e-05, "P99": 4.83730842825027e-05} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.742143131647432} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 303.49716042543565} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.691497012873878, "P50": 9.762932666460983, "P90": 15.114638803550042, "P99": 15.207373834996252} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1300847874954344e-05, "P50": 1.8854043446481228e-05, "P90": 2.2258295211941006e-05, "P99": 0.0002088446076959372} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.755133706255921} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 304.32855720037895} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.233730937949149, "P50": 11.357760812505148, "P90": 17.747714179381727, "P99": 18.007674868716858} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1120345918461682e-05, "P50": 2.0666513592004776e-05, "P90": 2.4333305191248662e-05, "P99": 3.771861898712849e-05} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.766802405552006} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 305.0753539553284} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.959337320466293, "P50": 12.10546820802847, "P90": 19.013545508740936, "P99": 19.38628337407019} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.990663935430348e-05, "P50": 1.9749975763261318e-05, "P90": 2.505364827811718e-05, "P99": 4.458069684915259e-05} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0687623543169664} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.8015813525717} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.0432005157659296, "P50": 1.9271513124695048, "P90": 2.6408802831312648, "P99": 3.3661332724813864} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.118662051856518e-05, "P50": 1.931202132254839e-05, "P90": 2.3345509544014933e-05, "P99": 0.00020544505212459106} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0825276518486944} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 266.5635394366329} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.8703157891728917, "P50": 2.68870058353059, "P90": 4.112924121040851, "P99": 4.250954759339802} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.748669892549515e-05, "P50": 1.895846799015999e-05, "P90": 2.2325315512716774e-05, "P99": 3.374570165760819e-05} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.479396917944604} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 445.3628054969093} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.484132932026405, "P50": 7.057311041513458, "P90": 8.66958600380458, "P99": 9.505718903953676} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1239558700472118e-05, "P50": 2.0187522750347853e-05, "P90": 2.385390689596534e-05, "P99": 0.00012587037519551847} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8128911793966602} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 488.0500709627725} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.671794283353956, "P50": 12.137564229546115, "P90": 15.055289795831778, "P99": 15.182136849433883} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.641711013391614e-05, "P50": 1.9042054191231728e-05, "P90": 2.2420927416533237e-05, "P99": 0.0001644769043196129} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.817975808019255} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 488.70090342646466} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.785948109927121, "P50": 15.340018416987732, "P90": 20.202830779436045, "P99": 20.315408678812673} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0608287304639816e-05, "P50": 2.11455044336617e-05, "P90": 2.35871528275311e-05, "P99": 3.6839129170402995e-05} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8300214764675853} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 490.2427489878509} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.30689197040978, "P50": 16.915502917021513, "P90": 22.820975974947213, "P99": 23.094627902293578} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.8700745422393083e-05, "P50": 2.0312436390668154e-05, "P90": 2.312498399987817e-05, "P99": 3.526595653966079e-05} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.8320375960764395} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 490.50081229778425} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.041604349559638, "P50": 17.671884729061276, "P90": 24.071453746117186, "P99": 24.49408878996386} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.778127858415246e-05, "P50": 1.9353930838406086e-05, "P90": 2.4046085309237244e-05, "P99": 4.043183289468296e-05} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0491624450989319} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 268.58558594532656} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.36835270334268, "P50": 4.125994875037577, "P90": 5.592689237231389, "P99": 6.070587427061983} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.786750159226358e-05, "P50": 1.9625003915280104e-05, "P90": 2.2583152167499064e-05, "P99": 2.8894214192405418e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.9483843880012008} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 498.7864033283074} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.196406884208554, "P50": 7.542222874471918, "P90": 8.967981229792349, "P99": 9.258459672953467} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6787914102897047e-05, "P50": 1.9916973542422056e-05, "P90": 2.2712175268679858e-05, "P99": 4.028367111459777e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.600985063032508} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 665.8521761363221} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.010170447503913, "P50": 17.04011962498771, "P90": 18.386644983442967, "P99": 18.69856051309267} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.239040099084377e-05, "P50": 1.829152461141348e-05, "P90": 2.24664225243032e-05, "P99": 0.00015587836038321397} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.711484023839452} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 694.1399101028998} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 21.655049752001652, "P50": 22.77945033297874, "P90": 25.61928959575016, "P99": 25.717514910389436} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9352892413735388e-05, "P50": 2.02084775082767e-05, "P90": 2.3092294577509167e-05, "P99": 3.989360760897422e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.7201817389913945} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 696.366525181797} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 24.729386238327717, "P50": 25.95443362504011, "P90": 30.766505637124645, "P99": 30.880341326645574} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.996625098399818e-05, "P50": 2.1041487343609333e-05, "P90": 2.445445861667395e-05, "P99": 3.44919611234218e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.7278002404218347} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 698.3168615479897} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.212630359114847, "P50": 27.463846104510594, "P90": 33.30425608265214, "P99": 33.63758272471721} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.03500606585294e-05, "P50": 2.0083447452634573e-05, "P90": 2.265427028760314e-05, "P99": 4.247743519954476e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.732587694175802} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 699.5424497090053} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.932800787979502, "P50": 28.219300792028662, "P90": 34.57999514573021, "P99": 35.033603182592195} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9622958498075604e-05, "P50": 1.9875005818903446e-05, "P90": 2.3549434263259173e-05, "P99": 3.524445695802635e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9898751535559183} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 506.81607862063015} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.322325259115313, "P50": 11.692786541942041, "P90": 13.683243379869964, "P99": 14.018329347707331} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.92992202937603e-05, "P50": 1.9520986825227737e-05, "P90": 2.40250607021153e-05, "P99": 6.044794106856005e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.4494567597324282} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 742.1218609830032} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 23.658336499104042, "P50": 25.382544021005742, "P90": 28.437182141153606, "P99": 28.721060635779285} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7733697313815354e-05, "P50": 1.956248888745904e-05, "P90": 2.352495212107897e-05, "P99": 3.485115594230621e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5867557905326144} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 812.4189647526986} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 34.10377774747787, "P50": 31.754771228996105, "P90": 41.82843220824143, "P99": 42.49149738608976} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9041686318814754e-05, "P50": 1.8874998204410076e-05, "P90": 2.2082950454205275e-05, "P99": 3.27737850602721e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.6259044979477075} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 832.4631029492263} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 39.530627734145384, "P50": 40.23347193747759, "P90": 50.18058092924766, "P99": 50.33096730735851} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.043131691403687e-05, "P50": 9.479466825723648e-06, "P90": 1.4008465223014367e-05, "P99": 2.0050685852766042e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.6265715745564209} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 832.8046461728875} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 42.69044157248456, "P50": 43.502237104519736, "P90": 55.515269737516064, "P99": 55.658371908322444} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.765464199706912e-05, "P50": 1.9604573026299477e-05, "P90": 2.2496469318866734e-05, "P99": 3.0501076253131266e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.6275005980271806} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 833.2803061899165} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 44.19042098458973, "P50": 45.04572814545827, "P90": 58.04768201245461, "P99": 58.41739923917223} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6318276757374405e-05, "P50": 1.916650217026472e-05, "P90": 2.304596127942205e-05, "P99": 2.956418320536621e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.6317610529300335} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 835.4616591001771} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 44.85482002334902, "P50": 45.715578811999876, "P90": 59.29269957044162, "P99": 59.71936456835712} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.0282477596774697e-05, "P50": 9.790994226932526e-06, "P90": 1.0920851491391663e-05, "P99": 1.79933302570135e-05} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.json b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.json new file mode 100644 index 00000000..1bd7efd4 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.json @@ -0,0 +1 @@ +{"gpu": "simulator-llama2-7b-a40", "cost": 0.3, "tputs": [[53.46768813782049, 26.94702168379827, 13.351384925561684, 6.449112831829054, 3.0687269478197807], [49.18066045134238, 25.769051024042447, 12.750500448630419, 6.217046222817317, 3.908221660884725], [41.108372151595, 22.75667452657716, 11.871417823932267, 5.829879694384015, 2.8224716954769615], [26.893000851172808, 19.116530652345308, 10.259072704641495, 5.2228653781964365, 2.543157981660752], [22.158835440491103, 14.4763730301464, 8.033579290082939, 4.2722041267679, 2.164619049620232], [12.63449771529047, 9.105625644461483, 5.341921516909921, 3.0724262148293473, 1.605589539944446], [6.44070093597801, 4.908506391879229, 2.825722541489097, 1.7049267539064084, 0.975843626543094], [2.6864554196635524, 2.0460122075598965, 1.365640744818924, 1.2238675036166577, 0.5042209229511221]], "indexes": [[4, 8, 16, 32, 64, 128, 256, 512], [128, 256, 512, 1024, 2048]], "created": 1732598726.235767} \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.jsonl b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.jsonl new file mode 100644 index 00000000..d020699b --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.jsonl @@ -0,0 +1,1400 @@ +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0839043805241204} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.335617522096482} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.438547724969103, "P50": 0.13613435449951794, "P90": 1.1207229958032259, "P99": 2.903862332510185} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.9262420361628756e-05, "P50": 9.145500371232629e-06, "P90": 1.5387797611765572e-05, "P99": 0.0014663791733619293} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1622589787693403} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.649035915077361} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2688347645504109, "P50": 0.13617285349755548, "P90": 0.5641376333034714, "P99": 1.4619319694067259} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6277890137862416e-05, "P50": 9.41600592341274e-06, "P90": 1.7587500042282073e-05, "P99": 0.00027396732548368575} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.298054237296266} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.192216949185063} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.18707339040003718, "P50": 0.1413101460057078, "P90": 0.2852682829936385, "P99": 0.7420618833374477} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.658087029587477e-05, "P50": 9.520495950710028e-06, "P90": 1.7758403555490076e-05, "P99": 0.0015352545697533099} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.463529183739894} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.854116734959575} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.15810109749043477, "P50": 0.14602668750740122, "P90": 0.18148903750552567, "P99": 0.43437789975025254} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.663545049377717e-05, "P50": 9.333503840025514e-06, "P90": 1.7050605674739937e-05, "P99": 0.001594731580989908} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.473724008648674} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 65.8948960345947} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.17584271837011328, "P50": 0.16567710399976932, "P90": 0.22817004199605437, "P99": 0.2585759596631397} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00022817787888925522, "P50": 9.290997695643455e-06, "P90": 0.00037267089355736994, "P99": 0.0036314010646310755} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 31.27979562792885} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 125.1191825117154} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.22268811204005032, "P50": 0.21727316649776185, "P90": 0.2874093369042384, "P99": 0.3208939269041003} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00015957123061525636, "P50": 8.83299799170345e-06, "P90": 0.00032532529876334756, "P99": 0.0028857170867559096} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 53.46768813782049} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 213.87075255128195} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3075376753995079, "P50": 0.2900425414991332, "P90": 0.37703785410412827, "P99": 0.41696699601248843} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00019355702941538765, "P50": 5.562498699873686e-06, "P90": 0.0007956705012475146, "P99": 0.0017631298243941309} +{"input_tokens": 128, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0837843531685567} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.670274825348454} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.49746841084954213, "P50": 0.2343688335022307, "P90": 1.062250779397437, "P99": 2.9082634011695387} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012030415993649512, "P50": 9.458002750761807e-06, "P90": 2.1257801563479088e-05, "P99": 0.0032353440720180537} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1580304475998564} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.26424358079885} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3373743758203636, "P50": 0.2387285204968066, "P90": 0.5417081672916542, "P99": 1.464953744577654} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.9464619912905616e-05, "P50": 9.354007488582283e-06, "P90": 1.5392010391224185e-05, "P99": 0.0013131320866523316} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.278736354374212} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.2298908349937} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2884601691304124, "P50": 0.2433021254983032, "P90": 0.2889888874968166, "P99": 0.8608414796782023} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.368699967628344e-05, "P50": 9.499999578110874e-06, "P90": 1.628339232411237e-05, "P99": 0.0020393284207966666} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.397424606678252} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.17939685342601} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.26857761001971087, "P50": 0.25992191600380465, "P90": 0.295953308895696, "P99": 0.42689800592270366} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5771159707801418e-05, "P50": 8.874994819052517e-06, "P90": 1.2533007247839171e-05, "P99": 0.0006139479180274072} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.216537260701124} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 129.732298085609} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.306746664068487, "P50": 0.29768141649401514, "P90": 0.37721112560684567, "P99": 0.4176467741586385} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4632949935039504e-05, "P50": 9.520510502625257e-06, "P90": 2.2325299505610096e-05, "P99": 0.0002868567599216492} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 30.322196458076718} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 242.57757166461374} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3847017724797479, "P50": 0.3948703749993001, "P90": 0.44696010770130673, "P99": 0.48129873948768365} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.952746920636855e-05, "P50": 6.604001100640744e-06, "P90": 5.5032996169758944e-05, "P99": 0.0013416425058676421} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 49.18066045134238} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 393.44528361073907} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5296553861792199, "P50": 0.5208468124983483, "P90": 0.6009611752990169, "P99": 0.6677521699049976} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00024032871006056665, "P50": 5.895999493077397e-06, "P90": 0.0010343750022002512, "P99": 0.0026138429103593885} +{"input_tokens": 128, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0814234777701306} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.30277564432209} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.685500749170169, "P50": 0.43545956299931277, "P90": 1.1459870042090194, "P99": 3.876163432001343} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.88437507476192e-05, "P50": 9.000003046821803e-06, "P90": 1.3728803605772578e-05, "P99": 0.0005885375056823617} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.148216442238347} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.37146307581355} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5026723037715419, "P50": 0.43682316649938, "P90": 0.49894882089429304, "P99": 1.6863156100930077} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013966825950774365, "P50": 9.12499672267586e-06, "P90": 1.9621397950686517e-05, "P99": 0.0032553300748986835} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.242365739560298} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.87785183296477} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.46979074252943975, "P50": 0.46228279199567623, "P90": 0.4978863339027157, "P99": 0.8535931108299708} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013241913984529674, "P50": 9.292001777794212e-06, "P90": 1.7616292461752894e-05, "P99": 0.0010332132517942708} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.25015861468023} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 132.00253783488367} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.49951786167017415, "P50": 0.4951680834928993, "P90": 0.559215666304226, "P99": 0.5942116368286952} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.725623060949147e-05, "P50": 9.499999578110874e-06, "P90": 1.8520808953326206e-05, "P99": 0.0010750949202338266} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 15.65481089870355} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 250.4769743792568} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5747115070786094, "P50": 0.5691322499988019, "P90": 0.6643954214028781, "P99": 0.7042597033322091} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.348463040310889e-05, "P50": 9.12499672267586e-06, "P90": 2.022920234594499e-05, "P99": 0.00043028883563238265} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 28.15251413377608} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 450.4402261404173} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7328853666898795, "P50": 0.7395608335064026, "P90": 0.8390067044078023, "P99": 0.8689291221657186} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00017146451034932397, "P50": 9.04199987417087e-06, "P90": 0.0005498005892150106, "P99": 0.0031281883298652255} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 41.108372151595} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 657.73395442552} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0023432788297941, "P50": 0.9879536459993687, "P90": 1.0882979247049662, "P99": 1.1127979411699926} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00017871834977995605, "P50": 5.583999154623598e-06, "P90": 0.0006454869013396095, "P99": 0.001975332417641774} +{"input_tokens": 128, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0771349361079334} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.46831795545387} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9652029175098868, "P50": 0.8294558960042195, "P90": 0.9066838205078972, "P99": 3.3469306224974598} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.872372036217712e-05, "P50": 9.208997653331608e-06, "P90": 7.634579233126755e-05, "P99": 0.001120664762711395} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1316915518860466} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.21412966035349} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8748390454804758, "P50": 0.8542407709974214, "P90": 0.8959668253024575, "P99": 1.687360376334838} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.823539024684578e-05, "P50": 9.270494047086686e-06, "P90": 2.381669328315184e-05, "P99": 0.0014054516717442382} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.1729793991529185} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.5353407728934} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8953692429402145, "P50": 0.8950633545100573, "P90": 0.9521407503081719, "P99": 0.9810975263219734} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.393251070519909e-05, "P50": 9.249997674487531e-06, "P90": 1.8400006229057932e-05, "P99": 0.0007163904210028713} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.975811569935071} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 255.22597023792227} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9733052682809649, "P50": 0.9636108125050669, "P90": 1.0524579037941293, "P99": 1.0782787544825987} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.672505965572782e-05, "P50": 9.292001777794212e-06, "P90": 1.868750987341628e-05, "P99": 0.0005536586654489005} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.53647197417589} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 465.1671031736285} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.179512752101, "P50": 1.196477750003396, "P90": 1.3020309496976552, "P99": 1.3442222666546877} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.4587489576078954e-05, "P50": 9.479495929554105e-06, "P90": 4.0595799509903066e-05, "P99": 0.0007672111570718702} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 23.81474722850987} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 762.0719113123158} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.5852693962199556, "P50": 1.6286043540021637, "P90": 1.7961095538921654, "P99": 1.8491501982505725} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00011927958970773034, "P50": 6.72949681757018e-06, "P90": 0.0004662045030272568, "P99": 0.0011547762507689227} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 26.893000851172808} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 860.5760272375298} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.9661889604599856, "P50": 2.218192729007569, "P90": 2.384756549401209, "P99": 2.4217460091730754} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00037541373050771655, "P50": 7.437491149175912e-06, "P90": 0.0013304752908879956, "P99": 0.0032889699286897667} +{"input_tokens": 128, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0681013047358747} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.35848350309598} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.6903086520994839, "P50": 1.6515243960020598, "P90": 1.7028188294003486, "P99": 3.366735217835524} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010688581940485165, "P50": 8.874994819052517e-06, "P90": 3.814580268226577e-05, "P99": 0.00142789607518355} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0940986619486193} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 134.02231436471163} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.7052669782903103, "P50": 1.6970655830009491, "P90": 1.777015145304904, "P99": 1.8211547539138702} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.07158602704294e-05, "P50": 8.95900302566588e-06, "P90": 1.6592201427556576e-05, "P99": 0.001700708839343864} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.030333898280051} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 257.94136948992326} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.8054828645697854, "P50": 1.8076308130039251, "P90": 1.9006877083957079, "P99": 1.9244921405040079} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.910747931106016e-05, "P50": 8.916002116166055e-06, "P90": 1.6186898574233068e-05, "P99": 0.0004536255786661065} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.429433614342273} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 475.48375131790544} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.0662712329102213, "P50": 2.0926852289994713, "P90": 2.2094433833059157, "P99": 2.2537208637547157} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.385374977369793e-05, "P50": 8.916002116166055e-06, "P90": 1.7912503972183925e-05, "P99": 0.0003784817461564706} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.391658056095213} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 793.0661155900937} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6571099188101654, "P50": 2.714129750500433, "P90": 3.0194359544999316, "P99": 3.069299617416982} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012825245037674903, "P50": 7.957998604979366e-06, "P90": 7.970350707182688e-05, "P99": 0.001000008094270046} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 17.611739774389015} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1127.151345560897} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.1301586295792365, "P50": 3.2262498540003435, "P90": 3.3566074875008782, "P99": 3.391537908088503} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.837044082814827e-05, "P50": 5.395500920712948e-06, "P90": 0.0002735083064180803, "P99": 0.000877795500564405} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 22.158835440491103} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1418.1654681914306} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.1383686520985794, "P50": 3.1500388540007407, "P90": 3.262924166704761, "P99": 3.304704604576109} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.371876003569924e-05, "P50": 5.854504706803709e-06, "P90": 0.0003303750010672963, "P99": 0.0011334500834345855} +{"input_tokens": 128, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.049107209116777} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 134.28572276694746} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.3508307762310143, "P50": 3.3325255410018144, "P90": 3.49756731280504, "P99": 3.5169898050896884} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.174918948090636e-05, "P50": 8.812501619104296e-06, "P90": 1.9999696814920756e-05, "P99": 0.0008580040726519834} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.021903184513788} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 258.80360761776484} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.536135887091514, "P50": 3.5176012084994, "P90": 3.7046800165990135, "P99": 4.000717985013471} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.534750005812384e-05, "P50": 8.791001164354384e-06, "P90": 3.295360656920824e-05, "P99": 0.000919061576860271} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.7285408143619883} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 477.2532242383345} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.9436233891507437, "P50": 4.0212018544989405, "P90": 4.150104408311018, "P99": 4.191839684768056} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9694969669217244e-05, "P50": 8.874994819052517e-06, "P90": 1.8020495190285153e-05, "P99": 0.0003094908298226093} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.234021391195232} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 797.9547380729897} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.961230069230369, "P50": 5.038317062506394, "P90": 5.440100304096996, "P99": 6.088489928843628} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.850208072341047e-05, "P50": 8.332994184456766e-06, "P90": 2.0012802269775428e-05, "P99": 0.0011802917446766595} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.864603191896709} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1134.6692085627787} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.9398164958000415, "P50": 6.022108978999313, "P90": 6.304794012497586, "P99": 6.363069552255911} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.60917092661839e-05, "P50": 8.978997357189655e-06, "P90": 2.049190516117962e-05, "P99": 0.0012431364247458994} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.073947966690483} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1417.4653397363818} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.453705586279829, "P50": 6.500367541506421, "P90": 6.656403121195035, "P99": 6.702381524502853} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.7011300814338025e-05, "P50": 6.333502824418247e-06, "P90": 0.00013868750393157943, "P99": 0.0008861827479267962} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.63449771529047} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1617.2157075571802} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.507585805450072, "P50": 6.527113479001855, "P90": 6.595106816994667, "P99": 6.621519518517162} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.862962967716158e-05, "P50": 5.875001079402864e-06, "P90": 0.00018195449374616154, "P99": 0.0008217504287313247} +{"input_tokens": 128, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0119313545638546} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 259.0544267683468} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.050457380880689, "P50": 7.021310104501026, "P90": 7.37658635439293, "P99": 7.420429478164879} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.373915897100233e-05, "P50": 9.063005563803017e-06, "P90": 1.7220502195414158e-05, "P99": 0.0007128600928990647} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8673367853697465} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 478.0382170546551} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.946024355398986, "P50": 8.110466958503821, "P90": 8.398590766594861, "P99": 8.425653376442789} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.191462086164393e-05, "P50": 9.39600431593135e-06, "P90": 2.5491100677754853e-05, "P99": 0.0011920279312471521} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0949590260694135} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 792.3095106737699} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.908220982039056, "P50": 10.146661770995706, "P90": 10.949943579699902, "P99": 10.983012926739029} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.132675047614611e-05, "P50": 9.062503522727638e-06, "P90": 1.8246396211907322e-05, "P99": 0.0009483090748835961} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.309038317695634} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1103.1138093300824} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.21257603456921, "P50": 12.464256603998365, "P90": 13.010536446102197, "P99": 13.05442974842037} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.86628396215383e-05, "P50": 7.687500328756869e-06, "P90": 3.0708592385053864e-05, "P99": 0.0016952819077414468} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.278352245072307} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1351.2581747385107} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.395503556270123, "P50": 13.532345833002182, "P90": 13.814602800598369, "P99": 13.836165568919968} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.138962890370749e-05, "P50": 7.81250128056854e-06, "P90": 0.0001849079912062737, "P99": 0.00088438441540348} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.996499288926428} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1535.1038179651655} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.986183834590484, "P50": 14.021719395997934, "P90": 14.174480612503249, "P99": 14.192941160081974} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.244452972779982e-05, "P50": 6.3539991970174015e-06, "P90": 0.000174141697061714, "P99": 0.0011119820772728555} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.44070093597801} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1648.8194396103706} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.061878057859722, "P50": 14.073760541497904, "P90": 14.116623153799447, "P99": 14.129665916254599} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00016358298948034643, "P50": 6.250003934837878e-06, "P90": 0.0004365292057627814, "P99": 0.0034057470911648146} +{"input_tokens": 128, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9250080438699301} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 473.6041184614042} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.95308339631156, "P50": 17.372387437004363, "P90": 18.29915238749527, "P99": 18.838645580825396} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00017180004011606797, "P50": 9.499999578110874e-06, "P90": 0.00014410000003408648, "P99": 0.0030960367605439515} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.4855170076261695} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 760.5847079045988} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 22.14776523375069, "P50": 23.240092146006646, "P90": 24.802224616397872, "P99": 24.85988209667994} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010979705053614453, "P50": 9.188006515614688e-06, "P90": 2.0008292631246162e-05, "P99": 0.0026563987442932667} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.9902988026602257} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1019.0329869620356} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 27.207949302949565, "P50": 28.298728479501733, "P90": 29.567018729494883, "P99": 29.59367785773502} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.4886239629704504e-05, "P50": 8.937495294958353e-06, "P90": 1.4904803538229332e-05, "P99": 0.0006081055098911842} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.3590426854013353} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1207.8298549254837} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 30.441667088340182, "P50": 31.29290656249941, "P90": 31.850428383291, "P99": 31.887671705002866} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0004088745397166349, "P50": 6.791000487282872e-06, "P90": 1.913280430017041e-05, "P99": 0.018450552823342153} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.620574150760402} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1341.7339651893258} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 32.16053733286884, "P50": 32.56079279149708, "P90": 32.862949075303916, "P99": 32.91251508299611} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00016800080920802428, "P50": 8.291492122225463e-06, "P90": 0.000612012490455527, "P99": 0.002388109758467183} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.667231864976756} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1365.6227148680991} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 33.35700971165017, "P50": 33.463258854499145, "P90": 33.60870834610105, "P99": 34.31767003908011} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.834331976482645e-05, "P50": 8.957998943515122e-06, "P90": 1.821309851948175e-05, "P99": 0.0017124389250238973} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.6864554196635524} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1375.4651748677388} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 33.647806842520225, "P50": 33.683721166504256, "P90": 33.802841262199216, "P99": 35.48582025158088} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00023435954994056375, "P50": 8.416500349994749e-06, "P90": 3.745029098354378e-05, "P99": 0.00388623508828463} +{"input_tokens": 128, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0855913827165853} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.342365530866341} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4470335070705914, "P50": 0.1630050830062828, "P90": 0.9820420211937758, "P99": 3.341773287335675} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.1756249954923986e-05, "P50": 9.458002750761807e-06, "P90": 1.7441403178963842e-05, "P99": 0.0006338007510930782} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1632187093298607} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.652874837319443} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.29175405083035, "P50": 0.1671717289937078, "P90": 0.5661679548094981, "P99": 1.4619446063498642} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.102539031417109e-05, "P50": 9.978990419767797e-06, "P90": 1.451249554520473e-05, "P99": 0.0018230000055336894} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.265503317997372} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.06201327198949} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2587688753897964, "P50": 0.2161537914944347, "P90": 0.3892671375040664, "P99": 0.9916335700011412} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0006240833811170887, "P50": 9.750001481734216e-06, "P90": 8.332529687322685e-05, "P99": 0.002444899673719445} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.476172706596419} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.904690826385675} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.21652384912988054, "P50": 0.20386872900417075, "P90": 0.2982806798027014, "P99": 0.46552549664877024} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00021848123942618257, "P50": 9.43699706112966e-06, "P90": 1.2445608444977571e-05, "P99": 0.0059130399207060974} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.437690887352556} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 65.75076354941022} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.2860537079401547, "P50": 0.27439789599884534, "P90": 0.4032988081060466, "P99": 0.4621370374216349} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.536416022456252e-05, "P50": 9.437499102205038e-06, "P90": 1.3608302106149503e-05, "P99": 0.0014640517566295056} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 25.73611012168761} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 102.94444048675044} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5615111045901722, "P50": 0.5248488540019025, "P90": 0.8579790169082118, "P99": 0.922583863661421} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0006849258208239916, "P50": 5.6249991757795215e-06, "P90": 4.3378501140979196e-05, "P99": 0.019842560247634623} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 26.94702168379827} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 107.78808673519308} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.114192920420901, "P50": 1.0030277084952104, "P90": 1.9121259580962944, "P99": 2.035314235332917} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0005276737807434984, "P50": 9.083501936402172e-06, "P90": 0.00021627530368277992, "P99": 0.010578954763623258} +{"input_tokens": 256, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0838856092106033} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.671084873684826} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5278855978799402, "P50": 0.2658776669995859, "P90": 1.1184096744051204, "P99": 2.906709703755628} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.634202047483996e-05, "P50": 9.541501640342176e-06, "P90": 1.923719682963574e-05, "P99": 0.0026221366650133865} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.159698261644992} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.277586093159936} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3573881629008974, "P50": 0.2680125625047367, "P90": 0.49689750410470923, "P99": 1.4682245661561344} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.4424600016791373e-05, "P50": 1.0167001164518297e-05, "P90": 1.6062801296357066e-05, "P99": 0.0005957580085669202} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.285107514365901} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.28086011492721} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3112557458001538, "P50": 0.27465779149497394, "P90": 0.37113202470209217, "P99": 0.8506574100068254} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.491289994097315e-05, "P50": 9.43699706112966e-06, "P90": 1.4524994185194375e-05, "P99": 0.0010476675097015702} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.42141105817977} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.37128846543816} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.34824806794000324, "P50": 0.33564012499846285, "P90": 0.4647439374937676, "P99": 0.4963470540786512} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.7582030413905158e-05, "P50": 9.417002729605883e-06, "P90": 1.597049558768049e-05, "P99": 0.00024016510084038712} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 16.143744111958988} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 129.1499528956719} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4800296775203606, "P50": 0.4459682709930348, "P90": 0.6728211915979045, "P99": 0.7084507116547322} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.792829022742807e-05, "P50": 9.457988198846579e-06, "P90": 5.9437500021886086e-05, "P99": 0.0012530459176923634} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 24.416308311543894} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 195.33046649235115} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8389532533098827, "P50": 0.7984342709969496, "P90": 1.104933150301804, "P99": 1.2033286179949938} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.129042976885103e-05, "P50": 8.291499398183078e-06, "P90": 0.00016490890993736684, "P99": 0.001199981663812651} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 25.769051024042447} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 206.15240819233958} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4201583428797313, "P50": 1.3574877084975014, "P90": 2.3145735205020173, "P99": 2.337367919751123} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001012662093853578, "P50": 9.12499672267586e-06, "P90": 0.000192975302343257, "P99": 0.001544496658898437} +{"input_tokens": 256, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0815111337530035} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.304178140048055} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7119307108294742, "P50": 0.4677337295070174, "P90": 1.1423192500034935, "P99": 3.881926620000307} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.074910924420692e-05, "P50": 9.458504791837186e-06, "P90": 1.2008995690848694e-05, "P99": 0.00040285592040163466} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1508264946619415} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.413223914591065} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5187005629103806, "P50": 0.46619081199605716, "P90": 0.5583176493964856, "P99": 1.6881514286591859} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.4640850683208554e-05, "P50": 9.854498784989119e-06, "P90": 1.487950648879633e-05, "P99": 0.0008283878248767031} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.247188321131562} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.95501313810499} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.534129710400448, "P50": 0.5196304370037979, "P90": 0.6139035040963791, "P99": 0.851734846668697} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013711089050048031, "P50": 9.70800465438515e-06, "P90": 1.715419930405915e-05, "P99": 0.0022600910899928184} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.263433096015662} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 132.2149295362506} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6666295053504291, "P50": 0.6514733750009327, "P90": 0.8350067541032331, "P99": 0.9545008317510658} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00023895959966466763, "P50": 9.811992640607059e-06, "P90": 1.786190696293494e-05, "P99": 0.002555538336164396} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.791607877745344} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 236.6657260439255} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4465141050005332, "P50": 1.4595690210044268, "P90": 2.1388085580110783, "P99": 2.214317126993847} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0006152632308658213, "P50": 9.166506060864776e-06, "P90": 0.001274037499388215, "P99": 0.0110281976708211} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 21.911344475828685} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 350.58151161325895} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4937977466903976, "P50": 1.4643404585003736, "P90": 1.7499353210994744, "P99": 1.8836920838343216} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.805210981634445e-05, "P50": 5.9375015553087e-06, "P90": 4.4074702600483854e-05, "P99": 0.0009213069132238164} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 22.75667452657716} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 364.1067924252346} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.085479760780436, "P50": 2.144062416504312, "P90": 2.7972313958016457, "P99": 2.8302094671757367} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0003477483296592254, "P50": 9.47900116443634e-06, "P90": 0.0009107292062253706, "P99": 0.004484832098096396} +{"input_tokens": 256, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0770915756062223} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.46693041939911} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0061597908691329, "P50": 0.8786750625004061, "P90": 0.986802633892512, "P99": 3.3483420908478756} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00014473336006631143, "P50": 9.520503226667643e-06, "P90": 2.5445196661166858e-05, "P99": 0.003245991675648842} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1320114653480817} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.22436689113862} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9427082674395934, "P50": 0.9221798754952033, "P90": 1.012932499393355, "P99": 1.6910644455027082} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.7735799453221264e-05, "P50": 9.54199640545994e-06, "P90": 2.0425302500370932e-05, "P99": 0.0007410571708169388} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.1656120725452705} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.29958632144866} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.020137873749627, "P50": 1.0045162919996073, "P90": 1.1821907958117663, "P99": 1.2282681317499373} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.143625062075444e-05, "P50": 9.854004019871354e-06, "P90": 1.8270805594511353e-05, "P99": 0.001215850003645777} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.988279383679795} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 255.62494027775344} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.232087589950388, "P50": 1.215403708498343, "P90": 1.4683988246964874, "P99": 1.5480410192413547} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.1675879834219815e-05, "P50": 9.499999578110874e-06, "P90": 1.4795303286518902e-05, "P99": 0.00025756300019566135} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.114801099104234} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 451.6736351713355} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.8919600041706872, "P50": 1.87945995850896, "P90": 2.322500091705297, "P99": 2.395973492158082} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.9199629758950324e-05, "P50": 9.41699545364827e-06, "P90": 4.1458000487183395e-05, "P99": 0.0010129008462536164} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 18.25210579676998} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 584.0673854966393} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.610583542110544, "P50": 2.6300893540028483, "P90": 2.8275201503027345, "P99": 2.869738736087311} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012493877045926638, "P50": 7.83299037721008e-06, "P90": 0.00015492109960178011, "P99": 0.0014671233244007611} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 19.116530652345308} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 611.7289808750498} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.1538232595290174, "P50": 3.3842627289996017, "P90": 3.6440314288949596, "P99": 3.6703826959895376} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.281335049308836e-05, "P50": 5.895999493077397e-06, "P90": 8.661669999128294e-05, "P99": 0.002577518510079247} +{"input_tokens": 256, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0682436116806113} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.36759114755912} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.7611805974988965, "P50": 1.717203520493058, "P90": 1.8174894371943082, "P99": 3.3714379162526193} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.305208978825249e-05, "P50": 9.625000529922545e-06, "P90": 1.4016589557286365e-05, "P99": 0.0015047800941101754} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0942820032977214} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 134.03404821105417} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.829918367840437, "P50": 1.811133395996876, "P90": 1.9966411786022946, "P99": 2.0665458817429316} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.8835379818920046e-05, "P50": 9.437506378162652e-06, "P90": 1.4258305600378673e-05, "P99": 0.0006883500824915313} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.025370352965046} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 257.62370258976296} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.0546014950105747, "P50": 2.0379821044989512, "P90": 2.27765166660829, "P99": 2.376797155669774} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.686000046902336e-05, "P50": 9.541006875224411e-06, "P90": 1.1863096733577552e-05, "P99": 0.0002647683479881389} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.316096204590208} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 468.2301570937733} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.6789876145502785, "P50": 2.7974970414943527, "P90": 3.067053262202535, "P99": 3.2147821837407538} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.267294130637311e-05, "P50": 9.500006854068488e-06, "P90": 1.3558400678448395e-05, "P99": 0.00036622875719331805} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.3058176515962} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 723.5723297021568} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.965859421620116, "P50": 4.121599854501255, "P90": 4.64337827150739, "P99": 4.681690526325983} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.799291987088509e-05, "P50": 9.395494998898357e-06, "P90": 2.161249867640442e-05, "P99": 0.0019673619074456} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.483485276144064} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 862.9430576732201} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.690168418689864, "P50": 4.695508874501684, "P90": 4.931913250002253, "P99": 5.01033573467561} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.428165918099694e-05, "P50": 9.292001777794212e-06, "P90": 1.563300029374666e-05, "P99": 0.0015763815749960525} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 14.4763730301464} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 926.4878739293696} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.024298177500314, "P50": 5.03795129199716, "P90": 5.355767683293379, "P99": 5.392100778925087} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.887838986585848e-05, "P50": 9.290997695643455e-06, "P90": 5.7870497403200835e-05, "P99": 0.0009769635788688968} +{"input_tokens": 256, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0491204433860253} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 134.28741675341124} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.5102788070900717, "P50": 3.4696215209987713, "P90": 3.773407812806545, "P99": 3.8091555132425854} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.4517839376349e-05, "P50": 9.437499102205038e-06, "P90": 1.452500728191808e-05, "P99": 0.0006413094837625994} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.018186239477635} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 258.3278386531373} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.825506552899751, "P50": 3.8308955624961527, "P90": 4.111452938099683, "P99": 4.135950386837212} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.904795954236761e-05, "P50": 9.915995178744197e-06, "P90": 2.8149696299806263e-05, "P99": 0.00034108249819837766} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.691764974548785} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 472.54591674224446} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.675140423779958, "P50": 4.734400792003726, "P90": 5.16058690410282, "P99": 5.17946376500011} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.5119599817553536e-05, "P50": 9.750001481734216e-06, "P90": 1.4629094221163552e-05, "P99": 0.0004619945783633845} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.904968725470994} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 755.8359968602872} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.4420135920986645, "P50": 6.60238335399481, "P90": 7.250533028603241, "P99": 7.289552429178293} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.076993059949018e-05, "P50": 9.666509868111461e-06, "P90": 1.5137808804865934e-05, "P99": 0.0005092603334924235} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.769650177141936} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 994.5152226741678} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.947457896640117, "P50": 8.058163625006273, "P90": 8.43392270829936, "P99": 8.502774239083083} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.232670937199145e-05, "P50": 9.3334965640679e-06, "P90": 1.3920803030487154e-05, "P99": 0.0011314246678375652} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.722462526814395} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1116.4752034322426} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.699817051249847, "P50": 8.723318666503474, "P90": 8.850563679797052, "P99": 8.917725134990324} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.71783499699086e-05, "P50": 9.415991371497512e-06, "P90": 2.995379763888229e-05, "P99": 0.001963370740704713} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.105625644461483} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1165.5200824910698} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.040247739180195, "P50": 9.068945000508393, "P90": 9.419319766301488, "P99": 9.454236619174335} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001497828205174301, "P50": 9.583498467691243e-06, "P90": 4.356700374046345e-05, "P99": 0.003568443160038457} +{"input_tokens": 256, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0103397781387096} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 258.64698320350965} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.4949188571107515, "P50": 7.490235645498615, "P90": 7.969064295897261, "P99": 8.018734128834767} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.5733780096052213e-05, "P50": 9.750001481734216e-06, "P90": 1.5471395454369488e-05, "P99": 0.0004839833280129857} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8469738542167033} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 472.82530667947606} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.984567979139683, "P50": 9.18899158349086, "P90": 9.746725299999525, "P99": 9.817838396741427} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.94733695895411e-05, "P50": 9.750001481734216e-06, "P90": 1.4533293142449125e-05, "P99": 0.001882186165166792} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.9457778622496296} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 754.1191327359052} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.199923648720869, "P50": 12.614711020498362, "P90": 13.65182237889967, "P99": 13.715046312073536} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00018026870995527133, "P50": 9.70800465438515e-06, "P90": 3.465420013526456e-05, "P99": 0.003421249750244914} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.891010626462052} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 996.0987203742853} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.38124160506035, "P50": 15.628820103993348, "P90": 16.295589583701805, "P99": 16.374210898167075} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0003198561801400501, "P50": 9.665993275120854e-06, "P90": 0.0003869417079840845, "P99": 0.005450867409526836} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.550448231546603} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1164.9147472759303} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.6799317066402, "P50": 16.716528167002252, "P90": 17.168521079403583, "P99": 17.46700000833269} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.261252012336627e-05, "P50": 9.499999578110874e-06, "P90": 1.4833908062428353e-05, "P99": 0.0016321757610421668} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.908506391879229} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1256.5776363210825} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.486064236271048, "P50": 17.480763833496894, "P90": 17.57595694220654, "P99": 17.637690087077498} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.340418102103286e-05, "P50": 9.458002750761807e-06, "P90": 3.66336942533963e-05, "P99": 0.0014181649075180856} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.8400386487527385} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1239.049894080701} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 18.269778575848903, "P50": 18.650804103992414, "P90": 19.046562145499045, "P99": 19.10580675499339} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.476788977626711e-05, "P50": 9.395997039973736e-06, "P90": 1.1495899525471041e-05, "P99": 0.0003010651767545034} +{"input_tokens": 256, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9136943737263855} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 467.8115193479094} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 18.759461743351856, "P50": 19.4933364374956, "P90": 20.61606707470928, "P99": 20.692808674180853} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.6658369790529835e-05, "P50": 9.729497833177447e-06, "P90": 1.7303593631368132e-05, "P99": 0.0004450333422573766} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.4150406798717987} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 724.5008280943609} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.13497965757997, "P50": 27.397804334003013, "P90": 29.475873437502013, "P99": 29.543713913993706} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001256483896577265, "P50": 9.83300560619682e-06, "P90": 1.428339310223237e-05, "P99": 0.0009262806635525091} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8197637981431107} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 931.7190646492727} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 32.611780099589666, "P50": 33.78881254150474, "P90": 34.9348111164014, "P99": 35.09352941018471} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.9474539593793455e-05, "P50": 9.541501640342176e-06, "P90": 1.5683402307331578e-05, "P99": 0.0012816887472581642} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.956379129698578} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1001.6661144056719} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 36.980761348720115, "P50": 37.54505649999919, "P90": 38.154167445797064, "P99": 39.45866090049734} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.174458020017482e-05, "P50": 9.687493729870766e-06, "P90": 1.7133296933025155e-05, "P99": 0.002037958664295731} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0093053189788774} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1028.7643233171852} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 39.1598525270808, "P50": 39.02792381250765, "P90": 42.1182618207924, "P99": 43.812934160084694} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.484874869580381e-05, "P50": 1.0041498171631247e-05, "P90": 1.5820194676052803e-05, "P99": 0.0009961016726447347} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0324686356429558} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1040.6239414491934} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 39.72219719874949, "P50": 39.060307458501484, "P90": 44.23160005030223, "P99": 46.05099424833461} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.691998855676502e-05, "P50": 9.58298915065825e-06, "P90": 1.2858593254350132e-05, "P99": 8.175408409443071e-05} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0460122075598965} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 1047.558250270667} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 40.06301202874936, "P50": 39.230475728989404, "P90": 45.30977201729111, "P99": 47.17306271774156} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.526830009534023e-05, "P50": 9.229501301888376e-06, "P90": 1.7383589874953126e-05, "P99": 0.0011277366815193172} +{"input_tokens": 256, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0840175767869982} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.336070307147993} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5043306929302344, "P50": 0.24560479149658931, "P90": 1.1041465000089377, "P99": 2.915753617424203} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00011814915007562376, "P50": 1.0020499757956713e-05, "P90": 1.5657707990612845e-05, "P99": 0.0030632599240925634} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1602671757954983} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.641068703181993} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.3685514982999302, "P50": 0.24914006250037346, "P90": 0.6188258661073632, "P99": 1.945439464081575} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012716955083305948, "P50": 9.437506378162652e-06, "P90": 1.4854493201710289e-05, "P99": 0.0028839241625974003} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.290312704221436} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.161250816885744} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.29619134163993294, "P50": 0.251105812501919, "P90": 0.4173476042036782, "P99": 0.8476790786508361} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001805908902315423, "P50": 9.68800304690376e-06, "P90": 1.498310157330707e-05, "P99": 0.0007586498335877742} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.388167072977168} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.552668291908674} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.45242629456042777, "P50": 0.4342285619932227, "P90": 0.7364549125049963, "P99": 0.8269102721587115} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.358119065524079e-05, "P50": 9.604002116248012e-06, "P90": 1.6466397210024297e-05, "P99": 0.00044163750280859135} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.529377540679512} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 50.11751016271805} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0800142016993777, "P50": 0.9107314789944212, "P90": 1.9972152125032154, "P99": 2.180641507585533} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.803248110576532e-05, "P50": 9.77099989540875e-06, "P90": 1.758329599397265e-05, "P99": 0.0024527380934159754} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.970671346003343} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 51.88268538401337} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.4023948796292824, "P50": 2.3054093754981295, "P90": 4.360357291100081, "P99": 4.7177483230932555} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.284626877051778e-05, "P50": 9.645998943597078e-06, "P90": 1.468339614802972e-05, "P99": 0.0003188533346110609} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 13.351384925561684} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 53.405539702246735} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.9558799558696047, "P50": 2.8853606039992883, "P90": 5.4463075375068, "P99": 5.918200403665542} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.40491999790538e-05, "P50": 9.770497854333371e-06, "P90": 1.2645497918128972e-05, "P99": 0.0017023153381887921} +{"input_tokens": 512, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.083401144117928} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.667209152943425} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6246680400198966, "P50": 0.34864960450067883, "P90": 1.223030962196936, "P99": 2.904660807918122} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00011916338888113388, "P50": 9.812501957640052e-06, "P90": 2.6000609796028653e-05, "P99": 0.002530962489399837} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1559831571339423} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.24786525707154} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4305102291806543, "P50": 0.3517126875012764, "P90": 0.4949511124956191, "P99": 1.6852646779138025} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.186163981212303e-05, "P50": 9.354000212624669e-06, "P90": 1.567019644426182e-05, "P99": 0.0014885951741598622} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.2766335950490255} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.213068760392204} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.43348277759869236, "P50": 0.4028387500002282, "P90": 0.6082976502919338, "P99": 0.7666651046603514} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.175582878408022e-05, "P50": 9.458497515879571e-06, "P90": 1.4799402561038734e-05, "P99": 0.00035796283060335554} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.30749664354739} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 66.45997314837912} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6682206691901956, "P50": 0.6288884374953341, "P90": 1.021337725000922, "P99": 1.136107314079127} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.6777928984956813e-05, "P50": 9.77099989540875e-06, "P90": 1.464579545427114e-05, "P99": 0.0011554123308451449} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.041695575586099} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 96.33356460468879} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4799905074790878, "P50": 1.298962062508508, "P90": 2.5151716631022283, "P99": 2.5883393824874656} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00014463583996985108, "P50": 9.583993232809007e-06, "P90": 1.5903888561297278e-05, "P99": 0.004546230411942823} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.422943330481392} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 99.38354664385113} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.8015157495604943, "P50": 2.6656929580130964, "P90": 4.898743538104464, "P99": 5.0489887549891135} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4143779737642034e-05, "P50": 9.791998309083283e-06, "P90": 1.1970492778345944e-05, "P99": 0.00043233158503426495} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 12.750500448630419} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 102.00400358904335} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.373726988270355, "P50": 3.285824145998049, "P90": 6.035012236899639, "P99": 6.2774265000104785} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.6998290452174845e-05, "P50": 9.3334965640679e-06, "P90": 1.434939913451672e-05, "P99": 9.136458422291453e-05} +{"input_tokens": 512, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0811750990039635} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.298801584063416} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7210806762996071, "P50": 0.5578815000044415, "P90": 0.9832681496947778, "P99": 2.9146445604256637} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.629543898976408e-05, "P50": 9.479495929554105e-06, "P90": 1.2841398711316289e-05, "P99": 0.0017035629146266785} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.148455080669325} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.3752812907092} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6125793932696979, "P50": 0.5660793329952867, "P90": 0.7230151035037125, "P99": 1.6859643712577248} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001386904704850167, "P50": 9.666502592153847e-06, "P90": 0.00010124550317414137, "P99": 0.0024456354210269653} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.2255038588517735} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.60806174162838} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7693805836794491, "P50": 0.7428868120041443, "P90": 1.0594115250045435, "P99": 1.1428596337469936} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.9527899130480364e-05, "P50": 9.437499102205038e-06, "P90": 1.4562792785000058e-05, "P99": 0.0003543594101211065} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.099678750319264} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 129.59486000510822} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.1159213567098778, "P50": 1.034467562501959, "P90": 1.625783904195123, "P99": 1.7847017389982651} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012119877050281502, "P50": 9.583491191733629e-06, "P90": 2.5237802765332276e-05, "P99": 0.002395294576417665} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.177992246803685} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 178.84787594885896} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.258712281260523, "P50": 2.114943624997977, "P90": 3.1848407038982263, "P99": 3.3739348762505688} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.8696669730125e-05, "P50": 9.416500688530505e-06, "P90": 2.4629107792861802e-05, "P99": 0.0009454905804887026} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.579844227352995} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 185.27750763764791} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.5136313299988977, "P50": 3.477355520997662, "P90": 5.598867333601811, "P99": 5.664698176993116} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.14854001509957e-05, "P50": 9.625000529922545e-06, "P90": 1.5899693244136875e-05, "P99": 0.0010160548382555327} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 11.871417823932267} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 189.94268518291628} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.061888720010611, "P50": 4.084490000001097, "P90": 6.726550616297755, "P99": 6.885424899477366} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.970702030230313e-05, "P50": 9.791496268007904e-06, "P90": 1.3921200297772884e-05, "P99": 0.0016937046642124304} +{"input_tokens": 512, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0763769014029683} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.444060844894985} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.0512557400095102, "P50": 0.9721583959981217, "P90": 1.1154718836987743, "P99": 3.5209670372599815} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.55075403524097e-05, "P50": 9.479503205511719e-06, "P90": 4.662060819100589e-05, "P99": 0.002392752253217624} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1298968762884773} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.15670004123128} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.1085284683703502, "P50": 1.0766712710101274, "P90": 1.2707552713967745, "P99": 1.690062099004282} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.2042499592062084e-05, "P50": 9.70800465438515e-06, "P90": 1.4346103125717495e-05, "P99": 0.0007716654025716798} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.154727415360534} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 132.95127729153708} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.3411474629104487, "P50": 1.2877482920011971, "P90": 1.7414398584121957, "P99": 1.879550425500347} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.550252975197509e-05, "P50": 9.56250005401671e-06, "P90": 1.2762495316565038e-05, "P99": 0.0003653316640702577} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.575132651417761} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 242.40424484536834} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.1285525582800617, "P50": 2.2613652289946913, "P90": 2.599000700304168, "P99": 2.6427926112482965} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00038914369040867315, "P50": 9.499490261077881e-06, "P90": 7.5383407238406e-05, "P99": 0.0061277641725611} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.691938777666708} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 310.14204088533467} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.8635772262993853, "P50": 3.9549208960088436, "P90": 4.720204674996785, "P99": 4.967476036077423} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.5971259280340745e-05, "P50": 9.666502592153847e-06, "P90": 1.2349695316515864e-05, "P99": 0.000647378090507131} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 9.993167213888288} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 319.7813508444252} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.073788285451156, "P50": 5.2899838334997185, "P90": 6.891355574704358, "P99": 6.940098252088937} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010325250899768434, "P50": 9.56250005401671e-06, "P90": 1.4149992784950884e-05, "P99": 0.0030315488362975913} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 10.259072704641495} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 328.29032654852784} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.645639258309675, "P50": 5.904219771000498, "P90": 8.02718492920394, "P99": 8.181088597006601} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2314219929976389e-05, "P50": 9.70800465438515e-06, "P90": 1.2500301818363386e-05, "P99": 7.759933985653343e-05} +{"input_tokens": 512, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.067235142517035} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.30304912109024} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.955742116290203, "P50": 1.890505437993852, "P90": 2.1360586207913004, "P99": 3.3705459567482494} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.8631650141906e-05, "P50": 9.791496268007904e-06, "P90": 1.9579504441935678e-05, "P99": 0.0022522940786438997} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0889778984963376} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.6945855037656} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.1474063337106784, "P50": 2.090626874996815, "P90": 2.5070058003097078, "P99": 2.6728208883300253} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.791036121081561e-05, "P50": 9.70800465438515e-06, "P90": 1.5128604718483992e-05, "P99": 0.001568662506178957} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.981604601798121} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 254.82269451507975} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.7544392175000394, "P50": 2.6925307709971094, "P90": 3.3784057868964736, "P99": 3.552580571917206} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.062500135158189e-05, "P50": 9.645504178479314e-06, "P90": 1.4366397226694974e-05, "P99": 0.00028487998730272534} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.5934189104968} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 421.9788102717952} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.882827863340935, "P50": 4.869569271002547, "P90": 6.051617146402714, "P99": 6.156076234679494} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.927674024249427e-05, "P50": 9.41600592341274e-06, "P90": 1.2696205521933737e-05, "P99": 0.0007181683307862848} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.674385247651147} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 491.1606558496734} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.974408108779753, "P50": 7.224172541995358, "P90": 7.549805933097377, "P99": 7.670008941166307} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.194004996563308e-05, "P50": 9.70800465438515e-06, "P90": 1.2079697626177232e-05, "P99": 0.0015190141636412682} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 7.792992834829126} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 498.75154142906405} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.363191072850604, "P50": 8.894706250503077, "P90": 9.751978833597969, "P99": 9.827687129750702} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.349328053649515e-05, "P50": 9.666495316196233e-06, "P90": 1.3024700456298904e-05, "P99": 0.0009264795000490312} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 8.033579290082939} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 514.1490745653081} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.791402581670118, "P50": 9.350583062005171, "P90": 10.714973495794402, "P99": 10.908325428430981} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.4288350794231519e-05, "P50": 9.625000529922545e-06, "P90": 1.2471202353481208e-05, "P99": 0.0001802030100952839} +{"input_tokens": 512, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0474780818844678} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 134.07719448121188} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.8791308108599334, "P50": 3.8198611455009086, "P90": 4.367031545301143, "P99": 4.4849807383328155} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.351956966682337e-05, "P50": 9.395494998898357e-06, "P90": 3.4387194318696965e-05, "P99": 0.0010343204165110418} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.005226502565236} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 256.6689923283502} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.562571197449579, "P50": 4.52039820850041, "P90": 5.1554417829916925, "P99": 5.305987661238905} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4529180082026868e-05, "P50": 9.292001777794212e-06, "P90": 1.3857994053978474e-05, "P99": 0.00036426791411941237} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.5402628264146743} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 453.1536417810783} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.80617597501041, "P50": 7.0981574164907215, "P90": 8.205290545809842, "P99": 8.63211371774829} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.898624989436939e-05, "P50": 9.707997378427535e-06, "P90": 1.5866699686739645e-05, "P99": 0.0014074541657464587} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.920257902945506} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 629.7930115770248} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.507685880418721, "P50": 10.912392312493466, "P90": 11.684724416401878, "P99": 11.799012876257766} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4713708990020676e-05, "P50": 9.749994205776602e-06, "P90": 1.7120291886385537e-05, "P99": 0.00028536791869555834} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.1878349679638625} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 664.0428758993744} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.803190416249418, "P50": 12.78597064600035, "P90": 13.097242504489259, "P99": 13.40384333441194} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.588000006857328e-05, "P50": 9.583003702573478e-06, "P90": 1.5642293146811462e-05, "P99": 0.0006521025004622092} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.276202495368075} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 675.3539194071136} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.041861816690362, "P50": 14.162251770503644, "P90": 15.17195276249986, "P99": 15.920454281578422} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.3059121089754626e-05, "P50": 9.416507964488119e-06, "P90": 2.5891992845572666e-05, "P99": 0.0006950441779918078} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.341921516909921} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 683.7659541644699} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.626481386259984, "P50": 14.795570166505058, "P90": 16.321942574993592, "P99": 17.172211008749873} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.432837923057377e-05, "P50": 9.562507329974324e-06, "P90": 2.3291700927075002e-05, "P99": 0.00028117074834883404} +{"input_tokens": 512, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.004669676192561} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 257.19543710529564} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.509360085839434, "P50": 8.573184125503758, "P90": 9.387828833695675, "P99": 9.496391045408965} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.400202898774296e-05, "P50": 9.665993275120854e-06, "P90": 1.587949664099146e-05, "P99": 0.00019680675424752796} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.7897889600104842} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 458.18597376268394} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 11.729421083759952, "P50": 12.140330437505327, "P90": 13.588960074695933, "P99": 13.664375322409178} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.0906759891659023e-05, "P50": 9.729497833177447e-06, "P90": 1.8195198208559328e-05, "P99": 0.00019356741453521026} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.6019688640660155} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 666.1040292009} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.876104359179006, "P50": 18.64728922899667, "P90": 19.987393870801316, "P99": 20.12275059876294} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.0846279987599704e-05, "P50": 9.666495316196233e-06, "P90": 1.5692294982727638e-05, "P99": 0.0005374979948101117} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.7603787908215622} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 706.6569704503199} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 22.34057003090973, "P50": 22.478856582994922, "P90": 23.0277244752986, "P99": 24.79735728000407} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.3454931108281014e-05, "P50": 9.458002750761807e-06, "P90": 1.2999700265936557e-05, "P99": 0.0007600687515514483} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.775390810003865} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 710.5000473609895} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 24.71892776337976, "P50": 24.1579325000057, "P90": 28.410541370508145, "P99": 30.07876927190242} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.078379978542216e-05, "P50": 9.437506378162652e-06, "P90": 1.452150172553957e-05, "P99": 0.000837630581081631} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.7973564333396985} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 716.1232469349628} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.046227481721143, "P50": 25.624842166507733, "P90": 30.814543379200042, "P99": 32.73981134275033} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.090247905580327e-05, "P50": 9.584000508766621e-06, "P90": 1.556279748911039e-05, "P99": 0.0013670434198866167} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.825722541489097} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 723.3849706212088} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.501606436269796, "P50": 26.111554146002163, "P90": 31.776966633301345, "P99": 33.80532060875252} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.418006003717892e-05, "P50": 9.41699545364827e-06, "P90": 2.0783704530913434e-05, "P99": 0.0012988044967642142} +{"input_tokens": 512, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.885877990068922} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 453.56953091528806} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 23.423177411239593, "P50": 24.753849124994304, "P90": 27.086326971199014, "P99": 27.242498011743155} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00014433833013754337, "P50": 9.499999578110874e-06, "P90": 1.621699047973396e-05, "P99": 0.0036788383369275885} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.21791730517106} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 623.5736602475827} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 35.79751576251991, "P50": 37.6465638544978, "P90": 40.19314374610112, "P99": 40.46651157408269} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00035810955057968385, "P50": 9.666502592153847e-06, "P90": 2.8937200841028454e-05, "P99": 0.003282180245441968} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.3059038161617067} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 668.6227538747938} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 44.03982341750976, "P50": 43.25448343750759, "P90": 53.91940819589945, "P99": 54.89309707442124} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.9816689202561975e-05, "P50": 9.832998330239207e-06, "P90": 1.4508394815493376e-05, "P99": 0.0007694040787464477} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.34678451652938} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 689.5536724630425} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 49.02979374458024, "P50": 45.00604666650179, "P90": 61.660366312800036, "P99": 62.812962592925615} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.764963047928176e-05, "P50": 9.625000529922545e-06, "P90": 1.4428500435315074e-05, "P99": 0.0040466099065088225} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.3545310863705176} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 693.519916221705} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 51.24418909619024, "P50": 45.4097079169951, "P90": 66.24906456610333, "P99": 67.93611561107303} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.588876047520898e-05, "P50": 9.18799196369946e-06, "P90": 1.1141400318592791e-05, "P99": 0.00251153667530162} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.3596808835374523} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 696.1566123711756} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 52.54736686158998, "P50": 46.705684895998274, "P90": 68.6326671332994, "P99": 70.59494428717967} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.00591599161271e-05, "P50": 9.708499419502914e-06, "P90": 1.4333096623886377e-05, "P99": 0.0018204622539633505} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.365640744818924} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 699.2080613472891} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 52.97522714710038, "P50": 47.17740047950065, "P90": 69.56555400000944, "P99": 71.69316816667546} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.129005025490187e-05, "P50": 9.645504178479314e-06, "P90": 1.660420093685399e-05, "P99": 0.0013096512501942956} +{"input_tokens": 512, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0836964609397537} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.334785843759015} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.5830403766705422, "P50": 0.38696854149748106, "P90": 1.1208281000013813, "P99": 2.914266903521789} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.409041915205307e-05, "P50": 1.0000003385357559e-05, "P90": 2.8662489785347253e-05, "P99": 0.0017563004216936088} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.158126684992968} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.632506739971872} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.4804411595202691, "P50": 0.3925832290042308, "P90": 0.6768725165995424, "P99": 1.4610129695922685} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.205743996659294e-05, "P50": 9.271003364119679e-06, "P90": 1.1158606503158818e-05, "P99": 0.0006825813234900162} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.240887079416033} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 16.96354831766413} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6809599303502182, "P50": 0.6307859584921971, "P90": 1.1818091336012004, "P99": 1.3364400126841793} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6315429713577033e-05, "P50": 9.291994501836598e-06, "P90": 1.4941109111532656e-05, "P99": 0.0005166324973106389} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.121079372216443} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 24.484317488865774} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.1853454462504307, "P50": 1.9315709590009646, "P90": 4.363074737209535, "P99": 4.821092804672226} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.927626055083238e-05, "P50": 9.458002750761807e-06, "P90": 1.8358408124186257e-05, "P99": 0.002063175176590455} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.214563072585733} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 24.858252290342932} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.158113082128693, "P50": 5.0409958954987815, "P90": 9.364563903903763, "P99": 10.25399498499828} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.6525470388587564e-05, "P50": 9.166506060864776e-06, "P90": 1.1691699910443303e-05, "P99": 0.0009176370034401787} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.2981968696691295} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 25.192787478676518} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.52791216247977, "P50": 6.450734062498668, "P90": 11.785176467300335, "P99": 12.932453730840061} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6155779924010858e-05, "P50": 9.146009688265622e-06, "P90": 1.1933701171074064e-05, "P99": 0.0006083634280366823} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.449112831829054} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 25.796451327316216} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.941278556210891, "P50": 6.8920621249999385, "P90": 12.710973207800999, "P99": 13.973120912088344} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013985621917527168, "P50": 9.520495950710028e-06, "P90": 1.919579954119402e-05, "P99": 0.002501877912291131} +{"input_tokens": 1024, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0824123468601983} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.659298774881586} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.7241457403899403, "P50": 0.4946097084975918, "P90": 1.1362902209075407, "P99": 3.8738686903398807} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4195809819502755e-05, "P50": 9.812996722757816e-06, "P90": 1.8007810285780586e-05, "P99": 0.0004847308383614308} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1485492444935352} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.188393955948282} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.6040875949700422, "P50": 0.5070369585009757, "P90": 0.8460863500033157, "P99": 1.6793807283334914} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4198780301958323e-05, "P50": 9.000003046821803e-06, "P90": 1.8845291924662893e-05, "P99": 0.0004791964235482744} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.201412045104191} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.61129636083353} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9494686328893295, "P50": 0.8685015209921403, "P90": 1.533937424697797, "P99": 1.7252161905828691} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.571712041273713e-05, "P50": 9.6454969025217e-06, "P90": 1.4999999257270246e-05, "P99": 0.0011330517508031412} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.929115087130496} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 47.43292069704397} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.640736916259775, "P50": 2.358628874993883, "P90": 5.060240075607726, "P99": 5.40264176392986} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.6620831049513073e-05, "P50": 9.56250005401671e-06, "P90": 1.6295509703923026e-05, "P99": 0.00020954017687472968} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.979747566821098} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 47.837980534568786} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.688867549559655, "P50": 5.508969333001005, "P90": 10.140793633306748, "P99": 10.835811243742064} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.487083912361413e-05, "P50": 9.666495316196233e-06, "P90": 1.6016597510315555e-05, "P99": 0.0011356960760895245} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.092465655634499} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 48.739725245075995} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.980547208739445, "P50": 6.8435043539939215, "P90": 12.4819816255942, "P99": 13.428931715748186} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010510997904930263, "P50": 9.895498806145042e-06, "P90": 4.8187804350163885e-05, "P99": 0.0027972493445850006} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 6.217046222817317} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 49.73636978253854} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.435889446209476, "P50": 7.328078708495013, "P90": 13.45552456729056, "P99": 14.521477456428839} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.8762429719790818e-05, "P50": 9.499999578110874e-06, "P90": 1.4891596219968052e-05, "P99": 0.0004523591678298582} +{"input_tokens": 1024, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0799727138413167} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.279563421461066} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8543551782808209, "P50": 0.7055879585022922, "P90": 0.9975132163017412, "P99": 3.3414737941752564} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00023079875943949447, "P50": 9.604002116248012e-06, "P90": 1.865470403572559e-05, "P99": 0.004637498097290533} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.143561538216054} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.296984611456864} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8955513896406047, "P50": 0.8142986460006796, "P90": 1.292449233295338, "P99": 1.5806631049167486} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.315297002904117e-05, "P50": 9.499999578110874e-06, "P90": 1.4158095291350044e-05, "P99": 0.00046828158985591354} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.132450258328595} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 66.11920413325753} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4637445753706562, "P50": 1.3746780624933308, "P90": 2.2808138003005296, "P99": 2.4646267637502755} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00019433248991845175, "P50": 9.56250005401671e-06, "P90": 1.5087511565070606e-05, "P99": 0.00792338458792074} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.565527073567807} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 89.04843317708492} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.7314119995606596, "P50": 3.46072166650265, "P90": 6.398549313109834, "P99": 6.541338161911263} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.8361700319219384e-05, "P50": 9.85399674391374e-06, "P90": 2.5345192989334524e-05, "P99": 0.0008640462494804525} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.607707206720797} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 89.72331530753276} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.778170889589383, "P50": 6.6967904579942115, "P90": 11.599950158709543, "P99": 11.953086567003629} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.452582034398802e-05, "P50": 9.83300560619682e-06, "P90": 1.4299400208983571e-05, "P99": 0.0011361402283364508} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.720125403711825} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 91.5220064593892} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.017219389970705, "P50": 7.981550812502974, "P90": 13.8899465579947, "P99": 14.502704323511717} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.0008360772626476e-05, "P50": 9.54199640545994e-06, "P90": 1.4578796981368225e-05, "P99": 0.00012903648865177257} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.829879694384015} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 93.27807511014424} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.456241048370284, "P50": 8.449645542001235, "P90": 14.844965021204553, "P99": 15.559784836670296} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.260753024136647e-05, "P50": 9.666997357271612e-06, "P90": 1.696279796306057e-05, "P99": 0.0024981628240493604} +{"input_tokens": 1024, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0749156371088826} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.39730038748424} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.2735694008493739, "P50": 1.1378348959988216, "P90": 1.5425591663981326, "P99": 3.3584867270827843} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.673124048276805e-05, "P50": 9.291004971601069e-06, "P90": 9.514170087641116e-05, "P99": 0.0019344834104413266} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.121663992765719} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.893247768503} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.5873964033302037, "P50": 1.4421865835029166, "P90": 2.3296179535929697, "P99": 2.53932782874821} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.3099589383928105e-05, "P50": 9.625000529922545e-06, "P90": 1.2174398580100388e-05, "P99": 0.00020119575536229218} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.9798869910947983} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 127.35638371503354} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.5417251283102087, "P50": 2.485552166501293, "P90": 3.5657896835968135, "P99": 3.903840597749805} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013319541059900075, "P50": 9.417002729605883e-06, "P90": 1.2238111230544749e-05, "P99": 0.002835743009200118} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.010862014272354} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 160.34758445671534} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.830329449989949, "P50": 5.656717646001198, "P90": 8.570625491398095, "P99": 8.84276186058356} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.212457988411188e-05, "P50": 9.458497515879571e-06, "P90": 1.4424993423745041e-05, "P99": 0.0006869808449118895} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.048802216757137} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 161.56167093622838} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 8.848449344589753, "P50": 8.84988052049448, "P90": 13.837958670202351, "P99": 13.952316732913605} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.434751986991614e-05, "P50": 9.47900116443634e-06, "P90": 2.2154208272695543e-05, "P99": 0.001286019170511284} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.122750854456335} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 163.92802734260272} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.114799861690116, "P50": 10.188547000005201, "P90": 16.17895174970181, "P99": 16.499328135003307} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.0414180364459754e-05, "P50": 9.250004950445145e-06, "P90": 1.4258392911870033e-05, "P99": 0.0005611286721250532} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 5.2228653781964365} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 167.13169210228597} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.549047631259455, "P50": 10.640048604000185, "P90": 17.19251401190122, "P99": 17.631971468750418} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00011414449967560358, "P50": 7.353497494477779e-06, "P90": 2.6995896769222257e-05, "P99": 0.0031013199983863284} +{"input_tokens": 1024, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0658488692518666} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 68.21432763211946} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.325130251650262, "P50": 2.252730791493377, "P90": 2.818357812803879, "P99": 3.367166434919639} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.604919061530382e-05, "P50": 7.499998901039362e-06, "P90": 2.211219980381443e-05, "P99": 0.0018508566574018995} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0772145516687512} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 132.94173130680008} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.9772184729200672, "P50": 2.822229770499689, "P90": 3.9782220294888253, "P99": 4.092676757500303} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001261558895930648, "P50": 8.895498467609286e-06, "P90": 1.874580921139569e-05, "P99": 0.0035069799993652853} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.620657880777943} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 231.72210436978835} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.7370204795802415, "P50": 6.118753436996485, "P90": 7.552988245498274, "P99": 8.342213170094183} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.498750983155332e-05, "P50": 9.020499419420958e-06, "P90": 1.9179101218469445e-05, "P99": 0.0008012080022308611} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.146394547740415} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 265.36925105538654} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.145008522090212, "P50": 10.479509645498183, "P90": 12.93145788690308, "P99": 13.083431481348526} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.881412052782252e-05, "P50": 9.312505426350981e-06, "P90": 2.0479108206927784e-05, "P99": 0.0009398949894239229} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.153929967257427} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 265.8515179044753} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.22326086456087, "P50": 13.704207812501409, "P90": 18.027199212199776, "P99": 18.174171088919977} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00014650294964667408, "P50": 6.374997610691935e-06, "P90": 3.357979148859204e-05, "P99": 0.0015572849051387005} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.195079378554554} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 268.48508022749144} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.53667343006964, "P50": 15.067443583502609, "P90": 20.445016483090875, "P99": 20.80906494333816} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010913039033766836, "P50": 8.958493708632886e-06, "P90": 1.747939677443358e-05, "P99": 0.0019234891734959299} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 4.2722041267679} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 273.4210641131456} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.953426504969539, "P50": 15.519060312501097, "P90": 21.406911766697885, "P99": 21.849596778091655} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.784916967968456e-05, "P50": 8.937502570915967e-06, "P90": 1.7316998855676503e-05, "P99": 0.0007883445844345407} +{"input_tokens": 1024, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.043592070490661} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 133.5797850228046} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.771156022060459, "P50": 4.623608520501875, "P90": 5.72784761669609, "P99": 6.285249484756496} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.723001963109709e-05, "P50": 7.770999218337238e-06, "P90": 1.1150604404974735e-05, "P99": 0.0013311849128513143} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.9526880463090428} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 249.94406992755748} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.5974074504106826, "P50": 6.48247181300394, "P90": 8.326795746097924, "P99": 8.66613944426761} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013821248037857004, "P50": 8.02100112196058e-06, "P90": 1.127919967984781e-05, "P99": 0.005035456661862563} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.797988911480238} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 358.1425806694705} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.68652719213962, "P50": 14.288362125000276, "P90": 15.658237479395757, "P99": 16.117421394497796} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001410254495567642, "P50": 7.645998266525567e-06, "P90": 5.3042007493786826e-05, "P99": 0.004748784069815886} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.999009899315326} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 383.8732671123617} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 18.605167244619953, "P50": 19.125042521001888, "P90": 22.06639691610035, "P99": 22.21313723716943} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010401126943179406, "P50": 9.437491826247424e-06, "P90": 3.616729954956107e-05, "P99": 0.0018417719965509631} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.030949074067694} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 387.9614814806648} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 21.40738069785919, "P50": 22.009431354003027, "P90": 26.911933508694347, "P99": 27.079540675431925} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.300749053480104e-05, "P50": 8.9585009845905e-06, "P90": 1.4212503447197407e-05, "P99": 0.000507824587257361} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0319582727955403} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 388.09065891782916} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 23.004175248739486, "P50": 23.672311854490545, "P90": 29.59442843690631, "P99": 29.97740366522703} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.586128066875972e-05, "P50": 6.208007107488811e-06, "P90": 1.2345596041996032e-05, "P99": 0.0007983129273634445} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0724262148293473} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 393.27055549815645} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 23.386972827549034, "P50": 24.069225500003085, "P90": 30.502798808306398, "P99": 30.997216475836613} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00017224378083483316, "P50": 8.874507329892367e-06, "P90": 2.877050719689656e-05, "P99": 0.004699284999514931} +{"input_tokens": 1024, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9876545229861823} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 252.83955788446266} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.589533677510044, "P50": 11.299824207999336, "P90": 13.417536462489807, "P99": 13.698215856437164} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.5601700579281896e-05, "P50": 8.10399797046557e-06, "P90": 1.1963097495026887e-05, "P99": 0.00015377040865132706} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5437993106746755} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 395.2126235327169} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 20.512625929619244, "P50": 21.351363978996233, "P90": 24.50134419969836, "P99": 24.918897587512767} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.890958004281856e-05, "P50": 9.208000847138464e-06, "P90": 1.9403507758397618e-05, "P99": 0.0017095649197290228} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.7049267539064084} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 436.46124900004054} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 29.99300650915029, "P50": 30.777252062507614, "P90": 36.709788096100965, "P99": 37.578096175006216} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.796048993943259e-05, "P50": 6.2920007621869445e-06, "P90": 1.1791997530963282e-05, "P99": 0.0014726086622977234} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.663798750195332} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 425.932480050005} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 35.13940919666973, "P50": 38.91721166700154, "P90": 44.55932027469389, "P99": 48.503628504492376} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00010067210969282314, "P50": 9.04199987417087e-06, "P90": 0.00021769970335299162, "P99": 0.0012803157417511104} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.6688626558359618} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 427.2288398940062} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 38.10100109794017, "P50": 41.93828154150833, "P90": 49.572061041394775, "P99": 54.05635076749517} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00015383370133349673, "P50": 8.104507287498564e-06, "P90": 1.1295205331407487e-05, "P99": 0.0005742714850931146} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.676123070182883} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 429.08750596681807} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 39.40821032630018, "P50": 43.32257347900304, "P90": 51.94331675360736, "P99": 56.619846178340524} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.20779306255281e-05, "P50": 6.499991286545992e-06, "P90": 1.0462210047990083e-05, "P99": 6.494465895230634e-05} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.682003855273893} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 430.5929869501166} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 40.04043917293006, "P50": 43.98279312499653, "P90": 53.10738668360136, "P99": 57.88155433283682} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.529537829104811e-05, "P50": 8.937502570915967e-06, "P90": 1.198750251205639e-05, "P99": 0.00039155707592726996} +{"input_tokens": 1024, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8303373168624176} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 425.1327062335578} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 25.09061423669016, "P50": 27.39562341650162, "P90": 35.564758050002276, "P99": 36.21596755458857} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0003277329589764122, "P50": 8.083501597866416e-06, "P90": 0.000375374704890419, "P99": 0.005202050334919484} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8466889582159969} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 433.5047466065904} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 54.55764193707946, "P50": 55.34461891650426, "P90": 69.90821816220124, "P99": 72.31821926658566} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.134247942711226e-05, "P50": 9.125011274591088e-06, "P90": 1.241660647792742e-05, "P99": 0.002263068566535371} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8560640922597019} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 438.3048152369674} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 65.59699930251037, "P50": 76.11246368799766, "P90": 93.73926998339884, "P99": 94.38666108875564} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00011110340084997006, "P50": 8.479495591018349e-06, "P90": 1.3491699064616144e-05, "P99": 0.0026930525881471126} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8625523458600032} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 441.6268010803216} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 70.55006926912917, "P50": 81.28068239599816, "P90": 103.22441809608864, "P99": 104.46145998833134} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.797495999606326e-05, "P50": 9.229494025930762e-06, "P90": 1.2146496737841522e-05, "P99": 0.0008907429019745912} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.863391898990016} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 442.0566522828882} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 73.6212814108096, "P50": 84.44627785400371, "P90": 108.35512865798957, "P99": 109.9277401894104} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00014032592865987682, "P50": 9.209004929289222e-06, "P90": 2.2466694645117985e-05, "P99": 0.0031864154154027362} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.2238675036166577} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 626.6201618517288} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 41.62159011658936, "P50": 47.22161516699998, "P90": 78.29661306279885, "P99": 78.70409110184163} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00028080957956262866, "P50": 8.83299799170345e-06, "P90": 0.00048520809650654174, "P99": 0.0028704147445387657} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8679383568748511} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 444.38443871992376} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 75.41083033045055, "P50": 86.2729479375048, "P90": 111.74101779170014, "P99": 113.6443698175969} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012548126091132872, "P50": 8.188006177078933e-06, "P90": 2.614549885038287e-05, "P99": 0.0028406976710539386} +{"input_tokens": 1024, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0819343744516552} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 4.327737497806621} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.8432267062293249, "P50": 0.6799055624942412, "P90": 1.2207975207929853, "P99": 2.906939591761043} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00015740666014607996, "P50": 9.021001460496336e-06, "P90": 4.379590536700586e-05, "P99": 0.0038391501709702435} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1188288031826015} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.475315212730406} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.1768343095696765, "P50": 0.9913993335067062, "P90": 2.141673779707344, "P99": 2.485549306833564} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.386334032868035e-05, "P50": 9.270494047086686e-06, "P90": 2.4749700969550782e-05, "P99": 0.0012471241687308072} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.998144444655141} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 11.992577778620564} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 4.477737895409081, "P50": 3.922580749502231, "P90": 9.232654934002493, "P99": 10.27444690199991} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.716422001365572e-05, "P50": 9.208000847138464e-06, "P90": 1.235419331351296e-05, "P99": 0.000511602163896905} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0061878996552367} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 12.024751598620947} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 10.730847311239632, "P50": 10.548595208500046, "P90": 19.611121521091263, "P99": 21.64853427332346} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.183376026456245e-05, "P50": 9.271003364119679e-06, "P90": 1.4891297905705896e-05, "P99": 0.000570720519608602} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0205365069190155} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 12.082146027676062} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 13.699989506239508, "P50": 13.606573853990994, "P90": 24.674032625001566, "P99": 27.188012529837287} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.353714041528291e-05, "P50": 9.020994184538722e-06, "P90": 1.3203900016378643e-05, "P99": 0.002233547747746348} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0368081071956006} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 12.147232428782402} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.111542934950267, "P50": 15.080571624996082, "P90": 27.124246962503822, "P99": 29.864587276163512} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013600117978057824, "P50": 8.896000508684665e-06, "P90": 1.5158409951254739e-05, "P99": 0.0034984634947613827} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0687269478197807} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 12.274907791279123} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.554444732949486, "P50": 15.545627375002368, "P90": 28.108642454193618, "P99": 30.91326184615624} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 9.896833056700415e-05, "P50": 9.062496246770024e-06, "P90": 1.272500812774526e-05, "P99": 0.0008838408258452886} +{"input_tokens": 2048, "output_tokens": 4, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0803322706661582} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 8.642658165329266} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 0.9675238049792825, "P50": 0.7918811249983264, "P90": 1.549970387495705, "P99": 2.910091120929579} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.6541250232839955e-05, "P50": 6.958507583476603e-06, "P90": 1.0649696923792369e-05, "P99": 0.0009265675049391678} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0969217209808613} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 16.77537376784689} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.4782728933609905, "P50": 1.3243677295031375, "P90": 2.549816575611476, "P99": 2.920038535512431} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.885499063879252e-05, "P50": 8.812006853986531e-06, "P90": 1.1416699271649125e-05, "P99": 0.0008603923319606194} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.0474244497043275} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 24.37939559763462} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.845869981639844, "P50": 2.7490916250026203, "P90": 8.367891166394111, "P99": 9.559588517499506} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00012729912035865708, "P50": 8.95801349543035e-06, "P90": 1.9470503320917508e-05, "P99": 0.0034982394857797777} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 3.908221660884725} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 31.2657732870778} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 5.200771298750478, "P50": 3.6572632289971807, "P90": 11.867435433900392, "P99": 13.749346833417658} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00030740045011043546, "P50": 9.375500667374581e-06, "P90": 0.00011850059527205319, "P99": 0.007375505660020299} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.924957919509632} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 23.399663356077056} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 14.480570888749499, "P50": 14.307847146003041, "P90": 25.88393039218936, "P99": 28.27122377208754} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.9498679359676317e-05, "P50": 9.6454969025217e-06, "P90": 1.5182692732196302e-05, "P99": 0.000544567076576643} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.943644882705703} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 23.549159061645625} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.891409101698374, "P50": 15.77574620849191, "P90": 28.31515923390689, "P99": 30.91423552300534} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.528498928062618e-05, "P50": 1.0624993592500687e-05, "P90": 1.487050030846149e-05, "P99": 0.00019147865939886423} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.9857167193280616} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 23.885733754624493} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.253497385009833, "P50": 16.124356728498242, "P90": 29.198255241106384, "P99": 31.88877741633405} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001883000001544133, "P50": 9.56250005401671e-06, "P90": 2.0212196977809073e-05, "P99": 0.003430760078481422} +{"input_tokens": 2048, "output_tokens": 8, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0780296661557893} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 17.24847465849263} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.2658278405005694, "P50": 1.030598020501202, "P90": 1.9019467292047925, "P99": 3.348239177829385} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.314790923264809e-05, "P50": 9.625000529922545e-06, "P90": 1.4366308460012081e-05, "P99": 0.00034669591404963584} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.080416351227587} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 33.28666161964139} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 2.118617503380083, "P50": 1.9417331869990448, "P90": 3.4133737586002098, "P99": 3.8345599739988394} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.871415992965922e-05, "P50": 9.520997991785407e-06, "P90": 1.93083003978245e-05, "P99": 0.00032576365498245257} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.753581837480389} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 44.057309399686226} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.752787658790039, "P50": 6.186934395504068, "P90": 12.53850789579883, "P99": 13.515922824833979} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 8.125548047246411e-05, "P50": 9.938004950527102e-06, "P90": 2.2454500140156675e-05, "P99": 0.0024638916646654305} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.758992306891269} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 44.1438769102603} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 12.982034900469152, "P50": 12.805779770998925, "P90": 22.934863091094304, "P99": 24.557750609599754} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.361292999121361e-05, "P50": 9.666997357271612e-06, "P90": 1.3558004866354176e-05, "P99": 0.002128448657749691} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.763472718821207} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 44.215563501139314} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.09542988033965, "P50": 15.995505895501992, "P90": 28.12231409580272, "P99": 30.273160877595657} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.270000132848508e-05, "P50": 9.563002095092088e-06, "P90": 1.5524700575042554e-05, "P99": 0.0013371356597053957} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.7899221895673967} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 44.63875503307835} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.30443574668927, "P50": 17.251822458492825, "P90": 30.377343383299014, "P99": 32.74265506884811} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.661831062752754e-05, "P50": 9.438001143280417e-06, "P90": 1.0984002437908204e-05, "P99": 0.0012394938513170988} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.8224716954769615} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 45.159547127631384} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.692291718799243, "P50": 17.661902853993524, "P90": 31.29056559220626, "P99": 33.779362034905354} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6791649541119115e-05, "P50": 9.354502253700048e-06, "P90": 1.0520806245040151e-05, "P99": 0.00011270107454038506} +{"input_tokens": 2048, "output_tokens": 16, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0723851384895906} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 34.3163244316669} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 1.9745710905105807, "P50": 1.7285046670003794, "P90": 3.0771269920034685, "P99": 3.5976804666643147} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.6552059686509895e-05, "P50": 9.687508281785995e-06, "P90": 1.3637192023452376e-05, "P99": 0.0018312522643827856} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.0232557022945326} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 64.74418247342504} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.41060353501045, "P50": 3.2854785000017728, "P90": 5.20003614549496, "P99": 5.749356494666572} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00013404294950305484, "P50": 9.479495929554105e-06, "P90": 2.0325006335042435e-05, "P99": 0.003209790400578664} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.4884395549022202} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 79.63006575687105} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 9.896002120448829, "P50": 9.456290124493535, "P90": 17.03221277920093, "P99": 17.356317833658103} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.376126085640863e-05, "P50": 9.645998943597078e-06, "P90": 1.3213105557952077e-05, "P99": 0.000639109163021207} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.4949376882866283} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 79.8380060251721} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 16.12278186622003, "P50": 16.000820478991955, "P90": 27.60363897109346, "P99": 28.430273796754193} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.367672016494907e-05, "P50": 9.39600431593135e-06, "P90": 1.2687507842201755e-05, "P99": 0.0007883424891042721} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.502398614196601} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 80.07675565429123} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 19.14818044456886, "P50": 19.135511458494875, "P90": 32.71187117889786, "P99": 34.035383325829756} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00029665623980690726, "P50": 9.395502274855971e-06, "P90": 3.7962805072311517e-05, "P99": 0.0028808015711548436} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.523219037416119} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 80.74300919731581} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 20.393421348360718, "P50": 20.436356479505775, "P90": 35.007685137511004, "P99": 36.59176593065509} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.2732538782292976e-05, "P50": 9.833493095356971e-06, "P90": 1.4920896501280372e-05, "P99": 0.00031999258033465756} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.543157981660752} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 81.38105541314407} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 20.873880005369866, "P50": 20.93182331249409, "P90": 35.98301607949252, "P99": 37.669968074484494} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.6565799596719445e-05, "P50": 9.374998626299202e-06, "P90": 1.3562501408159738e-05, "P99": 0.0002016808387998032} +{"input_tokens": 2048, "output_tokens": 32, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.0602741245924623} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 67.85754397391759} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 3.636939855020464, "P50": 3.337759478999942, "P90": 5.727733233598701, "P99": 6.170053745254991} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.540912949480116e-05, "P50": 9.70800465438515e-06, "P90": 1.812120317481459e-05, "P99": 0.002187055317190244} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.8983937489697753} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 121.49719993406562} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 6.968217897501018, "P50": 7.207733188006387, "P90": 9.703274554098607, "P99": 10.327048977498343} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.51363208796829e-05, "P50": 9.271010640077293e-06, "P90": 1.567529980093241e-05, "P99": 0.0014022074962849757} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.124133768528409} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 135.94456118581817} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 15.913780462070427, "P50": 15.822123333004129, "P90": 24.21029214641021, "P99": 24.6596388512502} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.980214071110822e-05, "P50": 9.395502274855971e-06, "P90": 1.4629194629378616e-05, "P99": 0.0008950790789094833} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1274789594259675} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 136.15865340326192} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 22.164674359639758, "P50": 22.322744937002426, "P90": 34.89871281670203, "P99": 35.326739747689714} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.136377047165297e-05, "P50": 9.16700082598254e-06, "P90": 1.4050003665033732e-05, "P99": 0.0008158540866861666} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.1537373470351433} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.83919021024917} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 24.745029333361018, "P50": 25.002530354002374, "P90": 39.5933618668947, "P99": 40.46097486983388} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.45391093287617e-05, "P50": 9.499999578110874e-06, "P90": 1.3366104394663131e-05, "P99": 0.0002885775065806312} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.149675604875031} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 137.579238712002} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.430767290489165, "P50": 26.72445968750253, "P90": 42.35053963719402, "P99": 43.448966124589205} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.382915998576209e-05, "P50": 9.874995157588273e-06, "P90": 1.3049997505731886e-05, "P99": 0.0006042824946052953} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 2.164619049620232} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 138.53561917569485} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 26.888243417900522, "P50": 27.209378687504795, "P90": 43.30519847890391, "P99": 44.54493936341489} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.255751985008829e-05, "P50": 9.541501640342176e-06, "P90": 2.064159198198474e-05, "P99": 0.0015073128409858374} +{"input_tokens": 2048, "output_tokens": 64, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.026497100534482} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 131.3916288684137} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 7.766778960449738, "P50": 7.458155708001868, "P90": 10.778533796092962, "P99": 11.54499569316089} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.745544873410835e-05, "P50": 9.604489605408162e-06, "P90": 1.5066991909407124e-05, "P99": 0.002009346840204672} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5141191267897447} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 193.80724822908732} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 17.5639353733194, "P50": 18.72450758350169, "P90": 22.815525421200437, "P99": 23.22648524250646} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.759839041274972e-05, "P50": 9.41699545364827e-06, "P90": 1.3741700968239477e-05, "P99": 0.001243022077396747} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5840591125871033} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 202.75956641114922} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 28.373930487499603, "P50": 28.26346822949563, "P90": 40.270215129801365, "P99": 40.6487920645783} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.347501900861971e-05, "P50": 9.479503205511719e-06, "P90": 1.3387203216552739e-05, "P99": 0.00045544416876510313} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5867623535946618} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 203.1055812601167} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 34.57332735707983, "P50": 34.627305875001184, "P90": 50.89567168329813, "P99": 51.36618228092964} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 5.72278605250176e-05, "P50": 9.583003702573478e-06, "P90": 2.9991003975737984e-05, "P99": 0.0010816371650435096} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.5909088194386338} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 203.63632888814513} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 37.570006407070906, "P50": 37.73529529199732, "P90": 55.94339099550562, "P99": 56.890894949830106} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.172825978253968e-05, "P50": 9.374998626299202e-06, "P90": 1.0379092418588699e-05, "P99": 0.00021080707709189502} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.597170200919318} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 204.4377857176727} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 38.950297816260424, "P50": 39.15535597950657, "P90": 58.428336625301746, "P99": 59.53392046667533} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.15500000922475e-05, "P50": 9.312505426350981e-06, "P90": 1.0599696543067695e-05, "P99": 0.0003302891699422644} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 1.605589539944446} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 205.51546111288908} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 39.40510724412961, "P50": 39.63606014550169, "P90": 59.378369250605466, "P99": 60.596090362676335} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 7.922128948848694e-05, "P50": 9.56250005401671e-06, "P90": 1.1658895527943975e-05, "P99": 0.0006881228293059647} +{"input_tokens": 2048, "output_tokens": 128, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.8583681691181628} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 219.74225129424968} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 23.384191956270662, "P50": 25.356584062501497, "P90": 30.036968945499392, "P99": 33.742752967173146} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 1.2172040587756783e-05, "P50": 9.333001798950136e-06, "P90": 1.1171094956807793e-05, "P99": 8.806490106508182e-05} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9456949450327189} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 242.09790592837604} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 41.86012358709078, "P50": 40.68452283349325, "P90": 60.343550766403496, "P99": 61.75208656491464} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.040039006620646e-05, "P50": 9.645998943597078e-06, "P90": 2.0058304653503017e-05, "P99": 0.001389278244896565} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9681647795025865} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 247.85018355266214} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 53.127021752449366, "P50": 48.36226441649342, "P90": 80.4317295747067, "P99": 80.80952615925096} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.4107060162350536e-05, "P50": 9.750001481734216e-06, "P90": 1.3912798021920047e-05, "P99": 0.00040086925175274044} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9693058283596162} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 248.14229206006175} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 59.282369117899215, "P50": 54.70588958299777, "P90": 90.95630766669201, "P99": 91.51408791582843} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.180502939154394e-05, "P50": 9.562507329974324e-06, "P90": 2.2162494133226577e-05, "P99": 0.00022631083033047755} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9702301611520325} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 248.37892125492033} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 62.34925189959962, "P50": 57.8473809165007, "P90": 96.21696675859421, "P99": 97.15481438448391} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 3.135627048322931e-05, "P50": 9.770497854333371e-06, "P90": 2.195840643253178e-05, "P99": 0.0004483671688649347} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.9728526627789189} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 249.05028167140324} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 63.64547421083029, "P50": 59.2060083545075, "P90": 98.48637951640121, "P99": 99.61867034325464} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001089999990654178, "P50": 9.395997039973736e-06, "P90": 2.3671393864788177e-05, "P99": 0.001401421016344125} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.975843626543094} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 249.81596839503206} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 64.17319886877071, "P50": 59.75206812550459, "P90": 99.58659053390438, "P99": 100.85282674274146} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.00015512211932218632, "P50": 9.249997674487531e-06, "P90": 1.6342003073077664e-05, "P99": 0.0008811555159628105} +{"input_tokens": 2048, "output_tokens": 256, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.5042209229511221} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 258.1611125509745} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 71.56667676835976, "P50": 70.2347417705023, "P90": 106.29406142090156, "P99": 107.97179660749127} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 0.0001569499798642937, "P50": 9.625000529922545e-06, "P90": 1.2904108734801435e-05, "P99": 0.0009842799960461607} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 1.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.5011951011797522} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 256.6118918040331} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 94.53904749545065, "P50": 105.34003470848984, "P90": 143.8883242627926, "P99": 153.4314080641602} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.650460098171606e-05, "P50": 9.45899955695495e-06, "P90": 1.9775299006141752e-05, "P99": 0.0019434191701293475} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 2.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.49880226457225735} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 255.38675946099576} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 106.53222002330033, "P50": 121.59381510350795, "P90": 164.34122652440564, "P99": 177.3525155163344} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.213250002474524e-05, "P50": 9.582996426615864e-06, "P90": 2.267949457745999e-05, "P99": 0.001988187413808193} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 4.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.4991552237718333} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 255.56747457117865} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 112.7004303749805, "P50": 127.88753860399447, "P90": 174.852682420802, "P99": 188.68298036167167} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.77316694741603e-05, "P50": 9.957999282050878e-06, "P90": 1.6029091784730558e-05, "P99": 0.0010152232526161318} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 8.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.49954703427047853} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 255.768081546485} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 115.70977242704001, "P50": 131.0190455835036, "P90": 179.9692558083887, "P99": 194.28163630950294} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 2.5144120590994135e-05, "P50": 1.1208001524209976e-05, "P90": 2.3095504730008543e-05, "P99": 0.00019753351443796763} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 16.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.5002447111660434} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 256.1252921170142} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 117.05798370210046, "P50": 132.40766670799349, "P90": 182.36069460829748, "P99": 196.87868904517003} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 4.839999950490892e-05, "P50": 9.937502909451723e-06, "P90": 1.8900308350566785e-05, "P99": 0.0010154072451405288} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 32.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPUT", "mean": 0.5010599656332514} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TT", "mean": 256.5427024042247} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "E2E", "mean": 117.52386768041033, "P50": 132.87766843750433, "P90": 183.349978103803, "P99": 197.97255932023762} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TTFT", "mean": 6.056746089598164e-05, "P50": 1.0541509254835546e-05, "P90": 2.4979397130664443e-05, "P99": 0.0008379666662949643} +{"input_tokens": 2048, "output_tokens": 512, "request_rate": 64.0, "seed": 0, "model": "llama2-7b", "samples": 100, "metric": "TPOT", "mean": 0.0, "P50": 0.0, "P90": 0.0, "P99": 0.0} diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/__init__.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/__init__.py new file mode 100644 index 00000000..6461ec1a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/README.md b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/README.md new file mode 100644 index 00000000..f998fae8 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/README.md @@ -0,0 +1,96 @@ +# Mélange: Cost Efficient Large Language Model Serving by Exploiting GPU Heterogeneity + +## About +Here we provide the implementation of the Mélange solver and other related scripts used in our [paper](https://arxiv.org/pdf/2404.14527). + +## Getting Started +```bash +# Tested on Python 3.9.18 + +# 1. Install the necessary dependencies +pip install -r requirements.txt + +# See the melange/profiling/profiling-instructions.md for instructions on how to obtain the GPU information needed as the solver's input. + +# 2. Execute the solver with your own input configuration +python -m melange.main -c melange/config/example.json + +# 3. By default, the solver will save the output in a JSON file named as "melange_result.json" at the root directory +``` + + +## Explanation of Inputs and Outputs +### Inputs +The solver requires a json file with the following inputs: +1. `workload_distribution`: A 2D matrix representing the distribution of input and output lengths that the LLM service expects. Each row refers to one input size, each column refer to one output size, and each cell correspond to the proportion of requests that are within the cell's input and output size range (i.e., a bucket). The request size boundaries between buckets can be tuned to reach a desired balance of granularity and solver complexity. An example for the range of input and output sizes could be as follows: + - Input/Output size: 1-25, 25-100, 100-250, ... + - The cell at (0, 0) represents the request rate for requests with input and output sizes of 1-25 tokens. + - The cell at (0, 1) represents the request rate for requests with input size 1-25 tokens and output size 25-100 tokens. + - And so on ... +2. `gpu_info`: A list of dictionaries, where each dictionary contains the following keys: + - `name`: The name of the GPU. + - `cost`: The hourly rental cost of the GPU. + - `tputs`: A 2D matrix where each cell represents the GPU's profiled maximum throughput for requests of size equivalent to the corresponding cell in the `workload_distribution` matrix. +3. `total_request_rate`: A float value representing the total request rate of the workload. +4. `slice_factor`: An integer multiplier for the number of slices each bucket is split into. + +Please kindly refer to [example.json](melange/config/example.json) for an example of the inputs and check out our paper for more details on our methodology. We have also provided the profiling scripts we used to obtain the GPU information in the [profiling](melange/profiling) directory. See the [profiling instructions](melange/profiling/profiling-instructions.md) for more details on how to use these scripts. + +### Outputs +### Solver Output +The solver returns a dictionary containing the following: +1. The name of each GPU and the number of that GPU type to use. +2. The total cost for one hour. + +An example of the solver output is as follows: +```json +{ + "A10G": 3, + "A100-80GB": 1, + "cost": 6.7 +} +``` +In this case, the solver recommends using 3 A10G GPUs and 1 A100-80GB GPUs, which results in a total cost of $6.7/hr. + +### Output Formats +Melange currently supports the following output formats: +* **JSON**: + * Default output format. + * The solver output is saved as a JSON file at the root directory with the name `melange_result.json`. + +## Run with Your Own Dataset or GPU Information +The toy example at [script_code](melange/main.py) and [example_config](melange/config/example.json) includes examples of the four inputs to Mélange, which should be replaced to fit your setting's need. + +### Workload Distribution + 1. Determine the expected distribution of request sizes your LLM service expects. For example, you can use historical data of requests served by your service. In our evaluations, we used publicly available datasets (such as [Chatbot Arena](https://huggingface.co/datasets/lmsys/lmsys-chat-1m)) to determine a reasonable distribution of request sizes. + 2. Populate the `workload_distribution` based on the determined distribution. As mentioned, each row refers to a single input size, each column refers to a single output size, and each cell corresponds to the proportion of requests that fall into the given bucket. For example, a cell value of 0.1 indicates that 10% are in that bucket's size range. + +### GPU Information +For each GPU instance of interest, provide the following information: + 1. The name of the instance. + 2. The hourly rental cost of the instance. + 3. Results from profiling the GPUs maximum throughput (in requests/s) for requests within each bucket's size range from the buckets in `workload_distribution`. + +### Overall Rate and Slice Factor + 1. Determine the service's overall request rate across all request sizes, and provide it as the `total_request_rate`. + 2. Decide on the slice factor. We find that the solver's output is not very sensitive to the choice of slice factor. We empirically find that 4 is sufficient for most cases. + +## For Arm-based Mac platforms +We have occasionally (but not always) seen errors using PuLP on Arm-based MACs (M1/M2/M3). If you experience this issue, it's likely because the default ILP solver used by the PuLP library is not compatible with your architecture and will require additional steps. +1. Install the COIN CBC ILP solver using homebrew: `brew install coin-or-tools/coinor/cbc` +2. In [melange/solver.py](melange/solver.py), uncomment the following code to use the CBC solver. Note that your `path` may differ based on where the library was installed. +``` +solver= pulp.getSolver('COIN_CMD', path='/opt/homebrew/opt/cbc/bin/cbc', msg=0) +problem.solve(solver) +``` + +## Citation +If you use Mélange in your research, please cite our [paper](https://arxiv.org/abs/2404.14527): +``` +@article{griggs2024m, + title={M$\backslash$'elange: Cost Efficient Large Language Model Serving by Exploiting GPU Heterogeneity}, + author={Griggs, Tyler and Liu, Xiaoxuan and Yu, Jiaxiang and Kim, Doyoung and Chiang, Wei-Lin and Cheung, Alvin and Stoica, Ion}, + journal={arXiv preprint arXiv:2404.14527}, + year={2024} +} +``` diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/__init__.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/__init__.py new file mode 100644 index 00000000..95238207 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/__init__.py @@ -0,0 +1,2 @@ +from .runner import Config as Config +from .runner import SolverRunner as SolverRunner diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/config_example.json b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/config_example.json new file mode 100644 index 00000000..116803d9 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/config_example.json @@ -0,0 +1,15 @@ +{ + "gpu_info": { + "A10G": { + "cost": 1.01, + "tputs": [[2, 1], [5, 2]] + }, + "A100-80GB": { + "cost": 3.67, + "tputs": [[20, 20], [40, 20]] + } + }, + "workload_distribution": [[0.2, 0.1], [0.5, 0.2]], + "total_request_rate": 30.0, + "slice_factor": 1 +} \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/example.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/example.py new file mode 100644 index 00000000..981c04f2 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/example.py @@ -0,0 +1,23 @@ +import argparse + +from .runner import SolverRunner + + +def main(config_path: str): + runner = SolverRunner(config_path) + print(runner.run()) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # Input arguments + parser.add_argument( + "--config", + "-c", + type=str, + default="melange/config/example.json", + help="Path to the input configuration file, in json", + ) + args = parser.parse_args() + + main(args.config) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/requirements.txt b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/requirements.txt new file mode 100644 index 00000000..1bc0a423 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/requirements.txt @@ -0,0 +1,5 @@ +# used for the solver +numpy +pulp==2.8.0 +pandas +ruamel.yaml==0.18.6 \ No newline at end of file diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/runner.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/runner.py new file mode 100644 index 00000000..ca32b350 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/runner.py @@ -0,0 +1,41 @@ +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Union + +from .solver import MelangeSolver, Solver + +PROJECT_DIR = Path(__file__).parent.parent.parent + + +@dataclass +class Config: + gpu_info: dict = field(default_factory=dict) + workload_distribution: list = field(default_factory=list) + total_request_rate: float = 0 # units: requests per second + slice_factor: int = 4 + + +# This class is adapted from code originally written by Tyler Griggs +# found at https://github.com/tyler-griggs/melange-release +# See: https://tyler-griggs.github.io/blogs/melange +class SolverRunner: + def __init__(self, config: Union[str, Config]): + if isinstance(config, str): + config = Config(**json.load(open(config))) + self.config: Config = config + self.solver: Solver = MelangeSolver( + workload_distribution=self.config.workload_distribution, + total_request_rate=self.config.total_request_rate, + gpu_info=self.config.gpu_info, + slice_factor=self.config.slice_factor, + ) + self.execution_result = {} # type: ignore + + def run(self): + self.execution_result = self.solver.run() + return self.execution_result + + def export(self, path): + with open(path, "w") as f: + json.dump(self.execution_result, f, indent=4) diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/solver.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/solver.py new file mode 100644 index 00000000..4769b293 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/solver.py @@ -0,0 +1,138 @@ +import pulp +from pulp import LpInteger, LpMinimize, LpProblem, LpVariable + +from .util import tputs_to_loads_2d + + +# This class is adapted from code originally written by Tyler Griggs +# found at https://github.com/tyler-griggs/melange-release +# See: https://tyler-griggs.github.io/blogs/melange +class Solver: + def __init__( + self, workload_distribution: list, total_request_rate: float, gpu_info: dict + ): + self.workload_distribution = workload_distribution + self.overall_rate = total_request_rate + self.gpu_info = gpu_info + + def run(self, logs=False): + raise NotImplementedError + + +class MelangeSolver(Solver): + def __init__( + self, + workload_distribution: list, + total_request_rate: float, + gpu_info: dict, + slice_factor: int, + ): + super().__init__(workload_distribution, total_request_rate, gpu_info) + self.slice_factor = slice_factor + + def run(self, logs=False): + # Multiply overall rate across distribution. + request_rate_histogram = [] + for i in range(len(self.workload_distribution)): + request_rate_histogram.append([]) + for j in range(len(self.workload_distribution[0])): + request_rate_histogram[-1].append( + self.workload_distribution[i][j] * self.overall_rate + ) + + # Convert the profiled max throughputs into mapping from request size to load + for gpu in self.gpu_info: + self.gpu_info[gpu]["loads"] = tputs_to_loads_2d(self.gpu_info[gpu]["tputs"]) + + gpu_types = list(self.gpu_info.keys()) + cost_vector = [self.gpu_info[gpu]["cost"] for gpu in gpu_types] + + # Create slices, which is a single dimension. + slices = [] + for i in range(len(request_rate_histogram)): + for j in range(len(request_rate_histogram[i])): + for _ in range(self.slice_factor): + slices.append(request_rate_histogram[i][j] / self.slice_factor) + + # Create slice-to-load mapping, which is a single dimension. + for gpu in gpu_types: + slice_loads = [] + for i in range(len(self.gpu_info[gpu]["loads"])): + for j in range(len(self.gpu_info[gpu]["loads"][i])): + for _ in range(self.slice_factor): + slice_loads.append(self.gpu_info[gpu]["loads"][i][j]) + assert len(slices) == len(slice_loads) + self.gpu_info[gpu]["slice_loads"] = slice_loads + + # Decision matrix value is binary. The slice is assigned to a GPU, or it isn't. + matrix_rows = len(slices) + matrix_cols = len(gpu_types) + + # Vector value is non-negative integer of how many of each GPU type are needed + vector_length = matrix_cols + + decision_matrix = [ + [ + LpVariable(f"x_{i}_{j}", cat=LpInteger, lowBound=0, upBound=1) + for j in range(matrix_cols) + ] + for i in range(matrix_rows) + ] + decision_vector = [ + LpVariable(f"y_{i}", cat=LpInteger, lowBound=0) + for i in range(vector_length) + ] + + # Objective: minimize cost + problem = LpProblem("GpuAllocation", LpMinimize) + problem += pulp.lpSum( + [decision_vector[i] * cost_vector[i] for i in range(len(decision_vector))] + ) + + # C1: Each row of decision matrix must sum to exactly 1 (ie, each slice assigned to one GPU) + for i in range(len(decision_matrix)): + problem += pulp.lpSum(decision_matrix[i]) == 1 + + # C2: Load of column of decision matrix must fit in decision vector capacity + for j in range(len(decision_matrix[0])): + # j is idx of GPU type, i is slice + problem += ( + pulp.lpSum( + [ + decision_matrix[i][j] + * self.gpu_info[gpu_types[j]]["slice_loads"][i] + * slices[i] + for i in range(len(decision_matrix)) + ] + ) + <= decision_vector[j] + ) + + # Solve the problem + problem.solve(pulp.PULP_CBC_CMD(msg=0)) + + # For Arm-based Mac platforms. + # solver= pulp.getSolver('COIN_CMD', path='/opt/homebrew/opt/cbc/bin/cbc', msg=0) + # problem.solve(solver) + + # Print the results if needed + if logs: + print("Decision Matrix:") + for row in decision_matrix: + print([var.value() for var in row]) + print("Decision Vector:") + print(f"{[var.value() for var in decision_vector]}") + + if pulp.LpStatus[problem.status] != "Optimal": + return None + + solution_dict = {} + for i in range(len(decision_vector)): + solution_dict[gpu_types[i]] = int(decision_vector[i].value()) + + total_cost = 0 + for gpu in solution_dict: + total_cost += solution_dict[gpu] * self.gpu_info[gpu]["cost"] + solution_dict["cost"] = total_cost + + return solution_dict diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/util.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/util.py new file mode 100644 index 00000000..1de80511 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/util.py @@ -0,0 +1,12 @@ +from typing import List + + +# Convert max throughput profiling to a mapping from request size to load +def tputs_to_loads_2d(max_tputs: List[List[float]]): + loads: List[List[float]] = [] + for i in range(len(max_tputs)): + loads.append([]) + for j in range(len(max_tputs[0])): + load = 1 / max_tputs[i][j] + loads[-1].append(load) + return loads diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/types.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/types.py new file mode 100644 index 00000000..ca343f1b --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/types.py @@ -0,0 +1,67 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field +from typing import Callable, List, Optional, Protocol, Tuple + + +@dataclass +class GPUProfile: + """Support json input like: + { + "gpu": "A10", + "cost": 1.01, + "tputs": [[3, 2, 1], [5, 2, 1]], + "indexes: [[512, 1024], [32, 64, 128]] + } + where tputs is formulated as: + + | RPS | # OUTs 1 | # OUTs 2 | + |---|---|---| + | # INs 1 | 2 | 1 | + | # INs s 2 | 5 | 2 | + """ + + gpu: str = "" + cost: float = 0.0 + tputs: list = field(default_factory=list) # units: requests per second + indexes: list = field(default_factory=list) # value ticks of tputs columns and rows + created: float = 0.0 + + +WorkloadSignatureErrorHandler = Callable[[int, float, float, float, float], None] +"""A function to handle the error with parameters(dimension, value, index assigned, value of index, value offset).""" + + +class WorkloadProfile(Protocol): + """Description of worklaod characteristic""" + + def get_signature( + self, + indexes: List[List[float]], + error_suppressor: Optional[WorkloadSignatureErrorHandler] = None, + ) -> Tuple[int]: + """Generate the index signature of the WorkloadProfile within the indexes' range. + + Args: + indexes: A list of list of float, each list is a range of values. + error_suppressor: A callback to suppress possible error. If None, raise an exception. + """ + + @property + def signature(self) -> Tuple[int]: + """The signature of the workload""" + + @property + def rate(self) -> float: + """The request rate of the workload in the unit RPS""" diff --git a/python/aibrix/aibrix/gpu_optimizer/tests/__init__.py b/python/aibrix/aibrix/gpu_optimizer/tests/__init__.py new file mode 100644 index 00000000..6461ec1a --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/aibrix/aibrix/gpu_optimizer/tests/test_datasetloadreader.py b/python/aibrix/aibrix/gpu_optimizer/tests/test_datasetloadreader.py new file mode 100644 index 00000000..cb9c3816 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/tests/test_datasetloadreader.py @@ -0,0 +1,45 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np + +from aibrix.gpu_optimizer.load_monitor.load_reader import ( + DatasetLoadReader, + unittest_filepath, +) + + +class TestDatasetLoadReader(unittest.TestCase): + def __init__(self, methodName: str = "runTest") -> None: + super().__init__(methodName) + + self.reader = DatasetLoadReader(unittest_filepath) + + def test_stair_agggregate(self): + series = np.array( + [7, 8, 59, 127, 128, 341, 1023, 1024, 2047, 2048, 3100, 4100, 9000, 10150], + dtype=float, + ) + expected = np.array( + [1, 8, 56, 120, 128, 320, 960, 1024, 1984, 2048, 3072, 4096, 8192, 9216], + dtype=float, + ) + np.testing.assert_array_equal( + self.reader.stair_aggregate(series, skip_log2=True), expected + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/aibrix/aibrix/gpu_optimizer/utils/__init__.py b/python/aibrix/aibrix/gpu_optimizer/utils/__init__.py new file mode 100644 index 00000000..4c2181d4 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/utils/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .logging import DelayedLog as DelayedLog +from .logging import ExcludePathsFilter as ExcludePathsFilter diff --git a/python/aibrix/aibrix/gpu_optimizer/utils/logging.py b/python/aibrix/aibrix/gpu_optimizer/utils/logging.py new file mode 100644 index 00000000..12468d21 --- /dev/null +++ b/python/aibrix/aibrix/gpu_optimizer/utils/logging.py @@ -0,0 +1,41 @@ +# Copyright 2024 The Aibrix Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any, Callable +from urllib.parse import quote + + +class DelayedLog: + def __init__(self, expr: Callable[[], Any]): + self._expr = expr + + def __str__(self) -> str: + return str(self._expr()) + + def __expr__(self) -> str: + return self.__str__() + + +class ExcludePathsFilter(logging.Filter): + def __init__(self, exclude_paths): + super().__init__() + self.exclude_paths = [quote(exclude_path) for exclude_path in exclude_paths] + + def filter(self, record): + # Check if the record is an access log and extract the path + if hasattr(record, "args") and len(record.args) >= 3: + request_path = record.args[2] + return not any(request_path.startswith(path) for path in self.exclude_paths) + return True # Allow other log records diff --git a/python/aibrix/poetry.lock b/python/aibrix/poetry.lock index 7ac9f853..f773be6f 100644 --- a/python/aibrix/poetry.lock +++ b/python/aibrix/poetry.lock @@ -1,4 +1,127 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. + +[[package]] +name = "aiohappyeyeballs" +version = "2.4.3" +description = "Happy Eyeballs for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"}, + {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"}, +] + +[[package]] +name = "aiohttp" +version = "3.11.7" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "aiohttp-3.11.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8bedb1f6cb919af3b6353921c71281b1491f948ca64408871465d889b4ee1b66"}, + {file = "aiohttp-3.11.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f5022504adab881e2d801a88b748ea63f2a9d130e0b2c430824682a96f6534be"}, + {file = "aiohttp-3.11.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e22d1721c978a6494adc824e0916f9d187fa57baeda34b55140315fa2f740184"}, + {file = "aiohttp-3.11.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e993676c71288618eb07e20622572b1250d8713e7e00ab3aabae28cb70f3640d"}, + {file = "aiohttp-3.11.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e13a05db87d3b241c186d0936808d0e4e12decc267c617d54e9c643807e968b6"}, + {file = "aiohttp-3.11.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ba8d043fed7ffa117024d7ba66fdea011c0e7602327c6d73cacaea38abe4491"}, + {file = "aiohttp-3.11.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dda3ed0a7869d2fa16aa41f9961ade73aa2c2e3b2fcb0a352524e7b744881889"}, + {file = "aiohttp-3.11.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43bfd25113c1e98aec6c70e26d5f4331efbf4aa9037ba9ad88f090853bf64d7f"}, + {file = "aiohttp-3.11.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3dd3e7e7c9ef3e7214f014f1ae260892286647b3cf7c7f1b644a568fd410f8ca"}, + {file = "aiohttp-3.11.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:78c657ece7a73b976905ab9ec8be9ef2df12ed8984c24598a1791c58ce3b4ce4"}, + {file = "aiohttp-3.11.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:db70a47987e34494b451a334605bee57a126fe8d290511349e86810b4be53b01"}, + {file = "aiohttp-3.11.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9e67531370a3b07e49b280c1f8c2df67985c790ad2834d1b288a2f13cd341c5f"}, + {file = "aiohttp-3.11.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9202f184cc0582b1db15056f2225ab4c1e3dac4d9ade50dd0613ac3c46352ac2"}, + {file = "aiohttp-3.11.7-cp310-cp310-win32.whl", hash = "sha256:2257bdd5cf54a4039a4337162cd8048f05a724380a2283df34620f55d4e29341"}, + {file = "aiohttp-3.11.7-cp310-cp310-win_amd64.whl", hash = "sha256:b7215bf2b53bc6cb35808149980c2ae80a4ae4e273890ac85459c014d5aa60ac"}, + {file = "aiohttp-3.11.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cea52d11e02123f125f9055dfe0ccf1c3857225fb879e4a944fae12989e2aef2"}, + {file = "aiohttp-3.11.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3ce18f703b7298e7f7633efd6a90138d99a3f9a656cb52c1201e76cb5d79cf08"}, + {file = "aiohttp-3.11.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:670847ee6aeb3a569cd7cdfbe0c3bec1d44828bbfbe78c5d305f7f804870ef9e"}, + {file = "aiohttp-3.11.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4dda726f89bfa5c465ba45b76515135a3ece0088dfa2da49b8bb278f3bdeea12"}, + {file = "aiohttp-3.11.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25b74a811dba37c7ea6a14d99eb9402d89c8d739d50748a75f3cf994cf19c43"}, + {file = "aiohttp-3.11.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5522ee72f95661e79db691310290c4618b86dff2d9b90baedf343fd7a08bf79"}, + {file = "aiohttp-3.11.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fbf41a6bbc319a7816ae0f0177c265b62f2a59ad301a0e49b395746eb2a9884"}, + {file = "aiohttp-3.11.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59ee1925b5a5efdf6c4e7be51deee93984d0ac14a6897bd521b498b9916f1544"}, + {file = "aiohttp-3.11.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:24054fce8c6d6f33a3e35d1c603ef1b91bbcba73e3f04a22b4f2f27dac59b347"}, + {file = "aiohttp-3.11.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:351849aca2c6f814575c1a485c01c17a4240413f960df1bf9f5deb0003c61a53"}, + {file = "aiohttp-3.11.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:12724f3a211fa243570e601f65a8831372caf1a149d2f1859f68479f07efec3d"}, + {file = "aiohttp-3.11.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7ea4490360b605804bea8173d2d086b6c379d6bb22ac434de605a9cbce006e7d"}, + {file = "aiohttp-3.11.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e0bf378db07df0a713a1e32381a1b277e62ad106d0dbe17b5479e76ec706d720"}, + {file = "aiohttp-3.11.7-cp311-cp311-win32.whl", hash = "sha256:cd8d62cab363dfe713067027a5adb4907515861f1e4ce63e7be810b83668b847"}, + {file = "aiohttp-3.11.7-cp311-cp311-win_amd64.whl", hash = "sha256:bf0e6cce113596377cadda4e3ac5fb89f095bd492226e46d91b4baef1dd16f60"}, + {file = "aiohttp-3.11.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4bb7493c3e3a36d3012b8564bd0e2783259ddd7ef3a81a74f0dbfa000fce48b7"}, + {file = "aiohttp-3.11.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e143b0ef9cb1a2b4f74f56d4fbe50caa7c2bb93390aff52f9398d21d89bc73ea"}, + {file = "aiohttp-3.11.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f7c58a240260822dc07f6ae32a0293dd5bccd618bb2d0f36d51c5dbd526f89c0"}, + {file = "aiohttp-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d20cfe63a1c135d26bde8c1d0ea46fd1200884afbc523466d2f1cf517d1fe33"}, + {file = "aiohttp-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12e4d45847a174f77b2b9919719203769f220058f642b08504cf8b1cf185dacf"}, + {file = "aiohttp-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf4efa2d01f697a7dbd0509891a286a4af0d86902fc594e20e3b1712c28c0106"}, + {file = "aiohttp-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee6a4cdcbf54b8083dc9723cdf5f41f722c00db40ccf9ec2616e27869151129"}, + {file = "aiohttp-3.11.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6095aaf852c34f42e1bd0cf0dc32d1e4b48a90bfb5054abdbb9d64b36acadcb"}, + {file = "aiohttp-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1cf03d27885f8c5ebf3993a220cc84fc66375e1e6e812731f51aab2b2748f4a6"}, + {file = "aiohttp-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1a17f6a230f81eb53282503823f59d61dff14fb2a93847bf0399dc8e87817307"}, + {file = "aiohttp-3.11.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:481f10a1a45c5f4c4a578bbd74cff22eb64460a6549819242a87a80788461fba"}, + {file = "aiohttp-3.11.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:db37248535d1ae40735d15bdf26ad43be19e3d93ab3f3dad8507eb0f85bb8124"}, + {file = "aiohttp-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d18a8b44ec8502a7fde91446cd9c9b95ce7c49f1eacc1fb2358b8907d4369fd"}, + {file = "aiohttp-3.11.7-cp312-cp312-win32.whl", hash = "sha256:3d1c9c15d3999107cbb9b2d76ca6172e6710a12fda22434ee8bd3f432b7b17e8"}, + {file = "aiohttp-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:018f1b04883a12e77e7fc161934c0f298865d3a484aea536a6a2ca8d909f0ba0"}, + {file = "aiohttp-3.11.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:241a6ca732d2766836d62c58c49ca7a93d08251daef0c1e3c850df1d1ca0cbc4"}, + {file = "aiohttp-3.11.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:aa3705a8d14de39898da0fbad920b2a37b7547c3afd2a18b9b81f0223b7d0f68"}, + {file = "aiohttp-3.11.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9acfc7f652b31853eed3b92095b0acf06fd5597eeea42e939bd23a17137679d5"}, + {file = "aiohttp-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcefcf2915a2dbdbce37e2fc1622129a1918abfe3d06721ce9f6cdac9b6d2eaa"}, + {file = "aiohttp-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c1f6490dd1862af5aae6cfcf2a274bffa9a5b32a8f5acb519a7ecf5a99a88866"}, + {file = "aiohttp-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac5462582d6561c1c1708853a9faf612ff4e5ea5e679e99be36143d6eabd8e"}, + {file = "aiohttp-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c1a6309005acc4b2bcc577ba3b9169fea52638709ffacbd071f3503264620da"}, + {file = "aiohttp-3.11.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5b973cce96793725ef63eb449adfb74f99c043c718acb76e0d2a447ae369962"}, + {file = "aiohttp-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ce91a24aac80de6be8512fb1c4838a9881aa713f44f4e91dd7bb3b34061b497d"}, + {file = "aiohttp-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:875f7100ce0e74af51d4139495eec4025affa1a605280f23990b6434b81df1bd"}, + {file = "aiohttp-3.11.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c171fc35d3174bbf4787381716564042a4cbc008824d8195eede3d9b938e29a8"}, + {file = "aiohttp-3.11.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ee9afa1b0d2293c46954f47f33e150798ad68b78925e3710044e0d67a9487791"}, + {file = "aiohttp-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8360c7cc620abb320e1b8d603c39095101391a82b1d0be05fb2225471c9c5c52"}, + {file = "aiohttp-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7a9318da4b4ada9a67c1dd84d1c0834123081e746bee311a16bb449f363d965e"}, + {file = "aiohttp-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:fc6da202068e0a268e298d7cd09b6e9f3997736cd9b060e2750963754552a0a9"}, + {file = "aiohttp-3.11.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:17829f37c0d31d89aa6b8b010475a10233774771f9b6dc2cc352ea4f8ce95d9a"}, + {file = "aiohttp-3.11.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d6177077a31b1aecfc3c9070bd2f11419dbb4a70f30f4c65b124714f525c2e48"}, + {file = "aiohttp-3.11.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:badda65ac99555791eed75e234afb94686ed2317670c68bff8a4498acdaee935"}, + {file = "aiohttp-3.11.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0de6466b9d742b4ee56fe1b2440706e225eb48c77c63152b1584864a236e7a50"}, + {file = "aiohttp-3.11.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04b0cc74d5a882c9dacaeeccc1444f0233212b6f5be8bc90833feef1e1ce14b9"}, + {file = "aiohttp-3.11.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c7af3e50e5903d21d7b935aceed901cc2475463bc16ddd5587653548661fdb"}, + {file = "aiohttp-3.11.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c63f898f683d1379b9be5afc3dd139e20b30b0b1e0bf69a3fc3681f364cf1629"}, + {file = "aiohttp-3.11.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdadc3f6a32d6eca45f9a900a254757fd7855dfb2d8f8dcf0e88f0fae3ff8eb1"}, + {file = "aiohttp-3.11.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d329300fb23e14ed1f8c6d688dfd867d1dcc3b1d7cd49b7f8c5b44e797ce0932"}, + {file = "aiohttp-3.11.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:5578cf40440eafcb054cf859964bc120ab52ebe0e0562d2b898126d868749629"}, + {file = "aiohttp-3.11.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:7b2f8107a3c329789f3c00b2daad0e35f548d0a55cda6291579136622099a46e"}, + {file = "aiohttp-3.11.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:43dd89a6194f6ab02a3fe36b09e42e2df19c211fc2050ce37374d96f39604997"}, + {file = "aiohttp-3.11.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d2fa6fc7cc865d26ff42480ac9b52b8c9b7da30a10a6442a9cdf429de840e949"}, + {file = "aiohttp-3.11.7-cp39-cp39-win32.whl", hash = "sha256:a7d9a606355655617fee25dd7e54d3af50804d002f1fd3118dd6312d26692d70"}, + {file = "aiohttp-3.11.7-cp39-cp39-win_amd64.whl", hash = "sha256:53c921b58fdc6485d6b2603e0132bb01cd59b8f0620ffc0907f525e0ba071687"}, + {file = "aiohttp-3.11.7.tar.gz", hash = "sha256:01a8aca4af3da85cea5c90141d23f4b0eee3cbecfd33b029a45a80f28c66c668"}, +] + +[package.dependencies] +aiohappyeyeballs = ">=2.3.0" +aiosignal = ">=1.1.2" +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} +attrs = ">=17.3.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +propcache = ">=0.2.0" +yarl = ">=1.17.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" [[package]] name = "annotated-types" @@ -11,9 +134,6 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} - [[package]] name = "anyio" version = "4.5.2" @@ -36,6 +156,47 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] +[[package]] +name = "async-timeout" +version = "5.0.1" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, + {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, +] + +[[package]] +name = "attrs" +version = "24.2.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, + {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, +] + +[package.extras] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] + +[[package]] +name = "blinker" +version = "1.9.0" +description = "Fast, simple object-to-object and broadcast signaling" +optional = false +python-versions = ">=3.9" +files = [ + {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, + {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, +] + [[package]] name = "boto3" version = "1.35.57" @@ -69,14 +230,22 @@ files = [ [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" -urllib3 = [ - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, - {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, -] +urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} [package.extras] crt = ["awscrt (==0.22.0)"] +[[package]] +name = "cachetools" +version = "5.5.0" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"}, + {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"}, +] + [[package]] name = "certifi" version = "2024.8.30" @@ -88,6 +257,85 @@ files = [ {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, ] +[[package]] +name = "cffi" +version = "1.17.1" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.4.0" @@ -227,6 +475,79 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "contourpy" +version = "1.3.1" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.10" +files = [ + {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"}, + {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595"}, + {file = "contourpy-1.3.1-cp310-cp310-win32.whl", hash = "sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697"}, + {file = "contourpy-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f"}, + {file = "contourpy-1.3.1-cp311-cp311-win32.whl", hash = "sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375"}, + {file = "contourpy-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d"}, + {file = "contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e"}, + {file = "contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1"}, + {file = "contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82"}, + {file = "contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53"}, + {file = "contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699"}, +] + +[package.dependencies] +numpy = ">=1.23" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] + [[package]] name = "crcmod" version = "1.7" @@ -237,6 +558,136 @@ files = [ {file = "crcmod-1.7.tar.gz", hash = "sha256:dc7051a0db5f2bd48665a990d3ec1cc305a466a77358ca4492826f41f283601e"}, ] +[[package]] +name = "cryptography" +version = "43.0.3" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18"}, + {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd"}, + {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73"}, + {file = "cryptography-43.0.3-cp37-abi3-win32.whl", hash = "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2"}, + {file = "cryptography-43.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd"}, + {file = "cryptography-43.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405"}, + {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16"}, + {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73"}, + {file = "cryptography-43.0.3-cp39-abi3-win32.whl", hash = "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995"}, + {file = "cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff"}, + {file = "cryptography-43.0.3.tar.gz", hash = "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + +[[package]] +name = "dash" +version = "2.18.2" +description = "A Python framework for building reactive web-apps. Developed by Plotly." +optional = false +python-versions = ">=3.8" +files = [ + {file = "dash-2.18.2-py3-none-any.whl", hash = "sha256:0ce0479d1bc958e934630e2de7023b8a4558f23ce1f9f5a4b34b65eb3903a869"}, + {file = "dash-2.18.2.tar.gz", hash = "sha256:20e8404f73d0fe88ce2eae33c25bbc513cbe52f30d23a401fa5f24dbb44296c8"}, +] + +[package.dependencies] +dash-core-components = "2.0.0" +dash-html-components = "2.0.0" +dash-table = "5.0.0" +Flask = ">=1.0.4,<3.1" +importlib-metadata = "*" +nest-asyncio = "*" +plotly = ">=5.0.0" +requests = "*" +retrying = "*" +setuptools = "*" +typing-extensions = ">=4.1.1" +Werkzeug = "<3.1" + +[package.extras] +celery = ["celery[redis] (>=5.1.2)", "redis (>=3.5.3)"] +ci = ["black (==22.3.0)", "dash-dangerously-set-inner-html", "dash-flow-example (==0.0.5)", "flake8 (==7.0.0)", "flaky (==3.8.1)", "flask-talisman (==1.0.0)", "jupyterlab (<4.0.0)", "mimesis (<=11.1.0)", "mock (==4.0.3)", "numpy (<=1.26.3)", "openpyxl", "orjson (==3.10.3)", "pandas (>=1.4.0)", "pyarrow", "pylint (==3.0.3)", "pytest-mock", "pytest-rerunfailures", "pytest-sugar (==0.9.6)", "pyzmq (==25.1.2)", "xlrd (>=2.0.1)"] +compress = ["flask-compress"] +dev = ["PyYAML (>=5.4.1)", "coloredlogs (>=15.0.1)", "fire (>=0.4.0)"] +diskcache = ["diskcache (>=5.2.1)", "multiprocess (>=0.70.12)", "psutil (>=5.8.0)"] +testing = ["beautifulsoup4 (>=4.8.2)", "cryptography", "dash-testing-stub (>=0.0.2)", "lxml (>=4.6.2)", "multiprocess (>=0.70.12)", "percy (>=2.0.2)", "psutil (>=5.8.0)", "pytest (>=6.0.2)", "requests[security] (>=2.21.0)", "selenium (>=3.141.0,<=4.2.0)", "waitress (>=1.4.4)"] + +[[package]] +name = "dash-core-components" +version = "2.0.0" +description = "Core component suite for Dash" +optional = false +python-versions = "*" +files = [ + {file = "dash_core_components-2.0.0-py3-none-any.whl", hash = "sha256:52b8e8cce13b18d0802ee3acbc5e888cb1248a04968f962d63d070400af2e346"}, + {file = "dash_core_components-2.0.0.tar.gz", hash = "sha256:c6733874af975e552f95a1398a16c2ee7df14ce43fa60bb3718a3c6e0b63ffee"}, +] + +[[package]] +name = "dash-html-components" +version = "2.0.0" +description = "Vanilla HTML components for Dash" +optional = false +python-versions = "*" +files = [ + {file = "dash_html_components-2.0.0-py3-none-any.whl", hash = "sha256:b42cc903713c9706af03b3f2548bda4be7307a7cf89b7d6eae3da872717d1b63"}, + {file = "dash_html_components-2.0.0.tar.gz", hash = "sha256:8703a601080f02619a6390998e0b3da4a5daabe97a1fd7a9cebc09d015f26e50"}, +] + +[[package]] +name = "dash-table" +version = "5.0.0" +description = "Dash table" +optional = false +python-versions = "*" +files = [ + {file = "dash_table-5.0.0-py3-none-any.whl", hash = "sha256:19036fa352bb1c11baf38068ec62d172f0515f73ca3276c79dee49b95ddc16c9"}, + {file = "dash_table-5.0.0.tar.gz", hash = "sha256:18624d693d4c8ef2ddec99a6f167593437a7ea0bf153aa20f318c170c5bc7308"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -254,6 +705,17 @@ wrapt = ">=1.10,<2" [package.extras] dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] +[[package]] +name = "durationpy" +version = "0.9" +description = "Module for converting between datetime.timedelta and Go's Duration strings." +optional = false +python-versions = "*" +files = [ + {file = "durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38"}, + {file = "durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -304,6 +766,202 @@ docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2. testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] typing = ["typing-extensions (>=4.12.2)"] +[[package]] +name = "flask" +version = "3.0.3" +description = "A simple framework for building complex web applications." +optional = false +python-versions = ">=3.8" +files = [ + {file = "flask-3.0.3-py3-none-any.whl", hash = "sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3"}, + {file = "flask-3.0.3.tar.gz", hash = "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842"}, +] + +[package.dependencies] +blinker = ">=1.6.2" +click = ">=8.1.3" +itsdangerous = ">=2.1.2" +Jinja2 = ">=3.1.2" +Werkzeug = ">=3.0.0" + +[package.extras] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + +[[package]] +name = "fonttools" +version = "4.55.0" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.55.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:51c029d4c0608a21a3d3d169dfc3fb776fde38f00b35ca11fdab63ba10a16f61"}, + {file = "fonttools-4.55.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bca35b4e411362feab28e576ea10f11268b1aeed883b9f22ed05675b1e06ac69"}, + {file = "fonttools-4.55.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ce4ba6981e10f7e0ccff6348e9775ce25ffadbee70c9fd1a3737e3e9f5fa74f"}, + {file = "fonttools-4.55.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31d00f9852a6051dac23294a4cf2df80ced85d1d173a61ba90a3d8f5abc63c60"}, + {file = "fonttools-4.55.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e198e494ca6e11f254bac37a680473a311a88cd40e58f9cc4dc4911dfb686ec6"}, + {file = "fonttools-4.55.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7208856f61770895e79732e1dcbe49d77bd5783adf73ae35f87fcc267df9db81"}, + {file = "fonttools-4.55.0-cp310-cp310-win32.whl", hash = "sha256:e7e6a352ff9e46e8ef8a3b1fe2c4478f8a553e1b5a479f2e899f9dc5f2055880"}, + {file = "fonttools-4.55.0-cp310-cp310-win_amd64.whl", hash = "sha256:636caaeefe586d7c84b5ee0734c1a5ab2dae619dc21c5cf336f304ddb8f6001b"}, + {file = "fonttools-4.55.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fa34aa175c91477485c44ddfbb51827d470011e558dfd5c7309eb31bef19ec51"}, + {file = "fonttools-4.55.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:37dbb3fdc2ef7302d3199fb12468481cbebaee849e4b04bc55b77c24e3c49189"}, + {file = "fonttools-4.55.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5263d8e7ef3c0ae87fbce7f3ec2f546dc898d44a337e95695af2cd5ea21a967"}, + {file = "fonttools-4.55.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f307f6b5bf9e86891213b293e538d292cd1677e06d9faaa4bf9c086ad5f132f6"}, + {file = "fonttools-4.55.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f0a4b52238e7b54f998d6a56b46a2c56b59c74d4f8a6747fb9d4042190f37cd3"}, + {file = "fonttools-4.55.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3e569711464f777a5d4ef522e781dc33f8095ab5efd7548958b36079a9f2f88c"}, + {file = "fonttools-4.55.0-cp311-cp311-win32.whl", hash = "sha256:2b3ab90ec0f7b76c983950ac601b58949f47aca14c3f21eed858b38d7ec42b05"}, + {file = "fonttools-4.55.0-cp311-cp311-win_amd64.whl", hash = "sha256:aa046f6a63bb2ad521004b2769095d4c9480c02c1efa7d7796b37826508980b6"}, + {file = "fonttools-4.55.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:838d2d8870f84fc785528a692e724f2379d5abd3fc9dad4d32f91cf99b41e4a7"}, + {file = "fonttools-4.55.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f46b863d74bab7bb0d395f3b68d3f52a03444964e67ce5c43ce43a75efce9246"}, + {file = "fonttools-4.55.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33b52a9cfe4e658e21b1f669f7309b4067910321757fec53802ca8f6eae96a5a"}, + {file = "fonttools-4.55.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:732a9a63d6ea4a81b1b25a1f2e5e143761b40c2e1b79bb2b68e4893f45139a40"}, + {file = "fonttools-4.55.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7dd91ac3fcb4c491bb4763b820bcab6c41c784111c24172616f02f4bc227c17d"}, + {file = "fonttools-4.55.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1f0e115281a32ff532118aa851ef497a1b7cda617f4621c1cdf81ace3e36fb0c"}, + {file = "fonttools-4.55.0-cp312-cp312-win32.whl", hash = "sha256:6c99b5205844f48a05cb58d4a8110a44d3038c67ed1d79eb733c4953c628b0f6"}, + {file = "fonttools-4.55.0-cp312-cp312-win_amd64.whl", hash = "sha256:f8c8c76037d05652510ae45be1cd8fb5dd2fd9afec92a25374ac82255993d57c"}, + {file = "fonttools-4.55.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8118dc571921dc9e4b288d9cb423ceaf886d195a2e5329cc427df82bba872cd9"}, + {file = "fonttools-4.55.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01124f2ca6c29fad4132d930da69158d3f49b2350e4a779e1efbe0e82bd63f6c"}, + {file = "fonttools-4.55.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ffd58d2691f11f7c8438796e9f21c374828805d33e83ff4b76e4635633674c"}, + {file = "fonttools-4.55.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5435e5f1eb893c35c2bc2b9cd3c9596b0fcb0a59e7a14121562986dd4c47b8dd"}, + {file = "fonttools-4.55.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d12081729280c39d001edd0f4f06d696014c26e6e9a0a55488fabc37c28945e4"}, + {file = "fonttools-4.55.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a7ad1f1b98ab6cb927ab924a38a8649f1ffd7525c75fe5b594f5dab17af70e18"}, + {file = "fonttools-4.55.0-cp313-cp313-win32.whl", hash = "sha256:abe62987c37630dca69a104266277216de1023cf570c1643bb3a19a9509e7a1b"}, + {file = "fonttools-4.55.0-cp313-cp313-win_amd64.whl", hash = "sha256:2863555ba90b573e4201feaf87a7e71ca3b97c05aa4d63548a4b69ea16c9e998"}, + {file = "fonttools-4.55.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:00f7cf55ad58a57ba421b6a40945b85ac7cc73094fb4949c41171d3619a3a47e"}, + {file = "fonttools-4.55.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f27526042efd6f67bfb0cc2f1610fa20364396f8b1fc5edb9f45bb815fb090b2"}, + {file = "fonttools-4.55.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e67974326af6a8879dc2a4ec63ab2910a1c1a9680ccd63e4a690950fceddbe"}, + {file = "fonttools-4.55.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61dc0a13451143c5e987dec5254d9d428f3c2789a549a7cf4f815b63b310c1cc"}, + {file = "fonttools-4.55.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:b2e526b325a903868c62155a6a7e24df53f6ce4c5c3160214d8fe1be2c41b478"}, + {file = "fonttools-4.55.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b7ef9068a1297714e6fefe5932c33b058aa1d45a2b8be32a4c6dee602ae22b5c"}, + {file = "fonttools-4.55.0-cp38-cp38-win32.whl", hash = "sha256:55718e8071be35dff098976bc249fc243b58efa263768c611be17fe55975d40a"}, + {file = "fonttools-4.55.0-cp38-cp38-win_amd64.whl", hash = "sha256:553bd4f8cc327f310c20158e345e8174c8eed49937fb047a8bda51daf2c353c8"}, + {file = "fonttools-4.55.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f901cef813f7c318b77d1c5c14cf7403bae5cb977cede023e22ba4316f0a8f6"}, + {file = "fonttools-4.55.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c9679fc0dd7e8a5351d321d8d29a498255e69387590a86b596a45659a39eb0d"}, + {file = "fonttools-4.55.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd2820a8b632f3307ebb0bf57948511c2208e34a4939cf978333bc0a3f11f838"}, + {file = "fonttools-4.55.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23bbbb49bec613a32ed1b43df0f2b172313cee690c2509f1af8fdedcf0a17438"}, + {file = "fonttools-4.55.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a656652e1f5d55b9728937a7e7d509b73d23109cddd4e89ee4f49bde03b736c6"}, + {file = "fonttools-4.55.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f50a1f455902208486fbca47ce33054208a4e437b38da49d6721ce2fef732fcf"}, + {file = "fonttools-4.55.0-cp39-cp39-win32.whl", hash = "sha256:161d1ac54c73d82a3cded44202d0218ab007fde8cf194a23d3dd83f7177a2f03"}, + {file = "fonttools-4.55.0-cp39-cp39-win_amd64.whl", hash = "sha256:ca7fd6987c68414fece41c96836e945e1f320cda56fc96ffdc16e54a44ec57a2"}, + {file = "fonttools-4.55.0-py3-none-any.whl", hash = "sha256:12db5888cd4dd3fcc9f0ee60c6edd3c7e1fd44b7dd0f31381ea03df68f8a153f"}, + {file = "fonttools-4.55.0.tar.gz", hash = "sha256:7636acc6ab733572d5e7eec922b254ead611f1cdad17be3f0be7418e8bfaca71"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + +[[package]] +name = "frozenlist" +version = "1.5.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"}, + {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"}, + {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"}, + {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"}, + {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"}, + {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"}, + {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"}, + {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"}, + {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"}, + {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"}, + {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"}, + {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"}, + {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"}, + {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"}, + {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"}, +] + [[package]] name = "fsspec" version = "2024.10.0" @@ -343,6 +1001,29 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] +[[package]] +name = "google-auth" +version = "2.36.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google_auth-2.36.0-py2.py3-none-any.whl", hash = "sha256:51a15d47028b66fd36e5c64a82d2d57480075bccc7da37cde257fc94177a61fb"}, + {file = "google_auth-2.36.0.tar.gz", hash = "sha256:545e9618f2df0bcbb7dcbc45a546485b1212624716975a1ea5ae8149ce769ab1"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography", "pyopenssl"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] + [[package]] name = "gunicorn" version = "23.0.0" @@ -533,6 +1214,47 @@ files = [ [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +[[package]] +name = "importlib-metadata" +version = "8.5.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, + {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"}, +] + +[package.dependencies] +zipp = ">=3.20" + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +perf = ["ipython"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +type = ["pytest-mypy"] + +[[package]] +name = "incdbscan" +version = "0.1.0" +description = "Implementation of IncrementalDBSCAN clustering." +optional = false +python-versions = ">=3.7.1,<4.0" +files = [ + {file = "incdbscan-0.1.0-py3-none-any.whl", hash = "sha256:b8e9fff2f6c22f11c66e75731023b95b3150c9425ce43b821da5d70731e46168"}, + {file = "incdbscan-0.1.0.tar.gz", hash = "sha256:1e970243cef1eada128e84366c58d4dcff5f3320efeb041f13006d68791207d7"}, +] + +[package.dependencies] +networkx = ">=2.5.1,<3.0.0" +numpy = ">=1.20.3,<2.0.0" +scikit-learn = ">=1.0,<2.0" +sortedcontainers = ">=2.4.0,<3.0.0" +xxhash = ">=2.0.0,<3.0.0" + [[package]] name = "iniconfig" version = "2.0.0" @@ -544,6 +1266,34 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "itsdangerous" +version = "2.2.0" +description = "Safely pass data to untrusted environments and back." +optional = false +python-versions = ">=3.8" +files = [ + {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, + {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, +] + +[[package]] +name = "jinja2" +version = "3.1.4" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + [[package]] name = "jmespath" version = "1.0.1" @@ -555,6 +1305,404 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.7" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +] + +[[package]] +name = "kubernetes" +version = "31.0.0" +description = "Kubernetes python client" +optional = false +python-versions = ">=3.6" +files = [ + {file = "kubernetes-31.0.0-py2.py3-none-any.whl", hash = "sha256:bf141e2d380c8520eada8b351f4e319ffee9636328c137aa432bc486ca1200e1"}, + {file = "kubernetes-31.0.0.tar.gz", hash = "sha256:28945de906c8c259c1ebe62703b56a03b714049372196f854105afe4e6d014c0"}, +] + +[package.dependencies] +certifi = ">=14.05.14" +durationpy = ">=0.7" +google-auth = ">=1.0.1" +oauthlib = ">=3.2.2" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4.1" +requests = "*" +requests-oauthlib = "*" +six = ">=1.9.0" +urllib3 = ">=1.24.2" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" + +[package.extras] +adal = ["adal (>=1.0.2)"] + +[[package]] +name = "markupsafe" +version = "3.0.2" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.9" +files = [ + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, + {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, +] + +[[package]] +name = "matplotlib" +version = "3.9.2" +description = "Python plotting package" +optional = false +python-versions = ">=3.9" +files = [ + {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"}, + {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66"}, + {file = "matplotlib-3.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a"}, + {file = "matplotlib-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"}, + {file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"}, + {file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"}, + {file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"}, + {file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"}, + {file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"}, + {file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"}, + {file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca"}, + {file = "matplotlib-3.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea"}, + {file = "matplotlib-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697"}, + {file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.23" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[package.extras] +dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] + +[[package]] +name = "multidict" +version = "6.1.0" +description = "multidict implementation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7"}, + {file = "multidict-6.1.0-cp310-cp310-win32.whl", hash = "sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0"}, + {file = "multidict-6.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753"}, + {file = "multidict-6.1.0-cp311-cp311-win32.whl", hash = "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80"}, + {file = "multidict-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3"}, + {file = "multidict-6.1.0-cp312-cp312-win32.whl", hash = "sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133"}, + {file = "multidict-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6"}, + {file = "multidict-6.1.0-cp313-cp313-win32.whl", hash = "sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81"}, + {file = "multidict-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd"}, + {file = "multidict-6.1.0-cp38-cp38-win32.whl", hash = "sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167"}, + {file = "multidict-6.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43"}, + {file = "multidict-6.1.0-cp39-cp39-win32.whl", hash = "sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada"}, + {file = "multidict-6.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a"}, + {file = "multidict-6.1.0-py3-none-any.whl", hash = "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506"}, + {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} + [[package]] name = "mypy" version = "1.11.1" @@ -613,6 +1761,96 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nest-asyncio" +version = "1.6.0" +description = "Patch asyncio to allow nested event loops" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, +] + +[[package]] +name = "networkx" +version = "2.8.8" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ + {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"}, + {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=0.982)", "pre-commit (>=2.20)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.2)", "pydata-sphinx-theme (>=0.11)", "sphinx (>=5.2)", "sphinx-gallery (>=0.11)", "texext (>=0.6.6)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + [[package]] name = "packaging" version = "24.2" @@ -624,6 +1862,198 @@ files = [ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + +[[package]] +name = "pillow" +version = "11.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, + {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, + {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, + {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, + {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, + {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, + {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, + {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, + {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, + {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, + {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, + {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, + {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, + {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, + {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, + {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, + {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, + {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, + {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, + {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, + {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + +[[package]] +name = "plotly" +version = "5.24.1" +description = "An open-source, interactive data visualization library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"}, + {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"}, +] + +[package.dependencies] +packaging = "*" +tenacity = ">=6.2.0" + [[package]] name = "pluggy" version = "1.5.0" @@ -653,6 +2083,160 @@ files = [ [package.extras] twisted = ["twisted"] +[[package]] +name = "propcache" +version = "0.2.0" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, + {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, + {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, + {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, + {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, + {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, + {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, + {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, + {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, + {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, + {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, + {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, + {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, + {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, + {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, +] + +[[package]] +name = "pulp" +version = "2.8.0" +description = "PuLP is an LP modeler written in python. PuLP can generate MPS or LP files and call GLPK, COIN CLP/CBC, CPLEX, and GUROBI to solve linear problems." +optional = false +python-versions = ">=3.7" +files = [ + {file = "PuLP-2.8.0-py3-none-any.whl", hash = "sha256:4a19814a5b0a4392d788ac2315263435293579b0583c3469943fe0c6a586f263"}, + {file = "PuLP-2.8.0.tar.gz", hash = "sha256:4903bf96110bbab8ed2c68533f90565ebb76aa367d9e4df38e51bf727927c125"}, +] + +[[package]] +name = "pyasn1" +version = "0.6.1" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, + {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.1" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, + {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, +] + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.7.0" + +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + [[package]] name = "pydantic" version = "2.9.2" @@ -774,6 +2358,20 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pyparsing" +version = "3.2.0" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"}, + {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + [[package]] name = "pytest" version = "8.3.3" @@ -883,6 +2481,24 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "redis" +version = "5.2.0" +description = "Python client for Redis database and key-value store" +optional = false +python-versions = ">=3.8" +files = [ + {file = "redis-5.2.0-py3-none-any.whl", hash = "sha256:ae174f2bb3b1bf2b09d54bf3e51fbc1469cf6c10aa03e21141f51969801a7897"}, + {file = "redis-5.2.0.tar.gz", hash = "sha256:0b1087665a771b1ff2e003aa5bdd354f15a70c9e25d5a7dbf9c722c16528a7b0"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} + +[package.extras] +hiredis = ["hiredis (>=3.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==23.2.1)", "requests (>=2.31.0)"] + [[package]] name = "requests" version = "2.32.3" @@ -904,6 +2520,52 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=3.4" +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "retrying" +version = "1.3.4" +description = "Retrying" +optional = false +python-versions = "*" +files = [ + {file = "retrying-1.3.4-py3-none-any.whl", hash = "sha256:8cc4d43cb8e1125e0ff3344e9de678fefd85db3b750b81b2240dc0183af37b35"}, + {file = "retrying-1.3.4.tar.gz", hash = "sha256:345da8c5765bd982b1d1915deb9102fd3d1f7ad16bd84a9700b85f64d24e8f3e"}, +] + +[package.dependencies] +six = ">=1.7.0" + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +optional = false +python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + [[package]] name = "ruff" version = "0.6.1" @@ -948,6 +2610,126 @@ botocore = ">=1.33.2,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] +[[package]] +name = "scikit-learn" +version = "1.5.2" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.14.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +files = [ + {file = "scipy-1.14.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69"}, + {file = "scipy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad"}, + {file = "scipy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2"}, + {file = "scipy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2"}, + {file = "scipy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066"}, + {file = "scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1"}, + {file = "scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e"}, + {file = "scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06"}, + {file = "scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84"}, + {file = "scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "setuptools" +version = "75.6.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.9" +files = [ + {file = "setuptools-75.6.0-py3-none-any.whl", hash = "sha256:ce74b49e8f7110f9bf04883b730f4765b774ef3ef28f722cce7c273d253aaf7d"}, + {file = "setuptools-75.6.0.tar.gz", hash = "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"] +core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] + [[package]] name = "six" version = "1.16.0" @@ -970,6 +2752,17 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "starlette" version = "0.38.6" @@ -983,11 +2776,36 @@ files = [ [package.dependencies] anyio = ">=3.4.0,<5" -typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "tenacity" +version = "9.0.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, + {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tomli" version = "2.0.2" @@ -1038,18 +2856,48 @@ slack = ["slack-sdk"] telegram = ["requests"] [[package]] -name = "types-requests" -version = "2.31.0.6" -description = "Typing stubs for requests" +name = "types-cffi" +version = "1.16.0.20240331" +description = "Typing stubs for cffi" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, - {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, + {file = "types-cffi-1.16.0.20240331.tar.gz", hash = "sha256:b8b20d23a2b89cfed5f8c5bc53b0cb8677c3aac6d970dbc771e28b9c698f5dee"}, + {file = "types_cffi-1.16.0.20240331-py3-none-any.whl", hash = "sha256:a363e5ea54a4eb6a4a105d800685fde596bc318089b025b27dee09849fe41ff0"}, ] [package.dependencies] -types-urllib3 = "*" +types-setuptools = "*" + +[[package]] +name = "types-pyopenssl" +version = "24.1.0.20240722" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"}, + {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-cffi = "*" + +[[package]] +name = "types-redis" +version = "4.6.0.20241004" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"}, + {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" [[package]] name = "types-requests" @@ -1066,14 +2914,14 @@ files = [ urllib3 = ">=2" [[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" +name = "types-setuptools" +version = "75.5.0.20241122" +description = "Typing stubs for setuptools" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, - {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, + {file = "types_setuptools-75.5.0.20241122-py3-none-any.whl", hash = "sha256:d69c445f7bdd5e49d1b2441aadcee1388febcc9ad9d9d5fd33648b555e0b1c31"}, + {file = "types_setuptools-75.5.0.20241122.tar.gz", hash = "sha256:196aaf1811cbc1c77ac1d4c4879d5308b6fdf426e56b73baadbca2a1827dadef"}, ] [[package]] @@ -1088,21 +2936,16 @@ files = [ ] [[package]] -name = "urllib3" -version = "1.26.20" -description = "HTTP library with thread-safe connection pooling, file post, and more." +name = "tzdata" +version = "2024.2" +description = "Provider of IANA time zone data" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +python-versions = ">=2" files = [ - {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, - {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] -[package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - [[package]] name = "urllib3" version = "2.2.3" @@ -1139,6 +2982,39 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "websocket-client" +version = "1.8.0" +description = "WebSocket client for Python with low level API options" +optional = false +python-versions = ">=3.8" +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[[package]] +name = "werkzeug" +version = "3.0.6" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.8" +files = [ + {file = "werkzeug-3.0.6-py3-none-any.whl", hash = "sha256:1bc0c2310d2fbb07b1dd1105eba2f7af72f322e1e455f2f93c993bee8c8a5f17"}, + {file = "werkzeug-3.0.6.tar.gz", hash = "sha256:a8dd59d4de28ca70471a34cba79bed5f7ef2e036a76b3ab0835474246eb41f8d"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + [[package]] name = "wrapt" version = "1.16.0" @@ -1218,7 +3094,195 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[[package]] +name = "xxhash" +version = "2.0.2" +description = "Python binding for xxHash" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "xxhash-2.0.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dac3b94881b943bbe418f5829128b9c48f69a66f816ef8b72ee0129d676dbd7c"}, + {file = "xxhash-2.0.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:43fd97f332bd581639bb99fe8f09f7e9113d49cad4d21bef0620867f92c802c6"}, + {file = "xxhash-2.0.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:6e5058c3fa5b42ded9a303f1a5a42d3ff732cb54c108424c63e993fc3379513c"}, + {file = "xxhash-2.0.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:dfacce97a3ccb46089e358ceaeca9300298511673bf87596da66882af386f6c7"}, + {file = "xxhash-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:1dfa115c8e07b3e1d94ebd60a6d6ee16ea692efb890e245addb0d33b47ee1dee"}, + {file = "xxhash-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:fb28b0313c7582225373f343635674231518452331a9bdea8261d0e27b48594f"}, + {file = "xxhash-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:427851234a87bfe6636c90b89bd65b7ca913befff3c7bcd92a3568e635fccc92"}, + {file = "xxhash-2.0.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:0b92a01dc8dcada8827de140a5df83c9e8e5c190ef8bf972c98ebbe0924ee044"}, + {file = "xxhash-2.0.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:676d6964b8a9bdaf737ae6836b886ab53b2863c6aa00d43952b130a6130d1bdc"}, + {file = "xxhash-2.0.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:8362693a1ce5c1373f48f047470e7797ed17dfe5babc37ba7bef50d6e6f83a72"}, + {file = "xxhash-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:515747159fccd23fc9d1b7afeaa8bd7fc36884188b47491713d22032c5f9e502"}, + {file = "xxhash-2.0.2-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:e1787b9cea43f256f8d06c8429999d386a9da9cb000c265a4dde48dd08242528"}, + {file = "xxhash-2.0.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:d47ab1245ee4c7e6fc424ad990e4d7cfe0f206d617efe990fea34000a9242102"}, + {file = "xxhash-2.0.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:81ec049f4936a49311e1fc58036d7d682b5c83d6d16ba1c852a981588c90e027"}, + {file = "xxhash-2.0.2-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:df71aeedee74eaf670d1243b6722c8c77626f3b6e6cf2cd79f2e336b151749cd"}, + {file = "xxhash-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a922315c8e20dae0d35e54b49fd7ee348fe0a5e2fd8ec02f6a74140e063fcdb3"}, + {file = "xxhash-2.0.2-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:22ddd484cd92d138feeec556387894b8ec529bab7f2feb3a177eb84baadee8c1"}, + {file = "xxhash-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:b4964e7ddca1ef9d7addef40a9f5eaa97aeda367c1d895e392533c0d2f9c3b8e"}, + {file = "xxhash-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:6077fdb44f68920c4ac8e2f34b2a107c9a218f00a698253c824a0c6c1b9622a3"}, + {file = "xxhash-2.0.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:04ae5706ddfe0fd2b46cd0b6487d3edae7e724e27d732b055ffd0f9539c4afc5"}, + {file = "xxhash-2.0.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c4a892bc47b6ea92bbb82499a81882548ce990d62c1862b3834f1f70e8cf4423"}, + {file = "xxhash-2.0.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:57d43ce9594676b503c0a0a383481cb4e5cf736f88970bd41849fe15a68a5d48"}, + {file = "xxhash-2.0.2-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:c2e44d162c3361392dbde736ee8ba3d1a414f63e32be6c71186f2b0654559d26"}, + {file = "xxhash-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:0beb79835ca47af257f8126fccd9d5e0ba56ba7d39dab6f6b5a7acea4d8ac4b5"}, + {file = "xxhash-2.0.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:f2bef10c417c4667310cc240d49e521e6b5fc90c4ff77a1ec78649869685e8d3"}, + {file = "xxhash-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:9b6bb1bd34a6365c790c328a604ec5a628059fef6e4486380caa89bc12787a6e"}, + {file = "xxhash-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:4243dbeb1ce09d359289844f0c54676343857fdc6a092184aea159fecdf6d9f3"}, + {file = "xxhash-2.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:71b38300e1803ab32ee787f89cdbc032b46ac5834eca9109d8fb576ae1a31741"}, + {file = "xxhash-2.0.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:a8a68d117178f15c96cb9ae2613f53db94e0fdb34ffc69c7ab600c899c7a966c"}, + {file = "xxhash-2.0.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:dd9c72520f790ce6eaa535cdad1a53ded22deab43766cfa7cef42834a9a65561"}, + {file = "xxhash-2.0.2-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:f95adf6091fa13ce19fab21fadb8d07210822320568d24a6405d6b557afc0411"}, + {file = "xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:00aaf882036d2a0fa7652cf9aeaaf2ad077b784c09ef8d60f5d97ebf0d47ffa1"}, + {file = "xxhash-2.0.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bb8c0efad20da40da1aa56f36b929b965d1adede8a1d5b37b702d378a683e0dd"}, + {file = "xxhash-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:6fc0b8c21a181b771e1f0c25eb8a0a241af0126f1fc19f4c3cde7233de91326f"}, + {file = "xxhash-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b232b47a3aa825e0df14b1bd3e051dd327c8539e382728ddb81997d26de5256a"}, + {file = "xxhash-2.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dc328d3d635ec851d6befdf6ced2134d587d3be973dbbbc489da24c0c88ecb01"}, + {file = "xxhash-2.0.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9e6e5e095417060bed45119c510d5bc846b62e2a8218cb3e5a19b3ccf12e4c18"}, + {file = "xxhash-2.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:b4b7d4d19c125738c5fc48356505dfbd63b3cdf826dd868a1b80a73de48729b7"}, + {file = "xxhash-2.0.2-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:686fcf2aff041df65470eccc7dcea5e7e77cfad99efcaba0c6f58bbd81846e10"}, + {file = "xxhash-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:cb3a196fd1d55ce86b1123cbf3ef6603f80f4d0b46541412bb5056b0563ef384"}, + {file = "xxhash-2.0.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:68d067427f2c6f7b3014e28bf4794b0876ab5f6366b53e1d6f59d275b4f19a8d"}, + {file = "xxhash-2.0.2-cp38-cp38-win32.whl", hash = "sha256:73649555656dd17e809b9b3c54855f4f72144024b0e6395cd37b5395fa0f48c3"}, + {file = "xxhash-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:dafd1066c99d448a7a1226f10766b61ff752aaad8a4392e4cae30aafefa6fff5"}, + {file = "xxhash-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb1e9e347c9810a272154814cf5ce33a6c3ac7d0d7cbcb066e92dd5f9fa4db8f"}, + {file = "xxhash-2.0.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:ebff22f1783f641c6c2b313bfc44d6cc620c17409ec512e67c7c6de809155880"}, + {file = "xxhash-2.0.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:b7640e043ac6e0f503eadb108e6971d69b0c95c23fbcac3e5632578f9f906050"}, + {file = "xxhash-2.0.2-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:db2352d375e6594620c462c029d3c1a1b18ff7168e470657e354f1b8b332d9dd"}, + {file = "xxhash-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f49dbd3b8e4cc13f2df92fb3db39204e3258105a212e23784cbb340e415ae8ed"}, + {file = "xxhash-2.0.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e70059c5cc8f0cecd16d8cb0263de8f317239cabee3fa4af35c0a1ddaed2110e"}, + {file = "xxhash-2.0.2-cp39-cp39-win32.whl", hash = "sha256:a0199a07a264be96ed658ba3b4e9ee58a3c678e51a18e134e2518cf1a8171e18"}, + {file = "xxhash-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:173d3f662dc88de734bd622e46a3bbac6fd00e957b3e098fa8b75b141aa4354e"}, + {file = "xxhash-2.0.2-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:e94fdff9b102ca7c0969230d209f7ce17020db17a89d026ac45d8ffb9e4929ec"}, + {file = "xxhash-2.0.2-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:d7175cd7f490aae742d18eb9b519e74180958f88fa8ff47091727b3efb57bfbf"}, + {file = "xxhash-2.0.2-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:d707d2a053a5d55ccd2e59d7a228636cafeebb44c9ac3ca1c088f4d384c8c3a9"}, + {file = "xxhash-2.0.2-pp27-pypy_73-win32.whl", hash = "sha256:dad190caa293abbb39d96b4a09f121fc971d81eb19c96e4e0db89a99a7d59b93"}, + {file = "xxhash-2.0.2-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5dc3da5fa855dd8e35f24d20fabfcd29c0b3ac85a14dc2c329c029971ae4eeb7"}, + {file = "xxhash-2.0.2-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:17a3b0a2ff20879ed5c9d9c178349e9c6257db11b193e4103282d7a78ef9cb08"}, + {file = "xxhash-2.0.2-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:c75f8375c80c3815f49a744ef1a8303577757eb9a2dc53bed33d9318b760fec6"}, + {file = "xxhash-2.0.2-pp36-pypy36_pp73-win32.whl", hash = "sha256:eb2670ed6c435189aeb479bfff990e00b849ae0ff49945632db74b2a2a08d192"}, + {file = "xxhash-2.0.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ff518ec1bd7cc33218f8f3325848c56e9c73c5df30138a64a89dd65ab1e1ffb5"}, + {file = "xxhash-2.0.2-pp37-pypy37_pp73-manylinux1_x86_64.whl", hash = "sha256:c4a0806ffb33c9d892b5565fa010c252c7e0f4d01ded901a637dfede624e4d0c"}, + {file = "xxhash-2.0.2-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:fdfac2014301da79cebcd8f9535c875f63242fe404d741cec5f70f400cc6a561"}, + {file = "xxhash-2.0.2-pp37-pypy37_pp73-win32.whl", hash = "sha256:357f6a52bd18a80635cf4c83f648c42fa0609713b4183929ed019f7627af4b68"}, + {file = "xxhash-2.0.2.tar.gz", hash = "sha256:b7bead8cf6210eadf9cecf356e17af794f57c0939a3d420a00d87ea652f87b49"}, +] + +[[package]] +name = "yarl" +version = "1.18.0" +description = "Yet another URL library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "yarl-1.18.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:074fee89caab89a97e18ef5f29060ef61ba3cae6cd77673acc54bfdd3214b7b7"}, + {file = "yarl-1.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b026cf2c32daf48d90c0c4e406815c3f8f4cfe0c6dfccb094a9add1ff6a0e41a"}, + {file = "yarl-1.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ae38bd86eae3ba3d2ce5636cc9e23c80c9db2e9cb557e40b98153ed102b5a736"}, + {file = "yarl-1.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:685cc37f3f307c6a8e879986c6d85328f4c637f002e219f50e2ef66f7e062c1d"}, + {file = "yarl-1.18.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8254dbfce84ee5d1e81051ee7a0f1536c108ba294c0fdb5933476398df0654f3"}, + {file = "yarl-1.18.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20de4a8b04de70c49698dc2390b7fd2d18d424d3b876371f9b775e2b462d4b41"}, + {file = "yarl-1.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0a2074a37285570d54b55820687de3d2f2b9ecf1b714e482e48c9e7c0402038"}, + {file = "yarl-1.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f576ed278860df2721a5d57da3381040176ef1d07def9688a385c8330db61a1"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3a3709450a574d61be6ac53d582496014342ea34876af8dc17cc16da32826c9a"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bd80ed29761490c622edde5dd70537ca8c992c2952eb62ed46984f8eff66d6e8"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:32141e13a1d5a48525e519c9197d3f4d9744d818d5c7d6547524cc9eccc8971e"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8b8d3e4e014fb4274f1c5bf61511d2199e263909fb0b8bda2a7428b0894e8dc6"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:701bb4a8f4de191c8c0cc9a1e6d5142f4df880e9d1210e333b829ca9425570ed"}, + {file = "yarl-1.18.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a45d94075ac0647621eaaf693c8751813a3eccac455d423f473ffed38c8ac5c9"}, + {file = "yarl-1.18.0-cp310-cp310-win32.whl", hash = "sha256:34176bfb082add67cb2a20abd85854165540891147f88b687a5ed0dc225750a0"}, + {file = "yarl-1.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:73553bbeea7d6ec88c08ad8027f4e992798f0abc459361bf06641c71972794dc"}, + {file = "yarl-1.18.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b8e8c516dc4e1a51d86ac975b0350735007e554c962281c432eaa5822aa9765c"}, + {file = "yarl-1.18.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2e6b4466714a73f5251d84b471475850954f1fa6acce4d3f404da1d55d644c34"}, + {file = "yarl-1.18.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c893f8c1a6d48b25961e00922724732d00b39de8bb0b451307482dc87bddcd74"}, + {file = "yarl-1.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13aaf2bdbc8c86ddce48626b15f4987f22e80d898818d735b20bd58f17292ee8"}, + {file = "yarl-1.18.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd21c0128e301851de51bc607b0a6da50e82dc34e9601f4b508d08cc89ee7929"}, + {file = "yarl-1.18.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:205de377bd23365cd85562c9c6c33844050a93661640fda38e0567d2826b50df"}, + {file = "yarl-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed69af4fe2a0949b1ea1d012bf065c77b4c7822bad4737f17807af2adb15a73c"}, + {file = "yarl-1.18.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e1c18890091aa3cc8a77967943476b729dc2016f4cfe11e45d89b12519d4a93"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:91b8fb9427e33f83ca2ba9501221ffaac1ecf0407f758c4d2f283c523da185ee"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:536a7a8a53b75b2e98ff96edb2dfb91a26b81c4fed82782035767db5a465be46"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a64619a9c47c25582190af38e9eb382279ad42e1f06034f14d794670796016c0"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c73a6bbc97ba1b5a0c3c992ae93d721c395bdbb120492759b94cc1ac71bc6350"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a173401d7821a2a81c7b47d4e7d5c4021375a1441af0c58611c1957445055056"}, + {file = "yarl-1.18.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7520e799b1f84e095cce919bd6c23c9d49472deeef25fe1ef960b04cca51c3fc"}, + {file = "yarl-1.18.0-cp311-cp311-win32.whl", hash = "sha256:c4cb992d8090d5ae5f7afa6754d7211c578be0c45f54d3d94f7781c495d56716"}, + {file = "yarl-1.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:52c136f348605974c9b1c878addd6b7a60e3bf2245833e370862009b86fa4689"}, + {file = "yarl-1.18.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ece25e2251c28bab737bdf0519c88189b3dd9492dc086a1d77336d940c28ced"}, + {file = "yarl-1.18.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:454902dc1830d935c90b5b53c863ba2a98dcde0fbaa31ca2ed1ad33b2a7171c6"}, + {file = "yarl-1.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:01be8688fc211dc237e628fcc209dda412d35de7642453059a0553747018d075"}, + {file = "yarl-1.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d26f1fa9fa2167bb238f6f4b20218eb4e88dd3ef21bb8f97439fa6b5313e30d"}, + {file = "yarl-1.18.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b234a4a9248a9f000b7a5dfe84b8cb6210ee5120ae70eb72a4dcbdb4c528f72f"}, + {file = "yarl-1.18.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe94d1de77c4cd8caff1bd5480e22342dbd54c93929f5943495d9c1e8abe9f42"}, + {file = "yarl-1.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4c90c5363c6b0a54188122b61edb919c2cd1119684999d08cd5e538813a28e"}, + {file = "yarl-1.18.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a98ecadc5a241c9ba06de08127ee4796e1009555efd791bac514207862b43d"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9106025c7f261f9f5144f9aa7681d43867eed06349a7cfb297a1bc804de2f0d1"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:f275ede6199d0f1ed4ea5d55a7b7573ccd40d97aee7808559e1298fe6efc8dbd"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f7edeb1dcc7f50a2c8e08b9dc13a413903b7817e72273f00878cb70e766bdb3b"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c083f6dd6951b86e484ebfc9c3524b49bcaa9c420cb4b2a78ef9f7a512bfcc85"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:80741ec5b471fbdfb997821b2842c59660a1c930ceb42f8a84ba8ca0f25a66aa"}, + {file = "yarl-1.18.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1a3297b9cad594e1ff0c040d2881d7d3a74124a3c73e00c3c71526a1234a9f7"}, + {file = "yarl-1.18.0-cp312-cp312-win32.whl", hash = "sha256:cd6ab7d6776c186f544f893b45ee0c883542b35e8a493db74665d2e594d3ca75"}, + {file = "yarl-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:039c299a0864d1f43c3e31570045635034ea7021db41bf4842693a72aca8df3a"}, + {file = "yarl-1.18.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6fb64dd45453225f57d82c4764818d7a205ee31ce193e9f0086e493916bd4f72"}, + {file = "yarl-1.18.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3adaaf9c6b1b4fc258584f4443f24d775a2086aee82d1387e48a8b4f3d6aecf6"}, + {file = "yarl-1.18.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:da206d1ec78438a563c5429ab808a2b23ad7bc025c8adbf08540dde202be37d5"}, + {file = "yarl-1.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:576d258b21c1db4c6449b1c572c75d03f16a482eb380be8003682bdbe7db2f28"}, + {file = "yarl-1.18.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c60e547c0a375c4bfcdd60eef82e7e0e8698bf84c239d715f5c1278a73050393"}, + {file = "yarl-1.18.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3818eabaefb90adeb5e0f62f047310079d426387991106d4fbf3519eec7d90a"}, + {file = "yarl-1.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5f72421246c21af6a92fbc8c13b6d4c5427dfd949049b937c3b731f2f9076bd"}, + {file = "yarl-1.18.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7fa7d37f2ada0f42e0723632993ed422f2a679af0e200874d9d861720a54f53e"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:42ba84e2ac26a3f252715f8ec17e6fdc0cbf95b9617c5367579fafcd7fba50eb"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6a49ad0102c0f0ba839628d0bf45973c86ce7b590cdedf7540d5b1833ddc6f00"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:96404e8d5e1bbe36bdaa84ef89dc36f0e75939e060ca5cd45451aba01db02902"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a0509475d714df8f6d498935b3f307cd122c4ca76f7d426c7e1bb791bcd87eda"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1ff116f0285b5c8b3b9a2680aeca29a858b3b9e0402fc79fd850b32c2bcb9f8b"}, + {file = "yarl-1.18.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e2580c1d7e66e6d29d6e11855e3b1c6381971e0edd9a5066e6c14d79bc8967af"}, + {file = "yarl-1.18.0-cp313-cp313-win32.whl", hash = "sha256:14408cc4d34e202caba7b5ac9cc84700e3421a9e2d1b157d744d101b061a4a88"}, + {file = "yarl-1.18.0-cp313-cp313-win_amd64.whl", hash = "sha256:1db1537e9cb846eb0ff206eac667f627794be8b71368c1ab3207ec7b6f8c5afc"}, + {file = "yarl-1.18.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fa2c9cb607e0f660d48c54a63de7a9b36fef62f6b8bd50ff592ce1137e73ac7d"}, + {file = "yarl-1.18.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c0f4808644baf0a434a3442df5e0bedf8d05208f0719cedcd499e168b23bfdc4"}, + {file = "yarl-1.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7db9584235895a1dffca17e1c634b13870852094f6389b68dcc6338086aa7b08"}, + {file = "yarl-1.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:309f8d27d6f93ceeeb80aa6980e883aa57895270f7f41842b92247e65d7aeddf"}, + {file = "yarl-1.18.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:609ffd44fed2ed88d9b4ef62ee860cf86446cf066333ad4ce4123505b819e581"}, + {file = "yarl-1.18.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f172b8b2c72a13a06ea49225a9c47079549036ad1b34afa12d5491b881f5b993"}, + {file = "yarl-1.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89ae7de94631b60d468412c18290d358a9d805182373d804ec839978b120422"}, + {file = "yarl-1.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:466d31fd043ef9af822ee3f1df8fdff4e8c199a7f4012c2642006af240eade17"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7609b8462351c4836b3edce4201acb6dd46187b207c589b30a87ffd1813b48dc"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:d9d4f5e471e8dc49b593a80766c2328257e405f943c56a3dc985c125732bc4cf"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:67b336c15e564d76869c9a21316f90edf546809a5796a083b8f57c845056bc01"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b212452b80cae26cb767aa045b051740e464c5129b7bd739c58fbb7deb339e7b"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:38b39b7b3e692b6c92b986b00137a3891eddb66311b229d1940dcbd4f025083c"}, + {file = "yarl-1.18.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a7ee6884a8848792d58b854946b685521f41d8871afa65e0d4a774954e9c9e89"}, + {file = "yarl-1.18.0-cp39-cp39-win32.whl", hash = "sha256:b4095c5019bb889aa866bf12ed4c85c0daea5aafcb7c20d1519f02a1e738f07f"}, + {file = "yarl-1.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:2d90f2e4d16a5b0915ee065218b435d2ef619dd228973b1b47d262a6f7cd8fa5"}, + {file = "yarl-1.18.0-py3-none-any.whl", hash = "sha256:dbf53db46f7cf176ee01d8d98c39381440776fcda13779d269a8ba664f69bec0"}, + {file = "yarl-1.18.0.tar.gz", hash = "sha256:20d95535e7d833889982bfe7cc321b7f63bf8879788fee982c76ae2b24cfb715"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" +propcache = ">=0.2.0" + +[[package]] +name = "zipp" +version = "3.21.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, + {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +type = ["pytest-mypy"] + [metadata] lock-version = "2.0" -python-versions = ">=3.8,<3.12" -content-hash = "3d09100d93d16e1eeac2b858cc446a7410e1699fe6c1cda549a04e37655db4e0" +python-versions = ">=3.10,<3.12" +content-hash = "dc205fd0f31f54b35ed660d4ed566ea09a46000b7da11da526160196e1b9e686" diff --git a/python/aibrix/pyproject.toml b/python/aibrix/pyproject.toml index eea2c1bc..50ba8563 100644 --- a/python/aibrix/pyproject.toml +++ b/python/aibrix/pyproject.toml @@ -35,7 +35,7 @@ aibrix_runtime = 'aibrix.app:main' aibrix_download = 'aibrix.downloader.__main__:main' [tool.poetry.dependencies] -python = ">=3.8,<3.12" +python = ">=3.10,<3.12" huggingface-hub = "^0.24.6" tos = "2.8.0" boto3 = "^1.35.5" @@ -46,6 +46,16 @@ prometheus-client = "^0.20.0" types-requests = "^2.31.0" httpx = "^0.27.2" hf-transfer = "^0.1.8" +types-redis = "^4.6.0.20241004" +redis = "^5.2.0" +kubernetes = "^31.0.0" +numpy = "1.26.4" +pandas = "^2.2.3" +pulp = "2.8.0" +incdbscan = "^0.1.0" +aiohttp = "^3.11.7" +dash = "^2.18.2" +matplotlib = "^3.9.2" [tool.poetry.group.dev.dependencies]