Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
snova-jorgep authored Oct 7, 2024
2 parents 05276fa + 3d933b9 commit b29a78e
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 111 deletions.
32 changes: 20 additions & 12 deletions benchmarking/notebooks/analyze-results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 1,
"id": "dacfe98a-e81b-4089-9506-97a652993b5b",
"metadata": {
"tags": []
Expand All @@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 2,
"id": "17f7abe9-ed9e-466c-b034-577489aaf98b",
"metadata": {
"tags": []
Expand All @@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 3,
"id": "bdb61de7",
"metadata": {},
"outputs": [],
Expand All @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 4,
"id": "2707495e",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -263,7 +263,7 @@
" \"server_output_token_per_s_per_request\": \"Avg. server output tokens per sec per request\",\n",
" \"server_number_output_tokens\": \"Total output tokens\",\n",
" \"server_ttft_s\": \"Avg. server TTFT (s)\",\n",
" \"Counts\": \"Total number of requests\",\n",
" \"Counts\": \"Total number of completed requests\",\n",
" \"server_total_output_tokens_per_s\": \"Avg. server total output tokens per second\"\n",
"}, \n",
"inplace=True)\n",
Expand Down Expand Up @@ -295,11 +295,11 @@
"import numpy as np\n",
"\n",
"# calculate number of batches executed in each batch size\n",
"df_summary['num_batches_executed'] = np.ceil(df_summary['Total number of requests'] / df_summary.index.get_level_values('Batch size'))\n",
"df_summary['num_batches_executed'] = np.ceil(df_summary['Total number of completed requests'] / df_summary.index.get_level_values('Batch size'))\n",
"\n",
"# calculate average time taken per request in each batch size\n",
"df_summary['output_tokens_per_request'] = df_summary['Total output tokens']/df_summary['Total number of requests']\n",
"df_summary['time_taken_per_request'] = df_summary['output_tokens_per_request']/df_summary['Avg. server tokens per sec per request']\n",
"df_summary['output_tokens_per_request'] = df_summary['Total output tokens']/df_summary['Total number of completed requests']\n",
"df_summary['time_taken_per_request'] = df_summary['output_tokens_per_request']/df_summary['Avg. server output tokens per sec per request']\n",
"\n",
"# calculate total ttft and generation times across all batch sizes\n",
"total_wait_time_ttft = (df_summary['num_batches_executed']*df_summary['Avg. server TTFT (s)']).sum()\n",
Expand All @@ -322,7 +322,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 13,
"id": "79a2adde",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -385,13 +385,21 @@
"source": [
"plot_requests_gantt_chart(df_user).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d4176e3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "benchmark_venv_py312",
"language": "python",
"name": "python3"
"name": "benchmark_venv_py312"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -403,7 +411,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
28 changes: 14 additions & 14 deletions benchmarking/notebooks/multiple-models-benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -145,20 +145,20 @@
"\n",
" # Extract relevant fields from each JSON object and append to the data list\n",
" for item in json_data:\n",
" data.append(\n",
" {\n",
" 'start_time': item['start_time'],\n",
" 'end_time': item['end_time'],\n",
" 'server_ttft_s': item['server_ttft_s'],\n",
" 'model_name': model_name,\n",
" }\n",
" )\n",
" if pd.isnull(item['error_code']):\n",
" data.append(\n",
" {\n",
" 'start_time': item['start_time'],\n",
" 'end_time': item['end_time'],\n",
" 'server_ttft_s': item['server_ttft_s'],\n",
" 'model_name': model_name,\n",
" }\n",
" )\n",
"\n",
" # Create a DataFrame from the data list\n",
" df = pd.DataFrame(data)\n",
" return df\n",
"\n",
"\n",
"# Get the DataFrame\n",
"df = read_json_files_to_df(results_dir)\n",
"\n",
Expand Down Expand Up @@ -257,9 +257,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "benchmark_venv",
"display_name": "benchmark_venv_py312",
"language": "python",
"name": "benchmark_venv"
"name": "benchmark_venv_py312"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -271,7 +271,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
3 changes: 2 additions & 1 deletion benchmarking/src/llmperf/sambanova_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,8 @@ def llm_request(request_config: RequestConfig, tokenizer: AutoTokenizer) -> Tupl
error_code = getattr(
e,
'code',
'Error while running LLM API calls. Check your model name, LLM API type, env variables and endpoint status',
"""Error while running LLM API requests.
Check your model name, LLM API type, env variables and endpoint status.""",
)
error_message = str(e)
metrics[common_metrics.ERROR_MSG] = error_message
Expand Down
69 changes: 49 additions & 20 deletions benchmarking/src/performance_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,38 +729,45 @@ def calculate_switching_time(self, llm_responses: list[LLMResponse]) -> list[LLM
responses_ttfts = []

for llm_response in llm_responses:
request_idx = llm_response.request_config.request_idx
start_time = llm_response.metrics['start_time']
server_ttft_s = llm_response.metrics['server_ttft_s']
responses_ttfts.append(
{'request_idx': request_idx, 'start_time': start_time, 'server_ttft_s': server_ttft_s}
)
if pd.isnull(llm_response.metrics['error_code']):
request_idx = llm_response.request_config.request_idx
start_time = llm_response.metrics['start_time']
server_ttft_s = llm_response.metrics['server_ttft_s']
responses_ttfts.append(
{'request_idx': request_idx, 'start_time': start_time, 'server_ttft_s': server_ttft_s}
)

df_responses = pd.DataFrame(responses_ttfts)
df_valid_responses = pd.DataFrame(responses_ttfts)

# transforming str to date time for sorting
df_responses['start_time'] = pd.to_datetime(df_responses['start_time'])
df_responses = df_responses.sort_values(by=['start_time'])
df_valid_responses['start_time'] = pd.to_datetime(df_valid_responses['start_time'])
df_valid_responses = df_valid_responses.sort_values(by=['start_time'])

# initialize a column for the switching time
df_responses['server_switching_time'] = None
df_valid_responses['server_switching_time'] = None

# calculate switching time
first_ttft = df_responses['server_ttft_s'].iloc[0]
mean_ttft = df_responses['server_ttft_s'].iloc[1:].mean()
std_ttft = df_responses['server_ttft_s'].iloc[1:].std()
first_ttft = df_valid_responses['server_ttft_s'].iloc[0]
mean_ttft = df_valid_responses['server_ttft_s'].iloc[1:].mean()
std_ttft = df_valid_responses['server_ttft_s'].iloc[1:].std()
std_ttft = 1e-16 if np.isnan(std_ttft) else std_ttft

switching_time = first_ttft - mean_ttft
outlier_switching_time = None

if switching_time > (mean_ttft + 3 * std_ttft):
df_responses['server_switching_time'].iloc[0] = switching_time
outlier_switching_time = switching_time
df_valid_responses['server_switching_time'].iloc[0] = outlier_switching_time

# assign switching time back to request object
for llm_response in llm_responses:
metrics = llm_response.metrics
server_switching_time = df_responses[
df_responses['request_idx'] == llm_response.request_config.request_idx
].server_switching_time.values[0]

if llm_response.request_config.request_idx == df_valid_responses.head(1)['request_idx'].values[0]:
server_switching_time = df_valid_responses.head(1)['server_switching_time'].values[0]
else:
server_switching_time = None

llm_response.metrics = self.add_metric_after_key(
metrics,
new_key='server_switching_time',
Expand Down Expand Up @@ -850,17 +857,39 @@ def get_token_throughput_latencies(
thread.join()

# Error handling
if llm_responses[0].metrics['error_code']:
error_codes = [llm_response.metrics['error_code'] for llm_response in llm_responses]

if not any([pd.isnull(error_code) for error_code in error_codes]):
unique_error_codes = list(
set(
[
llm_response.metrics['error_code']
for llm_response in llm_responses
if not pd.isnull(llm_response.metrics['error_code'])
]
)
)
unique_error_msgs = list(
set(
[
llm_response.metrics['error_msg']
for llm_response in llm_responses
if not pd.isnull(llm_response.metrics['error_code'])
]
)
)
nl = '\n'
raise Exception(
f"""Unexpected error happened when executing requests: {llm_responses[0].metrics['error_code']}.
Additional message: {llm_responses[0].metrics['error_msg']}"""
f"""Unexpected error happened when executing requests: {f'{nl}-'.join(unique_error_codes)}{nl}"""
+ f"""Additional messages: {f'{nl}-'.join(unique_error_msgs)}"""
)

# Capture end time and notify user
end_time = time.monotonic()
logger.info('Tasks Executed!')
logger.info(f'Results for token benchmark for {self.model_name} queried with the {self.llm_api} api.')

# Calculate switching time
llm_responses = self.calculate_switching_time(llm_responses)

# Build a metrics summary for the results of the benchmarking run
Expand Down
37 changes: 4 additions & 33 deletions enterprise_knowledge_retriever/src/document_retrieval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import shutil
import sys
from typing import Any, Dict, List, Optional, Tuple

Expand All @@ -17,7 +16,6 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.retrievers import BaseRetriever
from langchain_core.vectorstores.base import VectorStoreRetriever
from streamlit.runtime.uploaded_file_manager import UploadedFile
from transformers import AutoModelForSequenceClassification, AutoTokenizer

current_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -184,14 +182,12 @@ def set_llm(self) -> LLM:
)
return llm

def parse_doc(
self, docs: List[UploadedFile], additional_metadata: Optional[Dict[str, Any]] = None
) -> List[Document]:
def parse_doc(self, doc_folder: str, additional_metadata: Optional[Dict[str, Any]] = None) -> List[Document]:
"""
Parse the uploaded documents and return a list of LangChain documents.
Parse specified documents and return a list of LangChain documents.
Args:
docs (List[UploadFile]): A list of uploaded files.
doc_folder (str): Path to the documents.
additional_metadata (Optional[Dict], optional): Additional metadata to include in the processed documents.
Defaults to an empty dictionary.
Expand All @@ -201,33 +197,8 @@ def parse_doc(
if additional_metadata is None:
additional_metadata = {}

# Create the data/tmp folder if it doesn't exist
temp_folder = os.path.join(kit_dir, 'data/tmp')
if not os.path.exists(temp_folder):
os.makedirs(temp_folder)
else:
# If there are already files there, delete them
for filename in os.listdir(temp_folder):
file_path = os.path.join(temp_folder, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(f'Failed to delete {file_path}. Reason: {e}')

# Save all selected files to the tmp dir with their file names
for doc in docs:
assert hasattr(doc, 'name'), 'doc has no attribute name.'
assert callable(doc.getvalue), 'doc has no method getvalue.'
temp_file = os.path.join(temp_folder, doc.name)
with open(temp_file, 'wb') as f:
f.write(doc.getvalue())

# Pass in the temp folder for processing into the parse_doc_universal function
_, _, langchain_docs = parse_doc_universal(
doc=temp_folder, additional_metadata=additional_metadata, lite_mode=self.pdf_only_mode
doc=doc_folder, additional_metadata=additional_metadata, lite_mode=self.pdf_only_mode
)

return langchain_docs
Expand Down
Loading

0 comments on commit b29a78e

Please sign in to comment.