Skip to content

Commit

Permalink
Convert cuda env tgi variables to lmi
Browse files Browse the repository at this point in the history
  • Loading branch information
sindhuvahinis committed Jun 3, 2024
1 parent 99fd2fe commit 8f99f3b
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions serving/docker/dockerd-entrypoint-with-cuda-compat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@ translateTGIToLMI "SM_NUM_GPUS" "TENSOR_PARALLEL_DEGREE"
translateTGIToLMI "MAX_CONCURRENT_REQUESTS" "SERVING_JOB_QUEUE_SIZE"
translateTGIToLMI "MAX_BATCH_PREFILL_TOKENS" "OPTION_MAX_ROLLING_BATCH_PREFILL_TOKENS"
translateTGIToLMI "MAX_BATCH_SIZE" "OPTION_MAX_ROLLING_BATCH_SIZE"
translateTGIToLMI "ENABLE_CUDA_GRAPHS" "OPTION_ENFORCE_EAGER"
if [[ -n "$ENABLE_CUDA_GRAPHS" && -z "$OPTION_ENFORCE_EAGER" ]]; then
if [[ "$ENABLE_CUDA_GRAPHS" = true ]]; then
export "OPTION_ENFORCE_EAGER"=false
else
export "OPTION_ENFORCE_EAGER"=true
fi
fi
if [[ "$SERVING_FEATURES" = "trtllm" ]]; then
translateTGIToLMI "CUDA_MEMORY_FRACTION" "OPTION_KV_CACHE_FREE_GPU_MEM_FRACTION"
else
translateTGIToLMI "CUDA_MEMORY_FRACTION" "OPTION_GPU_MEMORY_UTILIZATION"
fi

if [[ "$1" = "serve" ]]; then
shift 1
Expand Down

0 comments on commit 8f99f3b

Please sign in to comment.