Skip to content

Commit

Permalink
[ci] Updating lmi-dist ci tests for rubikon-engine
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaqib Ansari committed Mar 22, 2024
1 parent cfae4c0 commit fba9e9d
Show file tree
Hide file tree
Showing 10 changed files with 267 additions and 591 deletions.
48 changes: 24 additions & 24 deletions .github/workflows/llm_integration_p4d.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test Llama-2-70B with aiccl backend
working-directory: tests/integration
Expand All @@ -79,12 +79,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test codellama/CodeLlama-34b-hf with aiccl backend
working-directory: tests/integration
Expand All @@ -94,12 +94,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test tiiuae/falcon-40b with aiccl backend
working-directory: tests/integration
Expand All @@ -109,12 +109,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Remove models dir
working-directory: tests/integration
Expand Down
33 changes: 21 additions & 12 deletions .github/workflows/rolling_batch_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -299,14 +299,32 @@ jobs:
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist octocoder
docker rm -f $(docker ps -aq)
- name: Test gpt-neox-20b-bitsandbytes
- name: Test speculative-llama-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gpt-neox-20b-bitsandbytes
python3 llm/prepare.py lmi_dist speculative-llama-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gpt-neox-20b-bitsandbytes
python3 llm/client.py lmi_dist speculative-llama-13b
docker rm -f $(docker ps -aq)
- name: Test starcoder2-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist starcoder2-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist starcoder2-7b
docker rm -f $(docker ps -aq)
- name: Test gemma-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gemma-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gemma-7b
docker rm -f $(docker ps -aq)
- name: Test llama2-13b-gptq
working-directory: tests/integration
Expand Down Expand Up @@ -426,15 +444,6 @@ jobs:
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm phi-2
docker rm -f $(docker ps -aq)
- name: Test Speculative Decoding with LLAMA 13B model
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm speculative-llama-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm speculative-llama-13b
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand Down
2 changes: 1 addition & 1 deletion engines/python/setup/djl_python/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_rolling_batch_class_from_str(rolling_batch_type: str, is_mpi: bool,
from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
return SchedulerRollingBatch
elif rolling_batch_type == "lmi-dist":
from djl_python.rolling_batch.lmi_dist_v2_rolling_batch import LmiDistRollingBatch
from djl_python.rolling_batch.lmi_dist_rolling_batch import LmiDistRollingBatch
return LmiDistRollingBatch
elif rolling_batch_type == "vllm":
from djl_python.rolling_batch.vllm_rolling_batch import VLLMRollingBatch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,75 +10,44 @@
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.
import logging
import os
from enum import Enum
from typing import Optional

import torch
from pydantic.v1.class_validators import root_validator
from pydantic.v1.class_validators import validator, root_validator

from djl_python.properties_manager.hf_properties import get_torch_dtype_from_str
from djl_python.properties_manager.properties import Properties


class LmiDistQuantizeMethods(str, Enum):
# added for backward compatibility lmi-dist
bitsandbytes = 'bitsandbytes'
bitsandbytes8 = 'bitsandbytes8'
gptq = 'gptq'
awq = 'awq'
gptq = 'gptq'
squeezellm = 'squeezellm'


class LmiDistRbProperties(Properties):
engine: Optional[str] = None
dtype: Optional[str] = "auto"
load_format: Optional[str] = "auto"
quantize: Optional[LmiDistQuantizeMethods] = None
tensor_parallel_degree: Optional[int] = 1
max_rolling_batch_prefill_tokens: Optional[int] = 4096
device: Optional[int] = None
dtype: Optional[str] = None
torch_dtype: Optional[torch.dtype] = None

@root_validator(skip_on_failure=True)
def validate_mpi_mode(cls, properties):
if not properties.get("is_mpi") and int(
properties.get("tensor_parallel_degree", "1")) != 1:
raise ValueError(f"Need mpi_mode to start lmi-dist RollingBatcher."
f"Try with engine=MPI in your serving.properties")
return properties

@root_validator(skip_on_failure=True)
def validate_quantize(cls, properties):
if properties.get('quantize') is None:
if properties.get('dtype') == "int8":
properties['quantize'] = LmiDistQuantizeMethods.bitsandbytes
else:
# parsing bitsandbytes8, so it can be directly passed to lmi dist model loader.
if properties.get(
'quantize') == LmiDistQuantizeMethods.bitsandbytes8:
properties['quantize'] = LmiDistQuantizeMethods.bitsandbytes
if properties.get('dtype') is not None:
raise ValueError(
f"Can't set both dtype: {properties['dtype']} and quantize: {properties['quantize']}"
)
return properties

@root_validator(skip_on_failure=True)
def set_device(cls, properties):
if properties.get('is_mpi'):
properties['device'] = int(os.getenv("LOCAL_RANK", 0))
return properties

@root_validator(skip_on_failure=True)
def construct_dtype(cls, properties):
if properties.get('dtype'):
properties["torch_dtype"] = get_torch_dtype_from_str(
properties['dtype'].lower())
elif properties.get('data_type'):
logging.warning('option.data_type is deprecated.'
'Please use option.dtype')
properties["torch_dtype"] = get_torch_dtype_from_str(
properties['data_type'].lower())
else:
properties['torch_dtype'] = torch.float16
return properties
tensor_parallel_degree: Optional[int] = None
max_rolling_batch_prefill_tokens: Optional[int] = None
# Adjustable prefix model length for certain 32k or longer model
max_model_len: Optional[int] = None
# TODO: change Enforce eager to False once SageMaker driver issue resolved
enforce_eager: Optional[bool] = False
# TODO: this default may change with different vLLM versions
# TODO: try to get good default from vLLM to prevent revisiting
# TODO: last time check: vllm 0.3.1
gpu_memory_utilization: Optional[float] = 0.9
# TODO: speculative decoding changes
speculative_draft_model: Optional[str] = None
speculative_length: int = 5
draft_model_tp_size: int = 1
record_acceptance_rate: Optional[bool] = False

@validator('engine')
def validate_engine(cls, engine):
if engine != "MPI":
raise AssertionError(
f"Need MPI engine to start lmidist_v2 RollingBatcher")
return engine

This file was deleted.

Loading

0 comments on commit fba9e9d

Please sign in to comment.