Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] remove deepspeed related AOT code #2692

Merged
merged 1 commit into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions serving/docker/partition/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from pathlib import Path

import utils
from properties_manager import PropertiesManager
from huggingface_hub import snapshot_download
from datasets import load_dataset
Expand Down Expand Up @@ -196,12 +195,10 @@ def run_partition(self) -> str:
logging.info(proc)
if proc.returncode == 0:
logging.info("Partitioning done.")
self.properties_manager.validate_and_correct_checkpoints_json()
self.properties_manager.generate_properties_file()
if not self.properties_manager.skip_copy:
logging.info("Copying config files...")
self.copy_config_files()
self.load_the_generated_checkpoints()
self.upload_checkpoints_to_s3()
self.cleanup()
return partition_stdout
Expand All @@ -212,36 +209,6 @@ def run_partition(self) -> str:
f"Partitioning exited with return code: {proc.returncode}. Details: {partition_stderr}"
)

def load_the_generated_checkpoints(self):
if self.properties['engine'] == 'DeepSpeed':
saved_checkpoints_dir = self.properties[
"option.save_mp_checkpoint_path"]
properties = utils.load_properties(saved_checkpoints_dir)
if not self.properties_manager.skip_copy:
properties['model_dir'] = saved_checkpoints_dir
properties['option.entryPoint'] = self.properties[
'option.entryPoint']
properties['partition_handler'] = 'handle'

entry_point_file = None
if properties['option.entryPoint'] == 'model.py':
entry_point_file = os.path.join(
self.properties_manager.properties_dir, 'model.py')
shutil.copy(entry_point_file, saved_checkpoints_dir)

commands = get_partition_cmd(True, properties)
self.set_environmental_vars()
result = subprocess.run(commands)
logging.info(result)
if result.returncode == 0:
logging.info(
"Successfully loaded the partitioned checkpoints.")
else:
raise Exception("DeepSpeed does not support partitioning. "
"Please use a different engine")
if entry_point_file:
os.remove(os.path.join(saved_checkpoints_dir, 'model.py'))

def run_quantization(self):
quant_method = self.properties['option.quantize']
if quant_method == 'awq':
Expand Down
35 changes: 2 additions & 33 deletions serving/docker/partition/properties_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
import logging
import os
import glob
import json
import torch
import requests

# Properties to exclude while generating serving.properties
from utils import (is_engine_mpi_mode, get_engine_configs, get_download_dir,
load_properties, update_kwargs_with_env_vars)
from utils import (is_engine_mpi_mode, get_download_dir, load_properties,
update_kwargs_with_env_vars)

EXCLUDE_PROPERTIES = [
'option.model_id', 'option.save_mp_checkpoint_path', 'model_dir',
Expand Down Expand Up @@ -88,34 +87,6 @@ def set_and_validate_model_dir(self):
f'No .bin or .safetensors files found in the dir: {self.properties_dir}'
'\nPlease specify the model_dir or model_id')

def validate_and_correct_checkpoints_json(self):
"""
Removes base_dir from ds_inference_checkpoints.json file.

DeepSpeed writes base_dir directory, which is the path of checkpoints saved to the file.
Removing the base_dir since the user's deployment environment could be different from partition environment.
User can specify base_dir argument in deepspeed.init_inference while using this file.

:return:
"""
if self.properties.get('engine') == 'DeepSpeed':
config_file = os.path.join(
self.properties['option.save_mp_checkpoint_path'],
'ds_inference_config.json')
if not os.path.exists(config_file):
raise ValueError("Checkpoints json file was not generated."
"Partition was not successful.")

with open(config_file) as f:
configs = json.load(f)

if not configs.get('base_dir'):
return

configs.pop('base_dir')
with open(config_file, "w") as f:
json.dump(configs, f)

def generate_properties_file(self):
checkpoint_path = self.properties.get('option.save_mp_checkpoint_path')
configs = get_engine_configs(self.properties)
Expand Down Expand Up @@ -172,8 +143,6 @@ def set_and_validate_entry_point(self):
pass
elif engine is None:
raise ValueError("Please specify engine")
elif engine.lower() == "deepspeed":
entry_point = "djl_python.deepspeed"
elif engine.lower() == "python":
entry_point = "djl_python.transformers_neuronx"
else:
Expand Down
12 changes: 1 addition & 11 deletions serving/docker/partition/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,6 @@ def get_partition_cmd(is_mpi_mode, properties):
]


def get_engine_configs(properties):
engine = properties.get('engine')
configs = {}
if engine == 'DeepSpeed':
configs['option.checkpoint'] = 'ds_inference_config.json'
configs['option.parallel_loading'] = True

return configs


def extract_python_jar(target_dir):
os.makedirs(target_dir, exist_ok=True)
jar_files = glob.glob('/usr/local/djl-serving-*/lib/python-*.jar')
Expand All @@ -72,7 +62,7 @@ def get_djl_version_from_lib():


def is_engine_mpi_mode(engine):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we use this function anywhere currently?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, not right now. But if we unify TRTLLM which is in a separate script to this partition script, then we need this.

if engine == 'DeepSpeed':
if engine == 'MPI':
return True
else:
return False
Expand Down
Loading