Skip to content

Commit

Permalink
[python] remove deepspeed related AOT code
Browse files Browse the repository at this point in the history
  • Loading branch information
sindhuvahinis committed Jan 28, 2025
1 parent a1d2ea3 commit 1d194a9
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 76 deletions.
33 changes: 0 additions & 33 deletions serving/docker/partition/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from pathlib import Path

import utils
from properties_manager import PropertiesManager
from huggingface_hub import snapshot_download
from datasets import load_dataset
Expand Down Expand Up @@ -196,12 +195,10 @@ def run_partition(self) -> str:
logging.info(proc)
if proc.returncode == 0:
logging.info("Partitioning done.")
self.properties_manager.validate_and_correct_checkpoints_json()
self.properties_manager.generate_properties_file()
if not self.properties_manager.skip_copy:
logging.info("Copying config files...")
self.copy_config_files()
self.load_the_generated_checkpoints()
self.upload_checkpoints_to_s3()
self.cleanup()
return partition_stdout
Expand All @@ -212,36 +209,6 @@ def run_partition(self) -> str:
f"Partitioning exited with return code: {proc.returncode}. Details: {partition_stderr}"
)

def load_the_generated_checkpoints(self):
if self.properties['engine'] == 'DeepSpeed':
saved_checkpoints_dir = self.properties[
"option.save_mp_checkpoint_path"]
properties = utils.load_properties(saved_checkpoints_dir)
if not self.properties_manager.skip_copy:
properties['model_dir'] = saved_checkpoints_dir
properties['option.entryPoint'] = self.properties[
'option.entryPoint']
properties['partition_handler'] = 'handle'

entry_point_file = None
if properties['option.entryPoint'] == 'model.py':
entry_point_file = os.path.join(
self.properties_manager.properties_dir, 'model.py')
shutil.copy(entry_point_file, saved_checkpoints_dir)

commands = get_partition_cmd(True, properties)
self.set_environmental_vars()
result = subprocess.run(commands)
logging.info(result)
if result.returncode == 0:
logging.info(
"Successfully loaded the partitioned checkpoints.")
else:
raise Exception("DeepSpeed does not support partitioning. "
"Please use a different engine")
if entry_point_file:
os.remove(os.path.join(saved_checkpoints_dir, 'model.py'))

def run_quantization(self):
quant_method = self.properties['option.quantize']
if quant_method == 'awq':
Expand Down
33 changes: 1 addition & 32 deletions serving/docker/partition/properties_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@
import logging
import os
import glob
import json
import torch
import requests

# Properties to exclude while generating serving.properties
from utils import (is_engine_mpi_mode, get_engine_configs, get_download_dir,
from utils import (is_engine_mpi_mode, get_download_dir,
load_properties, update_kwargs_with_env_vars)

EXCLUDE_PROPERTIES = [
Expand Down Expand Up @@ -88,34 +87,6 @@ def set_and_validate_model_dir(self):
f'No .bin or .safetensors files found in the dir: {self.properties_dir}'
'\nPlease specify the model_dir or model_id')

def validate_and_correct_checkpoints_json(self):
"""
Removes base_dir from ds_inference_checkpoints.json file.
DeepSpeed writes base_dir directory, which is the path of checkpoints saved to the file.
Removing the base_dir since the user's deployment environment could be different from partition environment.
User can specify base_dir argument in deepspeed.init_inference while using this file.
:return:
"""
if self.properties.get('engine') == 'DeepSpeed':
config_file = os.path.join(
self.properties['option.save_mp_checkpoint_path'],
'ds_inference_config.json')
if not os.path.exists(config_file):
raise ValueError("Checkpoints json file was not generated."
"Partition was not successful.")

with open(config_file) as f:
configs = json.load(f)

if not configs.get('base_dir'):
return

configs.pop('base_dir')
with open(config_file, "w") as f:
json.dump(configs, f)

def generate_properties_file(self):
checkpoint_path = self.properties.get('option.save_mp_checkpoint_path')
configs = get_engine_configs(self.properties)
Expand Down Expand Up @@ -172,8 +143,6 @@ def set_and_validate_entry_point(self):
pass
elif engine is None:
raise ValueError("Please specify engine")
elif engine.lower() == "deepspeed":
entry_point = "djl_python.deepspeed"
elif engine.lower() == "python":
entry_point = "djl_python.transformers_neuronx"
else:
Expand Down
12 changes: 1 addition & 11 deletions serving/docker/partition/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,6 @@ def get_partition_cmd(is_mpi_mode, properties):
]


def get_engine_configs(properties):
engine = properties.get('engine')
configs = {}
if engine == 'DeepSpeed':
configs['option.checkpoint'] = 'ds_inference_config.json'
configs['option.parallel_loading'] = True

return configs


def extract_python_jar(target_dir):
os.makedirs(target_dir, exist_ok=True)
jar_files = glob.glob('/usr/local/djl-serving-*/lib/python-*.jar')
Expand All @@ -72,7 +62,7 @@ def get_djl_version_from_lib():


def is_engine_mpi_mode(engine):
if engine == 'DeepSpeed':
if engine == 'MPI':
return True
else:
return False
Expand Down

0 comments on commit 1d194a9

Please sign in to comment.