Skip to content

Commit c209230

Browse files
[python] remove deepspeed related AOT code (deepjavalibrary#2692)
1 parent 7fb993b commit c209230

File tree

3 files changed

+3
-77
lines changed

3 files changed

+3
-77
lines changed

serving/docker/partition/partition.py

-33
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
from pathlib import Path
2222

23-
import utils
2423
from properties_manager import PropertiesManager
2524
from huggingface_hub import snapshot_download
2625
from datasets import load_dataset
@@ -196,12 +195,10 @@ def run_partition(self) -> str:
196195
logging.info(proc)
197196
if proc.returncode == 0:
198197
logging.info("Partitioning done.")
199-
self.properties_manager.validate_and_correct_checkpoints_json()
200198
self.properties_manager.generate_properties_file()
201199
if not self.properties_manager.skip_copy:
202200
logging.info("Copying config files...")
203201
self.copy_config_files()
204-
self.load_the_generated_checkpoints()
205202
self.upload_checkpoints_to_s3()
206203
self.cleanup()
207204
return partition_stdout
@@ -212,36 +209,6 @@ def run_partition(self) -> str:
212209
f"Partitioning exited with return code: {proc.returncode}. Details: {partition_stderr}"
213210
)
214211

215-
def load_the_generated_checkpoints(self):
216-
if self.properties['engine'] == 'DeepSpeed':
217-
saved_checkpoints_dir = self.properties[
218-
"option.save_mp_checkpoint_path"]
219-
properties = utils.load_properties(saved_checkpoints_dir)
220-
if not self.properties_manager.skip_copy:
221-
properties['model_dir'] = saved_checkpoints_dir
222-
properties['option.entryPoint'] = self.properties[
223-
'option.entryPoint']
224-
properties['partition_handler'] = 'handle'
225-
226-
entry_point_file = None
227-
if properties['option.entryPoint'] == 'model.py':
228-
entry_point_file = os.path.join(
229-
self.properties_manager.properties_dir, 'model.py')
230-
shutil.copy(entry_point_file, saved_checkpoints_dir)
231-
232-
commands = get_partition_cmd(True, properties)
233-
self.set_environmental_vars()
234-
result = subprocess.run(commands)
235-
logging.info(result)
236-
if result.returncode == 0:
237-
logging.info(
238-
"Successfully loaded the partitioned checkpoints.")
239-
else:
240-
raise Exception("DeepSpeed does not support partitioning. "
241-
"Please use a different engine")
242-
if entry_point_file:
243-
os.remove(os.path.join(saved_checkpoints_dir, 'model.py'))
244-
245212
def run_quantization(self):
246213
quant_method = self.properties['option.quantize']
247214
if quant_method == 'awq':

serving/docker/partition/properties_manager.py

+2-33
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212
import logging
1313
import os
1414
import glob
15-
import json
1615
import torch
1716
import requests
1817

1918
# Properties to exclude while generating serving.properties
20-
from utils import (is_engine_mpi_mode, get_engine_configs, get_download_dir,
21-
load_properties, update_kwargs_with_env_vars)
19+
from utils import (is_engine_mpi_mode, get_download_dir, load_properties,
20+
update_kwargs_with_env_vars)
2221

2322
EXCLUDE_PROPERTIES = [
2423
'option.model_id', 'option.save_mp_checkpoint_path', 'model_dir',
@@ -88,34 +87,6 @@ def set_and_validate_model_dir(self):
8887
f'No .bin or .safetensors files found in the dir: {self.properties_dir}'
8988
'\nPlease specify the model_dir or model_id')
9089

91-
def validate_and_correct_checkpoints_json(self):
92-
"""
93-
Removes base_dir from ds_inference_checkpoints.json file.
94-
95-
DeepSpeed writes base_dir directory, which is the path of checkpoints saved to the file.
96-
Removing the base_dir since the user's deployment environment could be different from partition environment.
97-
User can specify base_dir argument in deepspeed.init_inference while using this file.
98-
99-
:return:
100-
"""
101-
if self.properties.get('engine') == 'DeepSpeed':
102-
config_file = os.path.join(
103-
self.properties['option.save_mp_checkpoint_path'],
104-
'ds_inference_config.json')
105-
if not os.path.exists(config_file):
106-
raise ValueError("Checkpoints json file was not generated."
107-
"Partition was not successful.")
108-
109-
with open(config_file) as f:
110-
configs = json.load(f)
111-
112-
if not configs.get('base_dir'):
113-
return
114-
115-
configs.pop('base_dir')
116-
with open(config_file, "w") as f:
117-
json.dump(configs, f)
118-
11990
def generate_properties_file(self):
12091
checkpoint_path = self.properties.get('option.save_mp_checkpoint_path')
12192
configs = get_engine_configs(self.properties)
@@ -172,8 +143,6 @@ def set_and_validate_entry_point(self):
172143
pass
173144
elif engine is None:
174145
raise ValueError("Please specify engine")
175-
elif engine.lower() == "deepspeed":
176-
entry_point = "djl_python.deepspeed"
177146
elif engine.lower() == "python":
178147
entry_point = "djl_python.transformers_neuronx"
179148
else:

serving/docker/partition/utils.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,6 @@ def get_partition_cmd(is_mpi_mode, properties):
4242
]
4343

4444

45-
def get_engine_configs(properties):
46-
engine = properties.get('engine')
47-
configs = {}
48-
if engine == 'DeepSpeed':
49-
configs['option.checkpoint'] = 'ds_inference_config.json'
50-
configs['option.parallel_loading'] = True
51-
52-
return configs
53-
54-
5545
def extract_python_jar(target_dir):
5646
os.makedirs(target_dir, exist_ok=True)
5747
jar_files = glob.glob('/usr/local/djl-serving-*/lib/python-*.jar')
@@ -72,7 +62,7 @@ def get_djl_version_from_lib():
7262

7363

7464
def is_engine_mpi_mode(engine):
75-
if engine == 'DeepSpeed':
65+
if engine == 'MPI':
7666
return True
7767
else:
7868
return False

0 commit comments

Comments
 (0)