Skip to content

Commit

Permalink
[Neo] [Neuron] Various CX improvements for Neo Neuron entrypoint (#2296)
Browse files Browse the repository at this point in the history
  • Loading branch information
a-ys authored Aug 15, 2024
1 parent db7bf29 commit 0a43b4d
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 266 deletions.
9 changes: 7 additions & 2 deletions engines/python/setup/djl_python/neuron_utils/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,13 @@ def __init__(self, *args, **kwargs) -> None:
self._neuronx_class = self.set_neuronx_class()

def set_neuronx_class(self):
module_name, class_name = self.MODEL_TYPE_TO_CLS_LOADER[
self.model_config.model_type].rsplit(".", maxsplit=1)
try:
module_name, class_name = self.MODEL_TYPE_TO_CLS_LOADER[
self.model_config.model_type].rsplit(".", maxsplit=1)
except KeyError as exc:
raise KeyError(
f"Unsupported model: {str(exc)}. Supported architectures: {list(self.MODEL_TYPE_TO_CLS_LOADER.keys())}"
)
module = importlib.import_module(f"transformers_neuronx.{module_name}")
neuronx_class = getattr(module, class_name, None)
if neuronx_class is None:
Expand Down
17 changes: 13 additions & 4 deletions serving/docker/partition/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from datasets import load_dataset

from utils import (get_partition_cmd, extract_python_jar,
get_python_executable, get_download_dir, init_hf_tokenizer,
remove_option_from_properties, load_hf_config_and_tokenizer)
get_python_executable, get_download_dir,
load_hf_config_and_tokenizer)

PYTHON_CACHE_DIR = '/tmp/djlserving/cache'

Expand Down Expand Up @@ -181,14 +181,19 @@ def run_partition(self) -> str:
logging.info(f"cmd: {commands}")
self.set_environmental_vars()
partition_stdout = ""
partition_stderr = ""
# Use Popen to capture stdout without delaying terminal output
with subprocess.Popen(commands,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=1,
universal_newlines=True) as proc:
text=True) as proc:
for line in proc.stdout:
partition_stdout += line
print(line, end='')
# Exception details are in the last line of stderr
for line in proc.stderr:
partition_stderr = line
logging.info(proc)
if proc.returncode == 0:
logging.info("Partitioning done.")
Expand All @@ -202,7 +207,11 @@ def run_partition(self) -> str:
self.cleanup()
return partition_stdout
else:
raise Exception("Partitioning was not successful.")
logging.error(
f"Partitioning was not successful: {partition_stderr}")
raise Exception(
f"Partitioning exited with return code: {proc.returncode}. Details: {partition_stderr}"
)

def load_the_generated_checkpoints(self):
if self.properties['engine'] == 'DeepSpeed':
Expand Down
4 changes: 2 additions & 2 deletions serving/docker/partition/run_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def invoke_partition(properties):
properties['entryPoint'], None)
model_service.invoke_handler(handler, inputs)
except Exception as e:
logging.exception(f"Partitioning failed {str(e)}")
raise Exception("Partitioning failed.")
logging.exception(f"Partitioning failed: {str(e)}")
raise e


def main():
Expand Down
Loading

0 comments on commit 0a43b4d

Please sign in to comment.