[Neo] [Neuron] Various CX improvements for Neo Neuron entrypoint (#2296)

deepjavalibrary · Aug 15, 2024 · 0a43b4d · 0a43b4d
1 parent db7bf29
commit 0a43b4d
Show file tree

Hide file tree

Showing 8 changed files with 132 additions and 266 deletions.
diff --git a/engines/python/setup/djl_python/neuron_utils/model_loader.py b/engines/python/setup/djl_python/neuron_utils/model_loader.py
@@ -139,8 +139,13 @@ def __init__(self, *args, **kwargs) -> None:
         self._neuronx_class = self.set_neuronx_class()
 
     def set_neuronx_class(self):
-        module_name, class_name = self.MODEL_TYPE_TO_CLS_LOADER[
-            self.model_config.model_type].rsplit(".", maxsplit=1)
+        try:
+            module_name, class_name = self.MODEL_TYPE_TO_CLS_LOADER[
+                self.model_config.model_type].rsplit(".", maxsplit=1)
+        except KeyError as exc:
+            raise KeyError(
+                f"Unsupported model: {str(exc)}. Supported architectures: {list(self.MODEL_TYPE_TO_CLS_LOADER.keys())}"
+            )
         module = importlib.import_module(f"transformers_neuronx.{module_name}")
         neuronx_class = getattr(module, class_name, None)
         if neuronx_class is None:

diff --git a/serving/docker/partition/partition.py b/serving/docker/partition/partition.py
@@ -27,8 +27,8 @@
 from datasets import load_dataset
 
 from utils import (get_partition_cmd, extract_python_jar,
-                   get_python_executable, get_download_dir, init_hf_tokenizer,
-                   remove_option_from_properties, load_hf_config_and_tokenizer)
+                   get_python_executable, get_download_dir,
+                   load_hf_config_and_tokenizer)
 
 PYTHON_CACHE_DIR = '/tmp/djlserving/cache'
 
@@ -181,14 +181,19 @@ def run_partition(self) -> str:
         logging.info(f"cmd: {commands}")
         self.set_environmental_vars()
         partition_stdout = ""
+        partition_stderr = ""
         # Use Popen to capture stdout without delaying terminal output
         with subprocess.Popen(commands,
                               stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE,
                               bufsize=1,
-                              universal_newlines=True) as proc:
+                              text=True) as proc:
             for line in proc.stdout:
                 partition_stdout += line
                 print(line, end='')
+            # Exception details are in the last line of stderr
+            for line in proc.stderr:
+                partition_stderr = line
         logging.info(proc)
         if proc.returncode == 0:
             logging.info("Partitioning done.")
@@ -202,7 +207,11 @@ def run_partition(self) -> str:
             self.cleanup()
             return partition_stdout
         else:
-            raise Exception("Partitioning was not successful.")
+            logging.error(
+                f"Partitioning was not successful: {partition_stderr}")
+            raise Exception(
+                f"Partitioning exited with return code: {proc.returncode}. Details: {partition_stderr}"
+            )
 
     def load_the_generated_checkpoints(self):
         if self.properties['engine'] == 'DeepSpeed':

diff --git a/serving/docker/partition/run_partition.py b/serving/docker/partition/run_partition.py
@@ -37,8 +37,8 @@ def invoke_partition(properties):
                                            properties['entryPoint'], None)
         model_service.invoke_handler(handler, inputs)
     except Exception as e:
-        logging.exception(f"Partitioning failed {str(e)}")
-        raise Exception("Partitioning failed.")
+        logging.exception(f"Partitioning failed: {str(e)}")
+        raise e
 
 
 def main():