[python] fix last token fetch logic (#2423)

deepjavalibrary · Oct 9, 2024 · 79d0c3e · 79d0c3e
1 parent bd0db03
commit 79d0c3e
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 4 deletions.
diff --git a/engines/python/setup/djl_python/output_formatter.py b/engines/python/setup/djl_python/output_formatter.py
@@ -20,8 +20,6 @@
 from djl_python.request_io import TextGenerationOutput
 from djl_python.utils import wait_till_generation_finished
 
-ERR_MSG = "Inference error occurred. Check CloudWatch metrics or model server logs for more details."
-
 
 def output_formatter(function):
     """
@@ -122,7 +120,7 @@ def _json_output_formatter(request_output: TextGenerationOutput):
         # partial generation response that may exist
         result = {
             "generated_text": None,
-            "error": final_token.error_msg if final_token else ERR_MSG,
+            "error": final_token.error_msg,
             "code": 400,
             "details": details,
         }

diff --git a/engines/python/setup/djl_python/request_io.py b/engines/python/setup/djl_python/request_io.py
@@ -122,7 +122,7 @@ def get_next_token(self) -> (Token, bool, bool):
         return None, False, False
 
     def get_last_token(self) -> Optional[Token]:
-        if self._last_token_index:
+        if self._last_token_index is not None:
             return self.tokens[self._last_token_index]
         return None