diff --git a/unstructured/partition/utils/ocr_models/tesseract_ocr.py b/unstructured/partition/utils/ocr_models/tesseract_ocr.py
index 64ba58e073..17589df06d 100644
--- a/unstructured/partition/utils/ocr_models/tesseract_ocr.py
+++ b/unstructured/partition/utils/ocr_models/tesseract_ocr.py
@@ -136,14 +136,12 @@ def hocr_to_dataframe(
         return ocr_df
 
     @staticmethod
-    def extract_word_from_hocr(
-        word: Tag, character_confidence_threshold: float = 0.0
-    ) -> str | None:
+    def extract_word_from_hocr(word: Tag, character_confidence_threshold: float = 0.0) -> str:
         """Extracts a word from an hOCR word tag, filtering out characters with low confidence."""
 
         character_spans = word.find_all("span", class_="ocrx_cinfo")
         if len(character_spans) == 0:
-            return None
+            return ""
 
         word_text = ""
         for character_span in character_spans: