diff --git a/unstructured/partition/utils/ocr_models/tesseract_ocr.py b/unstructured/partition/utils/ocr_models/tesseract_ocr.py index 6825c2867d..36e38787fa 100644 --- a/unstructured/partition/utils/ocr_models/tesseract_ocr.py +++ b/unstructured/partition/utils/ocr_models/tesseract_ocr.py @@ -49,7 +49,7 @@ def get_layout_from_image(self, image: PILImage.Image) -> List[TextRegion]: trace_logger.detail("Processing entire page OCR with tesseract...") zoom = 1 ocr_df: pd.DataFrame = self.image_to_data_with_character_confidence_filter( - np.array(image, zoom), + np.array(image), lang=self.language, character_confidence_threshold=env_config.TESSERACT_CHARACTER_CONFIDENCE_THRESHOLD, )