diff --git a/models/image_understanding_model.py b/models/image_understanding_model.py index be668498..a387a73d 100644 --- a/models/image_understanding_model.py +++ b/models/image_understanding_model.py @@ -50,15 +50,15 @@ async def do_image_ocr(self, filepath): # Read the image file and encode it in base64 format if not self.google_cloud_api_key: return "None" - with open(filepath, 'rb') as image_file: - encoded_image = base64.b64encode(image_file.read()).decode('utf-8') + with open(filepath, "rb") as image_file: + encoded_image = base64.b64encode(image_file.read()).decode("utf-8") # Prepare the JSON payload payload = { "requests": [ { "image": {"content": encoded_image}, - "features": [{"type": "TEXT_DETECTION"}] + "features": [{"type": "TEXT_DETECTION"}], } ] } @@ -71,15 +71,19 @@ async def do_image_ocr(self, filepath): # Send the async request async with aiohttp.ClientSession() as session: - async with session.post(url, headers=header, data=json.dumps(payload)) as response: + async with session.post( + url, headers=header, data=json.dumps(payload) + ) as response: result = await response.json() if response.status == 200: # Get fullTextAnnotation - full_text_annotation = result.get('responses', [])[0].get('fullTextAnnotation') + full_text_annotation = result.get("responses", [])[0].get( + "fullTextAnnotation" + ) if full_text_annotation: - extracted_text = full_text_annotation.get('text') + extracted_text = full_text_annotation.get("text") # Return the extracted text return extracted_text @@ -87,4 +91,5 @@ async def do_image_ocr(self, filepath): return "" else: raise Exception( - f"Google Cloud Vision API returned an error. Status code: {response.status}, Error: {result}") + f"Google Cloud Vision API returned an error. Status code: {response.status}, Error: {result}" + ) diff --git a/services/text_service.py b/services/text_service.py index 1712c27e..84e4731b 100644 --- a/services/text_service.py +++ b/services/text_service.py @@ -766,7 +766,7 @@ async def process_conversation_message( prompt, temp_file.name, ), - image_understanding_model.do_image_ocr(temp_file.name) + image_understanding_model.do_image_ocr(temp_file.name), ) prompt = ( f"Image Info-Caption: {image_caption}\nImage Info-QA: {image_qa}\nImage Info-OCR: {image_ocr}\n"