diff --git a/docker-compose-test.yml b/docker-compose-test.yml index d7bef59..68c4dab 100755 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -23,7 +23,7 @@ services: queue-processor-pdf-layout: container_name: "queue-processor-pdf-layout" - entrypoint: [ "python", "-m", "src.QueueProcessor" ] + entrypoint: [ "python", "-m", "src.start_queue_processor" ] init: true restart: unless-stopped build: @@ -43,7 +43,7 @@ services: worker-pdf-layout: container_name: "worker-pdf-layout" entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"] - image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.11 + image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.15 init: true restart: unless-stopped ports: diff --git a/docker-compose.yml b/docker-compose.yml index 3e59d58..8f87762 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ services: api-pdf-layout: container_name: "api-pdf-layout" - entrypoint: [ "gunicorn", "-w", "2", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5051", "--timeout", "300" ] + entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5051", "--timeout", "300" ] init: true restart: unless-stopped build: @@ -20,7 +20,7 @@ services: queue-processor-pdf-layout-gpu: container_name: "queue-processor-pdf-layout-gpu" - entrypoint: [ "python", "-m", "src.QueueProcessor" ] + entrypoint: [ "python", "-m", "src.start_queue_processor" ] init: true restart: unless-stopped build: @@ -38,19 +38,12 @@ services: - mongo-pdf-layout worker-pdf-layout-gpu: - container_name: "worker-pdf-layout-gpu" + container_name: "worker-pdf-layout-no-gpu" entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"] - image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.11 + image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.15 init: true restart: unless-stopped network_mode: host - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [ gpu ] volumes: - data:/app/xmls diff --git a/requirements.txt b/requirements.txt index 173bccc..4d81f2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -git+https://github.com/huridocs/pdf-document-layout-analysis@d6cbcc4891391fd9f2fc577c9cef6f9c8f7d9e6f -git+https://github.com/huridocs/queue-processor@26c9413ac4fd950ace4ee542d6734e6959e10ea4 +git+https://github.com/huridocs/pdf-document-layout-analysis@7cde0c113a5a312decd8a95fd759b48399ef4fb5 +git+https://github.com/huridocs/queue-processor@d30bf31e614694cf65f7117cd28cabf4afedfe55 graypy==2.1.0 PyYAML==6.0.1 pymongo==4.8.0 diff --git a/src/configuration.py b/src/configuration.py index 5b6e708..ce2d070 100644 --- a/src/configuration.py +++ b/src/configuration.py @@ -4,7 +4,7 @@ from pathlib import Path import graypy -QUEUES_NAMES = os.environ.get("QUEUES_NAMES", "segmentation") +QUEUES_NAMES = os.environ.get("QUEUES_NAMES", "segmentation development_segmentation") SERVICE_HOST = os.environ.get("SERVICE_HOST", "http://127.0.0.1") SERVICE_PORT = os.environ.get("SERVICE_PORT", "5051") diff --git a/src/delete_queues.py b/src/delete_queues.py index 653b7d0..9cf2c8c 100644 --- a/src/delete_queues.py +++ b/src/delete_queues.py @@ -1,7 +1,7 @@ from redis import exceptions from rsmq import RedisSMQ -from configuration import TASK_QUEUE_NAME, RESULTS_QUEUE_NAME +from configuration import QUEUES_NAMES REDIS_HOST = "127.0.0.1" REDIS_PORT = "6379" @@ -9,25 +9,26 @@ def delete_queues(): try: - queue = RedisSMQ( - host=REDIS_HOST, - port=REDIS_PORT, - qname=TASK_QUEUE_NAME, - quiet=False, - ) - - queue.deleteQueue().exceptions(False).execute() - queue.createQueue().exceptions(False).execute() - - queue = RedisSMQ( - host=REDIS_HOST, - port=REDIS_PORT, - qname=RESULTS_QUEUE_NAME, - quiet=False, - ) - - queue.deleteQueue().exceptions(False).execute() - queue.createQueue().exceptions(False).execute() + for queue_name in QUEUES_NAMES.split(): + queue = RedisSMQ( + host=REDIS_HOST, + port=REDIS_PORT, + qname=queue_name + "_tasks", + quiet=False, + ) + + queue.deleteQueue().exceptions(False).execute() + queue.createQueue().exceptions(False).execute() + + queue = RedisSMQ( + host=REDIS_HOST, + port=REDIS_PORT, + qname=queue_name + "_results", + quiet=False, + ) + + queue.deleteQueue().exceptions(False).execute() + queue.createQueue().exceptions(False).execute() print("Queues properly deleted") diff --git a/src/extract_segments.py b/src/extract_segments.py index 68cd58a..b9348a2 100644 --- a/src/extract_segments.py +++ b/src/extract_segments.py @@ -9,7 +9,9 @@ def get_xml_name(task: Task) -> str: - return f"{task.tenant}__{task.params.filename.lower().replace('.pdf', '.xml')}" + xml_file_name = f"{task.tenant}__{task.params.filename.lower().replace('.pdf', '.xml')}" + xml_file_name = xml_file_name if xml_file_name.endswith(".xml") else f"{xml_file_name}.xml" + return xml_file_name def extract_segments(task: Task, xml_file_name: str = "") -> ExtractionData: diff --git a/src/get_xml.py b/src/get_xml.py index f0ca880..ba91b03 100644 --- a/src/get_xml.py +++ b/src/get_xml.py @@ -1,12 +1,11 @@ import os -from os.path import join from pathlib import Path from configuration import DATA_PATH def get_xml(xml_file_name: str) -> str: - xml_file_path = Path(join(DATA_PATH, xml_file_name)) + xml_file_path = Path(DATA_PATH, xml_file_name) with open(xml_file_path, mode="r") as file: content = file.read() diff --git a/src/QueueProcessor.py b/src/start_queue_processor.py similarity index 99% rename from src/QueueProcessor.py rename to src/start_queue_processor.py index 3b5eb6f..998708d 100644 --- a/src/QueueProcessor.py +++ b/src/start_queue_processor.py @@ -88,5 +88,5 @@ def process_task(task): pass queues_names = QUEUES_NAMES.split(" ") - queue_processor = QueueProcessor(REDIS_HOST, REDIS_PORT, queues_names, service_logger) + queue_processor = QueueProcessor(REDIS_HOST, REDIS_PORT, queues_names, service_logger, 7) queue_processor.start(process)