From a2be023dc3aae1306460ff73fe1fe0fe7c553be1 Mon Sep 17 00:00:00 2001 From: raxtemur Date: Mon, 25 Dec 2023 10:19:28 +0000 Subject: [PATCH] fix bugs --- dedoc/config.py | 2 +- docker-compose.yml | 5 +-- docker_gpu/Dockerfile | 3 +- docker_gpu/config_on_gpu.py | 85 +++++++++++++++++++++++++++++++++++ docker_gpu/docker-compose.yml | 8 ++-- 5 files changed, 93 insertions(+), 10 deletions(-) create mode 100644 docker_gpu/config_on_gpu.py diff --git a/dedoc/config.py b/dedoc/config.py index 0b1cd097..10711e25 100644 --- a/dedoc/config.py +++ b/dedoc/config.py @@ -24,7 +24,7 @@ # --------------------------------------------GPU SETTINGS---------------------------------------------------------- # set gpu in XGBoost and torch models - on_gpu=True, + on_gpu=False, # ---------------------------------------------API SETTINGS--------------------------------------------------------- # max file size in bytes diff --git a/docker-compose.yml b/docker-compose.yml index d18aa1c0..3cfe4b62 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,8 +12,7 @@ services: - 1231:1231 environment: DOCREADER_PORT: 1231 - NVIDIA_VISIBLE_DEVICES: 0 - runtime: nvidia + test: depends_on: @@ -27,8 +26,6 @@ services: DOCREADER_PORT: 1231 is_test: $test PYTHONPATH: $PYTHONPATH:/dedoc_root/tests:/dedoc_root - NVIDIA_VISIBLE_DEVICES: 0 - runtime: nvidia command: bash dedoc_root/tests/run_tests_in_docker.sh diff --git a/docker_gpu/Dockerfile b/docker_gpu/Dockerfile index 8b4d7f2b..0cc37650 100644 --- a/docker_gpu/Dockerfile +++ b/docker_gpu/Dockerfile @@ -4,6 +4,7 @@ FROM dedocproject/dedoc_p3.9_base:version_2023_08_28 ENV PYTHONPATH "${PYTHONPATH}:/dedoc_root" ENV RESOURCES_PATH "/dedoc_root/resources" + ADD requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt RUN pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html @@ -11,7 +12,7 @@ RUN pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://downloa RUN mkdir /dedoc_root RUN mkdir /dedoc_root/dedoc -ADD dedoc/config.py /dedoc_root/dedoc/config.py +ADD docker_gpu/config_on_gpu.py /dedoc_root/dedoc/config.py ADD dedoc/download_models.py /dedoc_root/dedoc/download_models.py RUN python3 /dedoc_root/dedoc/download_models.py diff --git a/docker_gpu/config_on_gpu.py b/docker_gpu/config_on_gpu.py new file mode 100644 index 00000000..0b1cd097 --- /dev/null +++ b/docker_gpu/config_on_gpu.py @@ -0,0 +1,85 @@ +import importlib.util +import logging +import os +import sys +from typing import Any, Optional + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(pathname)s - %(levelname)s - %(message)s") + +DEBUG_MODE = False +RESOURCES_PATH = os.environ.get("RESOURCES_PATH", os.path.join(os.path.expanduser("~"), ".cache", "dedoc", "resources")) + +_config = dict( + # -----------------------------------------RESOURCES PATH SETTINGS---------------------------------------------------- + resources_path=RESOURCES_PATH, + intermediate_data_path=os.path.join(RESOURCES_PATH, "datasets"), + + # -----------------------------------------COMMON DEBUG SETTINGS---------------------------------------------------- + debug_mode=DEBUG_MODE, + path_debug=os.path.join(os.path.abspath(os.sep), "tmp", "dedoc"), + + # --------------------------------------------JOBLIB SETTINGS------------------------------------------------------- + # number of parallel jobs in some tasks as OCR + n_jobs=1, + + # --------------------------------------------GPU SETTINGS---------------------------------------------------------- + # set gpu in XGBoost and torch models + on_gpu=True, + + # ---------------------------------------------API SETTINGS--------------------------------------------------------- + # max file size in bytes + max_content_length=512 * 1024 * 1024, + # application port + api_port=int(os.environ.get("DOCREADER_PORT", "1231")), + static_files_dirs={}, + # log settings + logger=logging.getLogger(), + import_path_init_api_args="dedoc.api.api_args", + + # ----------------------------------------TABLE RECOGNIZER DEBUG SETTINGS------------------------------------------- + # path to save debug images for tables recognizer + path_detect=os.path.join(os.path.abspath(os.sep), "tmp", "dedoc", "debug_tables", "imgs", "detect_lines"), + + # -------------------------------------------RECOGNIZE SETTINGS----------------------------------------------------- + # TESSERACT OCR confidence threshold ( values: [-1 - undefined; 0.0 : 100.0 % - confidence value) + ocr_conf_threshold=40.0, + # max depth of document structure tree + recursion_deep_subparagraphs=30 +) + + +class Configuration(object): + """ + Pattern Singleton for configuration service + INFO: Configuration class and config are created once at the first call + """ + __instance = None + __config = None + + @classmethod + def get_instance(cls: "Configuration") -> "Configuration": + """ + Actual object creation will happen when we use Configuration.getInstance() + """ + if not cls.__instance: + cls.__instance = Configuration() + + return cls.__instance + + def __init_config(self, args: Optional[Any] = None) -> None: + if args is not None and args.config_path is not None: + spec = importlib.util.spec_from_file_location("config_module", args.config_path) + config_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(config_module) + self.__config = config_module._config + else: + self.__config = _config + + def get_config(self, args: Optional[Any] = None) -> dict: + if self.__config is None or args is not None: + self.__init_config(args) + return self.__config + + +def get_config(args: Optional[Any] = None) -> dict: + return Configuration.get_instance().get_config(args) diff --git a/docker_gpu/docker-compose.yml b/docker_gpu/docker-compose.yml index d18aa1c0..ce715ee3 100644 --- a/docker_gpu/docker-compose.yml +++ b/docker_gpu/docker-compose.yml @@ -4,8 +4,8 @@ services: dedoc: mem_limit: 16G build: - context: . - dockerfile: Dockerfile + context: .. + dockerfile: docker_gpu/Dockerfile restart: always tty: true ports: @@ -19,8 +19,8 @@ services: depends_on: - dedoc build: - context: . - dockerfile: Dockerfile + context: .. + dockerfile: docker_gpu/Dockerfile tty: true environment: DOC_READER_HOST: "dedoc"