Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
raxtemur committed Dec 25, 2023
1 parent bc09a03 commit a2be023
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 10 deletions.
2 changes: 1 addition & 1 deletion dedoc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

# --------------------------------------------GPU SETTINGS----------------------------------------------------------
# set gpu in XGBoost and torch models
on_gpu=True,
on_gpu=False,

# ---------------------------------------------API SETTINGS---------------------------------------------------------
# max file size in bytes
Expand Down
5 changes: 1 addition & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ services:
- 1231:1231
environment:
DOCREADER_PORT: 1231
NVIDIA_VISIBLE_DEVICES: 0
runtime: nvidia


test:
depends_on:
Expand All @@ -27,8 +26,6 @@ services:
DOCREADER_PORT: 1231
is_test: $test
PYTHONPATH: $PYTHONPATH:/dedoc_root/tests:/dedoc_root
NVIDIA_VISIBLE_DEVICES: 0
runtime: nvidia
command:
bash dedoc_root/tests/run_tests_in_docker.sh

Expand Down
3 changes: 2 additions & 1 deletion docker_gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ FROM dedocproject/dedoc_p3.9_base:version_2023_08_28
ENV PYTHONPATH "${PYTHONPATH}:/dedoc_root"
ENV RESOURCES_PATH "/dedoc_root/resources"


ADD requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
RUN pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html


RUN mkdir /dedoc_root
RUN mkdir /dedoc_root/dedoc
ADD dedoc/config.py /dedoc_root/dedoc/config.py
ADD docker_gpu/config_on_gpu.py /dedoc_root/dedoc/config.py
ADD dedoc/download_models.py /dedoc_root/dedoc/download_models.py
RUN python3 /dedoc_root/dedoc/download_models.py

Expand Down
85 changes: 85 additions & 0 deletions docker_gpu/config_on_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import importlib.util
import logging
import os
import sys
from typing import Any, Optional

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(pathname)s - %(levelname)s - %(message)s")

DEBUG_MODE = False
RESOURCES_PATH = os.environ.get("RESOURCES_PATH", os.path.join(os.path.expanduser("~"), ".cache", "dedoc", "resources"))

_config = dict(
# -----------------------------------------RESOURCES PATH SETTINGS----------------------------------------------------
resources_path=RESOURCES_PATH,
intermediate_data_path=os.path.join(RESOURCES_PATH, "datasets"),

# -----------------------------------------COMMON DEBUG SETTINGS----------------------------------------------------
debug_mode=DEBUG_MODE,
path_debug=os.path.join(os.path.abspath(os.sep), "tmp", "dedoc"),

# --------------------------------------------JOBLIB SETTINGS-------------------------------------------------------
# number of parallel jobs in some tasks as OCR
n_jobs=1,

# --------------------------------------------GPU SETTINGS----------------------------------------------------------
# set gpu in XGBoost and torch models
on_gpu=True,

# ---------------------------------------------API SETTINGS---------------------------------------------------------
# max file size in bytes
max_content_length=512 * 1024 * 1024,
# application port
api_port=int(os.environ.get("DOCREADER_PORT", "1231")),
static_files_dirs={},
# log settings
logger=logging.getLogger(),
import_path_init_api_args="dedoc.api.api_args",

# ----------------------------------------TABLE RECOGNIZER DEBUG SETTINGS-------------------------------------------
# path to save debug images for tables recognizer
path_detect=os.path.join(os.path.abspath(os.sep), "tmp", "dedoc", "debug_tables", "imgs", "detect_lines"),

# -------------------------------------------RECOGNIZE SETTINGS-----------------------------------------------------
# TESSERACT OCR confidence threshold ( values: [-1 - undefined; 0.0 : 100.0 % - confidence value)
ocr_conf_threshold=40.0,
# max depth of document structure tree
recursion_deep_subparagraphs=30
)


class Configuration(object):
"""
Pattern Singleton for configuration service
INFO: Configuration class and config are created once at the first call
"""
__instance = None
__config = None

@classmethod
def get_instance(cls: "Configuration") -> "Configuration":
"""
Actual object creation will happen when we use Configuration.getInstance()
"""
if not cls.__instance:
cls.__instance = Configuration()

return cls.__instance

def __init_config(self, args: Optional[Any] = None) -> None:
if args is not None and args.config_path is not None:
spec = importlib.util.spec_from_file_location("config_module", args.config_path)
config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config_module)
self.__config = config_module._config
else:
self.__config = _config

def get_config(self, args: Optional[Any] = None) -> dict:
if self.__config is None or args is not None:
self.__init_config(args)
return self.__config


def get_config(args: Optional[Any] = None) -> dict:
return Configuration.get_instance().get_config(args)
8 changes: 4 additions & 4 deletions docker_gpu/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ services:
dedoc:
mem_limit: 16G
build:
context: .
dockerfile: Dockerfile
context: ..
dockerfile: docker_gpu/Dockerfile
restart: always
tty: true
ports:
Expand All @@ -19,8 +19,8 @@ services:
depends_on:
- dedoc
build:
context: .
dockerfile: Dockerfile
context: ..
dockerfile: docker_gpu/Dockerfile
tty: true
environment:
DOC_READER_HOST: "dedoc"
Expand Down

0 comments on commit a2be023

Please sign in to comment.