Skip to content

Commit

Permalink
TLDR-531 -- fix
Browse files Browse the repository at this point in the history
* path_detect forword style changed
* img_processing.pt:146 - get_config()["debug_mode"] fixed
* ocr_cell_extractor.py - changed if False to if NoneType is None
  • Loading branch information
raxtemur committed Dec 1, 2023
1 parent 0dc4f11 commit d6700a5
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __concat_images(self, src_image: np.ndarray, tree_table_nodes: List["TableTr
for tree_node in tree_table_nodes:
x_coord = space
cell_image = BBox.crop_image_by_box(src_image, tree_node.crop_text_box)
if self.config.get("debug_mode", False) and self.config.get("path_debug", False) and os.path.exists(self.config.get("path_debug")):
if self.config.get("debug_mode", False) and self.config.get("path_debug") is not None and os.path.exists(self.config.get("path_debug")):
image_path = os.path.join(self.config.get("path_debug"), "debug_tables", "batches", "cell_croped.png")
cv2.imwrite(image_path, cell_image)
cell_height, cell_width = cell_image.shape[0], cell_image.shape[1]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import logging
import math
import os
Expand Down Expand Up @@ -64,7 +65,7 @@ def apply_houph_line(img: np.ndarray, threshold_gap: int = 10, *, config: dict)
return cdst_p, angle


def get_contours_cells(img: np.ndarray, table_type: str, *, config: dict, path_detect: str) -> [Any, Any, np.ndarray, float]:
def get_contours_cells(img: np.ndarray, table_type: str, *, config: dict) -> [Any, Any, np.ndarray, float]:
"""
function's steps:
1) detects Houph lines for detecting rotate angle. Then input image has rotated on the rotate angle.
Expand All @@ -81,47 +82,43 @@ def get_contours_cells(img: np.ndarray, table_type: str, *, config: dict, path_d
img_bin = 255 - img_bin

if config.get("debug_mode", False):
os.makedirs(path_detect, exist_ok=True)

if config.get("debug_mode", False):
cv2.imwrite(os.path.join(path_detect, "image_bin.jpg"), img_bin)
cv2.imwrite(os.path.join(config.get("path_detect"), "image_bin.jpg"), img_bin)
# step 2
img_final_bin = __detect_horizontal_and_vertical_lines(img_bin, config, "tables", path_detect)
img_final_bin = __detect_horizontal_and_vertical_lines(img_bin, config, "tables", config.get("path_detect"))
# step 3
img_final_bin_houph, angle_alignment = __apply_houph_lines_and_detect_angle(img_final_bin, config)

(thresh, img_final_bin_houph) = cv2.threshold(img_final_bin_houph, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
if config.get("debug_mode", False):
cv2.imwrite(os.path.join(path_detect, "img_final_bin.jpg"), img_final_bin)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_final_bin.jpg"), img_final_bin)
if config.get("debug_mode", False):
cv2.imwrite(os.path.join(path_detect, "img_final_bin_houph.jpg"), img_final_bin_houph)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_final_bin_houph.jpg"), img_final_bin_houph)

# step 4 - rotating
img_final_bin_houph = rotate_with_threshold(img_final_bin_houph, angle_alignment, config=config)
img = rotate_with_threshold(img, angle_alignment, config=config)
if config.get("debug_mode", False):
# TODO: paths should be configurable but now could not exist
cv2.imwrite(os.path.join(path_detect, "aligned_img.jpg"), img)
cv2.imwrite(os.path.join(config.get("path_detect"), "aligned_img.jpg"), img)
img_final_bin_houph = __paint_bounds(img_final_bin_houph)

# step 5 - detect contours
contours, hierarchy = cv2.findContours(img_final_bin_houph, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_KCOS)

if config.get("debug_mode", False):
# TODO: paths should be configurable but now could not exist
cv2.imwrite(os.path.join(path_detect, "img_houph_and_morph_wo_bound.jpg"), img_final_bin_houph)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_houph_and_morph_wo_bound.jpg"), img_final_bin_houph)
img_w_contour = img.copy()
cv2.drawContours(img_w_contour, contours, contourIdx=-1, color=(0, 0, 0), thickness=10, hierarchy=hierarchy, maxLevel=8)
cv2.imwrite(os.path.join(path_detect, "img_with_contours.jpg"), img_w_contour)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_with_contours.jpg"), img_w_contour)

# Draw external contours for tables without external contours. It is a rare case, but important for invoices
if table_options.table_wo_external_bounds in table_type:
contours, hierarchy = __get_contours_for_table_wo_external_bounds(img, img_final_bin_houph.copy(), contours, hierarchy, path_detect)
contours, hierarchy = __get_contours_for_table_wo_external_bounds(img, img_final_bin_houph.copy(), contours, hierarchy, config)
return contours, hierarchy, img, angle_alignment


def __get_contours_for_table_wo_external_bounds(img: np.ndarray, img_with_contours: np.ndarray, contours: List, hierarchy: List,
path_detect: str) -> [Any, Any]:
def __get_contours_for_table_wo_external_bounds(img: np.ndarray, img_with_contours: np.ndarray, contours: List, hierarchy: List, config: dict) -> [Any, Any]:
# get children (get table counters)
contours = np.array(contours)
list_contours, table_contours = __get_table_contours(contours, hierarchy)
Expand All @@ -143,9 +140,9 @@ def __get_contours_for_table_wo_external_bounds(img: np.ndarray, img_with_contou
x, y, w, h = cv2.boundingRect(c)
cv2.rectangle(img_with_contours, (x, y), (x + w, y + h), color=(0, 0, 0), thickness=5)

if get_config().get("debug_mode", False):
if config.get("debug_mode", False):
# TODO: paths should be configurable but now could not exist
cv2.imwrite(os.path.join(path_detect, "img_with_external_bounds.jpg"), img_with_contours)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_with_external_bounds.jpg"), img_with_contours)
contours, hierarchy = cv2.findContours(img_with_contours, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_KCOS)

return contours, hierarchy
Expand Down Expand Up @@ -189,7 +186,7 @@ def __apply_houph_lines_and_detect_angle(image: np.ndarray, config: dict) -> [np
return img_final_bin_houph, angle_alignment


def __detect_horizontal_and_vertical_lines(img_bin: np.ndarray, config: dict, task: str, path_detect: str) -> np.ndarray:
def __detect_horizontal_and_vertical_lines(img_bin: np.ndarray, config: dict, task: str) -> np.ndarray:
# Defining a kernel length

if task == "orientation":
Expand Down Expand Up @@ -220,8 +217,8 @@ def __detect_horizontal_and_vertical_lines(img_bin: np.ndarray, config: dict, ta

if config.get("debug_mode", False):
# TODO: paths should be configurable but now could not exist
cv2.imwrite(os.path.join(path_detect, "verticle_lines.jpg"), verticle_lines_img)
cv2.imwrite(os.path.join(path_detect, "horizontal_lines.jpg"), horizontal_lines_img)
cv2.imwrite(os.path.join(config.get("path_detect"), "verticle_lines.jpg"), verticle_lines_img)
cv2.imwrite(os.path.join(config.get("path_detect"), "horizontal_lines.jpg"), horizontal_lines_img)

"""Now we will add these two images.
This will have only boxes and the information written in the box will be erased.
Expand All @@ -238,7 +235,7 @@ def __detect_horizontal_and_vertical_lines(img_bin: np.ndarray, config: dict, ta
(thresh, img_bin_with_lines) = cv2.threshold(img_bin_with_lines, 200, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
if config.get("debug_mode", False):
# TODO: paths should be configurable but now could not exist
cv2.imwrite(os.path.join(path_detect, "img_bin_with_lines.jpg"), img_bin_with_lines)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_bin_with_lines.jpg"), img_bin_with_lines)

return img_bin_with_lines

Expand Down Expand Up @@ -269,18 +266,22 @@ def detect_tables_by_contours(img: np.ndarray,
:param config: dict from config.py
:return: TreeTable, contour, rotate angle
"""
path_detect = config.get("path_detect", None)
path_detect = config.get("path_detect")
if config.get("debug_mode", False) and path_detect is None:
path_detect = os.path.join(os.path.abspath(os.sep), "tmp", "dedoc", "debug_tables", "imgs", "detect_lines")
path_detect = get_config()["path_detect"]
os.makedirs(path_detect, exist_ok=True)

config = copy.deepcopy(config)
config["path_detect"] = path_detect

contours, hierarchy, image, angle_rotate = get_contours_cells(img, table_type, config=config, path_detect=path_detect)
contours, hierarchy, image, angle_rotate = get_contours_cells(img, table_type, config=config)
tree_table = TableTree.parse_contours_to_tree(contours=contours, hierarchy=hierarchy, config=config)

if config.get("debug_mode", False):
config.get("logger", logging.getLogger()).debug(f"Hierarchy [Next, Previous, First_Child, Parent]:\n {hierarchy}")
tree_table.print_tree(depth=0)

cv2.imwrite(os.path.join(path_detect, "img_draw_counters.jpg"), img)
cv2.imwrite(os.path.join(config.get("path_detect"), "img_draw_counters.jpg"), img)

tree_table.set_text_into_tree(tree=tree_table, src_image=image, language=language, config=config)

Expand Down

0 comments on commit d6700a5

Please sign in to comment.