Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code optimizations #51

Merged
merged 1 commit into from
Mar 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,18 @@
<img width="500" alt="teaser" src="https://github.com/gnana70/tamil_ocr/raw/develop/test_images/tamil_handwritten.jpg">


MODEL OUTPUT: *நிமிர்ந்த நன்னடை மேற்கொண்ட பார்வையும் நிலத்தில் யார்க் கும் அஞ்சாத நெறிகளும் திமிர்ந்த ஞானச் செருக்கும் இருப்பதால் செம்மை மாதர் திறம்புவ தில்லையாம் அமிழ்ந்து பேரிரு ளாமறி யாமையில் அவல மெய்திக் கலையின் பி வாழ்வதை உமிழ்ந்து தள்ளுதல் பெண்ணற மாகுமாம் உதய கன்ன உரைப்பது கேட்டிரோ பாரதியார் ஹேமந்த் ள்*
```
MODEL OUTPUT: நிமிர்ந்த நன்னடை மேற்கொண்ட பார்வையும்
நிலத்தில் யார்க் கும் அஞ்சாத நெறிகளும்
திமிர்ந்த ஞானச் செருக்கும் இருப்பதால்
செம்மை மாதர் திறம்புவ தில்லையாம்
அமிழ்ந்து பேரிரு ளாமறி யாமையில்
அவல மெய்திக் கலையின் வாழ்வதை
உமிழ்ந்து தள்ளுதல் பெண்ணற மாகுமாம்
உதய கன்ன உரைப்பது கேட்டிரோ
பாரதியார்
ஹேமந்த் ம
```


## How to Install and Use OCR Tamil 👨🏼‍💻
Expand Down Expand Up @@ -172,7 +183,7 @@ OCR module can be initialized by setting following parameters as per your requir

2. Currently supports Only English and Tamil Language

3. Document Text reading capability is limited. Auto identification of Paragraph, line are not supported along with Text detection model inability to detect and crop the Tamil text leads to accuracy decrease (**WORKAROUND** Can use your own text detection model along with OCR tamil text recognition model)
3. Document Text reading capability is limited. Auto identification of Paragraph, reading order are not supported along with Text detection model inability to detect and crop the Tamil text leads to accuracy decrease (**WORKAROUND** Can use your own text detection model along with OCR tamil text recognition model)
<p align="center">
<img width="200" alt="teaser" src="https://github.com/gnana70/tamil_ocr/raw/main/test_images/tamil_sentence.jpg">
</p>
Expand Down
10 changes: 5 additions & 5 deletions ocr_tamil/craft_text_detector/craft_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,12 +402,12 @@ def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly
textmap, linkmap, text_threshold, link_threshold, low_text
)

if poly:
polys = getPoly_core(boxes, labels, mapper, linkmap)
else:
polys = [None] * len(boxes)
# if poly:
# polys = getPoly_core(boxes, labels, mapper, linkmap)
# else:
# polys = [None] * len(boxes)

return boxes, polys
return boxes


def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
Expand Down
20 changes: 9 additions & 11 deletions ocr_tamil/craft_text_detector/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def crop_poly(image, poly):
return cropped


def export_detected_region(image, poly, file_path, rectify=True):
def export_detected_region(image, poly, rectify=True):
"""
Arguments:
image: full image
Expand All @@ -156,16 +156,16 @@ def export_detected_region(image, poly, file_path, rectify=True):
result_rgb = crop_poly(image, poly)

# export corpped region
result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
# result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
# cv2.imwrite(file_path, result_bgr)
return result_bgr
return result_rgb


def export_detected_regions(
image,
regions,
file_name: str = "image",
output_dir: str = "output/",
# file_name: str = "image",
# output_dir: str = "output/",
rectify: bool = False,
):
"""
Expand All @@ -178,24 +178,22 @@ def export_detected_regions(
"""

# read/convert image
image = read_image(image)
# image = read_image(image)

# deepcopy image so that original is not altered
image = copy.deepcopy(image)

# create crops dir
crops_dir = os.path.join(output_dir, file_name + "_crops")
# crops_dir = os.path.join(output_dir, file_name + "_crops")
# create_dir(crops_dir)

# init exported file paths
exported_file_paths = []

# export regions
for ind, region in enumerate(regions):
# get export path
file_path = os.path.join(crops_dir, "crop_" + str(ind) + ".png")
for region in regions:
# export region
exported_file_paths.append(export_detected_region(image, poly=region, file_path=file_path, rectify=rectify))
exported_file_paths.append(export_detected_region(image, poly=region, rectify=rectify))
# note exported file path
# exported_file_paths.append(file_path)

Expand Down
68 changes: 3 additions & 65 deletions ocr_tamil/craft_text_detector/predict.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import time
import torch

import cv2
import numpy as np
Expand Down Expand Up @@ -45,97 +46,34 @@ def get_prediction(
"heatmaps": visualizations of the detected characters/links,
"times": elapsed times of the sub modules, in seconds}
"""
t0 = time.time()

# read/convert image
image = image_utils.read_image(image)

# resize
img_resized, target_ratio, size_heatmap = image_utils.resize_aspect_ratio(
image, long_size, interpolation=cv2.INTER_LINEAR
)
ratio_h = ratio_w = 1 / target_ratio
resize_time = time.time() - t0
t0 = time.time()

# preprocessing
x = image_utils.normalizeMeanVariance(img_resized)
x = torch_utils.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
x = torch_utils.Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w]
if cuda:
x = x.cuda()
preprocessing_time = time.time() - t0
t0 = time.time()

# forward pass
with torch_utils.no_grad():
with torch.inference_mode():
y, feature = craft_net(x)
craftnet_time = time.time() - t0
t0 = time.time()

# make score and link map
score_text = y[0, :, :, 0].cpu().data.numpy()
score_link = y[0, :, :, 1].cpu().data.numpy()

# refine link
# if refine_net is not None:
# with torch_utils.no_grad():
# y_refiner = refine_net(y, feature)
# score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
# refinenet_time = time.time() - t0
# t0 = time.time()

# Post-processing
boxes, polys = craft_utils.getDetBoxes(
boxes = craft_utils.getDetBoxes(
score_text, score_link, text_threshold, link_threshold, low_text, poly
)

# coordinate adjustment
boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
# polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
# for k in range(len(polys)):
# if polys[k] is None:
# polys[k] = boxes[k]

# # get image size
# img_height = image.shape[0]
# img_width = image.shape[1]

# # calculate box coords as ratios to image size
# boxes_as_ratio = []
# for box in boxes:
# boxes_as_ratio.append(box / [img_width, img_height])
# boxes_as_ratio = np.array(boxes_as_ratio)

# # calculate poly coords as ratios to image size
# polys_as_ratio = []
# for poly in polys:
# polys_as_ratio.append(poly / [img_width, img_height])
# polys_as_ratio = np.array(polys_as_ratio)

# text_score_heatmap = image_utils.cvt2HeatmapImg(score_text)
# link_score_heatmap = image_utils.cvt2HeatmapImg(score_link)

# postprocess_time = time.time() - t0

# times = {
# "resize_time": resize_time,
# "preprocessing_time": preprocessing_time,
# "craftnet_time": craftnet_time,
# "refinenet_time": refinenet_time,
# "postprocess_time": postprocess_time,
# }

# return {
# "boxes": boxes,
# "boxes_as_ratios": boxes_as_ratio,
# "polys": polys,
# "polys_as_ratios": polys_as_ratio,
# "heatmaps": {
# "text_score_heatmap": text_score_heatmap,
# "link_score_heatmap": link_score_heatmap,
# },
# "times": times,
# }

return boxes
Loading