gnana70 · gnana70 · Mar 2, 2024 · Mar 2, 2024
diff --git a/README.md b/README.md
@@ -53,7 +53,18 @@
 <img width="500" alt="teaser" src="https://github.com/gnana70/tamil_ocr/raw/develop/test_images/tamil_handwritten.jpg">
 
 
-MODEL OUTPUT: *நிமிர்ந்த நன்னடை மேற்கொண்ட பார்வையும் நிலத்தில் யார்க் கும் அஞ்சாத நெறிகளும் திமிர்ந்த ஞானச் செருக்கும் இருப்பதால் செம்மை மாதர் திறம்புவ தில்லையாம் அமிழ்ந்து பேரிரு ளாமறி யாமையில் அவல மெய்திக் கலையின் பி வாழ்வதை உமிழ்ந்து தள்ளுதல் பெண்ணற மாகுமாம் உதய கன்ன உரைப்பது கேட்டிரோ பாரதியார் ஹேமந்த் ள்*
+```
+MODEL OUTPUT: நிமிர்ந்த நன்னடை மேற்கொண்ட பார்வையும் 
+நிலத்தில் யார்க் கும் அஞ்சாத நெறிகளும் 
+திமிர்ந்த ஞானச் செருக்கும் இருப்பதால் 
+செம்மை மாதர் திறம்புவ தில்லையாம் 
+அமிழ்ந்து பேரிரு ளாமறி யாமையில் 
+அவல மெய்திக் கலையின்  வாழ்வதை 
+உமிழ்ந்து தள்ளுதல் பெண்ணற மாகுமாம் 
+உதய கன்ன உரைப்பது கேட்டிரோ 
+பாரதியார் 
+ஹேமந்த் ம 
+```
 
 
 ## How to Install and Use OCR Tamil 👨🏼‍💻
@@ -172,7 +183,7 @@ OCR module can be initialized by setting following parameters as per your requir
 
 2. Currently supports Only English and Tamil Language
 
-3. Document Text reading capability is limited. Auto identification of Paragraph, line are not supported along with Text detection model inability to detect and crop the Tamil text leads to accuracy decrease (**WORKAROUND** Can use your own text detection model along with OCR tamil text recognition model)
+3. Document Text reading capability is limited. Auto identification of Paragraph, reading order are not supported along with Text detection model inability to detect and crop the Tamil text leads to accuracy decrease (**WORKAROUND** Can use your own text detection model along with OCR tamil text recognition model)
 <p align="center">
 <img width="200" alt="teaser" src="https://github.com/gnana70/tamil_ocr/raw/main/test_images/tamil_sentence.jpg">
 </p>

diff --git a/ocr_tamil/craft_text_detector/craft_utils.py b/ocr_tamil/craft_text_detector/craft_utils.py
@@ -402,12 +402,12 @@ def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly
         textmap, linkmap, text_threshold, link_threshold, low_text
     )
 
-    if poly:
-        polys = getPoly_core(boxes, labels, mapper, linkmap)
-    else:
-        polys = [None] * len(boxes)
+    # if poly:
+    #     polys = getPoly_core(boxes, labels, mapper, linkmap)
+    # else:
+    #     polys = [None] * len(boxes)
 
-    return boxes, polys
+    return boxes
 
 
 def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):

diff --git a/ocr_tamil/craft_text_detector/file_utils.py b/ocr_tamil/craft_text_detector/file_utils.py
@@ -141,7 +141,7 @@ def crop_poly(image, poly):
     return cropped
 
 
-def export_detected_region(image, poly, file_path, rectify=True):
+def export_detected_region(image, poly, rectify=True):
     """
     Arguments:
         image: full image
@@ -156,16 +156,16 @@ def export_detected_region(image, poly, file_path, rectify=True):
         result_rgb = crop_poly(image, poly)
 
     # export corpped region
-    result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
+    # result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
     # cv2.imwrite(file_path, result_bgr)
-    return result_bgr
+    return result_rgb
 
 
 def export_detected_regions(
     image,
     regions,
-    file_name: str = "image",
-    output_dir: str = "output/",
+    # file_name: str = "image",
+    # output_dir: str = "output/",
     rectify: bool = False,
 ):
     """
@@ -178,24 +178,22 @@ def export_detected_regions(
     """
 
     # read/convert image
-    image = read_image(image)
+    # image = read_image(image)
 
     # deepcopy image so that original is not altered
     image = copy.deepcopy(image)
 
     # create crops dir
-    crops_dir = os.path.join(output_dir, file_name + "_crops")
+    # crops_dir = os.path.join(output_dir, file_name + "_crops")
     # create_dir(crops_dir)
 
     # init exported file paths
     exported_file_paths = []
 
     # export regions
-    for ind, region in enumerate(regions):
-        # get export path
-        file_path = os.path.join(crops_dir, "crop_" + str(ind) + ".png")
+    for region in regions:
         # export region
-        exported_file_paths.append(export_detected_region(image, poly=region, file_path=file_path, rectify=rectify))
+        exported_file_paths.append(export_detected_region(image, poly=region, rectify=rectify))
         # note exported file path
         # exported_file_paths.append(file_path)
 

diff --git a/ocr_tamil/craft_text_detector/predict.py b/ocr_tamil/craft_text_detector/predict.py
@@ -1,5 +1,6 @@
 import os
 import time
+import torch
 
 import cv2
 import numpy as np
@@ -45,97 +46,34 @@ def get_prediction(
          "heatmaps": visualizations of the detected characters/links,
          "times": elapsed times of the sub modules, in seconds}
     """
-    t0 = time.time()
-
-    # read/convert image
-    image = image_utils.read_image(image)
 
     # resize
     img_resized, target_ratio, size_heatmap = image_utils.resize_aspect_ratio(
         image, long_size, interpolation=cv2.INTER_LINEAR
     )
     ratio_h = ratio_w = 1 / target_ratio
-    resize_time = time.time() - t0
-    t0 = time.time()
 
     # preprocessing
     x = image_utils.normalizeMeanVariance(img_resized)
     x = torch_utils.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
     x = torch_utils.Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
     if cuda:
         x = x.cuda()
-    preprocessing_time = time.time() - t0
-    t0 = time.time()
 
     # forward pass
-    with torch_utils.no_grad():
+    with torch.inference_mode():
         y, feature = craft_net(x)
-    craftnet_time = time.time() - t0
-    t0 = time.time()
 
     # make score and link map
     score_text = y[0, :, :, 0].cpu().data.numpy()
     score_link = y[0, :, :, 1].cpu().data.numpy()
 
-    # refine link
-    # if refine_net is not None:
-    #     with torch_utils.no_grad():
-    #         y_refiner = refine_net(y, feature)
-    #     score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
-    # refinenet_time = time.time() - t0
-    # t0 = time.time()
-
     # Post-processing
-    boxes, polys = craft_utils.getDetBoxes(
+    boxes = craft_utils.getDetBoxes(
         score_text, score_link, text_threshold, link_threshold, low_text, poly
     )
 
     # coordinate adjustment
     boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
-    # polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
-    # for k in range(len(polys)):
-    #     if polys[k] is None:
-    #         polys[k] = boxes[k]
-
-    # # get image size
-    # img_height = image.shape[0]
-    # img_width = image.shape[1]
-
-    # # calculate box coords as ratios to image size
-    # boxes_as_ratio = []
-    # for box in boxes:
-    #     boxes_as_ratio.append(box / [img_width, img_height])
-    # boxes_as_ratio = np.array(boxes_as_ratio)
-
-    # # calculate poly coords as ratios to image size
-    # polys_as_ratio = []
-    # for poly in polys:
-    #     polys_as_ratio.append(poly / [img_width, img_height])
-    # polys_as_ratio = np.array(polys_as_ratio)
-
-    # text_score_heatmap = image_utils.cvt2HeatmapImg(score_text)
-    # link_score_heatmap = image_utils.cvt2HeatmapImg(score_link)
-
-    # postprocess_time = time.time() - t0
-
-    # times = {
-    #     "resize_time": resize_time,
-    #     "preprocessing_time": preprocessing_time,
-    #     "craftnet_time": craftnet_time,
-    #     "refinenet_time": refinenet_time,
-    #     "postprocess_time": postprocess_time,
-    # }
-
-    # return {
-    #     "boxes": boxes,
-    #     "boxes_as_ratios": boxes_as_ratio,
-    #     "polys": polys,
-    #     "polys_as_ratios": polys_as_ratio,
-    #     "heatmaps": {
-    #         "text_score_heatmap": text_score_heatmap,
-    #         "link_score_heatmap": link_score_heatmap,
-    #     },
-    #     "times": times,
-    # }
 
     return boxes