9
9
#include "ccx_encoders_helpers.h"
10
10
#include "ccx_encoders_spupng.h"
11
11
#include "ocr.h"
12
- #undef OCR_DEBUG
13
12
14
13
struct ocrCtx
15
14
{
@@ -686,7 +685,6 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
686
685
TessResultIteratorDelete (ri );
687
686
}
688
687
// End Color Detection
689
- freep (& text_out );
690
688
boxDestroy (& crop_points );
691
689
692
690
pixDestroy (& pix );
@@ -698,47 +696,31 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
698
696
return text_out ;
699
697
}
700
698
701
- void erode (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color )
699
+ void erode (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color , int background_index )
702
700
{
703
- int background_index ;
704
- for (background_index = 0 ; background_index < nb_color ; background_index ++ )
705
- {
706
- if (alpha [background_index ])
707
- {
708
- break ;
709
- }
710
- }
711
701
// we will use a 2*2 kernel for the erosion
712
702
for (int row = 0 ; row < h - 1 ; row ++ )
713
703
{
714
704
for (int col = 0 ; col < w - 1 ; col ++ )
715
705
{
716
- if (alpha [ bitmap [row * w + col ]] || alpha [ bitmap [(row + 1 ) * w + col ]] ||
717
- alpha [ bitmap [row * w + (col + 1 )]] || alpha [ bitmap [(row + 1 ) * w + (col + 1 )]] )
706
+ if (bitmap [row * w + col ] == background_index || bitmap [(row + 1 ) * w + col ] == background_index ||
707
+ bitmap [row * w + (col + 1 )] == background_index || bitmap [(row + 1 ) * w + (col + 1 )] == background_index )
718
708
{
719
709
bitmap [row * w + col ] = background_index ;
720
710
}
721
711
}
722
712
}
723
713
}
724
714
725
- void dilate (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color )
715
+ void dilate (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color , int foreground_index )
726
716
{
727
- int foreground_index ;
728
- for (foreground_index = 0 ; foreground_index < nb_color ; foreground_index ++ )
729
- {
730
- if (!alpha [foreground_index ])
731
- {
732
- break ;
733
- }
734
- }
735
717
// we will use a 2*2 kernel for the erosion
736
718
for (int row = 0 ; row < h - 1 ; row ++ )
737
719
{
738
720
for (int col = 0 ; col < w - 1 ; col ++ )
739
721
{
740
- if (!( alpha [ bitmap [row * w + col ]] && alpha [ bitmap [(row + 1 ) * w + col ]] &&
741
- alpha [ bitmap [row * w + (col + 1 )]] && alpha [ bitmap [(row + 1 ) * w + (col + 1 )]] ))
722
+ if (( bitmap [row * w + col ] == foreground_index && bitmap [(row + 1 ) * w + col ] == foreground_index &&
723
+ bitmap [row * w + (col + 1 )] == foreground_index && bitmap [(row + 1 ) * w + (col + 1 )] == foreground_index ))
742
724
{
743
725
bitmap [row * w + col ] = foreground_index ;
744
726
}
@@ -769,6 +751,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
769
751
*/
770
752
uint32_t * mcit = NULL ;
771
753
struct transIntensity ti = {alpha , palette };
754
+ int text_color , text_bg_color ;
772
755
773
756
int ret = 0 ;
774
757
@@ -835,6 +818,14 @@ static int quantize_map(png_byte *alpha, png_color *palette,
835
818
max_ind = j ;
836
819
}
837
820
}
821
+
822
+ // Assume second most frequent color to be text background (first is alpha channel)
823
+ if (i == 1 )
824
+ text_bg_color = iot [max_ind ];
825
+ // Assume third most frequent color to be text color
826
+ if (i == 2 )
827
+ text_color = iot [max_ind ];
828
+
838
829
for (j = i ; j > 0 && max_ind < mcit [j - 1 ]; j -- )
839
830
{
840
831
mcit [j ] = mcit [j - 1 ];
@@ -878,8 +869,8 @@ static int quantize_map(png_byte *alpha, png_color *palette,
878
869
palette [iot [i ]].green = palette [index ].green ;
879
870
}
880
871
}
881
- erode (palette , alpha , bitmap , w , h , nb_color );
882
- dilate (palette , alpha , bitmap , w , h , nb_color );
872
+ erode (palette , alpha , bitmap , w , h , nb_color , text_bg_color );
873
+ dilate (palette , alpha , bitmap , w , h , nb_color , text_color );
883
874
#ifdef OCR_DEBUG
884
875
ccx_common_logging .log_ftn ("Colors present in quantized Image\n" );
885
876
for (int i = 0 ; i < nb_color ; i ++ )
@@ -1062,7 +1053,13 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
1062
1053
len += strlen (rect -> ocr_text );
1063
1054
}
1064
1055
if (len <= 0 )
1056
+ {
1057
+ for (i = 0 , rect = sub -> data ; i < sub -> nb_data ; i ++ , rect ++ )
1058
+ {
1059
+ freep (& rect -> ocr_text );
1060
+ }
1065
1061
return NULL ;
1062
+ }
1066
1063
else
1067
1064
{
1068
1065
str = malloc (len + 1 + 10 ); // Extra space for possible trailing '/n's at the end of tesseract UTF8 text
@@ -1076,7 +1073,7 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
1076
1073
if (!rect -> ocr_text )
1077
1074
continue ;
1078
1075
add_ocrtext2str (str , rect -> ocr_text , context -> encoded_crlf , context -> encoded_crlf_length );
1079
- free ( rect -> ocr_text );
1076
+ freep ( & rect -> ocr_text );
1080
1077
}
1081
1078
return str ;
1082
1079
}
0 commit comments