forked from tleyden/open-ocr
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtesseract_engine_test.go
90 lines (76 loc) · 2.95 KB
/
tesseract_engine_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package ocrworker
import (
"encoding/json"
"os"
"testing"
"github.com/rs/zerolog/log"
"github.com/couchbaselabs/go.assert"
)
func TestTesseractEngineWithRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
engine := TesseractEngine{}
bytes, err := os.ReadFile("docs/testimage.png")
assert.True(t, err == nil)
cFlags := make(map[string]interface{})
cFlags["tessedit_char_whitelist"] = "0123456789"
ocrRequest := OcrRequest{
ImgBytes: bytes,
EngineType: EngineTesseract,
EngineArgs: cFlags,
}
workerConfig := workerConfigForTests()
assert.True(t, err == nil)
result, err := engine.ProcessRequest(&ocrRequest, &workerConfig)
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}
func TestTesseractEngineWithJson(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
var testJsons []string
testJsons = append(testJsons, `{"engine":"tesseract"}`)
testJsons = append(testJsons, `{"engine":"tesseract", "engine_args":{}}`)
testJsons = append(testJsons, `{"engine":"tesseract", "engine_args":null}`)
testJsons = append(testJsons, `{"engine":"tesseract", "engine_args":{"config_vars":{"tessedit_char_whitelist":"0123456789"}, "-psm":"1"}}`)
testJsons = append(testJsons, `{"engine":"tesseract", "engine_args":{"config_vars":{"tessedit_create_hocr":"1", "tessedit_pageseg_mode":"1"}, "-psm":"3"}}`)
for _, testJson := range testJsons {
log.Info().Str("component", "TEST").Interface("testJson", testJson)
ocrRequest := OcrRequest{}
err := json.Unmarshal([]byte(testJson), &ocrRequest)
assert.True(t, err == nil)
bytes, err := os.ReadFile("docs/testimage.png")
assert.True(t, err == nil)
ocrRequest.ImgBytes = bytes
workerConfig := workerConfigForTests()
engine := NewOcrEngine(ocrRequest.EngineType)
result, err := engine.ProcessRequest(&ocrRequest, &workerConfig)
log.Error().Err(err).Str("component", "TEST")
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}
}
func TestNewTesseractEngineArgs(t *testing.T) {
testJson := `{"engine":"tesseract", "engine_args":{"config_vars":{"tessedit_char_whitelist":"0123456789"}, "psm":"0", "lang":"jpn"}}`
ocrRequest := OcrRequest{}
err := json.Unmarshal([]byte(testJson), &ocrRequest)
assert.True(t, err == nil)
engineArgs, err := NewTesseractEngineArgs(&ocrRequest)
assert.True(t, err == nil)
assert.Equals(t, len(engineArgs.configVars), 1)
assert.Equals(t, engineArgs.configVars["tessedit_char_whitelist"], "0123456789")
assert.Equals(t, engineArgs.pageSegMode, "0")
assert.Equals(t, engineArgs.lang, "jpn")
}
func TestTesseractEngineWithFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
engine := TesseractEngine{}
engineArgs := TesseractEngineArgs{}
result, err := engine.processImageFile("docs/testimage.png", engineArgs)
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}