forked from xiaofengShi/CHINESE-OCR
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ae57e96
Showing
125 changed files
with
4,613 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
modelAngle.h5 | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# dotenv | ||
.env | ||
|
||
# virtualenv | ||
.venv | ||
venv/ | ||
ENV/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
.DS_Store | ||
.idea/ | ||
*.h5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
{ | ||
// 使用 IntelliSense 了解相关属性。 | ||
// 悬停以查看现有属性的描述。 | ||
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"name": "Python: Current File", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${file}" | ||
}, | ||
{ | ||
"name": "Python: Attach", | ||
"type": "python", | ||
"request": "attach", | ||
"localRoot": "${workspaceFolder}", | ||
"remoteRoot": "${workspaceFolder}", | ||
"port": 3000, | ||
"secret": "my_secret", | ||
"host": "localhost" | ||
}, | ||
{ | ||
"name": "Python: Terminal (integrated)", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${file}", | ||
"console": "integratedTerminal" | ||
}, | ||
{ | ||
"name": "Python: Terminal (external)", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${file}", | ||
"console": "externalTerminal" | ||
}, | ||
{ | ||
"name": "Python: Django", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${workspaceFolder}/manage.py", | ||
"args": [ | ||
"runserver", | ||
"--noreload", | ||
"--nothreading" | ||
], | ||
"debugOptions": [ | ||
"RedirectOutput", | ||
"Django" | ||
] | ||
}, | ||
{ | ||
"name": "Python: Flask (0.11.x or later)", | ||
"type": "python", | ||
"request": "launch", | ||
"module": "flask", | ||
"env": { | ||
"FLASK_APP": "${workspaceFolder}/app.py" | ||
}, | ||
"args": [ | ||
"run", | ||
"--no-debugger", | ||
"--no-reload" | ||
] | ||
}, | ||
{ | ||
"name": "Python: Module", | ||
"type": "python", | ||
"request": "launch", | ||
"module": "module.name" | ||
}, | ||
{ | ||
"name": "Python: Pyramid", | ||
"type": "python", | ||
"request": "launch", | ||
"args": [ | ||
"${workspaceFolder}/development.ini" | ||
], | ||
"debugOptions": [ | ||
"RedirectOutput", | ||
"Pyramid" | ||
] | ||
}, | ||
{ | ||
"name": "Python: Watson", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${workspaceFolder}/console.py", | ||
"args": [ | ||
"dev", | ||
"runserver", | ||
"--noreload=True" | ||
] | ||
}, | ||
{ | ||
"name": "Python: All debug Options", | ||
"type": "python", | ||
"request": "launch", | ||
"pythonPath": "${config:python.pythonPath}", | ||
"program": "${file}", | ||
"module": "module.name", | ||
"env": { | ||
"VAR1": "1", | ||
"VAR2": "2" | ||
}, | ||
"envFile": "${workspaceFolder}/.env", | ||
"args": [ | ||
"arg1", | ||
"arg2" | ||
], | ||
"debugOptions": [ | ||
"RedirectOutput" | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"python.pythonPath": "/anaconda3/bin/python", | ||
"git.ignoreLimitWarning": true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# 本文基于tensorflow、keras/pytorch实现对自然场景的文字检测及端到端的OCR中文文字识别 | ||
|
||
# 实现功能 | ||
|
||
- [x] 文字方向检测 0、90、180、270度检测 | ||
- [x] 文字检测 后期将切换到keras版本文本检测 实现keras端到端的文本检测及识别 | ||
- [x] 不定长OCR识别 | ||
- [x] 增加python3.6 支持 | ||
|
||
|
||
## 环境部署 | ||
``` Bash | ||
##GPU环境 | ||
sh setup.sh | ||
##CPU环境 | ||
sh setup-cpu.sh | ||
##CPU python3环境 | ||
sh setup-python3.sh | ||
``` | ||
|
||
# 模型训练 | ||
|
||
## 训练keras版本的crnn | ||
|
||
``` Bash | ||
cd train & sh train-keras.sh | ||
``` | ||
|
||
## 训练pytorch版本的crnn | ||
|
||
``` Bash | ||
cd train & sh train-pytorch.sh | ||
``` | ||
# 文字方向检测 | ||
基于图像分类,在VGG16模型的基础上,训练0、90、180、270度检测的分类模型,详细代码参考angle/predict.py文件,训练图片8000张,准确率88.23%。 | ||
模型地址[百度云](https://pan.baidu.com/s/1pM2ha5P)下载 | ||
|
||
# 文字检测 | ||
支持CPU、GPU环境,一键部署, | ||
[文本检测训练参考](https://github.com/eragonruan/text-detection-ctpn) | ||
|
||
|
||
# OCR 端到端识别:GRU+CTC | ||
## ocr识别采用GRU+CTC端到到识别技术,实现不分隔识别不定长文字 | ||
提供keras 与pytorch版本的训练代码,在理解keras的基础上,可以切换到pytorch版本,此版本更稳定 | ||
- 此外还添加了tensorflow版本的资源仓库:[TF:LSTM-CTC_loss](https://github.com/ilovin/lstm_ctc_ocr) | ||
|
||
|
||
# 识别结果展示 | ||
## 文字检测及OCR识别结果 | ||
<div> | ||
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp.jpg"/> | ||
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp.png"/> | ||
</div> | ||
|
||
### 倾斜文字 | ||
|
||
<div> | ||
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp1.jpg"/> | ||
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp1.png"/> | ||
</div> | ||
|
||
## 参考 | ||
``` | ||
1.crnn | ||
https://github.com/meijieru/crnn.pytorch.git | ||
2.keras-crnn 版本实现参考 https://www.zhihu.com/question/59645822 | ||
3.tensorflow-crnn | ||
https://github.com/ilovin/lstm_ctc_ocr | ||
3.ctpn | ||
https://github.com/eragonruan/text-detection-ctpn | ||
https://github.com/tianzhi0549/CTPN | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/usr/bin/env python2 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
图像文字方向检测 | ||
@author: lywen | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# -*- coding: utf-8 -*- | ||
# from keras.models import load_model | ||
import numpy as np | ||
from PIL import Image | ||
from keras.applications.vgg16 import preprocess_input, VGG16 | ||
from keras.layers import Dense | ||
from keras.models import Model | ||
##编译模型,以较小的学习参数进行训练 | ||
from keras.optimizers import SGD | ||
|
||
|
||
def load(): | ||
vgg = VGG16(weights=None, input_shape=(224, 224, 3)) | ||
##修改输出层 3个输出 | ||
x = vgg.layers[-2].output | ||
predictions_class = Dense(4, activation='softmax', name='predictions_class')(x) | ||
prediction = [predictions_class] | ||
model = Model(inputs=vgg.input, outputs=prediction) | ||
sgd = SGD(lr=0.00001, momentum=0.9) | ||
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) | ||
model.load_weights('/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/angle/modelAngle.h5') | ||
return model | ||
|
||
|
||
##加载模型 | ||
model = None | ||
|
||
|
||
def predict(path=None, img=None): | ||
global model | ||
if model is None: | ||
model = load() | ||
""" | ||
图片文字方向预测 | ||
""" | ||
ROTATE = [0, 90, 180, 270] | ||
if path is not None: | ||
im = Image.open(path).convert('RGB') | ||
elif img is not None: | ||
im = Image.fromarray(img).convert('RGB') | ||
w, h = im.size | ||
xmin, ymin, xmax, ymax = int(0.1 * w), int(0.1 * h), w - int(0.1 * w), h - int(0.1 * h) | ||
im = im.crop((xmin, ymin, xmax, ymax)) ##剪切图片边缘,清楚边缘噪声 | ||
im = im.resize((224, 224)) | ||
img = np.array(im) | ||
img = preprocess_input(img.astype(np.float32)) | ||
pred = model.predict(np.array([img])) | ||
index = np.argmax(pred, axis=1)[0] | ||
return ROTATE[index] |
Empty file.
Oops, something went wrong.