Skip to content

Commit

Permalink
push all files
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaofengShi committed Apr 14, 2018
0 parents commit ae57e96
Show file tree
Hide file tree
Showing 125 changed files with 4,613 additions and 0 deletions.
107 changes: 107 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
modelAngle.h5

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

.DS_Store
.idea/
*.h5
116 changes: 116 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}"
},
{
"name": "Python: Attach",
"type": "python",
"request": "attach",
"localRoot": "${workspaceFolder}",
"remoteRoot": "${workspaceFolder}",
"port": 3000,
"secret": "my_secret",
"host": "localhost"
},
{
"name": "Python: Terminal (integrated)",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python: Terminal (external)",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "externalTerminal"
},
{
"name": "Python: Django",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/manage.py",
"args": [
"runserver",
"--noreload",
"--nothreading"
],
"debugOptions": [
"RedirectOutput",
"Django"
]
},
{
"name": "Python: Flask (0.11.x or later)",
"type": "python",
"request": "launch",
"module": "flask",
"env": {
"FLASK_APP": "${workspaceFolder}/app.py"
},
"args": [
"run",
"--no-debugger",
"--no-reload"
]
},
{
"name": "Python: Module",
"type": "python",
"request": "launch",
"module": "module.name"
},
{
"name": "Python: Pyramid",
"type": "python",
"request": "launch",
"args": [
"${workspaceFolder}/development.ini"
],
"debugOptions": [
"RedirectOutput",
"Pyramid"
]
},
{
"name": "Python: Watson",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/console.py",
"args": [
"dev",
"runserver",
"--noreload=True"
]
},
{
"name": "Python: All debug Options",
"type": "python",
"request": "launch",
"pythonPath": "${config:python.pythonPath}",
"program": "${file}",
"module": "module.name",
"env": {
"VAR1": "1",
"VAR2": "2"
},
"envFile": "${workspaceFolder}/.env",
"args": [
"arg1",
"arg2"
],
"debugOptions": [
"RedirectOutput"
]
}
]
}
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python.pythonPath": "/anaconda3/bin/python",
"git.ignoreLimitWarning": true
}
76 changes: 76 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# 本文基于tensorflow、keras/pytorch实现对自然场景的文字检测及端到端的OCR中文文字识别

# 实现功能

- [x] 文字方向检测 0、90、180、270度检测
- [x] 文字检测 后期将切换到keras版本文本检测 实现keras端到端的文本检测及识别
- [x] 不定长OCR识别
- [x] 增加python3.6 支持


## 环境部署
``` Bash
##GPU环境
sh setup.sh
##CPU环境
sh setup-cpu.sh
##CPU python3环境
sh setup-python3.sh
```

# 模型训练

## 训练keras版本的crnn

``` Bash
cd train & sh train-keras.sh
```

## 训练pytorch版本的crnn

``` Bash
cd train & sh train-pytorch.sh
```
# 文字方向检测
基于图像分类,在VGG16模型的基础上,训练0、90、180、270度检测的分类模型,详细代码参考angle/predict.py文件,训练图片8000张,准确率88.23%。
模型地址[百度云](https://pan.baidu.com/s/1pM2ha5P)下载

# 文字检测
支持CPU、GPU环境,一键部署,
[文本检测训练参考](https://github.com/eragonruan/text-detection-ctpn)


# OCR 端到端识别:GRU+CTC
## ocr识别采用GRU+CTC端到到识别技术,实现不分隔识别不定长文字
提供keras 与pytorch版本的训练代码,在理解keras的基础上,可以切换到pytorch版本,此版本更稳定
- 此外还添加了tensorflow版本的资源仓库:[TF:LSTM-CTC_loss](https://github.com/ilovin/lstm_ctc_ocr)


# 识别结果展示
## 文字检测及OCR识别结果
<div>
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp.jpg"/>
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp.png"/>
</div>

### 倾斜文字

<div>
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp1.jpg"/>
<img width="300" height="300" src="https://github.com/chineseocr/chinses-ocr/blob/master/img/tmp1.png"/>
</div>

## 参考
```
1.crnn
https://github.com/meijieru/crnn.pytorch.git
2.keras-crnn 版本实现参考 https://www.zhihu.com/question/59645822
3.tensorflow-crnn
https://github.com/ilovin/lstm_ctc_ocr
3.ctpn
https://github.com/eragonruan/text-detection-ctpn
https://github.com/tianzhi0549/CTPN
```

6 changes: 6 additions & 0 deletions angle/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
图像文字方向检测
@author: lywen
"""
49 changes: 49 additions & 0 deletions angle/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# from keras.models import load_model
import numpy as np
from PIL import Image
from keras.applications.vgg16 import preprocess_input, VGG16
from keras.layers import Dense
from keras.models import Model
##编译模型,以较小的学习参数进行训练
from keras.optimizers import SGD


def load():
vgg = VGG16(weights=None, input_shape=(224, 224, 3))
##修改输出层 3个输出
x = vgg.layers[-2].output
predictions_class = Dense(4, activation='softmax', name='predictions_class')(x)
prediction = [predictions_class]
model = Model(inputs=vgg.input, outputs=prediction)
sgd = SGD(lr=0.00001, momentum=0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model.load_weights('/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/angle/modelAngle.h5')
return model


##加载模型
model = None


def predict(path=None, img=None):
global model
if model is None:
model = load()
"""
图片文字方向预测
"""
ROTATE = [0, 90, 180, 270]
if path is not None:
im = Image.open(path).convert('RGB')
elif img is not None:
im = Image.fromarray(img).convert('RGB')
w, h = im.size
xmin, ymin, xmax, ymax = int(0.1 * w), int(0.1 * h), w - int(0.1 * w), h - int(0.1 * h)
im = im.crop((xmin, ymin, xmax, ymax)) ##剪切图片边缘,清楚边缘噪声
im = im.resize((224, 224))
img = np.array(im)
img = preprocess_input(img.astype(np.float32))
pred = model.predict(np.array([img]))
index = np.argmax(pred, axis=1)[0]
return ROTATE[index]
Empty file added crnn/__init__.py
Empty file.
Loading

0 comments on commit ae57e96

Please sign in to comment.