Skip to content

Commit

Permalink
tesseract: Better alternative method for obtaining available languages
Browse files Browse the repository at this point in the history
  • Loading branch information
jbaiter committed Dec 30, 2013
1 parent ab18b13 commit 7bccde7
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions spreadsplug/tesseract.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,22 @@

from spreads.plugin import HookPlugin, PluginOption
from spreads.util import find_in_path, MissingDependencyException
from spreads.vendor.pathlib import Path

if not find_in_path('tesseract'):
raise MissingDependencyException("Could not find executable `tesseract`"
" in $PATH. Please install the"
" appropriate package(s)!")

logger = logging.getLogger('spreadsplug.tesseract')
try:
AVAILABLE_LANGS = (subprocess.check_output(["tesseract", "--list-langfoo"],
stderr=subprocess.STDOUT)
.split("\n")[1:-1])
except subprocess.CalledProcessError:
AVAILABLE_LANGS = ['en']

logger = logging.getLogger('spreadsplug.tesseract')
AVAILABLE_LANGS = [x.stem for x in
Path('/usr/share/tesseract-ocr/tessdata')
.glob('*.traineddata')]


class TesseractPlugin(HookPlugin):
Expand Down

0 comments on commit 7bccde7

Please sign in to comment.