image: eurotext.tif
reubano@tokpro [~]âš¡ convert eurotext.tif -rotate 3 +repage eurotext_03.tif
reubano@tokpro [~]âš¡ convert eurotext.tif -rotate 5 +repage eurotext_05.tif
bug.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function, division
from os import path as p, environ
from ctypes import (
CDLL, POINTER, Structure, c_char_p, c_bool, c_int, c_float, byref)
from ctypes.util import find_library
LIBTESS = find_library('libtesseract.dylib')
LIBLEPT = find_library('liblept.dylib')
TESSDATA_PREFIX = environ.get('TESSDATA_PREFIX', '/opt/local/share')
class TessBaseAPI(Structure):
pass
class Pix(Structure):
pass
class TessPageIterator(Structure):
pass
def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'):
tesseract = CDLL(LIBTESS)
leptonica = CDLL(LIBLEPT)
base_api = POINTER(TessBaseAPI)
argtypes = [base_api, c_char_p, c_char_p]
tesseract.TessBaseAPICreate.restype = base_api
tesseract.TessBaseAPIInit3.argtypes = argtypes
tesseract.TessBaseAPIInit3.restype = c_bool
tesseract.TessBaseAPISetImage2.restype = None
tesseract.TessBaseAPISetImage2.argtypes = [base_api, POINTER(Pix)]
tesseract.TessBaseAPIAnalyseLayout.argtypes = [base_api]
tesseract.TessBaseAPIAnalyseLayout.restype = POINTER(TessPageIterator)
tesseract.TessPageIteratorOrientation.argtypes = [
POINTER(TessPageIterator), POINTER(c_int), POINTER(c_int),
POINTER(c_int), POINTER(c_float)]
tesseract.TessPageIteratorOrientation.restype = None
api = tesseract.TessBaseAPICreate()
tesseract.TessBaseAPIInit3(api, prefix, lang)
leptonica.pixRead.argtypes = [c_char_p]
leptonica.pixRead.restype = POINTER(Pix)
return tesseract, leptonica, api
def get_orientation(tesseract, leptonica, api, path, mode=1):
tesseract.TessBaseAPISetPageSegMode(api, mode)
pix = leptonica.pixRead(path)
tesseract.TessBaseAPISetImage2(api, pix)
it = tesseract.TessBaseAPIAnalyseLayout(api)
if it:
orientation, direction, line_order = c_int(), c_int(), c_int()
skew = c_float()
tesseract.TessPageIteratorOrientation(
it, byref(orientation), byref(direction), byref(line_order),
byref(skew))
print('%s: %s' % (path, orientation.value))
if __name__ == '__main__':
for path in ['eurotext.tif', 'eurotext_03.tif', 'eurotext_05.tif']:
tesseract, leptonica, api = create_tess_api()
orientation = get_orientation(tesseract, leptonica, api, path)
reubano@tokpro [~/Documents/Projects/tesseract]âš¡ python bug.py
eurotext.tif: 0
eurotext_03.tif: 0
Empty page!!
please follow FAQ#rules-and-advices
It would be helpful to have a CONTRIBUTING file with that information so that github will show it on the issues page.
Thanks to pyocr I figured it out!
# ... /snip
# https://github.com/jflesch/pyocr/blob/master/src/pyocr/libtesseract/tesseract_raw.py
class OSResults(Structure):
_fields_ = [
('orientations', c_float * 4),
('scripts_na', c_float * 4 * (116 + 1 + 2 + 1)),
('unicharset', c_void_p),
('best_orientation_id', c_int),
('best_script_id', c_int),
('best_sconfidence', c_float),
('best_oconfidence', c_float),
('padding', c_char_p * 512),
]
# ... /snip
def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'):
# ... /snip
tesseract.TessBaseAPIDetectOS.argtypes = [base_api, POINTER(OSResults)]
tesseract.TessBaseAPIDetectOS.restype = c_bool
# ... /snip
def get_orientation(tesseract, leptonica, api, path, mode=0):
tesseract.TessBaseAPISetPageSegMode(api, mode)
pix = leptonica.pixRead(path)
tesseract.TessBaseAPISetImage2(api, pix)
osr = OSResults()
it = tesseract.TessBaseAPIDetectOS(api, byref(osr))
if it and osr:
orientation, direction, line_order = c_int(), c_int(), c_int()
skew = c_float()
tesseract.TessPageIteratorOrientation(
it, byref(orientation), byref(direction), byref(line_order),
byref(skew))
print('%s: %s' % (path, osr.best_orientation_id))
print('confidence: %s' % osr.best_oconfidence)
@reubano
I added 'tesserocr' to the AddOns wiki.
https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers
I added this example to APIExample
It would be helpful to have a CONTRIBUTING file with that information so that github will show it on the issues page.
@reubano, thanks for the suggestion.
Now we have it! :-)
Most helpful comment
It would be helpful to have a
CONTRIBUTINGfile with that information so that github will show it on the issues page.