[英]OCR with python and tesseract
from PIL import Image
from pytesser import *
image_file = 'E:\Downloads\menu.jpg'
im = Image.open(image_file)
text = image_to_string(im)
text = image_file_to_string(image_file)
text = image_file_to_string(image_file, graceful_errors=True)
print ("=====output=======\n")
print (text)
File "C:\Users\XXX\Anaconda3\lib\site-packages\pytesser\__init__.py", line 64
except errors.Tesser_General_Exception, value:
^
SyntaxError: invalid syntax
。 我正在使用tesseract关注python和OCR的本教程
我正在使用python 3并下载了tesseract库并将其添加到anaconda库中。但是在第一次运行它时,我收到了显示打印缺失括号的错误消息,所以我更改了它,现在我发现了这个错误,有人可以帮助我解决这个问题很棒。 我还使用Google的Tesseract引擎在此处添加了tesseract包装器的源代码“”“ Python中的OCR
http://code.google.com/p/pytesser/
by Michael J.T. O'Kelly
V 0.0.1, 3/10/07"""
PIL import Image
import subprocess
import util
import errors
tesseract_exe_name = 'C:\Users\SACHIN\Anaconda3\Lib\site-packages\pytesser\\tesseract' # Name of executable to be called at command line
scratch_image_name = "temp.bmp" # This file must be .bmp or other Tesseract-compatible format
scratch_text_name_root = "temp" # Leave out the .txt extension
cleanup_scratch_flag = True # Temporary files cleaned up after OCR operation
def call_tesseract(input_filename, output_filename):
"""Calls external tesseract.exe on input file (restrictions on types),
outputting output_filename+'txt'"""
args = [tesseract_exe_name, input_filename, output_filename]
proc = subprocess.Popen(args)
retcode = proc.wait()
if retcode!=0:
errors.check_for_errors()
def image_to_string(im, cleanup = cleanup_scratch_flag):
"""Converts im to file, applies tesseract, and fetches resulting text.
If cleanup=True, delete scratch files after operation."""
try:
util.image_to_scratch(im, scratch_image_name)
call_tesseract(scratch_image_name, scratch_text_name_root)
text = util.retrieve_text(scratch_text_name_root)
finally:
if cleanup:
util.perform_cleanup(scratch_image_name, scratch_text_name_root)
return text
def image_file_to_string(filename, cleanup = cleanup_scratch_flag, graceful_errors=True):
"""Applies tesseract to filename; or, if image is incompatible and graceful_errors=True,
converts to compatible format and then applies tesseract. Fetches resulting text.
If cleanup=True, delete scratch files after operation."""
try:
try:
call_tesseract(filename, scratch_text_name_root)
text = util.retrieve_text(scratch_text_name_root)
except errors.Tesser_General_Exception:
if graceful_errors:
im = Image.open(filename)
text = image_to_string(im, cleanup)
else:
raise
finally:
if cleanup:
util.perform_cleanup(scratch_image_name, scratch_text_name_root)
return text
if __name__=='__main__':
im = Image.open('phototest.tif')
text = image_to_string(im)
print (text)
try:
text = image_file_to_string('fnord.tif', graceful_errors=False)
except errors.Tesser_General_Exception, value:
print "fnord.tif is incompatible filetype. Try graceful_errors=True"
print value
text = image_file_to_string('fnord.tif', graceful_errors=True)
print ("fnord.tif contents:", text)
text = image_file_to_string('fonts_test.png', graceful_errors=True)
print (text)
而不是使用:
except errors.Tesser_General_Exception, value:
替换为
except errors.Tesser_General_Exception as value:
这个对我有用。 这是所有关于从升级Python2
到Python3
。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.