簡體   English   中英

帶python和tesseract的OCR

[英]OCR with python and tesseract

OCr的示例代碼

from PIL import Image
from pytesser import *

image_file = 'E:\Downloads\menu.jpg'
im = Image.open(image_file)
text = image_to_string(im)
text = image_file_to_string(image_file)
text = image_file_to_string(image_file, graceful_errors=True)
print ("=====output=======\n")
print (text)

錯誤

  File "C:\Users\XXX\Anaconda3\lib\site-packages\pytesser\__init__.py", line 64
    except errors.Tesser_General_Exception, value:
                                          ^
SyntaxError: invalid syntax

我正在使用tesseract關注python和OCR的本教程

我正在使用python 3並下載了tesseract庫並將其添加到anaconda庫中。但是在第一次運行它時,我收到了顯示打印缺失括號的錯誤消息,所以我更改了它,現在我發現了這個錯誤,有人可以幫助我解決這個問題很棒。 我還使用Google的Tesseract引擎在此處添加了tesseract包裝器的源代碼“”“ Python中的OCR

http://code.google.com/p/pytesser/
by Michael J.T. O'Kelly
V 0.0.1, 3/10/07"""

PIL import Image
import subprocess

import util
import errors

tesseract_exe_name = 'C:\Users\SACHIN\Anaconda3\Lib\site-packages\pytesser\\tesseract' # Name of executable to be called at command line
scratch_image_name = "temp.bmp" # This file must be .bmp or other Tesseract-compatible format
scratch_text_name_root = "temp" # Leave out the .txt extension
cleanup_scratch_flag = True  # Temporary files cleaned up after OCR operation

def call_tesseract(input_filename, output_filename):
    """Calls external tesseract.exe on input file (restrictions on types),
    outputting output_filename+'txt'"""
    args = [tesseract_exe_name, input_filename, output_filename]
    proc = subprocess.Popen(args)
    retcode = proc.wait()
    if retcode!=0:
        errors.check_for_errors()

def image_to_string(im, cleanup = cleanup_scratch_flag):
    """Converts im to file, applies tesseract, and fetches resulting text.
    If cleanup=True, delete scratch files after operation."""
    try:
        util.image_to_scratch(im, scratch_image_name)
        call_tesseract(scratch_image_name, scratch_text_name_root)
        text = util.retrieve_text(scratch_text_name_root)
    finally:
        if cleanup:
            util.perform_cleanup(scratch_image_name, scratch_text_name_root)
    return text

def image_file_to_string(filename, cleanup = cleanup_scratch_flag, graceful_errors=True):
    """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True,
    converts to compatible format and then applies tesseract.  Fetches resulting text.
    If cleanup=True, delete scratch files after operation."""
    try:
        try:
            call_tesseract(filename, scratch_text_name_root)
            text = util.retrieve_text(scratch_text_name_root)
        except errors.Tesser_General_Exception:
            if graceful_errors:
                im = Image.open(filename)
                text = image_to_string(im, cleanup)
            else:
                raise
    finally:
        if cleanup:
            util.perform_cleanup(scratch_image_name, scratch_text_name_root)
    return text


if __name__=='__main__':
    im = Image.open('phototest.tif')
    text = image_to_string(im)
    print (text)
    try:
        text = image_file_to_string('fnord.tif', graceful_errors=False)
    except errors.Tesser_General_Exception, value:
        print "fnord.tif is incompatible filetype.  Try graceful_errors=True"
        print value
    text = image_file_to_string('fnord.tif', graceful_errors=True)
    print ("fnord.tif contents:", text)
    text = image_file_to_string('fonts_test.png', graceful_errors=True)
    print (text)

而不是使用:

except errors.Tesser_General_Exception, value:

替換為

except errors.Tesser_General_Exception as value:

這個對我有用。 這是所有關於從升級Python2Python3

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM