使用Tesseract OCR和python进行数字识别

Question

I use Tesseract and python to read digits (from a energy meter). 我使用Tesseract和python来读取数字（来自能量计）。 Everything works well except for the number "1". 一切都很好，除了数字“1”。 Tesseract can not read the "1" Digit. Tesseract无法读取“1”数字。

This is the picture I send to tesseract : 这是我发给tesseract的图片：

And tesseract reads "0000027 ". tesseract读取“0000027”。

How can I tell Tesseract that the vertical rod is a "1" ? 我怎么能告诉Tesseract垂直杆是“1”？

This is my tesseract initialisation : 这是我的tesseract初始化：

import tesseract

TESSERACT_LIBRARY_PATH = "C:\\Program Files (x86)\\Tesseract-OCR"
LANGUAGE = "eng"
CHARACTERS = "0123456789"
FALSE = "0"
TRUE = "1"

def init_ocr():
    """ 
    .. py:function:: init_ocr()

        Utilize the Tesseract-OCR library to create an tesseract_ocr that 
        predicts the numbers to be read off of the meter. 

        :return: tesseract_ocr Tesseracts OCR API.
        :rtype: Class
    """
    # Initialize the tesseract_ocr with the english language package.
    tesseract_ocr = tesseract.TessBaseAPI()
    tesseract_ocr.Init(TESSERACT_LIBRARY_PATH, LANGUAGE, 
                       tesseract.OEM_DEFAULT)


    # Limit the characters being seached for to numerics.
    tesseract_ocr.SetVariable("tessedit_char_whitelist", CHARACTERS)

    # Set the tesseract_ocr to predict for only one character.
    tesseract_ocr.SetPageSegMode(tesseract.PSM_AUTO)

    # Tesseract's Directed Acyclic Graph.
    # Not necessary for number recognition.
    tesseract_ocr.SetVariable("load_system_dawg", FALSE)
    tesseract_ocr.SetVariable("load_freq_dawg", FALSE)
    tesseract_ocr.SetVariable("load_number_dawg", TRUE)

    tesseract_ocr.SetVariable("classify_enable_learning", FALSE)
    tesseract_ocr.SetVariable("classify_enable_adaptive_matcher", FALSE)

    return tesseract_ocr

Answer 1

Slightly irrelevant answer, though may serve your original goal. 稍微不相关的答案，虽然可能符合您的原始目标。

I had similar problem with tesseract and I had very strict performance requirements as well. 我和tesseract有类似的问题，我也有非常严格的性能要求。 I found this simple solution on SO and crafted simple recogniser with OpenCV. 我在SO上找到了这个简单的解决方案，并使用OpenCV制作了简单的识别器。

It boils down to finding bounding rectangles (from edges) on the very clear image that you have and then trying to match found objects versus templates. 它归结为在您拥有的非常清晰的图像上找到边界矩形（来自边缘），然后尝试匹配找到的对象与模板。 I believe the solution in your case will be both simple and precise though will require slightly more code than you have now. 我相信您的案例中的解决方案既简单又精确，但需要的代码比现在多一些。

I will follow this question, since it will be nice to have working solution with tesseract. 我会关注这个问题，因为有了tesseract的工作解决方案会很好。

I have a limited time, but it seems to be a working solution: 我的时间有限，但似乎是一个有效的解决方案：

import os
import cv2
import numpy
KNN_SQUARE_SIDE = 50  # Square 50 x 50 px.


def resize(cv_image, factor):
    new_size = tuple(map(lambda x: x * factor, cv_image.shape[::-1]))
    return cv2.resize(cv_image, new_size)


def crop(cv_image, box):
    x0, y0, x1, y1 = box
    return cv_image[y0:y1, x0:x1]


def draw_box(cv_image, box):
    x0, y0, x1, y1 = box
    cv2.rectangle(cv_image, (x0, y0), (x1, y1), (0, 0, 255), 2)


def draw_boxes_and_show(cv_image, boxes, title='N'):
    temp_image = cv2.cvtColor(cv_image, cv2.COLOR_GRAY2RGB)
    for box in boxes:
        draw_box(temp_image, box)
    cv2.imshow(title, temp_image)
    cv2.waitKey(0)


class BaseKnnMatcher(object):
    distance_threshold = 0

    def __init__(self, source_dir):
        self.model, self.label_map = self.get_model_and_label_map(source_dir)

    @staticmethod
    def get_model_and_label_map(source_dir):
        responses = []
        label_map = []
        samples = numpy.empty((0, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE), numpy.float32)
        for label_idx, filename in enumerate(os.listdir(source_dir)):

            label = filename[:filename.index('.png')]
            label_map.append(label)
            responses.append(label_idx)

            image = cv2.imread(os.path.join(source_dir, filename), 0)

            suit_image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
            sample = suit_image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE))
            samples = numpy.append(samples, sample, 0)

        responses = numpy.array(responses, numpy.float32)
        responses = responses.reshape((responses.size, 1))
        model = cv2.KNearest()
        model.train(samples, responses)

        return model, label_map

    def predict(self, image):
        image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
        image_standard_size = numpy.float32(image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE)))
        closest_class, results, neigh_resp, distance = self.model.find_nearest(image_standard_size, k=1)

        if distance[0][0] > self.distance_threshold:
            return None

        return self.label_map[int(closest_class)]


class DigitKnnMatcher(BaseKnnMatcher):
    distance_threshold = 10 ** 10


class MeterValueReader(object):
    def __init__(self):
        self.digit_knn_matcher = DigitKnnMatcher(source_dir='templates')

    @classmethod
    def get_symbol_boxes(cls, cv_image):
        ret, thresh = cv2.threshold(cv_image.copy(), 150, 255, cv2.THRESH_BINARY)
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        symbol_boxes = []
        for contour in contours:
            x, y, width, height = cv2.boundingRect(contour)

            # You can test here for box size, though not required in your example:
            # if cls.is_size_of_digit(width, height):
            #     symbol_boxes.append((x, y, x+width, y+height))

            symbol_boxes.append((x, y, x+width, y+height))
        return symbol_boxes

    def get_value(self, meter_cv2_image):
        symbol_boxes = self.get_symbol_boxes(meter_cv2_image)
        symbol_boxes.sort()  # x is first in tuple
        symbols = []
        for box in symbol_boxes:
            symbol = self.digit_knn_matcher.predict(crop(meter_cv2_image, box))
            symbols.append(symbol)
        return symbols


if __name__ == '__main__':
    # If you want to see how boxes detection works, uncomment these:
    # img_bw = cv2.imread(os.path.join('original.png'), 0)
    # boxes = MeterValueReader.get_symbol_boxes(img_bw)
    # draw_boxes_and_show(img_bw, boxes)

    # Uncomment to generate templates from image
    # import random
    # TEMPLATE_DIR = 'templates'
    # img_bw = cv2.imread(os.path.join('original.png'), 0)
    # boxes = MeterValueReader.get_symbol_boxes(img_bw)
    # for box in boxes:
    #     # You need to label templates manually after extraction
    #     cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))

    img_bw = cv2.imread(os.path.join('original.png'), 0)
    vr = MeterValueReader()
    print vr.get_value(img_bw)

使用Tesseract OCR和python进行数字识别

问题描述

1 个解决方案

解决方案1
4 已采纳 2015-09-24 08:17:59

使用Tesseract OCR和python进行数字识别

问题描述

1 个解决方案

解决方案1 4 已采纳 2015-09-24 08:17:59

解决方案1
4 已采纳 2015-09-24 08:17:59