难以用 tesseract 检测数字

Question

我在检测以下类型图像上的文本时遇到了一些困难：

tesseract 似乎很难将数字与图表区分开来。 我的目标是找到每个数字及其位置。

从这张图片中，我运行以下代码，它应该给我找到的文本周围的矩形：

import cv2
import pytesseract
from pytesseract import Output
import numpy as np


pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'

img = cv2.imread('Temp/VE_cropped.png')

kernel = np.ones((2,2),np.uint8)

img_processed = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_processed = cv2.medianBlur(img_processed,3)
img_processed = cv2.threshold(img_processed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
img_processed = cv2.dilate(img_processed, kernel, iterations = 1)

dict_wordsDetected = pytesseract.image_to_data(img_processed, output_type=Output.DICT)
img_processed = cv2.cvtColor(img_processed, cv2.COLOR_GRAY2RGB)

n_boxes = len(dict_wordsDetected['text'])
for i in range(n_boxes):
    (x, y, w, h) = (dict_wordsDetected['left'][i]
                  , dict_wordsDetected['top'][i]
                  , dict_wordsDetected['width'][i]
                  , dict_wordsDetected['height'][i])
    img_processed = cv2.rectangle(img_processed, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
cv2.imshow("processed", img_processed)
cv2.waitKey(0)

是什么给了我们这个结果：

Answer 1

我想我明白你想要什么。 首先，Tessaract 可以很好地解决许多问题，特别是当我们看到带有易于 OCR 的图像的示例时，这意味着图像没有复杂的背景。 在您的情况下，仅使用 Tesseract 或图像阈值处理图像并不简单，您必须对图像进行更多的图像预处理来 OCR。 要解决您的问题，您必须清理图像，尝试仅获取数字。 这可能是一项艰苦的工作。

最近，我正在寻找一个代码来为具有复杂背景的图像应用 OCR，我找到了一些解决方案，我将向您展示的代码是基于这个解决方案的。

要提取数字（或尝试），您必须执行一些步骤

将图像转换为灰度
使用 Otsu 方法和逆运算应用图像阈值
应用距离变换
应用形态学运算来清理图像中的小点
应用扩张操作来扩大你的数字
找到轮廓并根据每个轮廓的宽度和高度对其进行过滤
为每个轮廓创建一个船体对象列表
绘制船体对象
在你的面具中使用扩张操作
检索分段区域的按位运算
OCR 预处理图像
打印你的结果

我在这里展示的代码并不完美，我认为它可以改进，但我想向您展示解决问题的起点。

import cv2
import pytesseract
from pytesseract import Output
import numpy as np
import imutils

# loading and resizing image
img = cv2.imread('ABV5H.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = imutils.resize(img, width=900)
#gray scale
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imshow("Gray", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()

# thresholding with Otsu method and inverse operation
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | 
cv2.THRESH_OTSU)[1]
cv2.imshow("Threshold", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

#distrance transform
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist*255).astype('uint8')
dist = cv2.threshold(dist, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imshow("Distance Transformation", dist)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Morphological operation kernel (2,2) and OPEN method
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (2,2))
opening = cv2.morphologyEx(dist, cv2.MORPH_OPEN, kernel)
cv2.imshow("Morphology", opening)
cv2.imwrite("morphology.jpg", opening)
cv2.waitKey(0)
cv2.destroyAllWindows()

#dilate operation to enlarge the numbers
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
dilation = cv2.dilate(opening, kernel, iterations = 1)
cv2.imshow("dilated", dilation)
cv2.imwrite("dilated.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()

#finding and grabbing the contours
cnts = cv2.findContours(dilation.copy(), cv2.RETR_EXTERNAL, 
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
output = img.copy()
for i in cnts:
    cv2.drawContours(output, [i], -1, (0, 0, 255), 3)
cv2.imshow("Contours", output)
cv2.imwrite("contours.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()

#filtering the contours
nums = []
output2 = img.copy()
for c in cnts:
    (x, y, w, h) = cv2.boundingRect(c)

    if w >= 5 and w < 75 and h > 15 and h <= 35:
        nums.append(c)
for i in nums:
    cv2.drawContours(output2, [i], -1, (0, 0, 255), 2)
cv2.imshow("Filter", output2)
cv2.imwrite("filter.jpg", output2)
cv2.waitKey(0)
cv2.destroyAllWindows()

# making a list with the hull points
hull = []
# calculate points for each contour
for i in range(len(nums)):
    # creating convex hull object for each contour
    hull.append(cv2.convexHull(nums[i], False))

# create an empty black image
mask = np.zeros(dilation.shape[:2], dtype='uint8')

# draw contours and hull points
for i in range(len(nums)):
    color = (255, 0, 0) # blue - color for convex hull
    # draw ith convex hull object
    cv2.drawContours(mask, hull, i, color, 1, 8)

#dilating the mask to have a proper image for bitwise
mask = cv2.dilate(mask, kernel, iterations = 15)
cv2.imshow("Dilated Mask", mask)
cv2.imwrite("dilated-mask.jpg", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()

#bitwise operation
final = cv2.bitwise_and(dilation, dilation, mask=mask)
cv2.imshow("Pre-processed Image", final)
cv2.imwrite("pre-processed.jpg", final)
cv2.waitKey(0)
cv2.destroyAllWindows()


config = '--psm 12 -c tessedit_char_whitelist=0123456789' #page segmentation mode and white lists
#OCR'ing the image
dict_wordsDetected = pytesseract.image_to_data(final, config = config, 
output_type=Output.DICT)

#filtering the detections and making a list of index
index = []
for idx, txt in enumerate(dict_wordsDetected['text']):
    if len(txt) >= 1:
        dict_wordsDetected['text'][idx] = txt.replace(" ", "")
        index.append(idx)
    
for i in index:

    (x, y, w, h) = (dict_wordsDetected['left'][i]
                  , dict_wordsDetected['top'][i]
                  , dict_wordsDetected['width'][i]
                  , dict_wordsDetected['height'][i])
    img_processed = cv2.rectangle(img, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
    text = "{}".format(dict_wordsDetected['text'][i])
    cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow("Voilà le résultat", img)
cv2.imwrite('result.jpg', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

可视化一些操作

（我暂时不能上传我的图片。有一些图片的超链接。这些图片对应一些图片预处理步骤）

结果

如您所见，我们可以在输入图像上找到数字。 我们有很好的检测，另一方面，我们也有不准确的输出。 主要原因是图像预处理。 即使我们执行了一些变换，图像也会有噪点。 您的问题的关键是图像预处理。 您必须记住的另一点是，Tesserat 并不完美，它需要好的图像才能正常工作。 除此之外，您还必须了解--psm模式（页面分割）来改进 OCR，以及使用白名单来避免不受欢迎的检测。 正如我所说，我们有很好的结果，但我想你可以改进它，如果你的任务只需要 OpenCV 和 Tessaract。 因为还有其他的方式比这个简单。

Si tu as besoin d'aide, tu peux me contacter, je préfère parler français que l'anglais。

难以用 tesseract 检测数字

问题描述

1 个解决方案

解决方案1
1 2021-11-18 13:18:05

可视化一些操作

结果

难以用 tesseract 检测数字

问题描述

1 个解决方案

解决方案1 1 2021-11-18 13:18:05

可视化一些操作

结果

解决方案1
1 2021-11-18 13:18:05