简体   繁体   中英

opencv, python, how to read grouped text in boxes

I would like to get from the image in the groups that are on the image在此处输入图片说明

I have managed to remove first contour (as described below), but issue is that when I try to read the text, I have some missing text, I expect that this is because of other contours that have stayed on the image, but while I try to remove them, I loose the grouping or part of text...

for i in range(len(contours)):
   if 800 < cv2.contourArea(contours[i]) < 2000:
        x, y, width, height = cv2.boundingRect(contours[i])
        roi = img[y:y + height, x:x + width]
        roi_h = roi.shape[0]
        roi_w = roi.shape[1]
        resize_roi = cv2.resize(roi,(int(roi_w*6),int(roi_h*6)), interpolation=cv2.INTER_LINEAR)
        afterd = cv2.cvtColor(resize_roi, cv2.COLOR_BGR2GRAY)
        retim, threshm = cv2.threshold(afterd, 210, 225, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        contoursm, hierarchym = cv2.findContours(threshm, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        mask = np.ones(resize_roi.shape[:2], dtype="uint8") * 255
        for m in range(len(contoursm)):
             if 10000 < cv2.contourArea(contoursm[m]) < 33000:
                  cv2.drawContours(mask, contoursm, m, 0, 7)
                  afterd = cv2.bitwise_not(afterd)
                  afterd = cv2.bitwise_and(afterd, afterd, mask=mask)
                  afterd = cv2.bitwise_not(afterd)
                  print(pytesseract.image_to_string(afterd, lang='eng', config='--psm 3'))

Instead of dealing with all the boxes, I suggest deleting them by finding connected components, and filling the large clusters with background color.

You may use the following stages:

  • Convert image to Grayscale, apply threshold, and invert polarity.
  • Delete all clusters having more than 100 pixels (assume letters are smaller).
  • Dilate thresh for uniting text areas to single "blocks".
  • Find contours on the dilated thresh image.
  • Find bounding rectangles, and apply OCR to the rectangle.

Here is the complete code sample:

import numpy as np
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # I am using Windows

img = cv2.imread('img.png')  # Read input image

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to Grayscale.

ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)  # Convert to binary and invert polarity

nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

thresh_size = 100

# Delete all lines by filling large clusters with zeros.
for i in range(1, nlabel):
    if stats[i, cv2.CC_STAT_AREA] > thresh_size:
        thresh[labels == i] = 0

# Dilate thresh for uniting text areas to single blocks.
dilated_thresh = cv2.dilate(thresh, np.ones((5,5)))

# Find contours on dilated thresh
contours, hierarchy = cv2.findContours(dilated_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# Iterate contours, find bounding rectangles
for c in contours:
    # Get bounding rectangle
    x, y, w, h = cv2.boundingRect(c)

    # Draw green rectangle for testing
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness = 1)

    # Get the slice with the text (slice with margins).
    afterd = thresh[y-3:y+h+3, x-3:x+w+3]

    # Show afterd as image for testing
    # cv2.imshow('afterd', afterd)
    # cv2.waitKey(100)

    # The OCR works only when image is enlarged and black text?
    resized_afterd = cv2.resize(afterd, (afterd.shape[1]*5, afterd.shape[0]*5), interpolation=cv2.INTER_LANCZOS4)

    print(pytesseract.image_to_string(255 - resized_afterd, lang='eng', config='--psm 3'))


cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result strings after OCR:

DF6DF645
RFFTW
2345
2277
AABBA
DF1267
ABCET5456


Input image with green boxes around the text:

在此处输入图片说明


Update:

Grouping contours:

For contours contours you may use the hierarchy result of cv2.findContours with cv2.RETR_TREE .
See Contours Hierarchy documentation.

You may use the parent-child relationship for grouping contours.

Here is an incomplete sample code for using the hierarchy:

img = cv2.imread('img.png')  # Read input image

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to Grayscale.

ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)  # Convert to binary and invert polarity

nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

thresh_boxes = np.zeros_like(thresh)

thresh_size = 100

# Delete all lines by filling large clusters with zeros.
# Make new image that contains only boxes - without text
for i in range(1, nlabel):
    if stats[i, cv2.CC_STAT_AREA] > thresh_size:
        thresh[labels == i] = 0
        thresh_boxes[labels == i] = 255


# Find contours on thresh_boxes, use cv2.RETR_TREE to build tree with hierarchy
contours, hierarchy = cv2.findContours(thresh_boxes, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Iterate contours, and hierarchy
for c, i in zip(contours, range(len(contours))):
    h = hierarchy[0, i, :]
    h_child = h[2]
    # if contours has no child (last level)
    if h_child == -1:
        h_parent = h[3]
        x, y, w, h = cv2.boundingRect(c)
        cv2.putText(img, str(h_parent), (x+w//2-4, y+h//2+8), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 0, 255), thickness=2)

cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:
在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM