opencv, python, how to read grouped text in boxes

Question

I would like to get from the image in the groups that are on the image

I have managed to remove first contour (as described below), but issue is that when I try to read the text, I have some missing text, I expect that this is because of other contours that have stayed on the image, but while I try to remove them, I loose the grouping or part of text...

for i in range(len(contours)):
   if 800 < cv2.contourArea(contours[i]) < 2000:
        x, y, width, height = cv2.boundingRect(contours[i])
        roi = img[y:y + height, x:x + width]
        roi_h = roi.shape[0]
        roi_w = roi.shape[1]
        resize_roi = cv2.resize(roi,(int(roi_w*6),int(roi_h*6)), interpolation=cv2.INTER_LINEAR)
        afterd = cv2.cvtColor(resize_roi, cv2.COLOR_BGR2GRAY)
        retim, threshm = cv2.threshold(afterd, 210, 225, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        contoursm, hierarchym = cv2.findContours(threshm, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        mask = np.ones(resize_roi.shape[:2], dtype="uint8") * 255
        for m in range(len(contoursm)):
             if 10000 < cv2.contourArea(contoursm[m]) < 33000:
                  cv2.drawContours(mask, contoursm, m, 0, 7)
                  afterd = cv2.bitwise_not(afterd)
                  afterd = cv2.bitwise_and(afterd, afterd, mask=mask)
                  afterd = cv2.bitwise_not(afterd)
                  print(pytesseract.image_to_string(afterd, lang='eng', config='--psm 3'))

Answer 1

Instead of dealing with all the boxes, I suggest deleting them by finding connected components, and filling the large clusters with background color.

You may use the following stages:

Convert image to Grayscale, apply threshold, and invert polarity.
Delete all clusters having more than 100 pixels (assume letters are smaller).
Dilate thresh for uniting text areas to single "blocks".
Find contours on the dilated thresh image.
Find bounding rectangles, and apply OCR to the rectangle.

Here is the complete code sample:

import numpy as np
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # I am using Windows

img = cv2.imread('img.png')  # Read input image

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to Grayscale.

ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)  # Convert to binary and invert polarity

nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

thresh_size = 100

# Delete all lines by filling large clusters with zeros.
for i in range(1, nlabel):
    if stats[i, cv2.CC_STAT_AREA] > thresh_size:
        thresh[labels == i] = 0

# Dilate thresh for uniting text areas to single blocks.
dilated_thresh = cv2.dilate(thresh, np.ones((5,5)))

# Find contours on dilated thresh
contours, hierarchy = cv2.findContours(dilated_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# Iterate contours, find bounding rectangles
for c in contours:
    # Get bounding rectangle
    x, y, w, h = cv2.boundingRect(c)

    # Draw green rectangle for testing
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness = 1)

    # Get the slice with the text (slice with margins).
    afterd = thresh[y-3:y+h+3, x-3:x+w+3]

    # Show afterd as image for testing
    # cv2.imshow('afterd', afterd)
    # cv2.waitKey(100)

    # The OCR works only when image is enlarged and black text?
    resized_afterd = cv2.resize(afterd, (afterd.shape[1]*5, afterd.shape[0]*5), interpolation=cv2.INTER_LANCZOS4)

    print(pytesseract.image_to_string(255 - resized_afterd, lang='eng', config='--psm 3'))


cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result strings after OCR:

DF6DF645
RFFTW
2345
2277
AABBA
DF1267
ABCET5456

Input image with green boxes around the text:

Update:

Grouping contours:

For contours contours you may use the hierarchy result of cv2.findContours with cv2.RETR_TREE .
See Contours Hierarchy documentation.

You may use the parent-child relationship for grouping contours.

Here is an incomplete sample code for using the hierarchy:

img = cv2.imread('img.png')  # Read input image

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to Grayscale.

ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)  # Convert to binary and invert polarity

nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

thresh_boxes = np.zeros_like(thresh)

thresh_size = 100

# Delete all lines by filling large clusters with zeros.
# Make new image that contains only boxes - without text
for i in range(1, nlabel):
    if stats[i, cv2.CC_STAT_AREA] > thresh_size:
        thresh[labels == i] = 0
        thresh_boxes[labels == i] = 255


# Find contours on thresh_boxes, use cv2.RETR_TREE to build tree with hierarchy
contours, hierarchy = cv2.findContours(thresh_boxes, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Iterate contours, and hierarchy
for c, i in zip(contours, range(len(contours))):
    h = hierarchy[0, i, :]
    h_child = h[2]
    # if contours has no child (last level)
    if h_child == -1:
        h_parent = h[3]
        x, y, w, h = cv2.boundingRect(c)
        cv2.putText(img, str(h_parent), (x+w//2-4, y+h//2+8), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 0, 255), thickness=2)

cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:

opencv, python, how to read grouped text in boxes

Question

1 answers

solution1
0 2020-03-14 22:06:28

Update:

opencv, python, how to read grouped text in boxes

Question

1 answers

solution1 0 2020-03-14 22:06:28

Update:

solution1
0 2020-03-14 22:06:28