简体   繁体   中英

OCR for digit recognition in Python using Open CV and Pytesseract

Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.

Following is my code:

import cv2
import numpy as np
import matplotlib.pyplot as plt 

# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median

binary = cv2.bitwise_not(image_gray)

blur = cv2.GaussianBlur(image_gray,(5,5),0)

ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

edged = cv2.Canny(th2, 50, 80, 255)

#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)



contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

contours = contours[0] if len(contours) == 2 else contours[1]



rect_cnts = []
for cnt in contours:
    peri = cv2.arcLength(cnt, True)
    approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
    (x, y, w, h) = cv2.boundingRect(cnt)
    ar = w / float(h)
    if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
        pass 
    else :
        rect_cnts.append(cnt)


max_area = 0
football_square = None
for cnt in rect_cnts:
    (x, y, w, h) = cv2.boundingRect(cnt)
    if max_area < w*h:
        max_area = w*h
        football_square = cnt

image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)

(x, y, w, h) = cv2.boundingRect(football_square)

new_image = image[y:y+h, x:x+w] 

new = new_image 

import cv2 as cv

orig = new.copy()
(origH, origW) = new.shape[:2]

rW = origW / 320.0
rH = origH / 320.0

# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]

# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
    (123.68, 116.78, 103.94), swapRB=True, crop=False)



net = cv.dnn.readNet('frozen_east_text_detection.pb')

layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]

net.setInput(blob)
(scores, geometry) = net.forward(layerNames)


def predictions(prob_score, geo):
    (numR, numC) = prob_score.shape[2:4]
    boxes = []
    confidence_val = []

    # loop over rows
    for y in range(0, numR):
        scoresData = prob_score[0, 0, y]
        x0 = geo[0, 0, y]
        x1 = geo[0, 1, y]
        x2 = geo[0, 2, y]
        x3 = geo[0, 3, y]
        anglesData = geo[0, 4, y]

        # loop over the number of columns
        for i in range(0, numC):
            if scoresData[i] < 0.5:
                continue

            (offX, offY) = (i * 4.0, y * 4.0)

            # extracting the rotation angle for the prediction and computing the sine and cosine
            angle = anglesData[i]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # using the geo volume to get the dimensions of the bounding box
            h = x0[i] + x2[i]
            w = x1[i] + x3[i]

            # compute start and end for the text pred bbox
            endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
            endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
            startX = int(endX - w)
            startY = int(endY - h)

            boxes.append((startX, startY, endX, endY))
            confidence_val.append(scoresData[i])

    # return bounding boxes and associated confidence_val
    return (boxes, confidence_val)


(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)


# initialize the list of results
results = []

# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    #extract the region of interest
    r = orig[startY:endY, startX:endX]
    plt.imshow(r)

    #configuration setting to convert image to string.  
    configuration = ("-l eng --oem 1 --psm 7")
    ##This will recognize the text from the image of bounding box
    text = pytesseract.image_to_string(r, config=configuration)

    # append bbox coordinate and associated text to the list of results 
    results.append(((startX, startY, endX, endY), text))

The results are bad - but my EAST model is identify the contour ( area) where the digits are present. Can you please help me? I have tried different psm values in config for image_to_string .

在此处输入图像描述

Use InRange() for selection. See example:

import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
frame_threshold=cv.bitwise_not(frame_threshold)
cv.imwrite('out_36.png', frame_threshold) 

在此处输入图像描述

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM