[英]OCR for digit recognition in Python using Open CV and Pytesseract
Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.您好,我正在尝试从使用开放式 CV 和 EAST model 以及 Pyteseract 附加的图像中识别里程表读数。
Following is my code:以下是我的代码:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median
binary = cv2.bitwise_not(image_gray)
blur = cv2.GaussianBlur(image_gray,(5,5),0)
ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(th2, 50, 80, 255)
#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)
contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
rect_cnts = []
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
pass
else :
rect_cnts.append(cnt)
max_area = 0
football_square = None
for cnt in rect_cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
if max_area < w*h:
max_area = w*h
football_square = cnt
image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)
(x, y, w, h) = cv2.boundingRect(football_square)
new_image = image[y:y+h, x:x+w]
new = new_image
import cv2 as cv
orig = new.copy()
(origH, origW) = new.shape[:2]
rW = origW / 320.0
rH = origH / 320.0
# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net = cv.dnn.readNet('frozen_east_text_detection.pb')
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < 0.5:
continue
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
confidence_val.append(scoresData[i])
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
# initialize the list of results
results = []
# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
#extract the region of interest
r = orig[startY:endY, startX:endX]
plt.imshow(r)
#configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 7")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
The results are bad - but my EAST model is identify the contour ( area) where the digits are present.结果很糟糕 - 但我的 EAST model 识别出数字所在的轮廓(区域)。 Can you please help me?你能帮我么? I have tried different psm
values in config
for image_to_string
.我在image_to_string
的config
中尝试了不同的psm
值。
Use InRange() for selection.使用 InRange() 进行选择。 See example:参见示例:
import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
frame_threshold=cv.bitwise_not(frame_threshold)
cv.imwrite('out_36.png', frame_threshold)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.