如何使用OpenCV从图像中提取文本行

Question

I am sending an image for OCR to tesseract and prior to sending it to tesseract I perform some preprocessing on it. 我正在为OCR发送一张图片给tesseract，在发送给tesseract之前，我会对它进行一些预处理。 I am putting a threshold on the image. 我在图像上设置了一个阈值。

I would like to use OpenCV to somehow either detect the line of text or crop out all the white spots out of it so it looks like this: Because when I send this image to tesseract, it can read the text perfectly fine. 我想使用OpenCV以某种方式检测文本行或从中裁掉所有白点，所以看起来像这样：因为当我将这个图像发送到tesseract时，它可以完全正确地读取文本。

Question 题

What are some ways of doing this? 有什么方法可以做到这一点？

Note: I've already tried increasing the threshold from 60% to 90% but it starts distorting the actual text which makes it harder for tesseract to read. 注意：我已经尝试将阈值从60％提高到90％，但它开始扭曲实际文本，这使得tesseract更难阅读。

Answer 1

edit 编辑

i have removed the old stuff since it's doing unnecessary things and the post is getting long 我删除了旧的东西，因为它正在做不必要的事情，而且帖子越来越长

result 结果在此输入图像描述

without explanations 没有解释

import cv2
import numpy as np

img = cv2.imread('c:/data/ocr2.jpg')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = gray.astype('float32')
gray/=255
dct=cv2.dct(gray)
vr=1.#vertical ratio
hr=.95#horizontal
dct[0:vr*dct.shape[0],0:hr*dct.shape[1]]=0
gray=cv2.idct(dct)
gray=cv2.normalize(gray,-1,0,1,cv2.NORM_MINMAX)
gray*=255
gray=gray.astype('uint8')

gray=cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT,
    cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)),
    iterations=1)
gray=cv2.morphologyEx(gray, cv2.MORPH_DILATE,
    cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11)),
    iterations=1)
gray=cv2.threshold(gray,0,255,cv2.THRESH_OTSU)[1]

contours,hierarchy = cv2.findContours(gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
boxmask=np.zeros(gray.shape,gray.dtype)
for i in xrange(len(contours)):
    x,y,w,h = cv2.boundingRect(contours[i])
    cv2.rectangle(boxmask,(x,y),(x+w,y+h),color=255,thickness=-1)
cv2.imshow('done',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.imwrite('done.jpg',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.waitKey(0)

with explanations 有解释

import cv2
import numpy as np
#import skimage.morphology as smp

'''
prerequisite
* some hands on practice on manipulation of 2d spectrums will make things much easier to grasp
** http://www.jcrystal.com/ 'FTL - SE' can be used to easily try stuff out
** try the phase spectrum filtering there
* dct was used here because it's just simpler to manipulate. dft can also be used to get the same effect

outline
* the main 'aha' was to notice that even afer a very large portion of the orignal specturm was zero/ed out,
  the area of interest fails to completely disappear unlike the rest. so the solution trys to box that part
* also note that the text of interest is always horizontal,so throwing away more vertical components bring it out even more

'''
cv2.namedWindow('img',0)
cv2.namedWindow('dct before',0)
cv2.namedWindow('dct after',0)
cv2.namedWindow('low freq suppressed',0)
cv2.namedWindow('bring out black gaps',0)
cv2.namedWindow('connect them together',0)
cv2.namedWindow('auto thresh',0)
cv2.namedWindow('boxmask',0)
cv2.namedWindow('done',0)


img = cv2.imread('c:/data/ocr2.jpg')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

orig=gray.copy()
gray = gray.astype('float32')
gray/=255
dct=cv2.dct(gray)

dctvis=cv2.normalize(np.log(dct.copy()),-1,0,1,cv2.NORM_MINMAX)
cv2.imshow('dct before',dctvis)

vr=1.#vertical ratio, how much percentage of vertical freq components should be thrown away
hr=.95#horizontal
dct[0:vr*dct.shape[0],0:hr*dct.shape[1]]=0

dctvis=cv2.normalize(np.sqrt(dct.copy()),-1,0,1,cv2.NORM_MINMAX)
cv2.imshow('dct after',dctvis)
gray=cv2.idct(dct)
gray=cv2.normalize(gray,-1,0,1,cv2.NORM_MINMAX)
gray*=255
gray=gray.astype('uint8')

cv2.imshow('low freq suppressed',gray)
gray=cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT,#smp.disk(7)
    cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)),
    iterations=1)
cv2.imshow('bring out black gaps',gray)
gray=cv2.morphologyEx(gray, cv2.MORPH_DILATE,#smp.disk(5), 
    cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11)),
    iterations=1)
cv2.imshow('connect them together',gray)
gray=cv2.threshold(gray,0,255,cv2.THRESH_OTSU)[1]
cv2.imshow('auto thresh',gray)

contours,hierarchy = cv2.findContours(gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
boxmask=np.zeros(gray.shape,gray.dtype)
for i in xrange(len(contours)):
    x,y,w,h = cv2.boundingRect(contours[i])
    cv2.rectangle(boxmask,(x,y),(x+w,y+h),color=255,thickness=-1)
cv2.imshow('boxmask',boxmask)
cv2.imshow('done',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.waitKey(0)

如何使用OpenCV从图像中提取文本行

问题描述

1 个解决方案

解决方案1
4 已采纳 2013-10-02 19:20:19

edit 编辑

如何使用OpenCV从图像中提取文本行

问题描述

1 个解决方案

解决方案1 4 已采纳 2013-10-02 19:20:19

edit 编辑

解决方案1
4 已采纳 2013-10-02 19:20:19