How to enhance Text detection in image using Python

Question

I tried to detect text in images specially images with quotes using OpenCV Python. For that I first train some text images. I detect each characters of text in the image to train. For images with proper word style the characters are detect properly. But for some images the text(character) area can't be detect properly. I attached the code for this below. How can I modify the code so that the characters can be detected properly

import sys
import numpy as np
import cv2
import os

MIN_CONTOUR_AREA = 100

RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30

def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png") 

if imgTrainingNumbers is None:  
    print ("error: image not read from file \n\n") 
    os.system("pause") 
    return

imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)

imgThresh = cv2.adaptiveThreshold(imgBlurred,
                                  255,
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  cv2.THRESH_BINARY_INV,
                                  11,
                                  2)

cv2.imshow("imgThresh", imgThresh)

imgThreshCopy = imgThresh.copy()

imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
                                             cv2.RETR_EXTERNAL, 
                                             cv2.CHAIN_APPROX_SIMPLE)


npaFlattenedImages =  np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

intClassifications = []

intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
                 ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
                 ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
                 ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
                 ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
                 ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]


for npaContour in npaContours:
    if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
        [intX, intY, intW, intH] = cv2.boundingRect(npaContour)


        cv2.rectangle(imgTrainingNumbers,
                      (intX, intY), 
                      (intX+intW,intY+intH),
                      (0, 0, 255),
                      2)

        imgROI = imgThresh[intY:intY+intH, intX:intX+intW] 
        imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))

        cv2.imshow("imgROI", imgROI)               
        cv2.imshow("imgROIResized", imgROIResized)
        cv2.imshow("training_numbers.png", imgTrainingNumbers)

        intChar = cv2.waitKey(0) 

        if intChar == 27: 
            sys.exit()
        elif intChar in intValidChars:
            print(intChar)
            intClassifications.append(intChar)    
            print(intChar)
            npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) 
            npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0) 

fltClassifications = np.array(intClassifications, np.float32) 

npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))

print ("\n\ntraining complete !!\n")

np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()

Answer 1

What you are trying to do is a very naive approach, just applying the threshold and detecting contours won't work here. A lot of research papers have been published around this task. You may refer those and try to implement or can use image_to_boxes function of the famous tesseract OCR . You can download it from here and as you are using python you can install pytesseract - python wrapper for tesseract from here and use the following code to achieve what you are expecting.

import pytesseract
import cv2

originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)

h, w = img.shape

letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]

for letter in letters:    
    cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)

cv2.imshow('', originalImg)

The resultant image

Note that there are many false detections, you need to ignore them in your training process.

How to enhance Text detection in image using Python

Question

1 answers

solution1
2 2018-04-20 23:24:43

How to enhance Text detection in image using Python

Question

1 answers

solution1 2 2018-04-20 23:24:43

solution1
2 2018-04-20 23:24:43