Removing the lines from captcha image using python

Question

I have captcha image as attached in this question.

I am trying to extract the text in the image. My following code is able to make all areas except the text and lines in white color

import cv2
from PIL import Image
import numpy as np

image1 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
image2 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
pix = image1.load()
for column in range(0, image1.height):
        for row in range(0, image1.width):
            if pix[row, column] >= 90:
                pix[row, column] = 255
cv2.imshow("1", np.array(image2))
cv2.imshow("2", np.array(image1))
cv2.waitKey(0)

But I am trying to remove the line crossing the text, but it does not seem to work. I tried with below portion of code which is posted on other question in StackOverflow. But it does not work

def eliminate_zeros(self,vector):
    return [(dex,v) for (dex,v) in enumerate(vector) if v!=0 ]

def get_line_position(self,img):
    sumx=img.sum(axis=0)
    list_without_zeros=self.eliminate_zeros(sumx)
    min1,min2=heapq.nsmallest(2,list_without_zeros,key=itemgetter(1))
    l=[dex for [dex,val] in enumerate(sumx) if val==min1[1] or val==min2[1]]
    mindex=[l[0],l[len(l)-1]]
    cols=img[:,mindex[:]]
    col1=cols[:,0]
    col2=cols[:,1]
    col1_without_0=self.eliminate_zeros(col1)
    col2_without_0=self.eliminate_zeros(col2)
    line_length=len(col1_without_0)
    dex1=col1_without_0[round(len(col1_without_0)/2)][0]
    dex2=col2_without_0[round(len(col2_without_0)/2)][0]
    p1=[dex1,mindex[0]]
    p2=[dex2,mindex[1]]
    return p1,p2,line_length

def remove_line(self,p1,p2,LL,img):
    m=(p2[0]-p1[0])/(p2[1]-p1[1]) if p2[1]!=p1[1] else np.inf
    w,h=len(img),len(img[0])
    x=list(range(h))
    y=list(map(lambda z : int(np.round(p1[0]+m*(z-p1[1]))),x))
    img_removed_line=list(img)
    for dex in range(h):
        i,j=y[dex],x[dex]
        i=int(i)
        j=int(j)
        rlist=[]
        while i>=0 and i<len(img_removed_line)-1:
            f1=i
            if img_removed_line[i][j]==0 and img_removed_line[i-1][j]==0:
                break
            rlist.append(i)
            i=i-1
        i,j=y[dex],x[dex]
        i=int(i)
        j=int(j)
        while i>=0 and i<len(img_removed_line)-1:
            f2=i
            if img_removed_line[i][j]==0 and img_removed_line[i+1][j]==0:
                break
            rlist.append(i)
            i=i+1
        if np.abs(f2-f1) in [LL+1,LL,LL-1]:
            rlist=list(set(rlist))
            for k in rlist:
                img_removed_line[k][j]=0

    return img_removed_line

I am new to CV and can someone help here to suggest the way?. Original and partially processed image files are attached here.

Answer 1

My approach is based on the fact that the line is thinner than the characters. In this example I used blurring, threshold and morphology to get rid of the line between the characters. The result is this:

import cv2
import numpy as np

image = cv2.imread('captcha.png')
image = cv2.blur(image, (3, 3))
ret, image = cv2.threshold(image, 90, 255, cv2.THRESH_BINARY)

image = cv2.dilate(image, np.ones((3, 1), np.uint8))
image = cv2.erode(image, np.ones((2, 2), np.uint8))

cv2.imshow("1", np.array(image))
cv2.waitKey(0)

Answer 2

You can use CV2 functions like threshold, dilate, bitwise_and and bitwise_not for removing unwanted lines from captcha

import numpy as np
import cv2

img = cv2.imread('captcha.jpg',0)

horizontal_inv = cv2.bitwise_not(img)
masked_img = cv2.bitwise_and(img, img, mask=horizontal_inv)
masked_img_inv = cv2.bitwise_not(masked_img)

kernel = np.ones((5,5),np.uint8)
dilation = cv2.dilate(masked_img_inv,kernel,iterations = 3)

ret,thresh2 = cv2.threshold(dilation,254,255,cv2.THRESH_BINARY_INV) 
thresh2=cv2.bitwise_not(thresh2)

cv2.waitKey(0)
cv2.destroyAllWindows()

Removing the lines from captcha image using python

Question

2 answers

solution1
2 ACCPTED 2019-12-31 14:10:40

solution2
0 2019-12-31 14:09:35

Removing the lines from captcha image using python

Question

2 answers

solution1 2 ACCPTED 2019-12-31 14:10:40

solution2 0 2019-12-31 14:09:35

solution1
2 ACCPTED 2019-12-31 14:10:40

solution2
0 2019-12-31 14:09:35