简体   繁体   中英

Removing the lines from captcha image using python

I have captcha image as attached in this question.

在此处输入图片说明

I am trying to extract the text in the image. My following code is able to make all areas except the text and lines in white color

import cv2
from PIL import Image
import numpy as np

image1 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
image2 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
pix = image1.load()
for column in range(0, image1.height):
        for row in range(0, image1.width):
            if pix[row, column] >= 90:
                pix[row, column] = 255
cv2.imshow("1", np.array(image2))
cv2.imshow("2", np.array(image1))
cv2.waitKey(0)

But I am trying to remove the line crossing the text, but it does not seem to work. I tried with below portion of code which is posted on other question in StackOverflow. But it does not work

在此处输入图片说明

def eliminate_zeros(self,vector):
    return [(dex,v) for (dex,v) in enumerate(vector) if v!=0 ]

def get_line_position(self,img):
    sumx=img.sum(axis=0)
    list_without_zeros=self.eliminate_zeros(sumx)
    min1,min2=heapq.nsmallest(2,list_without_zeros,key=itemgetter(1))
    l=[dex for [dex,val] in enumerate(sumx) if val==min1[1] or val==min2[1]]
    mindex=[l[0],l[len(l)-1]]
    cols=img[:,mindex[:]]
    col1=cols[:,0]
    col2=cols[:,1]
    col1_without_0=self.eliminate_zeros(col1)
    col2_without_0=self.eliminate_zeros(col2)
    line_length=len(col1_without_0)
    dex1=col1_without_0[round(len(col1_without_0)/2)][0]
    dex2=col2_without_0[round(len(col2_without_0)/2)][0]
    p1=[dex1,mindex[0]]
    p2=[dex2,mindex[1]]
    return p1,p2,line_length

def remove_line(self,p1,p2,LL,img):
    m=(p2[0]-p1[0])/(p2[1]-p1[1]) if p2[1]!=p1[1] else np.inf
    w,h=len(img),len(img[0])
    x=list(range(h))
    y=list(map(lambda z : int(np.round(p1[0]+m*(z-p1[1]))),x))
    img_removed_line=list(img)
    for dex in range(h):
        i,j=y[dex],x[dex]
        i=int(i)
        j=int(j)
        rlist=[]
        while i>=0 and i<len(img_removed_line)-1:
            f1=i
            if img_removed_line[i][j]==0 and img_removed_line[i-1][j]==0:
                break
            rlist.append(i)
            i=i-1
        i,j=y[dex],x[dex]
        i=int(i)
        j=int(j)
        while i>=0 and i<len(img_removed_line)-1:
            f2=i
            if img_removed_line[i][j]==0 and img_removed_line[i+1][j]==0:
                break
            rlist.append(i)
            i=i+1
        if np.abs(f2-f1) in [LL+1,LL,LL-1]:
            rlist=list(set(rlist))
            for k in rlist:
                img_removed_line[k][j]=0

    return img_removed_line

I am new to CV and can someone help here to suggest the way?. Original and partially processed image files are attached here.

My approach is based on the fact that the line is thinner than the characters. In this example I used blurring, threshold and morphology to get rid of the line between the characters. The result is this: 在此处输入图片说明

import cv2
import numpy as np

image = cv2.imread('captcha.png')
image = cv2.blur(image, (3, 3))
ret, image = cv2.threshold(image, 90, 255, cv2.THRESH_BINARY)

image = cv2.dilate(image, np.ones((3, 1), np.uint8))
image = cv2.erode(image, np.ones((2, 2), np.uint8))

cv2.imshow("1", np.array(image))
cv2.waitKey(0)

You can use CV2 functions like threshold, dilate, bitwise_and and bitwise_not for removing unwanted lines from captcha

import numpy as np
import cv2

img = cv2.imread('captcha.jpg',0)

horizontal_inv = cv2.bitwise_not(img)
masked_img = cv2.bitwise_and(img, img, mask=horizontal_inv)
masked_img_inv = cv2.bitwise_not(masked_img)

kernel = np.ones((5,5),np.uint8)
dilation = cv2.dilate(masked_img_inv,kernel,iterations = 3)

ret,thresh2 = cv2.threshold(dilation,254,255,cv2.THRESH_BINARY_INV) 
thresh2=cv2.bitwise_not(thresh2)

cv2.waitKey(0)
cv2.destroyAllWindows()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM