简体   繁体   English

从图像中提取矩形文本框

[英]Extract rectangular text boxes from an image

I have image of application form in that i want to extract only name, DOB, signature and tick text boxes which are surrounded by text boxes, but i am getting the result along with others unexpected results.我有申请表的图像,我只想提取被文本框包围的姓名、出生日期、签名和勾选文本框,但我得到的结果以及其他意想不到的结果。

Input image:输入图像:

输入图像

Expected result:预期结果:

预期的

My result:我的结果:

结果

i have tried the below code我试过下面的代码

import numpy as np
from PIL import Image
import tensorflow as tf
import os
import pytesseract 
import sys 
import re

#from pdf2image import convert_from_path #need proppeler windows distrubution 
import cv2
#from pdf2image.exceptions import PDFInfoNotInstalledError,PDFPageCountError,PDFSyntaxError

pdftoppm_path = r"C:\Program Files (x86)\Poppler\poppler-0.68.0\bin\pdftoppm.exe"

#path to tesseract instalattion
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

def box_extraction(img_for_box_extraction_path, cropped_dir_path):

    img1 = cv2.imread(img_for_box_extraction_path, 0)  # Read the image
    img = cv2.resize(img1, (800, 800))
    (thresh, img_bin) = cv2.threshold(img, 120, 255,
                                      cv2.THRESH_BINARY | cv2.THRESH_OTSU)  # Thresholding the image
    img_bin = 255-img_bin  # Invert the image

    ##cv2.imshow("Image_bin.jpg",img_bin)

    # Defining a kernel length
    kernel_length = np.array(img).shape[1]//150

    # A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

    # Morphological operation to detect verticle lines from an image
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
    #cv2.imwrite("verticle_lines.jpg",verticle_lines_img)

    # Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    #cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)

    # Weighting parameters,a=0.4 and b=0.8. this will decide the quantity of an image to be added to make a new image.
    alpha = 0.8
    beta = 5.0 - alpha
    # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
    img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
    img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
    (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # For Debugging
    # Enable this line to see verticle and horizontal lines in the image which is used to find boxes
    ##cv2.imshow("img_final_bin.jpg",img_final_bin)
    # Find contours for image, which will detect all the boxes
    contours, hierarchy = cv2.findContours(
        img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Sort all the contours by top to bottom.
    (contours, boundingBoxes) = sort_contours(contours)#, method="top-to-bottom")

    idx = 0
    for c in contours:
        # Returns the location and width,height for every contour
        x, y, w, h = cv2.boundingRect(c)
        #print(x,y,w,h)

        # If the box height is less than 80, widht is <400, then only save it as a box in "cropped/" folder.
        if (w < 400 and h < 80): # and h < 6*w:
            idx += 1
            new_img = img[y:y+h+10, x:x+w+10]
            contours1, hierarchy1 = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # Sort all the contours by top to bottom.
            (contours1, boundingBoxes) = sort_contours(contours1, method="top-to-bottom")
            # get the thresholded crop
            retval, thresh_crop = cv2.threshold(new_img, thresh=200, maxval=255, type=cv2.THRESH_BINARY)
            cv2.imwrite(cropped_dir_path+str(idx) + '.png', thresh_crop)

    # For Debugging
    # Enable this line to see all contours.
    # cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
    # cv2.imwrite("./Temp/img_contour.jpg", img)


box_extraction("X:\PDF2IMG\TEST.jpeg", "X:\PDF2IMG\cropped")

#cv2.waitKey(0)
#cv2.destroyAllWindows()

How can i get the expected result only?我怎样才能得到预期的结果?

I have image of application form in that i want to extract only name, DOB, signature and tick textboxes which are surrounded by text boxes, but i am getting the result along with others unexpected results.我有申请表的图像,我只想提取被文本框包围的姓名、出生日期、签名和勾选文本框,但我得到的结果以及其他意想不到的结果。

To extract the desired regions, we can use the property of a rectangular box in that they can be isolated using contour approximation and contour area.为了提取所需区域,我们可以使用矩形框的特性,因为它们可以使用轮廓近似和轮廓区域来隔离。 Here's an approach:这是一种方法:

  • Convert image to grayscale, blur, and threshold将图像转换为灰度、模糊和阈值
  • Perform morphological operations to smooth image and remove noise执行形态学操作以平滑图像并去除噪声
  • Find contours寻找轮廓
    • Filter using contour approximation and contour area使用轮廓近似和轮廓区域进行过滤
    • Extract and save ROI using Numpy slicing使用 Numpy 切片提取并保存 ROI

Here are the detected rectangular text boxes highlighted in green这是检测到的以绿色突出显示的矩形文本框

在此处输入图像描述

Since we have the bounding boxes, we simply extract the ROIs由于我们有边界框,我们只需提取 ROI

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

import cv2

image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]

ROI_number = 0
for c in cnts:
    area = cv2.contourArea(c)
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    x,y,w,h = cv2.boundingRect(approx)
    if len(approx) == 4 and (area > 1000) and (area < 80000):
        ROI = image[y:y+h, x:x+w]
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
        ROI_number += 1

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.waitKey()

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM