简体   繁体   中英

How to crop multiple ROI in image using Python and OpenCV

I have an image that converted from PDF to PNG. The converted image contains several keywords that I wanted to extracted using OCR Tesseract.

Right now, I need to determine the ROI manually to crop the selected ROI. Since I have more than 5 ROI's to be applied, what would be the most efficient way to apply the ROI instead of doing it by try and error to find the exact location?

在此处输入图像描述

Below is the code:

    def cropped(self, event):

        #1st ROI
        y = 20
        x = 405
        h = 230
        w = 425

        #2nd ROI
        y1 = 30
        x1 = 305
        h1 = 330
        w1 = 525

        #open the converted image
        image = cv2.imread("Output.png")

        #perform image cropping
        crop_image = image[x:w, y:h]
        crop_image1 = image[x1:w1, y1:h1]
        
        #save the cropped image
        cv2.imwrite("Cropped.png", crop_image)
        cv2.imwrite("Cropped1.png", crop_image1)
        
        #open the cropped image and pass to the OCR engine
        im = cv2.imread("Cropped.png")
        im1 = cv2.imread("Cropped1.png")

        ## Do the text extraction here

you can use mouse event to select multiple ROI and crop based on the location

#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os


drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []

# mouse callback function
def draw_rectangle(event,x,y,flags,param):
    global ix,iy,drawing,img,clone,refPt, ROIRegion
    if event == cv2.EVENT_LBUTTONDOWN:
        drawing = True
        ix,iy = x,y
        refPt = [(x, y)]
        ROIRegion.append(refPt)
        #clone = img.copy()

    elif event == cv2.EVENT_MOUSEMOVE:
        if drawing == True:
            img = clone.copy()
            cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
            a=x
            b=y
            if a != x | b != y:
                cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)

    elif event == cv2.EVENT_LBUTTONUP:
        drawing = False
        refPt.append((x,y))
        img = clone.copy()
        cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)


ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()

cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
    cv2.imshow('image',img)

    k = cv2.waitKey(1) & 0xFF
    if k == ord("r"):
        del ROIRegion[-1]
        del refPt[-1]
        img = clone.copy()

    elif k == 27:
        break

#Do your cropping here
for region in range(len(ROIRegion)):
    cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
    roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

Here is one way in Python/OpenCV.

  • Read the input
  • Threshold on box outline color
  • Apply morphology to ensure closed
  • Get the external contours
  • Loop over each contour, get its bounding box, crop the region in the input and write the output

Input:

在此处输入图像描述

import cv2
import numpy as np

# read image
img = cv2.imread('text_boxes.jpg')

# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)

# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# get bounding boxes
i = 1
for cntr in contours:
    # get bounding boxes
    x,y,w,h = cv2.boundingRect(cntr)
    crop = img[y:y+h, x:x+w]
    cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
    i = i + 1

# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)

# show thresh and result    
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

Threshold image:

在此处输入图像描述

Cropped Images:

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM