How to crop multiple ROI in image using Python and OpenCV

Question

I have an image that converted from PDF to PNG. The converted image contains several keywords that I wanted to extracted using OCR Tesseract.

Right now, I need to determine the ROI manually to crop the selected ROI. Since I have more than 5 ROI's to be applied, what would be the most efficient way to apply the ROI instead of doing it by try and error to find the exact location?

Below is the code:

    def cropped(self, event):

        #1st ROI
        y = 20
        x = 405
        h = 230
        w = 425

        #2nd ROI
        y1 = 30
        x1 = 305
        h1 = 330
        w1 = 525

        #open the converted image
        image = cv2.imread("Output.png")

        #perform image cropping
        crop_image = image[x:w, y:h]
        crop_image1 = image[x1:w1, y1:h1]
        
        #save the cropped image
        cv2.imwrite("Cropped.png", crop_image)
        cv2.imwrite("Cropped1.png", crop_image1)
        
        #open the cropped image and pass to the OCR engine
        im = cv2.imread("Cropped.png")
        im1 = cv2.imread("Cropped1.png")

        ## Do the text extraction here

Answer 1

you can use mouse event to select multiple ROI and crop based on the location

#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os


drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []

# mouse callback function
def draw_rectangle(event,x,y,flags,param):
    global ix,iy,drawing,img,clone,refPt, ROIRegion
    if event == cv2.EVENT_LBUTTONDOWN:
        drawing = True
        ix,iy = x,y
        refPt = [(x, y)]
        ROIRegion.append(refPt)
        #clone = img.copy()

    elif event == cv2.EVENT_MOUSEMOVE:
        if drawing == True:
            img = clone.copy()
            cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
            a=x
            b=y
            if a != x | b != y:
                cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)

    elif event == cv2.EVENT_LBUTTONUP:
        drawing = False
        refPt.append((x,y))
        img = clone.copy()
        cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)


ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()

cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
    cv2.imshow('image',img)

    k = cv2.waitKey(1) & 0xFF
    if k == ord("r"):
        del ROIRegion[-1]
        del refPt[-1]
        img = clone.copy()

    elif k == 27:
        break

#Do your cropping here
for region in range(len(ROIRegion)):
    cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
    roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

Answer 2

Here is one way in Python/OpenCV.

Read the input
Threshold on box outline color
Apply morphology to ensure closed
Get the external contours
Loop over each contour, get its bounding box, crop the region in the input and write the output

Input:

import cv2
import numpy as np

# read image
img = cv2.imread('text_boxes.jpg')

# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)

# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# get bounding boxes
i = 1
for cntr in contours:
    # get bounding boxes
    x,y,w,h = cv2.boundingRect(cntr)
    crop = img[y:y+h, x:x+w]
    cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
    i = i + 1

# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)

# show thresh and result    
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

Threshold image: