简体   繁体   中英

OpenCV Object detection with Feature Detection and Homography

I am trying to check if this image:

模板图像

is contained inside images like this one:

源图像

I am using feature detection (SURF) and homography because template matching is not scale invariant. Sadly all the keypoints, except a few, are all in the wrong positions. Should I maybe trying template matching by scaling multiple times the image? If so, what would be the best approach to try and scale the image?

Code:

import java.util.ArrayList;
import java.util.List;
import org.opencv.calib3d.Calib3d;
import org.opencv.core.Core;
import org.opencv.core.CvType;
import org.opencv.core.DMatch;
import org.opencv.core.KeyPoint;
import org.opencv.core.Mat;
import org.opencv.core.MatOfByte;
import org.opencv.core.MatOfDMatch;
import org.opencv.core.MatOfKeyPoint;
import org.opencv.core.MatOfPoint2f;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.features2d.DescriptorMatcher;
import org.opencv.features2d.Features2d;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.xfeatures2d.SURF;
class SURFFLANNMatchingHomography {
    public void run(String[] args) {
        String filenameObject = args.length > 1 ? args[0] : "../data/box.png";
        String filenameScene = args.length > 1 ? args[1] : "../data/box_in_scene.png";
        Mat imgObject = Imgcodecs.imread(filenameObject, Imgcodecs.IMREAD_GRAYSCALE);
        Mat imgScene = Imgcodecs.imread(filenameScene, Imgcodecs.IMREAD_GRAYSCALE);
        if (imgObject.empty() || imgScene.empty()) {
            System.err.println("Cannot read images!");
            System.exit(0);
        }
        //-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
        double hessianThreshold = 400;
        int nOctaves = 4, nOctaveLayers = 3;
        boolean extended = false, upright = false;
        SURF detector = SURF.create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
        MatOfKeyPoint keypointsObject = new MatOfKeyPoint(), keypointsScene = new MatOfKeyPoint();
        Mat descriptorsObject = new Mat(), descriptorsScene = new Mat();
        detector.detectAndCompute(imgObject, new Mat(), keypointsObject, descriptorsObject);
        detector.detectAndCompute(imgScene, new Mat(), keypointsScene, descriptorsScene);
        //-- Step 2: Matching descriptor vectors with a FLANN based matcher
        // Since SURF is a floating-point descriptor NORM_L2 is used
        DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
        List<MatOfDMatch> knnMatches = new ArrayList<>();
        matcher.knnMatch(descriptorsObject, descriptorsScene, knnMatches, 2);
        //-- Filter matches using the Lowe's ratio test
        float ratioThresh = 0.75f;
        List<DMatch> listOfGoodMatches = new ArrayList<>();
        for (int i = 0; i < knnMatches.size(); i++) {
            if (knnMatches.get(i).rows() > 1) {
                DMatch[] matches = knnMatches.get(i).toArray();
                if (matches[0].distance < ratioThresh * matches[1].distance) {
                    listOfGoodMatches.add(matches[0]);
                }
            }
        }
        MatOfDMatch goodMatches = new MatOfDMatch();
        goodMatches.fromList(listOfGoodMatches);
        //-- Draw matches
        Mat imgMatches = new Mat();
        Features2d.drawMatches(imgObject, keypointsObject, imgScene, keypointsScene, goodMatches, imgMatches, Scalar.all(-1),
                Scalar.all(-1), new MatOfByte(), Features2d.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS);
        //-- Localize the object
        List<Point> obj = new ArrayList<>();
        List<Point> scene = new ArrayList<>();
        List<KeyPoint> listOfKeypointsObject = keypointsObject.toList();
        List<KeyPoint> listOfKeypointsScene = keypointsScene.toList();
        for (int i = 0; i < listOfGoodMatches.size(); i++) {
            //-- Get the keypoints from the good matches
            obj.add(listOfKeypointsObject.get(listOfGoodMatches.get(i).queryIdx).pt);
            scene.add(listOfKeypointsScene.get(listOfGoodMatches.get(i).trainIdx).pt);
        }
        MatOfPoint2f objMat = new MatOfPoint2f(), sceneMat = new MatOfPoint2f();
        objMat.fromList(obj);
        sceneMat.fromList(scene);
        double ransacReprojThreshold = 3.0;
        Mat H = Calib3d.findHomography( objMat, sceneMat, Calib3d.RANSAC, ransacReprojThreshold );
        //-- Get the corners from the image_1 ( the object to be "detected" )
        Mat objCorners = new Mat(4, 1, CvType.CV_32FC2), sceneCorners = new Mat();
        float[] objCornersData = new float[(int) (objCorners.total() * objCorners.channels())];
        objCorners.get(0, 0, objCornersData);
        objCornersData[0] = 0;
        objCornersData[1] = 0;
        objCornersData[2] = imgObject.cols();
        objCornersData[3] = 0;
        objCornersData[4] = imgObject.cols();
        objCornersData[5] = imgObject.rows();
        objCornersData[6] = 0;
        objCornersData[7] = imgObject.rows();
        objCorners.put(0, 0, objCornersData);
        Core.perspectiveTransform(objCorners, sceneCorners, H);
        float[] sceneCornersData = new float[(int) (sceneCorners.total() * sceneCorners.channels())];
        sceneCorners.get(0, 0, sceneCornersData);
        //-- Draw lines between the corners (the mapped object in the scene - image_2 )
        Imgproc.line(imgMatches, new Point(sceneCornersData[0] + imgObject.cols(), sceneCornersData[1]),
                new Point(sceneCornersData[2] + imgObject.cols(), sceneCornersData[3]), new Scalar(0, 255, 0), 4);
        Imgproc.line(imgMatches, new Point(sceneCornersData[2] + imgObject.cols(), sceneCornersData[3]),
                new Point(sceneCornersData[4] + imgObject.cols(), sceneCornersData[5]), new Scalar(0, 255, 0), 4);
        Imgproc.line(imgMatches, new Point(sceneCornersData[4] + imgObject.cols(), sceneCornersData[5]),
                new Point(sceneCornersData[6] + imgObject.cols(), sceneCornersData[7]), new Scalar(0, 255, 0), 4);
        Imgproc.line(imgMatches, new Point(sceneCornersData[6] + imgObject.cols(), sceneCornersData[7]),
                new Point(sceneCornersData[0] + imgObject.cols(), sceneCornersData[1]), new Scalar(0, 255, 0), 4);
        //-- Show detected matches
        HighGui.imshow("Good Matches & Object detection", imgMatches);
        HighGui.waitKey(0);
        System.exit(0);
    }
}
public class SURFFLANNMatchingHomographyDemo {
    public static void main(String[] args) {
        // Load the native OpenCV library
        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
        new SURFFLANNMatchingHomography().run(args);
    }

Resulting image:生成的图像

Here's a possible solution. The code is in Python , but the operations are very straightforward, hopefully you would be able to port it to Java . I'm using template matching. The gist, I guess, is that I'm performing template matching on a binary mask obtained from the Cyan ( C ) component of the input image. The steps are these:

  1. Trim your image to get rid of unwanted noise
  2. Convert the image to the CMYK color space and get the Cyan Channel
  3. Clean the Cyan channel
  4. Read template
  5. Convert template to a binary image
  6. Perform template matching

Let's see. The position of the template in the target image seems constant, so we can crop the image to get rid of some of the parts where we are sure we wont locate the template. I've cropped the image to eliminate part of the "header" and "footer" by specifying the coordinates (top left x, top left y, width, height) of a Region Of Interest ( ROI ), like this:

# imports:
import numpy as np
import cv2

# image path
path = "D://opencvImages//"
fileName = "screen.png"

# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)

# Deep copy for results:
inputImageCopy = inputImage.copy()

# Get image dimensions:
(imageHeight, imageWidth) = inputImage.shape[:2]

# Set the ROI location:
roiX = 0
roiY = 225
roiWidth = imageWidth
roiHeight = 1390

# Crop the ROI:
imageROI = inputImage[roiY:roiHeight,roiX:roiWidth]

# Store a deep copy of this image for results:
imageROIcopy = imageROI.copy()

You will get the following cropped image:

You could crop even more, but I'm not sure of your requirements. Let's work with this and convert the new image to the CYMK color space. Then, extract the Cyan channel, as the template seems to have most content in that particular channel. There's no direct conversion to the CYMK colorspace in OpenCV, so I'm applying directly the conversion formula . We can get every color space component from that formula, but we are only interested on the C channel, which only needs pre-computation of the K (Key) channel. It can be calculated like this:

# Convert the image to float and divide by 255:
floatImage = imageROI.astype(np.float)/255.

# Calculate channel K (Key):
kChannel = 1 - np.max(floatImage, axis=2)

# Calculate  channel C (Cyan):
cChannel = np.where(kChannel < 0.9, (1-floatImage[..., 2] - kChannel)/(1 - kChannel), 0)

# Convert Cyan channel to uint 8:
cChannel = (255*cChannel).astype(np.uint8)

Be careful with your data types. We need to operate on float arrays, so that's the first conversion I perform. After getting the C channel, we convert back the image to an unsigned 8-bit array. This is the image you get for the C channel:

Next, get a binary mask from this via Otsu's thresholding:

# Threshold via Otsu:
_, binaryImage = cv2.threshold(cChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

This is the mask:

There are some white zones we could "eliminate" via flood-filling with black color. Let's apply four flood-filling operations on the binary image: Top left corner, top right corner, bottom left corner and bottom right corner:

# Get the dimensions of the cropped image:
(imageHeight, imageWidth) = binaryImage.shape[:2]

# Apply flood-fill at seed point (0,0) - Top Left:
cv2.floodFill(binaryImage, mask=None, seedPoint=(0, 0), newVal=0)

# Apply flood-fill at seed point (imageWidth - 1, 0) - Top Right:
cv2.floodFill(binaryImage, mask=None, seedPoint=(imageWidth - 1, 0), newVal=0)

# Apply flood-fill at seed point (0, imageHeight - 1) - Bottom Left:
cv2.floodFill(binaryImage, mask=None, seedPoint=(0, imageHeight - 1), newVal=0)

# Apply flood-fill at seed point (imageWidth - 1, imageHeight - 1) - Bottom Right:
cv2.floodFill(binaryImage, mask=None, seedPoint=(imageWidth - 1, imageHeight - 1), newVal=0)

This is the result. Note that the sub-image we are looking for is isolated, and most of the big noise is gone:

You probably could run an area filter on this to get rid of the smaller (and larger) blobs of noise, but let's roll with this result for now. Alright, the first part is done. Let's read the template and perform template matching. Now, your template has an alpha channel that is of no use here. I opened your image in GIMP and substituted the alpha channel for plain white, this is the template I got:

Let's read that, convert it to grayscale and perform Otsu's thresholding to get a binary image:

# Read template:
template = cv2.imread(path+"colorTemplate.png")

# Convert it to grayscale:
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

# Threshold via Otsu:
_, template = cv2.threshold(template, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

This is the binary template:

Now, you could implement a progressive scaling mechanism here, to resize this template by a scale percent and run template matching on the target image in "steps", then look for the best matching result on the whole "run" and compare it against a minimum threshold. But let's test the template as-is:

# Get template dimensions:
(templateHeight, templateWidth) = template.shape[:2]

# Run Template Matching:
result = cv2.matchTemplate(binaryImage, template, cv2.TM_CCOEFF_NORMED)

# Get Template Matching Results:
(minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(result)

# Get Matching Score:
matchScore = maxVal
print("Match Score: "+str(matchScore))

With this template, I'm getting a matchScore of:

Match Score: 0.806335985660553

Looks very acceptable. Let's draw a nice rectangle at the position where the largest matching score was found, just to visualize the result:

# Set ROI where the largest matching score was found:
matchX = maxLoc[0]
matchY = maxLoc[1]
matchWidth = matchX + templateWidth
matchHeight = matchY + templateHeight

# Draw the ROI on the copy of the cropped BGR image:
cv2.rectangle(imageROIcopy, (matchX, matchY), (matchWidth, matchHeight), (0, 0, 255), 2)
# Show the result:
cv2.imshow("Result (Local)", imageROIcopy)
cv2.waitKey(0)

This is the (Cropped) result:

Looks alright. As we cropped the image to run this operation, let's locate the matching ROI on the actual, uncropped, image:

# Show result on original image:
matchX = roiX + matchX
matchY = roiY + matchY
matchWidth = matchX + templateWidth
matchHeight = matchY + templateHeight

# Draw the ROI on the copy of the cropped BGR image:
cv2.rectangle(inputImage, (matchX, matchY), (matchWidth, matchHeight), (0, 0, 255), 2)

Additionally, we can draw a nice label with the matching score inside. This is optional, just to have all the info drawn on the original image:

# Draw label with match result:
# Round up match score to two significant digits:
matchScore = "{:.2f}".format(matchScore)

# Draw a filled rectangle:
labelOrigin = (matchX-1, matchY - 40)
(labelWidth, labelHeight) = (matchWidth+1, matchY)
cv2.rectangle(inputImage, labelOrigin, (labelWidth, labelHeight), (0, 0, 255), -1)

# Draw the text:
labelOrigin = (matchX-1, matchY - 10)
cv2.putText(inputImage, str(matchScore), labelOrigin, cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)

cv2.imshow("Result (Global)", inputImage)
cv2.waitKey(0)

This is the (Full-sized) result:


Edit: Handling new images

I noticed that your new image is different from the original image. It seems that you are capturing screens from different phones with different screen resolutions. Now, the problem with this is that the template must be re-scaled if you change the size of the target image, otherwise the template would be too small (or too large) for the new matching, yielding poor results. You could implement the re-scaling mechanism I mentioned above to up-scale the template, eventually you would find a decent result at a certain re-scaled size - that's an existing option.

The other option is to re-scale the new image to a similar size of the original image. Your original image had a size of 1125 x 2001 vs a size of 1600 x 2560 . That's an important difference. Let's resize the new image to have the same width as the original image. The beginning of the code would be modified to this:

# image path
path = "D://opencvImages//"
fileName = "newScreen.png"

# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)

# Set the reference width:
referenceWidth = 1125

# Get image dimensions:
(imageHeight, imageWidth) = inputImage.shape[:2]

# Check input width vs reference width:
if imageWidth != referenceWidth:

    # Get original aspect ratio:
    aspectRatio = imageWidth / imageHeight
    # Compute new height using the reference width:
    newHeight = referenceWidth / aspectRatio
    # Set the new dimensions as a tuple:
    dim = (int(referenceWidth), int(newHeight))
    # Resize the image:
    inputImage = cv2.resize(inputImage, dim, interpolation=cv2.INTER_AREA)
    # Get new dimensions for further processing:
    (imageHeight, imageWidth) = inputImage.shape[:2]


# Deep copy for results:
inputImageCopy = inputImage.copy()

# Set the ROI location:
roiX = 0
roiY = 225
roiWidth = imageWidth
roiHeight = 1390

Here, I set the reference width to 1125 pixels, get the input image dimensions via shape and check if the input width is different to the reference. If that's the case, I resize the image according to the reference width and the original aspect ratio. The rest of the code has no modifications. The result on your new image would be this:

If looking for specific colors is an option, you can rely on segmentation to find candidates quickly, regardless the size. But you'll have to add some post-filtering.

在此处输入图像描述

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM