How to detect a given shape on the video and draw a rectangle around it?

I have a certain problem with opencv and python...

I received 4 images of chainsaw and one video containing the same chainsaw. My task is to draw a rectangle around that chainsaw on the video using opencv. I was trying to do it this way:

import numpy as np
import inspect
import cv2

def show_img(img, bw=False):
    fig = plt.figure(figsize=(13,13))
    ax = fig.gca()
    ax.imshow(img, cmap='Greys_r' if bw else None)

img1 = cv2.imread("saw1.jpg")
img2 = cv2.imread("saw2.jpg")
img3 = cv2.imread("saw3.jpg")
img4 = cv2.imread("saw4.jpg")

gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray3 = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)
gray4 = cv2.cvtColor(img4, cv2.COLOR_BGR2GRAY)

orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(gray1, None)
kp2, des2 = orb.detectAndCompute(gray2, None)
kp3, des3 = orb.detectAndCompute(gray3, None)
kp4, des4 = orb.detectAndCompute(gray4, None)

matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

cap = cv2.VideoCapture('sawmovie.mp4')

    ret, frame = cap.read()
    if ret == True:
        frameGray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        kp5, des5 = orb.detectAndCompute(frameGray, None)
        matches = matcher.match(des1, des5)
        matches = sorted(matches, key = lambda x:x.distance)
        good_matches = matches[:10]
        list_kp1 = []
        list_kp2 = []
        for mat in good_matches:

            # Get the matching keypoints for each of the images
            img1_idx = mat.queryIdx
            img2_idx = mat.trainIdx

            (x1, y1) = kp1[img1_idx].pt
            (x2, y2) = kp5[img2_idx].pt

            list_kp1.append((x1, y1))
            list_kp2.append((x2, y2))
        x_values = []
        y_values = []
        for kp in list_kp2:
        for kp in list_kp2:
        x_max = int(max(x_values))
        x_min = int(min(x_values))
        y_max = int(max(y_values))
        y_min = int(min(y_values))
        starting_point = (x_max,y_max)
        ending_point = (x_min,y_min)
        thickness = 2
        color = (255,0,0)
        final_image = cv2.rectangle(frame, starting_point, ending_point, color, thickness)
        cv2.imshow('Frame', final_image)
        if cv2.waitKey(25) & 0xFF == ord('q'):


I was trying to do it like this, but rectangle is in a random place. I'm stuck and i can't go any further.

Sample images and one frame from video: https://drive.google.com/drive/u/0/folders/1lD97uXttSUUUc2R76nXsbQVxNfONKnp8

What i want to obtain is draw a rectangle around a saw on every frame of the video using those sample images.

I'm really sorry. I got called in for work over the weekend and it looks like I won't be leaving work today. I'll post the crappy code I've got now.

I ran it in two files. One to get all of the matched points saved to a text file and another to read all of those points and draw a box around it. The feature point detectors take forever to run which is why I'm saving it to a file and just replaying that file.


import cv2
import numpy as np
import time

# sample images
samples = [];
for a in range(1,5):
    samples.append(cv2.imread("saw" + str(a) + ".jpg"));

# test video
cap = cv2.VideoCapture("sawmovie.mp4");

# make detector and matcher
sift = cv2.SIFT_create();
orb = cv2.ORB_create();
bfm = cv2.BFMatcher();

# text file to save feature points (so we don't have to redo the slow part every time)
file = open("matched_points.txt", 'w');

# skip empty frames
frame = 0;
skip = 0;
for a in range(skip):
    _,_ = cap.read();
    frame += 1;

# go until video is finished
while True:
    # get frame
    ret, test = cap.read();
    if not ret:

    # count
    print("Frame: " + str(frame));

    # create a fresh mask
    mask = np.zeros(test.shape[:2], np.uint8);

    # do keypoint matching with each sample
    for sample in samples:
        # get features
        kp1, des1 = sift.detectAndCompute(sample,None);
        kp2, des2 = sift.detectAndCompute(test,None);

        # match
        matches = bfm.knnMatch(des1, des2, k=2);

        # lowe's ratio test
        good = []
        for a,b in matches:
            if a.distance < 0.75*b.distance:

        # pull position of matches
        points = [];
        for m in good:
            test_index = m.trainIdx;
            x, y = kp2[test_index].pt;
            points.append((int(x), int(y)));

        # add to log
        for point in points:
            x, y = point;
            out_str = "";
            out_str += str(frame) + " ";
            out_str += str(x) + " ";
            out_str += str(y) + "\n";

    # show
    cv2.imshow("Test", test);
    cv2.imshow("Mask", mask);
    key = cv2.waitKey(1);
    if key == ord('q'):

    # increment frame counter
    frame += 1;


import cv2
import numpy as np
import time

# rescale
def rescale(img, scale):
    h, w = img.shape[:2];
    h = int(h*scale);
    w = int(w*scale);
    return cv2.resize(img, (w,h));

# test video
cap = cv2.VideoCapture("sawmovie.mp4");

# text file to save feature points (so we don't have to redo the slow part every time)
file = open("matched_points.txt", 'r');
points = []; # [frame, x, y]
for line in file:
    frame, x, y = [int(a) for a in line.split(' ')];
    points.append([frame, x, y]);

# group by frame
fps = [];
last_frame = -1;
group = [];
for point in points:
    # unpack
    frame, x, y = point;
    if last_frame == -1:
        last_frame = frame;

    # check for new group
    if last_frame != frame:
        group = [];
        last_frame = frame;

    # add to group

# make videowriter 
res = (1920, 1080);
four_cc = cv2.VideoWriter_fourcc(*'DIVX'); # this is windows-specific
writer = cv2.VideoWriter("marked.avi", four_cc, 20, res);

# skip empty frames
frame = 0;
skip = 0;
for a in range(skip):
    _,_ = cap.read();
    frame += 1;

# go until video is finished
while True:
    # get frame
    ret, test = cap.read();
    if not ret:

    # count
    print("Frame: " + str(frame));

    # create a fresh mask
    mask = np.zeros(test.shape[:2], np.uint8);

    # grab points
    points = fps[frame - skip];

    # draw points
    # for point in points:
    #   cv2.circle(test, point, 50, (0,0,255), -1);

    # grow points to try and link up points that are closer than "dist"
    dist = 40;
    for point in points:
        cv2.circle(mask, point, dist, 255, -1);

    # get contours and find biggest by area
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
    biggest = None;
    biggest_area = -1;
    for con in contours:
        area = cv2.contourArea(con);
        if area > biggest_area:
            biggest = con;
            biggest_area = area;

    # bounding box
    x,y,w,h = cv2.boundingRect(biggest);
    test = cv2.rectangle(test,(x,y),(x+w,y+h),(0,255,0),4);

    # rescale to fit screen
    # test = rescale(test, 0.5);
    # print(test.shape);

    # save

    # show
    cv2.imshow("Test", test);
    # cv2.imshow("Mask", mask);
    key = cv2.waitKey(1);
    if key == ord('q'):

    # increment frame counter
    frame += 1;

