預期的 Ptr<cv::umat> 用於使用 TF 和 OpenCV 讀取的參數“img”</cv::umat>

Question

我從這里獲得了這段代碼，並從這里進行了一些修改。

from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2


# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()

ap.add_argument("-c", "--confidence", type=float, default=0.8,
    help="minimum probability to filter weak detections")
args = vars(ap.parse_args())


classes_90 = [ "person", "bicycle", "car", "motorcycle",
            "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
            "unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
            "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
            "umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
            "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
            "surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
            "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
            "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
            "unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
            "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
            "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] 
# Leemos las clases disponibles en openImages
CLASSES = classes_90  #New list of classess with 90 classess.
print(CLASSES)

# Le damos colores a las cajas para cada clase
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) 

# Importamos el modelo de red
cvNet = cv2.dnn.readNetFromTensorflow('faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb', 'faster_rcnn_inception_v2_coco_2018_01_28/resnet.pbtxt')

# Leemos una imagen
img = cv2.VideoCapture('people.mp4')  


while img.isOpened():
    ret, frame = img.read()
    
    if not ret:
        
   

 break


#img = cv2.imread(args["image"])

# Obtenemos las dimensiones de la imagen
h = frame.shape[0] # Alto
w = frame.shape[1] # Ancho
img = np.array(img)
cvNet.setInput(cv2.dnn.blobFromImage(img, size=(h, w), swapRB=True, crop=False))
detections = cvNet.forward()


# loop over the detections
for i in np.arange(0, detections.shape[2]):
    # extract the confidence (i.e., probability) associated with
    # the prediction
    confidence = detections[0, 0, i, 2]

    # filter out weak detections by ensuring the `confidence` is
    # greater than the minimum confidence
    if confidence > args["confidence"]:
        # extract the index of the class label from the
        # `detections`, then compute the (x, y)-coordinates of
        # the bounding box for the object
        idx = int(detections[0, 0, i, 1])
        print(idx   )
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")

        # draw the prediction on the frame
        label = "{}: {:.2f}%".format(CLASSES[idx],
            confidence * 100)
        cv2.rectangle(img, (startX, startY), (endX, endY),
            COLORS[idx], 2)
        y = startY - 15 if startY - 15 > 15 else startY + 15
        cv2.putText(img, label, (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

        print(label)


out_img = cv2.resize(img, (640, 480))
out.write(out_img)
cv2.imshow('img', img)
#cv2.waitKey()
if cv2.waitKey(25) & 0xFF == ord('q'):

    cv2.destroyAllWindows()
    cap.release()
    out.release()

並收到此錯誤Expected Ptr<cv::UMat> for argument 'img' ，在查找了該問題的大多數可用解決方案之后，似乎起初輸入不是數組，因此更改為 np.array 但不起作用，打印圖像顯示此處是視頻中的一幀圖像存在，因此圖像就在那里。

因此，我無法弄清楚究竟是什么導致了這個問題。 如果您使用cv2.imread()僅傳遞一個圖像，那么添加此代碼也可以正常工作。

Answer 1

我錯誤地將幀傳遞給數組，我的意思是我正在創建一個包含一些不同變量的空數組，而不是從cv2.VideoCapture()方法獲得的幀。

以下是更新后的代碼：

from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2


# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()

ap.add_argument("-c", "--confidence", type=float, default=0.8,
    help="minimum probability to filter weak detections")
args = vars(ap.parse_args())


classes_90 = [ "person", "bicycle", "car", "motorcycle",
            "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
            "unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
            "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
            "umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
            "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
            "surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
            "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
            "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
            "unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
            "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
            "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] 
# Leemos las clases disponibles en openImages
CLASSES = classes_90  #New list of classess with 90 classess.
print(CLASSES)

# Le damos colores a las cajas para cada clase
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) 

# Importamos el modelo de red
cvNet = cv2.dnn.readNetFromTensorflow('faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb', 'faster_rcnn_inception_v2_coco_2018_01_28/resnet.pbtxt')

# Leemos una imagen
img = cv2.VideoCapture('people.mp4')  


while img.isOpened():
    ret, frame = img.read()
    
    if not ret:
        
   

 break


#img = cv2.imread(args["image"])

# Obtenemos las dimensiones de la imagen
h = frame.shape[0] # Alto
w = frame.shape[1] # Ancho
img = np.array(frame)
cvNet.setInput(cv2.dnn.blobFromImage(img, size=(h, w), swapRB=True, crop=False))
detections = cvNet.forward()


# loop over the detections
for i in np.arange(0, detections.shape[2]):
    # extract the confidence (i.e., probability) associated with
    # the prediction
    confidence = detections[0, 0, i, 2]

    # filter out weak detections by ensuring the `confidence` is
    # greater than the minimum confidence
    if confidence > args["confidence"]:
        # extract the index of the class label from the
        # `detections`, then compute the (x, y)-coordinates of
        # the bounding box for the object
        idx = int(detections[0, 0, i, 1])
        print(idx   )
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")

        # draw the prediction on the frame
        label = "{}: {:.2f}%".format(CLASSES[idx],
            confidence * 100)
        cv2.rectangle(img, (startX, startY), (endX, endY),
            COLORS[idx], 2)
        y = startY - 15 if startY - 15 > 15 else startY + 15
        cv2.putText(img, label, (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

        print(label)


out_img = cv2.resize(img, (640, 480))
out.write(out_img)
cv2.imshow('img', img)
#cv2.waitKey()
if cv2.waitKey(25) & 0xFF == ord('q'):

    cv2.destroyAllWindows()
    cap.release()
    out.release()

預期的 Ptr<cv::umat> 用於使用 TF 和 OpenCV 讀取的參數“img”</cv::umat>

問題描述

1 個解決方案

解決方案1
0 2020-08-13 17:21:10

預期的 Ptr<cv::umat> 用於使用 TF 和 OpenCV 讀取的參數“img”</cv::umat>

問題描述

1 個解決方案

解決方案1 0 2020-08-13 17:21:10

解決方案1
0 2020-08-13 17:21:10