PyTorch 和 TorchVision FasterRCNN 解釋 C++ GenericDict 中的 output

Question

我試圖在 C++ 中解釋 FasterRCNN 的 output 並且我正在與 GenericDict 類型作斗爭。

我的代碼如下：

#include <opencv4/opencv2/opencv.hpp>
#include <opencv4/opencv2/shape.hpp>
#include <opencv4/opencv2/imgcodecs.hpp>
#include <opencv4/opencv2/highgui.hpp>
#include <opencv4/opencv2/imgproc.hpp>
#include <opencv4/opencv2/core/utility.hpp>
#include <opencv4/opencv2/core/mat.hpp>

#include <c10/cuda/CUDAStream.h>
#include <torch/csrc/autograd/grad_mode.h>

#include <torch/csrc/api/include/torch/torch.h>
#include <torch/script.h>
#include <torchvision/vision.h>
#include <torchvision/nms.h>

#include <iostream>
#include <memory>
#include <string>

int main(int argc, const char* argv[])
{
    if (argc != 3)
    {
        printf("usage: %s <path-to-exported-script-module> <image_to_test>\n",argv[0]);
        return -1;
    }

    std::string module_filename = argv[1];
    std::string image_file = argv[2];

    try
    {
        cv::Mat input_img = cv::imread(image_file, cv::IMREAD_GRAYSCALE);

        torch::autograd::AutoGradMode guard(false);
        // Deserialize the ScriptModule from a file using torch::jit::load().
        torch::jit::script::Module module = torch::jit::load(module_filename);

        assert(module.buffers().size() > 0);

        module.eval();

        // Assume that the entire model is on the same device.
        // We just put input to this device.
        auto device = (*std::begin(module.buffers())).device();

        const int height = input_img.rows;
        const int width  = input_img.cols;
        const int channels = 1;

        auto input = torch::from_blob(input_img.data, {height, width, channels}, torch::kUInt8);
        // HWC to CHW
        // input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous();
        input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous();

        // run the network
        std::vector<at::Tensor> inputs;
        inputs.push_back(input);
        auto output = module.forward({inputs});
        if (device.is_cuda())
            c10::cuda::getCurrentCUDAStream().synchronize();

        std::cout << "output: " << output << std::endl;

        auto outputs = output.toTuple()->elements();

        std::cout << "outputs: " << outputs << std::endl;

        for( auto& elem : outputs )
        {
            std::cout << "elem: " << elem << std::endl;
            if( elem.isGenericDict() )
            {
                std::cout << "elem is generic dict: " << elem << std::endl;
                c10::Dict<c10::IValue, c10::IValue> dict = elem.toGenericDict();

                auto elem_vector_0 = dict.at(c10::IValue("scores")).toIntVector();
                auto elem_vector_1 = dict.at(c10::IValue("boxes")).toIntVector();
                auto elem_vector_2 = dict.at(c10::IValue("labels")).toIntVector();

                for( auto& ee0 : elem_vector_0 )
                {
                    std::cout << "elem_vector_0" << ee0 << std::endl;
                }
                for( auto& ee0 : elem_vector_1 )
                {
                    std::cout << "elem_vector_1" << ee0 << std::endl;
                }
                for( auto& ee0 : elem_vector_2 )
                {
                    std::cout << "elem_vector_2" << ee0 << std::endl;
                }
            }
        }

        cv::namedWindow("Display Image", cv::WINDOW_AUTOSIZE );
        cv::imshow("Display Image", input_img);
        cv::waitKey(0);
    }
    catch(const c10::Error& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(const cv::Exception& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(const std::exception& e)
    {
        std::cerr << e.what() << std::endl;
        return -1;
    }
    catch(...)
    {
        std::cerr << "Unknown error" << std::endl;
        return -1;
    }

    std::cout << "ok\n";
    return 0;
}

output 是：

(base) fstrati@fstrati-desktop:~/libtorch_shared_cuda_10.1/load_and_run_model/Release$ ./load_and_run_model ./torch_script_v0.2.pt test_img.png 
[W faster_rcnn.py:95] Warning: RCNN always returns a (Losses, Detections) tuple in scripting (function )
output: ({}, [{boxes: [ CPUFloatType{0,4} ], labels: [ CPULongType{0} ], scores: [ CPUFloatType{0} ]}])
outputs: {} [{boxes: [ CPUFloatType{0,4} ], labels: [ CPULongType{0} ], scores: [ CPUFloatType{0} ]}]
elem: {}
elem is generic dict: {}
Argument passed to at() was not in the map.

我正在努力尋找一種從字典 GenericDict 中提取框、標簽和分數的方法。

這個 map 很奇怪，我無法對其進行迭代，也無法訪問第一種和第二種類型......使用它->第一個它->第二個

有任何想法嗎？

提前致謝

Answer 1

我認為以下方法可以解決這里的主要問題，

  output = module.forward(inputs);

  auto detections = output.toTuple()->elements().at(1).toList().get(0).toGenericDict();
  std::cout << ">>> detections labels: " << detections.at("labels") << std::endl;
  std::cout << ">>> detections boxes: " << detections.at("boxes") << std::endl;
  std::cout << ">>> detections scores: " << detections.at("scores") << std::endl;

此外，我添加了一個可執行文件https://github.com/zhiqwang/yolov5-rt-stack/tree/master/deployment/libtorch來展示 libtorch 的工作原理。

PyTorch 和 TorchVision FasterRCNN 解釋 C++ GenericDict 中的 output

問題描述

1 個解決方案

解決方案1
0 2021-07-14 09:16:16

PyTorch 和 TorchVision FasterRCNN 解釋 C++ GenericDict 中的 output

問題描述

1 個解決方案

解決方案1 0 2021-07-14 09:16:16

解決方案1
0 2021-07-14 09:16:16