簡體   English   中英

使用 boost 反序列化字符串數組並且大於預期(從 cv::cuda::GpuMat 序列化后)

[英]Deserializing array of string with boost and are bigger than expected (after serializing from cv::cuda::GpuMat)

我正在嘗試序列化和反序列化cv::cuda::GpuMat的數組(音調 arrays 的byte / uchar )。 在反序列化部分,我不需要恢復GpuMat ,而是恢復byte數組。

我使用這兩個類:

#pragma once
#include <fstream>
#include <string>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <opencv2/core/cuda.hpp>

using namespace std;
using namespace cv;
using namespace cuda;

#ifndef uchar
#define uchar unsigned char
#endif

class SerieFlow
{
    friend class boost::serialization::access;
    
    template <class Archive>
    void serialize(Archive& ar, const unsigned int version)
    {
        ar & flowX;
        ar & flowY;
    }

    GpuMat* FlowPlanes{}; // GpuMat[2]
public:
    string flowX{};
    string flowY{};

    void PrepData()
    {
        Mat matX = Mat(FlowPlanes[0]);
        Mat matY = Mat(FlowPlanes[1]);
        flowX = *new string((const char*)matX.col(0).data);
        flowY = *new string((const char*)matY.col(0).data);
        matX.release();
        matY.release();
    }
    
    SerieFlow() = default;
    SerieFlow(GpuMat* flowPlanes) : FlowPlanes(flowPlanes)
    {
        PrepData();
    }
};

class SerieFlowFile
{
public:
    void Save(SerieFlow content, string filename)
    {
        ofstream stream(filename);
        {
            boost::archive::text_oarchive archive(stream);
            archive << content;
        }
    }
    
    SerieFlow Open(string filename)
    {
        SerieFlow content;
        {
            ifstream stream(filename);
            boost::archive::text_iarchive archive(stream);
            archive >> content;
        }
        return content;
    }
};

我對結果進行了單元測試:

TEST_METHOD(ReadDeserializeTest)
{
    string filename{ "WriteRead.sflow" };
    if (filesystem::exists(filename))
        std::remove(filename.c_str());
    unsigned char data[8] = { 7, 4, 2, 6, 7, 18, 29, 111 };
    string dataAsString((const char*)data, 8);
    vector<unsigned char> expectedData{ dataAsString.begin(), dataAsString.end() };
    Mat mat{ 8, 1, CV_8UC1, data };
    GpuMat gpuMat1(mat);
    GpuMat gpuMat2(mat);
    GpuMat gpuMatArray[2] = { gpuMat1 , gpuMat2 };
    SerieFlow sflow(gpuMatArray);
    SerieFlowFile sut{};
    sut.Save(sflow, filename);

    SerieFlowFile sut2{};
    auto sflow2 = sut2.Open(filename);
    vector<unsigned char> resultFlowX{ sflow2.flowX.begin(),sflow2.flowX.end() };
    vector<unsigned char> resultFlowY{ sflow2.flowY.begin(),sflow2.flowY.end() };

    stringstream ss{};
    ss << "flowX size: " << resultFlowX.size();
    ss << " flowY size: " << resultFlowY.size() << endl;
    Logger::WriteMessage(ss.str().c_str());

    stringstream resultFlowXStream{};
    stringstream resultFlowYStream{};
    copy(resultFlowX.begin(), resultFlowX.end(), std::ostream_iterator<int>(resultFlowXStream, " "));
    copy(resultFlowY.begin(), resultFlowY.end(), std::ostream_iterator<int>(resultFlowYStream, " "));
    Logger::WriteMessage(resultFlowXStream.str().c_str());
    Logger::WriteMessage(resultFlowYStream.str().c_str());

    Assert::IsTrue(resultFlowX == resultFlowY, L"flowX and flowY are not the same.");
    Assert::IsTrue(expectedData == resultFlowX, L"resultFlowX is not correct.");
    Assert::IsTrue(expectedData == resultFlowY, L"resultFlowY is not correct.");
}

但是我得到:

flowX 尺寸:52 flowY 尺寸:36

7 4 2 6 7 18 29 111 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 253 253 253 253

7 4 2 6 7 18 29 111 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 2553 253 2

我希望兩者都是 8 號並給出:

7 4 2 6 7 18 29 111

得到這么多填充是否正常?

我使用string作為操作字節數組、比較等的便捷方式。

我懷疑我的string有問題,因為文件中的原始數組不是 null 終止的; 我應該如何序列化和反序列化那些 arrays? 如果我 null 在序列化時終止了字符串( string dataAsString((const char*)data, 8); ),這怎么會是一個問題?

我將 boost 1.72 用作 nuget package 和 opencv 4.51,並使用 x64 中的 VS222 進行測試。

  1. 不要使用 memory 泄漏運算符:

     X a = *new X(); // guaranteed unrecoverable memory leak¹
  2. 您使用它的地方充滿了其他問題:

     flowX = *new string((const char*)matX.col(0).data); flowY = *new string((const char*)matY.col(0).data);
  3. 首先, (const char*)data是重新解釋轉換。 使用起來很少安全。

     flowX = std::string(reinterpret_cast<const char*>(matX.col(0).data), length); flowY = std::string(reinterpret_cast<const char*>(matY.col(0).data), length);
  4. 您將其解釋為原始的 C 樣式字符串。 C 字符串需要以 NUL 結尾。 沒有理由假設它們是。 指定長度。

     flowX = std::string(reinterpret_cast<const char*>(matX.col(0).data), length); flowY = std::string(reinterpret_cast<const char*>(matY.col(0).data), length);

    或者實際上,使用flowX.assign

     flowX.assign(reinterpret_cast<const char*>(matX.col(0).data), length); flowY.assign(reinterpret_cast<const char*>(matY.col(0).data), length);
  5. 這條線

    cv::Mat matX = Mat(FlowPlanes[0]);

    不必要的復制構造

    cv::Nat matX(FlowPlanes[0]);
  6. 無論如何,似乎沒有理由復制矩陣

    void PrepData() { flowX.assign(reinterpret_cast<const char*>(FlowPlanes[0].col(0).data), length); flowY.assign(reinterpret_cast<const char*>(FlowPlanes[1].col(0).data), length); }
  7. 事實上,由於沒有其他任何東西使用flowPlanes ,只需像往常一樣在構造函數中初始化:

     std::string flowX; std::string flowY; SerieFlow() = default; explicit SerieFlow(cv::cuda::GpuMat* flowPlanes): flowX(reinterpret_cast<const char*>(flowPlanes[0].col(0).data), length), flowY(reinterpret_cast<const char*>(flowPlanes[1].col(0).data), length) { }
  8. 許多其他轉換混淆可以簡化:

     unsigned char data[8] = { 7, 4, 2, 6, 7, 18, 29, 111 }; std::string dataAsString(reinterpret_cast<const char*>(data), 8);

    std::string dataAsString { 7, 4, 2, 6, 7, 18, 29, 111 };

    std::string dataAsString { 7, 4, 2, 6, 7, 18, 29, 111 }; std::vector<unsigned char> expectedData{ dataAsString.begin(), dataAsString.end() };

    簡單來說...

     std::vector<unsigned char> data { 7, 4, 2, 6, 7, 18, 29, 111 }; cv::Mat mat(data.size(), 1, CV_8UC1, data.data());
  9. 不需要臨時創建數組:

     cv::cuda::GpuMat gpuMat1(mat); cv::cuda::GpuMat gpuMat2(mat); cv::cuda::GpuMat gpuMatArray[2] = { gpuMat1, gpuMat2 };

    GpuMat gpuMatArray[2] = { GpuMat{mat}, GpuMat{mat} };
  10. 一般來說,整個轉換為 std::string 是不必要的,是什么造成了 uchar 與 char 的不兼容。 只保留它vector<uchar>嗎?

     using cv::cuda::GpuMat; using uchar = std::uint8_t; using TwoMat = std::array<GpuMat, 2>; class SerieFlow { friend class boost::serialization::access; template <class Archive> void serialize(Archive& ar, unsigned /*unused*/) { ar& flowX & flowY; } static auto to_vector(GpuMat const& mat) { assert(mat.elemSize() == 1); auto n = mat.size().area() * mat.elemSize(); return std::vector<uchar>(mat.data, mat.data+n); } public: std::vector<uchar> flowX, flowY; SerieFlow() = default; explicit SerieFlow(TwoMat const planes): flowX(to_vector(planes[0])), flowY(to_vector(planes[1])) {} }; struct SerieFlowFile { static void Save(const SerieFlow& content, const std::string& filename) { std::ofstream stream(filename); boost::archive::text_oarchive archive(stream); archive << content; } static SerieFlow Open(const std::string& filename) { std::ifstream stream(filename); boost::archive::text_iarchive archive(stream); SerieFlow content; archive >> content; return content; } }; void ReadDeserializeTest() { std::filesystem::path filename = "WriteRead.sflow"; if (exists(filename)) { remove(filename); } std::vector<unsigned char> data { 7, 4, 2, 6, 7, 18, 29, 111 }; cv::Mat mat(data.size(), 1, CV_8UC1, data.data()); TwoMat gpuMatArray { GpuMat{mat}, GpuMat{mat} }; SerieFlow sflow(gpuMatArray); SerieFlowFile::Save(sflow, filename); auto roundtrip = SerieFlowFile::Open(filename); auto check = [&data](auto& flowX, auto label) { std::cout << label << " size: " << flowX.size(); copy(flowX.begin(), flowX.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << "\n" << (data == flowX? "CORRECT":"INCORRECT") << std::endl; }; check(roundtrip.flowX, "flowX"); check(roundtrip.flowY, "flowY"); } int main() { ReadDeserializeTest(); }

開箱即用:只需序列化 GpuMat

為什么不直接為 GpuMat 類型添加序列化?

#include <boost/serialization/array.hpp>
#include <boost/serialization/array_wrapper.hpp>

namespace boost::serialization {
    template <typename Ar> void save(Ar& ar, GpuMat const& mat, unsigned) {
        int r = mat.rows;
        int c = mat.cols;
        ar & r & c & make_array(&mat.data[0], r*c);
    }

    template <typename Ar> void load(Ar& ar, GpuMat& mat, unsigned) {
        int r, c;
        ar & r & c;
        mat = GpuMat(r, c, CV_8UC1);
        ar & make_array(&mat.data[0], r*c);
    }
}

BOOST_SERIALIZATION_SPLIT_FREE(GpuMat)

這將刪除所有復制,並應立即解決不處理矩陣的實際運行時形狀的問題(您只是假設它始終是 1 行)。

現在整個事情可以實現為

using Flows = std::array<GpuMat, 2>;

struct SerieFlowFile {
    static void Save(const Flows& content, const std::string& filename) {
        std::ofstream stream(filename);
        boost::archive::text_oarchive archive(stream);
        archive << content;
    }

    static Flows Open(const std::string& filename) {
        std::ifstream stream(filename);
        boost::archive::text_iarchive archive(stream);
        Flows content;
        archive >> content;
        return content;
    }
};

它與以下測試主編譯:

void ReadDeserializeTest() {
    std::vector<uchar> data { 7, 4, 2, 6, 7, 18, 29, 111 };

    Flows gpuMatArray {
        GpuMat (8, 1, CV_8UC1, data.data()),
        GpuMat (4, 1, CV_8UC1, data.data()),
    };

    SerieFlowFile::Save(gpuMatArray, "WriteRead.sflow");
    auto roundtrip = SerieFlowFile::Open("WriteRead.sflow");

    static auto as_vec = [](GpuMat const& mat) {
        return std::vector(&mat.data[0], &mat.data[mat.cols * mat.rows]);
    };

    auto check = [&] (int index) {
        auto const& v = as_vec(roundtrip[index]);
        auto eq = boost::equal(as_vec(gpuMatArray[index]), v);
        fmt::print("#{} size: {} {} {}\n", index, v.size(), v, (eq? "CORRECT":"INCORRECT"));
    };

    check(0);
    check(1);
}

在編譯器資源管理器上實時查看

//#pragma once
#include <array>
#include <cstdint>

using uchar = std::uint8_t;

#ifndef NO_OPENCV
    #include <opencv2/core/cuda.hpp>
    //using cv::cuda::GpuMat;
    using GpuMat = cv::Mat;
#else
    #include <memory>
    #include <algorithm>
    namespace {
        enum {CV_8UC1};
        struct FakeMat {
            FakeMat(int r=1, int c=1, int=CV_8UC1, void* init = nullptr)
                : rows(r), cols(c), data(r && c? std::make_unique<uchar[]>(r*c) : nullptr)
            {
                if (init && data) {
                    std::copy_n(static_cast<uchar const*>(init), rows*cols, data.get());
                }
            }
            FakeMat(FakeMat const& rhs)
                : rows(rhs.rows), cols(rhs.cols), data(rhs.data? std::make_unique<uchar[]>(rows*cols) : nullptr)
            {
                if (data && rhs.data) {
                    std::copy_n(rhs.data.get(), rows*cols, data.get());
                }
            }

            FakeMat(FakeMat&&) = default;
            FakeMat& operator=(FakeMat&&) = default;

            int rows, cols;
            std::unique_ptr<uchar[]> data;
        };
    }

    using GpuMat = FakeMat;
#endif

#include <boost/serialization/array.hpp>
#include <boost/serialization/array_wrapper.hpp>

namespace boost::serialization {
    template <typename Ar> void save(Ar& ar, GpuMat const& mat, unsigned) {
        int r = mat.rows;
        int c = mat.cols;
        ar & r & c & make_array(&mat.data[0], r*c);
    }

    template <typename Ar> void load(Ar& ar, GpuMat& mat, unsigned) {
        int r, c;
        ar & r & c;
        mat = GpuMat(r, c, CV_8UC1);
        ar & make_array(&mat.data[0], r*c);
    }
}

BOOST_SERIALIZATION_SPLIT_FREE(GpuMat)

#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <fstream>

using Flows = std::array<GpuMat, 2>;

struct SerieFlowFile {
    static void Save(const Flows& content, const std::string& filename) {
        std::ofstream stream(filename);
        boost::archive::text_oarchive archive(stream);
        archive << content;
    }

    static Flows Open(const std::string& filename) {
        std::ifstream stream(filename);
        boost::archive::text_iarchive archive(stream);
        Flows content;
        archive >> content;
        return content;
    }
};

#include <boost/range.hpp>
#include <fmt/ranges.h>
void ReadDeserializeTest() {
    std::vector<uchar> data { 7, 4, 2, 6, 7, 18, 29, 111 };

    Flows gpuMatArray {
        GpuMat (8, 1, CV_8UC1, data.data()),
        GpuMat (4, 1, CV_8UC1, data.data()+2),
    };

    SerieFlowFile::Save(gpuMatArray, "WriteRead.sflow");
    auto roundtrip = SerieFlowFile::Open("WriteRead.sflow");

    static auto as_vec = [](GpuMat const& mat) {
        return std::vector(&mat.data[0], &mat.data[mat.cols * mat.rows]);
    };

    auto check = [&] (int index) {
        auto const& v = as_vec(roundtrip[index]);
        auto eq = boost::equal(as_vec(gpuMatArray[index]), v);
        fmt::print("#{} size: {} {} {}\n", index, v.size(), v, (eq? "CORRECT":"INCORRECT"));
    };

    check(0);
    check(1);
}

int main() {
    ReadDeserializeTest();
}

印刷

#0 size: 8 {7, 4, 2, 6, 7, 18, 29, 111} CORRECT
#1 size: 4 {2, 6, 7, 18} CORRECT

PrepData中,字符串在沒有給定長度的情況下初始化,導致任意長字符串(直到達到 null '\0' )。

正確的代碼是:

string strX((const char*)matX.col(0).data, matX.rows);
string strY((const char*)matY.col(0).data, matY.rows);

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM