简体   繁体   English

pHash 返回不同的哈希长度

[英]pHash returning different hash lenght

I have the following code:我有以下代码:

fingerprint.cpp:指纹.cpp:

#include <iostream>
#include <filesystem>

#include "ImageHash.h"

#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include <opencv2/imgcodecs.hpp>

using namespace std;


void usage (char** argv) noexcept  {
    cout << "Usage: " << argv[0] << " <image or dir>" << endl << "Example: " << argv[0] << " template.png" << endl;
}

int main(int argc, char** argv) {
    if (argc != 2) {
        usage(argv);
        return 1;
    }

    auto imgHash = ImageHash();

    if (std::filesystem::is_directory(argv[1])) {
        for (const auto& dirEntry : std::filesystem::recursive_directory_iterator(argv[1])) {
            auto filePath = dirEntry.path();
            std::string fileName = dirEntry.path().filename().string();
        
            cv::Mat img = cv::imread(filePath.string(), cv::IMREAD_GRAYSCALE);
            img.resize(8*8);

            /*cv::imshow("img", img);
            cv::waitKey();*/
            
            cout << fileName << "\t\t" << imgHash.getHashString(img) << endl;
        }

    }
    else if (std::filesystem::is_regular_file(argv[1])){
        cv::Mat img = cv::imread(argv[1], cv::IMREAD_GRAYSCALE);
        cout << std::filesystem::path(argv[1]).filename().string() << "\t\t" << imgHash.getHashString(img) << endl;
    }
    else {
        usage(argv);
    }

    return 0;
}

ImageHash.cpp: ImageHash.cpp:

#include "ImageHash.h"

#include <iomanip> 

using namespace std;

ImageHash::ImageHash()
{
    pHash = cv::img_hash::PHash::create();
}

std::string ImageHash::convertHashToString(vector<bool> hash) {
    std::string ret = "";
    double h = 0;
    for (unsigned int i = 0; i < hash.size(); i++) {
        if (hash[i]) {
            h += pow(2, (i % 8));
        }

        if (i % 8 == 7) {
            std::stringstream buffer;
            buffer << std::hex << std::setfill('0') << std::setw(2) << h;
            ret += buffer.str();
            h = 0;
        }
    }
    return ret;
}

std::vector<bool> ImageHash::hex_str_to_hash(std::string inputString) {
    std::vector<bool> hash;
    size_t size = inputString.size() / 2;
    for (int i = 0; i < size; i++) {
        std::string str2 = inputString.substr(i * 2, 2);
        if (str2.empty()) {
            continue;
        }

        unsigned int value = 0;
        std::stringstream SS(str2);
        SS >> std::hex >> value;
        for (int j = 0; j < 8; j++) {
            bool check = !!((value >> j) & 1);
            hash.push_back(check);
        }
    }
    return hash;
}

std::vector<bool> ImageHash::matHashToBoolArr(cv::Mat const inHash) {
    const unsigned char* data = inHash.data;
    std::vector<bool> v;
    for (int i = 0; i < 8; i++) {
        unsigned char c = data[i];
        for (int j = 0; j < 8; j++) {
            int shift = (8 - j) - 1;
            bool val = ((c >> shift) & 1);
            v.push_back(val);
        }
    }
    return v;
}

std::vector<bool> ImageHash::computeHash(cv::Mat const input) {
    cv::Mat inHash;
    pHash->compute(input, inHash);
    return matHashToBoolArr(inHash);
}

std::string ImageHash::getHashString(cv::Mat const input) {
    return convertHashToString(computeHash(input));
}

//returns hamming distance
int ImageHash::getHashDistance(std::vector<bool>& hash1, std::vector<bool>& hash2) {
    //assert(hash1.size() == hash2.size());
    
    /* ToDo: I don't know why the size is not the same but if if is lets add padding to the smaller one
    This does not seem to make it work*/
    if (hash1.size() != hash2.size()) {
        auto smaller = hash1.size() < hash2.size() ? &hash1 : &hash2;
        auto larger = hash1.size() < hash2.size() ? hash2 : hash1;
        smaller->resize(larger.size());
        std::fill(begin(*smaller) + larger.size(), end(*smaller), 0);
    }


    int dist = 0;
    for (unsigned int i = 0; i < hash1.size(); i++) {
        dist += (hash1[i] != hash2[i]);
    }
    return dist;
}

For some reason the hashes returned have different size depending on the input image.出于某种原因,根据输入图像,返回的散列具有不同的大小。 I tried to add some padding at getHashDistance() but I don't think I should do that since now when I calculate the distance between two similar images I get a large distance as if they were not similar.我试图在 getHashDistance() 中添加一些填充,但我认为我不应该这样做,因为现在当我计算两个相似图像之间的距离时,我得到了很大的距离,就好像它们不相似一样。

Do you know why that is?你知道这是为什么吗? I'm resizing the images to 8*8 img.resize(8*8);我正在将图像大小调整为 8*8 img.resize(8*8); which I though it would make the trick but it does not work.我认为它会成功,但它不起作用。

Thanks Thanks谢谢,谢谢

auto imgHash = ImageHash();

Could it be that the auto keyword here is causing the runtime to draw different inferences of the returned type (and therefore the size of the returned type)?是否是这里的 auto 关键字导致运行时对返回的类型(以及返回类型的大小)进行不同的推断?

Type Inference in C++ (auto and decltype) C++ 中的类型推断(auto 和 decltype)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM