简体   繁体   中英

pHash returning different hash lenght

I have the following code:

fingerprint.cpp:

#include <iostream>
#include <filesystem>

#include "ImageHash.h"

#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include <opencv2/imgcodecs.hpp>

using namespace std;


void usage (char** argv) noexcept  {
    cout << "Usage: " << argv[0] << " <image or dir>" << endl << "Example: " << argv[0] << " template.png" << endl;
}

int main(int argc, char** argv) {
    if (argc != 2) {
        usage(argv);
        return 1;
    }

    auto imgHash = ImageHash();

    if (std::filesystem::is_directory(argv[1])) {
        for (const auto& dirEntry : std::filesystem::recursive_directory_iterator(argv[1])) {
            auto filePath = dirEntry.path();
            std::string fileName = dirEntry.path().filename().string();
        
            cv::Mat img = cv::imread(filePath.string(), cv::IMREAD_GRAYSCALE);
            img.resize(8*8);

            /*cv::imshow("img", img);
            cv::waitKey();*/
            
            cout << fileName << "\t\t" << imgHash.getHashString(img) << endl;
        }

    }
    else if (std::filesystem::is_regular_file(argv[1])){
        cv::Mat img = cv::imread(argv[1], cv::IMREAD_GRAYSCALE);
        cout << std::filesystem::path(argv[1]).filename().string() << "\t\t" << imgHash.getHashString(img) << endl;
    }
    else {
        usage(argv);
    }

    return 0;
}

ImageHash.cpp:

#include "ImageHash.h"

#include <iomanip> 

using namespace std;

ImageHash::ImageHash()
{
    pHash = cv::img_hash::PHash::create();
}

std::string ImageHash::convertHashToString(vector<bool> hash) {
    std::string ret = "";
    double h = 0;
    for (unsigned int i = 0; i < hash.size(); i++) {
        if (hash[i]) {
            h += pow(2, (i % 8));
        }

        if (i % 8 == 7) {
            std::stringstream buffer;
            buffer << std::hex << std::setfill('0') << std::setw(2) << h;
            ret += buffer.str();
            h = 0;
        }
    }
    return ret;
}

std::vector<bool> ImageHash::hex_str_to_hash(std::string inputString) {
    std::vector<bool> hash;
    size_t size = inputString.size() / 2;
    for (int i = 0; i < size; i++) {
        std::string str2 = inputString.substr(i * 2, 2);
        if (str2.empty()) {
            continue;
        }

        unsigned int value = 0;
        std::stringstream SS(str2);
        SS >> std::hex >> value;
        for (int j = 0; j < 8; j++) {
            bool check = !!((value >> j) & 1);
            hash.push_back(check);
        }
    }
    return hash;
}

std::vector<bool> ImageHash::matHashToBoolArr(cv::Mat const inHash) {
    const unsigned char* data = inHash.data;
    std::vector<bool> v;
    for (int i = 0; i < 8; i++) {
        unsigned char c = data[i];
        for (int j = 0; j < 8; j++) {
            int shift = (8 - j) - 1;
            bool val = ((c >> shift) & 1);
            v.push_back(val);
        }
    }
    return v;
}

std::vector<bool> ImageHash::computeHash(cv::Mat const input) {
    cv::Mat inHash;
    pHash->compute(input, inHash);
    return matHashToBoolArr(inHash);
}

std::string ImageHash::getHashString(cv::Mat const input) {
    return convertHashToString(computeHash(input));
}

//returns hamming distance
int ImageHash::getHashDistance(std::vector<bool>& hash1, std::vector<bool>& hash2) {
    //assert(hash1.size() == hash2.size());
    
    /* ToDo: I don't know why the size is not the same but if if is lets add padding to the smaller one
    This does not seem to make it work*/
    if (hash1.size() != hash2.size()) {
        auto smaller = hash1.size() < hash2.size() ? &hash1 : &hash2;
        auto larger = hash1.size() < hash2.size() ? hash2 : hash1;
        smaller->resize(larger.size());
        std::fill(begin(*smaller) + larger.size(), end(*smaller), 0);
    }


    int dist = 0;
    for (unsigned int i = 0; i < hash1.size(); i++) {
        dist += (hash1[i] != hash2[i]);
    }
    return dist;
}

For some reason the hashes returned have different size depending on the input image. I tried to add some padding at getHashDistance() but I don't think I should do that since now when I calculate the distance between two similar images I get a large distance as if they were not similar.

Do you know why that is? I'm resizing the images to 8*8 img.resize(8*8); which I though it would make the trick but it does not work.

Thanks Thanks

auto imgHash = ImageHash();

Could it be that the auto keyword here is causing the runtime to draw different inferences of the returned type (and therefore the size of the returned type)?

Type Inference in C++ (auto and decltype)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM