[英]Optimizing an algorithm using map::count
我目前有一個哈希密鑰的算法,並使用map :: count檢查它的唯一性。 怎么可以這個優化? 我也忘了提到這是有線的。
int coll = 0;
map<long, bool> mymap;
#pragma omp parallel for
for (int i = 0; i < 256; i++)
for (int j = 0; j < 256; j++)
for (int k = 0; k < 256; k++)
{
string temp;
temp = i;
temp += j;
temp += k;
temp += temp;
long myhash = hash(temp.c_str());
if (mymap.count(myhash))
{
#pragma omp atomic
coll++;
cout << "Collision at " << i << " " << j << " " << k << endl;
}
else
{
#pragma omp critical
mymap[myhash] = true;
}
}
cout << "Number of collisions: " << coll << endl;
cout << "Map size: " << mymap.size() << endl;
經過多次試驗和錯誤,這是我可以生成的最佳版本,使用1GB的RAM在82.5秒內生成4294967296個密鑰。
#include <iostream>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/time.h>
#include <iomanip>
#include <omp.h>
#include <vector>
#include <fstream>
#include <ios>
#include <unistd.h>
using namespace std;
class Timer
{
private:
timeval startTime;
public:
void start()
{
gettimeofday(&startTime, NULL);
}
double stop()
{
timeval endTime;
long seconds, useconds;
double duration;
gettimeofday(&endTime, NULL);
seconds = endTime.tv_sec - startTime.tv_sec;
useconds = endTime.tv_usec - startTime.tv_usec;
duration = seconds + useconds/1000000.0;
return duration;
}
static void printTime(double duration)
{
cout << setprecision(10) << fixed << duration << " seconds" << endl;
}
};
static inline long hash(const char* str)
{
return (*(long*)str)>> 0;
}
int coll;
vector<bool> test;
void process_mem_usage(double& vm_usage, double& resident_set)
{
using std::ios_base;
using std::ifstream;
using std::string;
vm_usage = 0.0;
resident_set = 0.0;
// 'file' stat seems to give the most reliable results
//
ifstream stat_stream("/proc/self/stat",ios_base::in);
// dummy vars for leading entries in stat that we don't care about
//
string pid, comm, state, ppid, pgrp, session, tty_nr;
string tpgid, flags, minflt, cminflt, majflt, cmajflt;
string utime, stime, cutime, cstime, priority, nice;
string O, itrealvalue, starttime;
// the two fields we want
//
unsigned long vsize;
long rss;
stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
>> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
>> utime >> stime >> cutime >> cstime >> priority >> nice
>> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
stat_stream.close();
long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
vm_usage = vsize / 1024.0;
resident_set = rss * page_size_kb;
}
Timer timer;
void signal_handlerkill(int sig)
{
cout << "Number of collisions: " << coll << endl;
//cout << test.size() << endl;
double vm, rss;
process_mem_usage(vm, rss);
vm /= 1024.0;
rss /= 1024.0;
cout << "VM: " << vm << "MB" << endl;
timer.printTime(timer.stop());
exit(1);
}
int main()
{
signal(SIGINT, signal_handlerkill);
timer = Timer();
timer.start();
coll = 0;
for (long i = 0; i < 4294967296+1; i++)
{
test.push_back(0); //Set up the vector
}
#pragma omp parallel for
for (int i = 0; i < 256; i++)
for (int j = 0; j < 256; j++)
for (int k = 0; k < 256; k++)
for (int l = 0; l < 256; l++)
{
const char temp[4] = {i, j, k, l};
long myhash = (*(long*)temp);
if(test.at(myhash))
{
#pragma omp atomic
coll++;
}
else
{
test[myhash].flip();
}
}
cout << "Number of collisions: " << coll << endl;
double vm, rss;
process_mem_usage(vm, rss);
vm /= 1024.0;
rss /= 1024.0;
cout << "VM: " << vm << "MB" << endl;
timer.printTime(timer.stop());
return 0;
}
就空間而言,您可以使用set
而不是map
,因為bool
值是無用的。
此外,如果您使用的是C ++ 11,則unordered_set
可能會提供更好的性能。
也,
temp = i;
temp += j;
temp += k;
temp += temp;
可能比使用stringstream
甚至char數組有更大的開銷。
使用insert
而不是operator[]
。 insert函數返回一對。 第二個值表示,如果實際插入了值,即您可以按如下方式重寫代碼:
if (!mymap.insert(std::make_pair(myhash, true)).second) {
coll++;
cout << "Collision at " << i << " " << j << " " << k << endl;
}
好吧,我在這里回答: https : //stackoverflow.com/a/10606381/389833 ,它是這樣的:
int coll = 0;
typedef map<long, bool> MY_MAP_TYPE;
MY_MAP_TYPE mymap;
string temp;
long myhash;
for (int i = 0; i < 256; i++)
for (int j = 0; j < 256; j++)
for (int k = 0; k < 256; k++)
{
temp = i;
temp += j;
temp += k;
temp += temp;
myhash = hash(temp.c_str());
if( mymap.insert( MY_MAP_TYPE::value_type( myhash, true ) ).second == false)
{
coll++;
cout << "Collision at " << i << " " << j << " " << k << endl;
}
}
根據哈希的大小,你可以用空間換取CPU時間,只需使用bool向量而不是地圖進行恆定時間查找。 如果范圍是0 - 256 3 (這里是唯一值的數量),它應該只需要大約2 MB,因為在許多實現中的STL向量將在內部將bool向量壓縮為位。 當然,如果你的哈希函數可以返回非常大的值,如2 32甚至2 64,那么這將不會有效(或者可能根本不工作)。
如果您只關注6個字符串,那么您可以通過以下方式輕松優化您生成的循環:
for (int i = 0; i < 256; i++)
for (int j = 0; j < 256; j++)
for (int k = 0; k < 256; k++)
{
/*
string temp;
temp = i;
temp += j;
temp += k;
temp += temp;
myhash = hash(temp.c_str());
*/
// effectively, the same as above
const char temp[7] = {i, j, k, i, j, k, '\0'};
myhash = hash(temp);
}
以上結合insert
建議也應該提供良好的性能提升。
編輯:
所以,你在下面評論這個版本是“慢”讓我真的有問題:
這些都值得懷疑,因為在我的機器上運行此代碼(暫時忽略3.3GHz幻數,因為這是我的CPU的速度):
#include <iostream>
#include <vector>
#include <boost/functional/hash.hpp>
#include <x86intrin.h>
using namespace std;
uint64_t f(std::vector<uint64_t>& values)
{
boost::hash<std::string> hasher;
uint64_t start = __rdtsc();
int z = 0;
for (int i = 0; i < 256; i++)
{
for (int j = 0; j < 256; j++)
{
for (int k = 0; k < 256; k++)
{
string temp;
temp = i;
temp += j;
temp += k;
temp += temp;
values[z++] = hasher(temp);
}
}
}
return (__rdtsc()) - start;
}
uint64_t g(std::vector<uint64_t>& values)
{
boost::hash<std::string> hasher;
uint64_t start = __rdtsc();
int z = 0;
for (int i = 0; i < 256; i++)
{
for (int j = 0; j < 256; j++)
{
for (int k = 0; k < 256; k++)
{
const char temp[7] = {i, j, k, i, j, k, '\0'};
values[z++] = hasher(std::string(temp, 6));
}
}
}
return (__rdtsc()) - start;
}
static const double freq = 3300000000.0;
static const int elements = 1024 * 1024 * 16;
int main()
{
std::vector<uint64_t> values_f(elements);
std::vector<uint64_t> values_g(elements);
uint64_t delta_f = f(values_f);
uint64_t delta_g = g(values_g);
cout << "F: " << (delta_f * 1000.0) / freq << "ms \n";
cout << "G: " << (delta_g * 1000.0) / freq << "ms \n";
for(int x = 0; x < elements; ++x)
{
if(values_f[x] != values_g[x])
{
cout << "Error: Expected "
<< values_f[x] << " received "
<< values_g[x] << "!\n";
}
}
return 0;
}
給出這個輸出:
F: 3297.17ms
G: 736.444ms
表明構造std::string
的版本(在技術上甚至不需要)比執行串聯的版本執行得更好。 我的情況的不同之處在於使用boost::hash
(顯然使用std::vector
而不是std::map
或std::set
,但這並不會使測試偏向任何一個結果。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.