[英]Efficient way to find intersection of two vectors with respect to two members of vector objects
我有兩個向量持有數據對象。 每個數據對象都保存坐標和其他一些數據。 向量將始終被排序(首先是x坐標,然后是y坐標)。 我正在嘗試從兩個向量中刪除所有對象,這些對象的坐標在兩個向量中均找不到。 這是我目前正在做的MWE:
#include <iostream>
#include <vector>
#include <algorithm>
struct foo{
foo()=default;
foo(int x, int y, double data):x(x),y(y),data(data){}
int x;
int y;
double data;
};
int main()
{
std::vector<foo> vec1=std::vector<foo>(7);
std::vector<foo> vec2=std::vector<foo>(4);
vec1={foo(1,1,0.),foo(1,2,0.),foo(2,1,0.),foo(2,2,0.),foo(2,3,0.),foo(3,1,0.),foo(3,2,0.)};
vec2={foo(1,2,0.),foo(1,3,0.),foo(2,1,0.),foo(3,1,0.)};
for(auto it1=vec1.begin(); it1!=vec1.end();){
auto cur_element=*it1;
auto intersec = std::find_if(vec2.begin(),vec2.end(),[cur_element]
(foo & comp_element)->bool{
return((cur_element.x==comp_element.x) && (cur_element.y==comp_element.y));
});
if(intersec==vec2.end()) it1=vec1.erase(it1);
else ++it1;
}
for(auto it2=vec2.begin(); it2!=vec2.end();){
auto cur_element=*it2;
auto intersec = std::find_if(vec1.begin(),vec1.end(),[cur_element]
(foo & comp_element)->bool{
return((cur_element.x==comp_element.x) && (cur_element.y==comp_element.y));
});
if(intersec==vec1.end()) it2=vec2.erase(it2);
else ++it2;
}
std::cout<<"vec1:\n";
for(auto i: vec1) std::cout<<i.x<<" "<<i.y<<"\n";
std::cout<<"\nvec2:\n";
for(auto i: vec2) std::cout<<i.x<<" "<<i.y<<"\n";
return 0;
}
它有效,並給了我預期的輸出。
無論如何,似乎必須遍歷兩個向量確實很低效。 有沒有更有效的方法來實現相同的輸出?
編輯:僅獲得兩個向量中表示的坐標是不夠的。 我需要的是一種從兩個向量中刪除“錯誤”對象的有效方法。
您的兩個向量已經排序-完美!
首先,假設有一個比較函數(在即將到來的C ++ 20中,這將獲得太空船操作員...):
int compare(foo const& l, foo const& r)
{
return l.x != r.x ? l.x - r.x : l.y - r.y;
}
現在,您可以在算法中使用它:
auto i1 = v1.begin();
auto i2 = v2.begin();
auto end1 = i1;
auto end2 = i2;
while(i1 != v1.end() && i2 != v2.end())
{
int cmp = compare(*i1, *i2);
if(cmp < 0)
{
// skip element
++i1;
}
else if(cmp > 0)
{
++i2;
}
else
{
// matching element found, keep in both vectors...
if(i1 != end1)
*end1 = std::move(*i1);
++i1;
++end1;
if(i2 != end2)
*end2 = std::move(*i2);
++i2;
++end2;
// if you can rely on move (or fallback copy) assignment
// checking for self assignment, the following simpler
// alternative can be used instead:
//*end1++ = std::move(*i1++);
//*end2++ = std::move(*i2++);
}
}
v1.erase(end1, v1.end());
v2.erase(end2, v2.end());
兩個向量均為線性...
該算法只是將要保留的元素移到最前面,最后刪除所有過期元素–與std::remove_if
做類似...
我認為這種解決方案是線性的,可以滿足您的需求。
可能的進一步增強:
對於具有大面積不相交的大向量,可能值得緩存區域以進行擦除。
如果data
廉價移動,另一種策略是有條件地根據輸入向量構建輸出向量並交換
struct foo_less
{
bool operator()(foo const&l, foo const& r) const
{
return std::tie(l.x, l.y) < std::tie(r.x, r.y);
}
};
void remove_non_matching(std::vector<foo>& l, std::vector<foo>& r)
{
constexpr auto less = foo_less();
assert(std::is_sorted(l.begin(), l.end(), less));
assert(std::is_sorted(r.begin(), r.end(), less));
auto lcurrent = l.begin(), rcurrent = r.begin();
while (lcurrent != l.end() && rcurrent != r.end())
{
if (less(*lcurrent, *rcurrent))
lcurrent = l.erase(lcurrent);
else if(less(*rcurrent, *lcurrent))
rcurrent = r.erase(rcurrent);
else
{
++lcurrent;
++rcurrent;
}
}
l.erase(lcurrent, l.end());
r.erase(rcurrent, r.end());
}
替代方法將花費更多的內存,但理論上更有效:
void remove_non_matching_alt(std::vector<foo>& l, std::vector<foo>& r)
{
constexpr auto less = foo_less();
assert(std::is_sorted(l.begin(), l.end(), less));
assert(std::is_sorted(r.begin(), r.end(), less));
auto lresult = std::vector<foo>(), rresult = std::vector<foo>();
auto sz = std::min(l.size(), r.size());
lresult.reserve(sz);
rresult.reserve(sz);
auto lcurrent = l.begin(), rcurrent = r.begin();
while (lcurrent != l.end() && rcurrent != r.end())
{
if (less(*lcurrent, *rcurrent))
++lcurrent;
else if(less(*rcurrent, *lcurrent))
++rcurrent;
else
{
lresult.push_back(std::move(*lcurrent++));
rresult.push_back(std::move(*rcurrent++));
}
}
l.swap(lresult);
r.swap(rresult);
}
與之類似,但使用thread_local持久性緩存來避免不必要的內存分配:
void remove_non_matching_alt_faster(std::vector<foo>& l, std::vector<foo>& r)
{
constexpr auto less = foo_less();
assert(std::is_sorted(l.begin(), l.end(), less));
assert(std::is_sorted(r.begin(), r.end(), less));
// optimisation - minimise memory allocations on subsequent calls while maintaining
// thread-safety
static thread_local auto lresult = std::vector<foo>(), rresult = std::vector<foo>();
auto sz = std::min(l.size(), r.size());
lresult.reserve(sz);
rresult.reserve(sz);
auto lcurrent = l.begin(), rcurrent = r.begin();
while (lcurrent != l.end() && rcurrent != r.end())
{
if (less(*lcurrent, *rcurrent))
++lcurrent;
else if(less(*rcurrent, *lcurrent))
++rcurrent;
else
{
lresult.push_back(std::move(*lcurrent++));
rresult.push_back(std::move(*rcurrent++));
}
}
l.swap(lresult);
r.swap(rresult);
// ensure destructors of discarded 'data' are called and prep for next call
lresult.clear();
rresult.clear();
}
這是我的方法,采用擦除刪除慣用語樣式,僅對向量進行一次迭代:
#include <iostream>
#include <vector>
#include <iterator>
#include <utility>
struct foo
{
foo() = default;
foo(int x, int y, double data) : x(x), y(y), data(data) {}
int x;
int y;
double data;
};
// Maybe better as overloaded operators
int compare_foo(const foo& foo1, const foo& foo2)
{
if (foo1.x < foo2.x) return -1;
if (foo1.x > foo2.x) return +1;
if (foo1.y < foo2.y) return -1;
if (foo1.y > foo2.y) return +1;
return 0;
}
std::tuple<std::vector<foo>::iterator, std::vector<foo>::iterator>
remove_difference(std::vector<foo>& vec1, std::vector<foo>& vec2)
{
typedef std::vector<foo>::iterator iterator;
iterator it1 = vec1.begin();
size_t shift1 = 0;
iterator it2 = vec2.begin();
size_t shift2 = 0;
while (it1 != vec1.end() && it2 != vec2.end())
{
int cmp = compare_foo(*it1, *it2);
if (cmp < 0)
{
++it1;
shift1++;
}
else if (cmp > 0)
{
++it2;
shift2++;
}
else
{
std::iter_swap(it1, std::prev(it1, shift1));
++it1;
std::iter_swap(it2, std::prev(it2, shift2));
++it2;
}
}
return std::make_tuple(std::prev(it1, shift1), std::prev(it2, shift2));
}
int main()
{
std::vector<foo> vec1=std::vector<foo>(7);
std::vector<foo> vec2=std::vector<foo>(4);
vec1={foo(1,1,0.),foo(1,2,0.),foo(2,1,0.),foo(2,2,0.),foo(2,3,0.),foo(3,1,0.),foo(3,2,0.)};
vec2={foo(1,2,0.),foo(1,3,0.),foo(2,1,0.),foo(3,1,0.)};
auto remove_iters = remove_difference(vec1, vec2);
vec1.erase(std::get<0>(remove_iters), vec1.end());
vec2.erase(std::get<1>(remove_iters), vec2.end());
std::cout<<"vec1:\n";
for(auto i: vec1) std::cout<<i.x<<" "<<i.y<<"\n";
std::cout<<"\nvec2:\n";
for(auto i: vec2) std::cout<<i.x<<" "<<i.y<<"\n";
return 0;
}
輸出:
vec1:
1 2
2 1
3 1
vec2:
1 2
2 1
3 1
唯一沒有做的是,這假設沒有重復的坐標,或更具體地說,假設兩個向量上的重復坐標均相同,並且將刪除“額外”重復(您可以修改算法以更改如果您需要的話,盡管它會使代碼更難看)。
也許是這樣的嗎? 您首先選擇哪個向量更大,然后(主要)對更大的向量進行迭代,然后檢查另一個向量。
int main()
{
std::vector<foo> vec1=std::vector<foo>(7);
std::vector<foo> vec2=std::vector<foo>(4);
vec1={foo(1,1,0.),foo(1,2,0.),foo(2,1,0.),foo(2,2,0.),foo(2,3,0.),foo(3,1,0.),foo(3,2,0.)};
vec2={foo(1,2,0.),foo(1,3,0.),foo(2,1,0.),foo(3,1,0.)};
std::vector<foo>::iterator it_begin;
std::vector<foo>::iterator it_end;
std::vector<foo>* main;
std::vector<foo>* other;
if( vec1.size() > vec2.size() ) {
it_begin = vec1.begin();
it_end = vec1.end();
main = &vec1;
other = &vec2;
}
else {
it_begin = vec2.begin();
it_end = vec2.end();
main = &vec2;
other = &vec1;
}
std::vector<foo> new_vec;
for( it_begin; it_begin != it_end; ++it_begin ) {
auto cur_element = *it_begin;
auto intersec = std::find_if( other->begin(),other->end(),[cur_element]
(foo & comp_element)->bool{
return( (cur_element.x==comp_element.x ) && ( cur_element.y==comp_element.y ) );
});
if( intersec != other->end() )
{
new_vec.push_back( cur_element );
}
}
vec1 = new_vec;
vec2 = new_vec;
std::cout<<"vec1:\n";
for(auto i: vec1) std::cout<<i.x<<" "<<i.y<<"\n";
std::cout<<"\nvec2:\n";
for(auto i: vec2) std::cout<<i.x<<" "<<i.y<<"\n";
return 0;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.