Context
I'm trying to implement Boruvka MST algorithm using CUDA, but it's not necessary to understand the algorithm at all to help me here.
Problem
So, let me describe the problem: I have a graph, stored in list of edges format (an array of edges, each edge is represented with 2 adjacent vertex IDs and it's weight).
But, (it is very important!) to optimize accesses into device memory I store the edges not as a single array of structs, but as three separate arrays:
To access single edge one can just iterate with the same index to this arrays:
Now, when I have described the data format, here is the problem
I want to remove from these three arrays elements(edges) with the following conditions:
1) if src_id[i] == dst_id[i], then remove the i-th edge
2) if src_id[i] != dst_id[i], but there is another edge j with the same src_id[j] and dst_id[j], but with smaller weight[j], than remove i-th edge
In other words I want to:
The first is simple: I can use thrust::remove_if or scan as described here parallel removal of elements from an array , to remove the edges with the same ids. (I have already implemented the second variant with the scan).
But I have failed to implement the second part, the removal of duplicate edges. I have one idea, but not sure if this approach is efficient at all. Let me describe it.
First of all we will reorder(or sort) this three arrays in the following way:
When all the edges are sorted this way, it's relatively easy to remove the duplicate non-minimal edges:
Question*
But the problem is that I don't know how to sort three arrays this way efficiently. (probably I can use thrust::sort on converted data, single array of structs, but it seems it will be very slow and it's better not to remove duplicate edges at all)
Or probably someone can advice the better way to remove duplicate edges without sorting them this way.
Thank you for reading this, any advice appreciated!
You can easily sort multiple vectors in one thrust::sort
call by using a thrust::zip_iterator
.
The main idea is:
auto z = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin(), d_weights.begin()));
thrust::sort(z,z+N);
This will sort the three vectors first by the first vector, then by the second, then by the third.
The following code shows how to use this in a fully worked out example. It uses a custom functor (copied from thrust::detail
) to do the remove_if
step in a single call without having to store intermediate results.
#include <thrust/sort.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/remove.h>
#define PRINTER(name) print(#name, (name))
template <template <typename...> class V, typename T, typename ...Args>
void print(const char* name, const V<T,Args...> & v)
{
std::cout << name << ":\t";
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t"));
std::cout << std::endl;
}
// copied from https://github.com/thrust/thrust/blob/master/thrust/detail/range/head_flags.h
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/tuple.h>
#include <thrust/functional.h>
template<typename RandomAccessIterator,
typename BinaryPredicate = thrust::equal_to<typename thrust::iterator_value<RandomAccessIterator>::type>,
typename ValueType = bool,
typename IndexType = typename thrust::iterator_difference<RandomAccessIterator>::type>
class head_flags
{
// XXX WAR cudafe issue
//private:
public:
struct head_flag_functor
{
BinaryPredicate binary_pred; // this must be the first member for performance reasons
IndexType n;
typedef ValueType result_type;
__host__ __device__
head_flag_functor(IndexType n)
: binary_pred(), n(n)
{}
__host__ __device__
head_flag_functor(IndexType n, BinaryPredicate binary_pred)
: binary_pred(binary_pred), n(n)
{}
template<typename Tuple>
__host__ __device__ __thrust_forceinline__
result_type operator()(const Tuple &t)
{
const IndexType i = thrust::get<0>(t);
// note that we do not dereference the tuple's 2nd element when i <= 0
// and therefore do not dereference a bad location at the boundary
return (i == 0 || !binary_pred(thrust::get<1>(t), thrust::get<2>(t)));
}
};
typedef thrust::counting_iterator<IndexType> counting_iterator;
public:
typedef thrust::transform_iterator<
head_flag_functor,
thrust::zip_iterator<thrust::tuple<counting_iterator,RandomAccessIterator,RandomAccessIterator> >
> iterator;
__host__ __device__
head_flags(RandomAccessIterator first, RandomAccessIterator last)
: m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first - 1)),
head_flag_functor(last - first))),
m_end(m_begin + (last - first))
{}
__host__ __device__
head_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred)
: m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first - 1)),
head_flag_functor(last - first, binary_pred))),
m_end(m_begin + (last - first))
{}
__host__ __device__
iterator begin() const
{
return m_begin;
}
__host__ __device__
iterator end() const
{
return m_end;
}
template<typename OtherIndex>
__host__ __device__
typename iterator::reference operator[](OtherIndex i)
{
return *(begin() + i);
}
private:
iterator m_begin, m_end;
};
template<typename RandomAccessIterator>
__host__ __device__
head_flags<RandomAccessIterator>
make_head_flags(RandomAccessIterator first, RandomAccessIterator last)
{
return head_flags<RandomAccessIterator>(first, last);
}
int main()
{
const int N = 6;
int src_ids[] = {3,1,2,2,3,3};
int dst_ids[] = {2,2,3,3,1,1};
float weights[] = {1,2,8,4,5,6};
thrust::device_vector<int> d_src_ids(src_ids,src_ids+N);
thrust::device_vector<int> d_dst_ids(dst_ids,dst_ids+N);
thrust::device_vector<float> d_weights(weights,weights+N);
std::cout << "--- initial values ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
auto z = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin(), d_weights.begin()));
thrust::sort(z,z+N);
std::cout << "--- after sort ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
auto z2 = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin()));
auto t = make_head_flags(z2,z2+N);
using namespace thrust::placeholders;
auto end = thrust::remove_if(z,z+N, t.begin(), !_1);
int new_size = thrust::get<0>(end.get_iterator_tuple()) - d_src_ids.begin();
d_src_ids.resize(new_size);
d_dst_ids.resize(new_size);
d_weights.resize(new_size);
std::cout << "--- after remove_if ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
return 0;
}
Output:
--- initial values ---
d_src_ids: 3 1 2 2 3 3
d_dst_ids: 2 2 3 3 1 1
d_weights: 1 2 8 4 5 6
--- after sort ---
d_src_ids: 1 2 2 3 3 3
d_dst_ids: 2 3 3 1 1 2
d_weights: 2 4 8 5 6 1
--- after remove_if ---
d_src_ids: 1 2 3 3
d_dst_ids: 2 3 1 2
d_weights: 2 4 5 1
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.