[英]Segmentation fault when I increase the size of vector in parallel merge sort
当我尝试使用 2 个线程和超过 450 万的大小运行程序时,它会产生分段错误。 低于该数字的任何内容都可以顺利运行。 简而言之,非常大的数字会产生段错误,我不知道为什么。 我想知道错误是否与线程创建或工作分配给线程有关。 一些帮助将不胜感激。 下面是代码。
#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
#include <ctime>
#include <algorithm>
/* define variables for the problem */
#define UPPER_LIM 10
#define LOWER_LIM 1
using namespace std;
/* function definitions */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit);
void merge_sort(vector<int>& array, int left, int right);
void merge(vector<int>& array, int left, int middle, int right);
void thread_merge_sort(vector<int>& array, int thread_id, int n, int p, int q);
bool isTest_array_is_in_order(vector<int>& array, int LENGTH);
int main(int argc, const char *argv[]) {
int NUM_THREADS = atoi(argv[1]);
int LENGTH = atoi(argv[2]);
int NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
int OFFSET = LENGTH % NUM_THREADS;
srand(time(0));
std::vector<int> array;
array.reserve(LENGTH);
/* initialize array with random numbers */
for (int i = 0; i < LENGTH; i++) {
array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
}
/* begin timing */
auto start = std::chrono::high_resolution_clock::now();
const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
{
// Pre loop
std::cout << "parallel(" << nthreads << " threads):" << std::endl;
std::vector<std::thread> workers;
for (std::size_t t = 0; t < nthreads; t++) {
workers.push_back(thread(thread_merge_sort, ref(array), t, nthreads, NUMBERS_PER_THREAD, OFFSET));
}
for (thread& t: workers) { // await thread termination
t.join();
}
}
auto elapsed = std::chrono::high_resolution_clock::now() - start;
auto usec = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();
// and print time
std::cout << "Spent " << usec << " executing " << nthreads << " in parallel " << " array size " << LENGTH << std::endl;
/* end timing */
/* test to ensure that the array is in sorted order */
if (!isTest_array_is_in_order(array, LENGTH)) {
fprintf(stderr, "Error: array is not sorted!!\n");
return 0;
}
}
/* generate random numbers within the specified limit */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit) {
//srand(time(NULL));
return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
}
/** assigns work to each thread to perform merge sort */
void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET) {
int left = thread_id * (NUMBERS_PER_THREAD);
int right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
if (thread_id == NUM_THREADS - 1) {
right += OFFSET;
}
int middle = left + (right - left) / 2;
if (left < right) {
merge_sort(arr, left, right);
merge_sort(arr, left + 1, right);
merge(arr, left, middle, right);
}
}
/* test to ensure that the array is in sorted order */
bool isTest_array_is_in_order(vector<int>& arr, int LENGTH) {
for (int i = 1; i < LENGTH; i++) {
if (arr[i] >= arr[i - 1]) {
return true;
} else {
return false;
}
}
}
/* perform merge sort */
void merge_sort(vector<int>& arr, int left, int right) {
if (left < right) {
int middle = left + (right - left) / 2;
merge_sort(arr, left, middle);
merge_sort(arr, middle + 1, right);
merge(arr, left, middle, right);
}
}
/* merge function */
void merge(vector<int>& arr, int left, int middle, int right) {
int i = 0;
int j = 0;
int k = 0;
int left_length = middle - left + 1;
int right_length = right - middle;
int left_array[left_length];
int right_array[right_length];
/* copy values to left array */
for (int i = 0; i < left_length; i++) {
left_array[i] = arr[left + i];
}
/* copy values to right array */
for (int j = 0; j < right_length; j++) {
right_array[j] = arr[middle + 1 + j];
}
i = 0;
j = 0;
/** chose from right and left arrays and copy */
while (i < left_length && j < right_length) {
if (left_array[i] <= right_array[j]) {
arr[left + k] = left_array[i];
i++;
} else {
arr[left + k] = right_array[j];
j++;
}
k++;
}
/* copy the remaining values to the array */
while (i < left_length) {
arr[left + k] = left_array[i];
k++;
i++;
}
while (j < right_length) {
arr[left + k] = right_array[j];
k++;
j++;
}
}
所以首先,我存根了您的 merge_sort 和合并,运行 2 个任务和 4500000 个元素:没有段错误。
对于完整的调试,您可能会考虑提供完整的代码...
这是我的编译,稍加修改的代码:
#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
#include <ctime>
#include <cstdint>
#include <algorithm>
#include <thread>
/* define variables for the problem */
constexpr size_t const UPPER_LIM = 10;
constexpr size_t const LOWER_LIM = 1;
using namespace std;
/* function definitions */
void merge_sort(vector<int>& array, int left, int right){
return;
}
void merge(vector<int>& array, int left, int middle, int right){
return;
}
bool isTest_array_is_in_order(vector<int>& array, int LENGTH){
return false;
}
/**
* generate random numbers within the specified limit
*/
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit){
return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
}
/**
* assigns work to each thread to perform merge sort
*/
void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET){
int left = thread_id * (NUMBERS_PER_THREAD);
int right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
if (thread_id == NUM_THREADS - 1) {
right += OFFSET;
}
int middle = left + (right - left) / 2;
if (left < right) {
merge_sort(arr, left, right);
merge_sort(arr, left + 1, right);
merge(arr, left, middle, right);
}
}
int main(int argc, const char * argv[]){
int const NUM_THREADS = 2; //atoi (argv[1]);
int const LENGTH = 4500000; //atoi(argv[2]);
int const NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
int const OFFSET = LENGTH % NUM_THREADS;
cout << sizeof(int) << "\n";
srand(time(0)) ;
std::vector<int> array;
array.reserve(LENGTH);
/* initialize array with random numbers */
for(int ii=0; ii < LENGTH; ++ii){
array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
}
/* begin timing */
auto start = std::chrono::high_resolution_clock::now();
const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
{
// Pre loop
std::cout<<"parallel ("<<nthreads<<" threads):"<<std::endl;
std::vector<std::thread> workers;
for(std::size_t tt=0; tt<nthreads; ++tt){
workers.push_back(thread(thread_merge_sort, ref(array), tt, nthreads, NUMBERS_PER_THREAD, OFFSET));
}
// await thread termination
for(thread& t: workers) {
t.join();
}
}
auto elapsed = std::chrono::high_resolution_clock::now() - start;
auto usec = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();
// and print time
std::cout << "Spent " << usec << " executing " << nthreads << " in parallel " << " array size " << array.size() << "\n";
/* end timing */
return 0;
}
[编辑]:
您的 isTest_array_is_in_order 无法工作,因为您在比较前两个元素后立即返回。
bool isTest_array_is_in_order(vector<int>& arr, int LENGTH)
{
for(int i=1;i<LENGTH;i++){
if(arr[i]>=arr[i-1]){
return true;
} else{
return false;
}
}
}
这是一个应该工作的版本:
/**
* test to ensure that the array is in sorted order
*/
bool isTest_array_is_in_order(vector<int>& arr, int LENGTH){
bool unorderd = false;
for(int ii=1;ii<LENGTH;++ii){
if(arr[ii]<arr[ii-1]){
unorderd = true;
break;
}
}
return !unorderd;
}
[编辑]:
所以一开始我可以用你的代码确认你的段错误
我更改了代码,现在它似乎运行得很好,只是为 44999999 个元素测试了 16 个线程,效果很好
查看您的代码后,它就在这里崩溃了:
/* merge function */
void merge(vector<int>& arr, int left, int middle, int right) {
int i = 0;
int j = 0;
int k = 0;
int left_length = middle - left + 1;
int right_length = right - middle;
int left_array[left_length];
int right_array[right_length];
在这里,您创建了 2 个本地数组,但是在堆栈上,而不是在堆上。 根据您的操作系统,堆栈通常被限制为一些低 MB,例如 10 左右。
所以我用向量替换了你的 C-Arrays 并进一步优化了代码:更复杂的类型,改变了主要的,所以我现在可以一次运行对不同的随机变化进行排序。
所以这是我的代码:
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <cstdint>
#include <thread>
#include <chrono>
#include <ctime>
#include <iterator>
#include <vector>
#include <array>
#include <random>
#include <algorithm>
using namespace std;
using idx_t = size_t;
using data_t = int;
/* define variables for the problem */
constexpr data_t const UPPER_LIM = 2000000;
constexpr data_t const LOWER_LIM = 0;
constexpr idx_t const REPEAT_CNT = 10000;
/* function definitions */
std::string to_array_str(vector<data_t>& arr, idx_t max_elem){
std::stringstream ss;
idx_t ii=1;
idx_t cnt = 0;
for(auto _d:arr){
ss << setw(8) << _d;
if(0==(ii%10)){
ss << ",\n";
ii=0;
}else{
ss << ", ";
}
if(cnt>=max_elem) break;
++ii;
++cnt;
}
return ss.str();
}
/**
* generate random numbers within the specified limit
*/
data_t generate_random_number(data_t const lower_limit, data_t const upper_limit){
static std::random_device rd;
static std::mt19937 gen(rd());
static std::uniform_int_distribution<data_t> dis(lower_limit, upper_limit);
return dis(gen);
//return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
}
/**
* test to ensure that the array is in sorted order
*/
bool isTest_array_is_in_order(vector<data_t>& arr){
bool unorderd = false;
for(idx_t ii=1;ii<arr.size();++ii){
if(arr[ii]<arr[ii-1]){
unorderd = true;
cout << "unordered Index: " << ii << "\n";
cout << "arr[ii] " << arr[ii] << " arr[ii-1] " << arr[ii-1] << "\n";
break;
}
}
return !unorderd;
}
/**
* merge function
*/
void merge(vector<data_t>& arr, idx_t const left, idx_t const middle, idx_t const right) {
idx_t const left_length = middle - left + 1;
idx_t const right_length = right - middle;
vector<data_t> left_array( left_length);
vector<data_t> right_array(right_length);
constexpr bool const use_loopcopy = true;
if(use_loopcopy){
/* copy values to left array */
for(idx_t ii=0; ii < left_length; ++ii){
left_array[ii] = arr[left + ii];
}
/* copy values to right array */
for(idx_t ii=0; ii < right_length; ++ii){
right_array[ii] = arr[middle + 1 + ii];
}
}
constexpr bool const use_libcode = false;
if(use_libcode){
{
auto from = arr.begin();
auto to = from;
std::advance(from,left);
std::advance(to,middle+1);
std::copy(from,to,left_array.begin());
}
{
auto from = arr.begin();
auto to = from;
std::advance(from,middle+1);
std::advance(to,right+1);
std::copy(from,to,right_array.begin());
}
}
idx_t ii = 0;
idx_t jj = 0;
idx_t kk = 0;
/** chose from right and left arrays and copy */
while((ii < left_length) && (jj < right_length)){
if(left_array[ii] <= right_array[jj]){
arr[left + kk] = left_array[ii];
++ii;
} else {
arr[left + kk] = right_array[jj];
++jj;
}
++kk;
}
/* copy the remaining values to the array */
while(ii < left_length){
arr[left + kk] = left_array[ii];
++kk;
++ii;
}
while(jj < right_length){
arr[left + kk] = right_array[jj];
++kk;
++jj;
}
return;
}
/**
* perform merge sort
*/
void merge_sort(vector<data_t>& arr, idx_t const left, idx_t const right) {
if(left < right){
idx_t middle = left + ((right - left)>>1);
//Divide
merge_sort(arr, left, middle);
merge_sort(arr, middle + 1, right);
//Conquer
merge(arr, left, middle, right);
}
return;
}
/**
* assigns work to each thread to perform merge sort
*/
void thread_merge_sort(vector<data_t>& arr, idx_t const thread_id, idx_t const NUM_THREADS, idx_t const NUMBERS_PER_THREAD, idx_t const OFFSET){
idx_t left = thread_id * (NUMBERS_PER_THREAD);
idx_t right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
if(thread_id == (NUM_THREADS - 1)) {
right += OFFSET;
}
merge_sort(arr,left,right);
return;
}
int main(int argc, const char * argv[]){
/*
int const NUM_THREADS = 16; //atoi (argv[1]);
int const LENGTH = 1000000000; //atoi(argv[2]);
*/
/*
int const NUM_THREADS = 8; //atoi (argv[1]);
int const LENGTH = 449999; //atoi(argv[2]);
*/
int const NUM_THREADS = 8; //atoi (argv[1]);
int const LENGTH = 100; //atoi(argv[2])
int const NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
int const OFFSET = LENGTH % NUM_THREADS;
cout << sizeof(int) << "\n";
//srand(time(0)) ;
std::vector<int> array(LENGTH);
//array.reserve(LENGTH);
constexpr size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
std::cout<<"parallel ("<<nthreads<<" threads):"<<std::endl;
uint64_t time = 0;
bool ordered = true;
for(idx_t ii=0; ii<REPEAT_CNT; ++ii){
/* initialize array with random numbers */
for(int ii=0; ii < LENGTH; ++ii){
//array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
array[ii]=(generate_random_number(LOWER_LIM, UPPER_LIM));
}
/* begin timing */
auto start = std::chrono::high_resolution_clock::now();
{
// Pre loop
std::vector<std::thread> workers;
for(std::size_t tt=0; tt<nthreads; ++tt){
workers.push_back(thread(thread_merge_sort, ref(array), tt, nthreads, NUMBERS_PER_THREAD, OFFSET));
}
// await thread termination
for(thread& t: workers) {
t.join();
}
ordered &= isTest_array_is_in_order(array);
if(!ordered) break;
}
auto elapsed = std::chrono::high_resolution_clock::now() - start;
auto usec = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();
time = (usec + time)>>1;
}
// and print time
//cout << "Spent " << usec << "[µs] executing " << nthreads << " Threads in parallel working on " << array.size() << " elements " << "\n";
cout << "Spent " << time << "[µs] executing " << nthreads << " Threads in parallel working on " << array.size() << " elements " << "\n";
//cout << "Result is ordered: " << isTest_array_is_in_order(array) << "\n";
cout << "Result is ordered: " << ordered << "\n";
cout << to_array_str(array,100) << "\n";
return 0;
}
但是,即使该代码现在不再导致段错误,使用不同的参数,事实证明,除了单线程运行之外,它几乎不会产生排序数组。 所以chqrlie的分析是正确的,因为单线程的排序部分也需要排序。
您的代码中有多个问题:
merge
,您分配具有自动存储的子数组,其组合大小等于要排序的切片的大小,从而导致大数组的堆栈溢出。isTest_array_is_in_order
只比较数组的前 2 个元素。 线程函数thread_merge_sort
应该只在其切片上调用merge_sort()
:
/** assigns work to each thread to perform merge sort */ void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET) { int left = thread_id * NUMBERS_PER_THREAD; int right = left + NUMBERS_PER_THREAD - 1; if (thread_id == NUM_THREADS - 1) { right += OFFSET; } merge_sort(arr, left, right); }
指定包含left
和right
的切片容易出错:您必须谨慎地调整长度,而如果排除right
,则代码更加规则,因为长度可以直接计算为right - left
。
切片边界和索引变量的类型应为size_t
而不是int
。
这是一个使用向量作为临时副本的修改版本:
#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
#include <ctime>
#include <algorithm>
/* generate random numbers in the range 1..9 */
#define LOWER_LIM 1
#define UPPER_LIM 10
using namespace std;
/* function declarations */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit);
void merge_sort(vector<int>& array, size_t start, size_t end);
void merge(vector<int>& array, size_t start, size_t middle, size_t end);
void thread_merge_sort(vector<int>& array, size_t start, size_t end);
bool is_vector_sorted(vector<int>& array, size_t length);
int main(int argc, const char *argv[]) {
const size_t NUM_THREADS = argc > 1 ? strtol(argv[1], NULL, 0) : 1;
const size_t LENGTH = argc > 2 ? strtol(argv[2], NULL, 0) : 1000;
std::vector<int> array(LENGTH);
/* initialize array with random numbers */
srand(time(0));
for (size_t i = 0; i < LENGTH; i++) {
array[i] = generate_random_number(LOWER_LIM, UPPER_LIM);
}
const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
const size_t NUMBERS_PER_THREAD = LENGTH / nthreads;
const size_t REMAINDER = LENGTH % nthreads;
/* Output message before the loop */
std::cout << "Sorting " << LENGTH << " elements in parallel" <<
" using " << nthreads << " threads:" << std::endl;
/* begin timing */
auto start_clock = std::chrono::high_resolution_clock::now();
std::vector<std::thread> workers;
for (size_t t = 0; t < nthreads; t++) {
size_t start = t * NUMBERS_PER_THREAD;
size_t end = start + NUMBERS_PER_THREAD;
if (t == nthreads - 1)
end += REMAINDER;
workers.push_back(thread(thread_merge_sort, ref(array), start, end));
}
/* await thread termination */
for (thread& t: workers) {
t.join();
}
/* merge thread slices */
for (size_t size = NUMBERS_PER_THREAD; size < LENGTH; size += size) {
for (size_t middle = size; middle < LENGTH; middle += size + size) {
size_t end = min(middle + size, LENGTH);
merge(array, middle - size, middle, end);
}
}
/* end timing */
auto elapsed = std::chrono::high_resolution_clock::now() - start_clock;
auto usec = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();
/* print time */
std::cout << "Sorting time: " << usec << " microseconds." << std::endl;
/* test to ensure that the array is in sorted order */
if (!is_vector_sorted(array, LENGTH)) {
fprintf(stderr, "Error: array is not sorted!!\n");
}
return 0;
}
/* generate random numbers within the specified limit */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit) {
return int(lower_limit + double(upper_limit - lower_limit) * rand() / RAND_MAX);
}
/** assign work to each thread to perform merge sort */
void thread_merge_sort(vector<int> &arr, size_t start, size_t end) {
merge_sort(arr, start, end);
}
/* test to ensure that the array is in sorted order */
bool is_vector_sorted(vector<int>& arr, size_t length) {
for (size_t i = 1; i < length; i++) {
if (arr[i] < arr[i - 1]) {
return false;
}
}
return true;
}
/* perform merge sort */
void merge_sort(vector<int>& arr, size_t start, size_t end) {
if (end - start >= 2) {
size_t middle = start + (end - start) / 2;
merge_sort(arr, start, middle);
merge_sort(arr, middle, end);
merge(arr, start, middle, end);
}
}
/* merge function */
void merge(vector<int>& arr, size_t start, size_t middle, size_t end) {
size_t i, j, k;
size_t left_length = middle - start;
size_t right_length = end - middle;
vector<int> left_array(&arr[start], &arr[middle]);
vector<int> right_array(&arr[middle], &arr[end]);
i = 0;
j = 0;
k = start;
/** chose from right and left arrays and copy */
while (i < left_length && j < right_length) {
if (left_array[i] <= right_array[j]) {
arr[k++] = left_array[i++];
} else {
arr[k++] = right_array[j++];
}
}
/* copy the remaining values to the array */
while (i < left_length) {
arr[k++] = left_array[i++];
}
while (j < right_length) {
arr[k++] = right_array[j++];
}
}
通过在merge
仅保存切片的左侧部分,性能可以进一步提高约 40%:切片右侧部分的元素不会被覆盖,除非它们已经被复制。 这是一个修改后的版本:
/* merge function */
void merge(vector<int>& arr, size_t start, size_t middle, size_t end) {
size_t i, j, k;
size_t left_length = middle - start;
/* save the left part of the slice */
vector<int> left_array(&arr[start], &arr[middle]);
i = 0;
j = middle;
k = start;
/** chose from right and left arrays and copy */
while (i < left_length && j < end) {
if (left_array[i] <= arr[j]) {
arr[k++] = left_array[i++];
} else {
arr[k++] = arr[j++];
}
}
/* copy the remaining values to the array */
while (i < left_length) {
arr[k++] = left_array[i++];
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.