简体   繁体   English

在并行合并排序中增加向量大小时出现分段错误

[英]Segmentation fault when I increase the size of vector in parallel merge sort

When I try to run the program using 2 threads and a size of more than 4.5 million it creates a segmentation fault.当我尝试使用 2 个线程和超过 450 万的大小运行程序时,它会产生分段错误。 Anything below that number runs smoothly.低于该数字的任何内容都可以顺利运行。 In short very large numbers generate a segfault and I don't know why.简而言之,非常大的数字会产生段错误,我不知道为什么。 I would like to know if the error has to do with the thread creation or the distribution of work to the threads.我想知道错误是否与线程创建或工作分配给线程有关。 Some help will be appreciated.一些帮助将不胜感激。 Below is the code.下面是代码。

#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
#include <ctime>
#include <algorithm>

/* define variables for the problem */
#define UPPER_LIM 10
#define LOWER_LIM  1

using namespace std;

/* function definitions */

int generate_random_number(unsigned int lower_limit, unsigned int upper_limit);
void merge_sort(vector<int>& array, int left, int right);
void merge(vector<int>& array, int left, int middle, int right);
void thread_merge_sort(vector<int>& array, int thread_id, int n, int p, int q);
bool isTest_array_is_in_order(vector<int>& array, int LENGTH);

int main(int argc, const char *argv[]) {
    int NUM_THREADS = atoi(argv[1]);
    int LENGTH = atoi(argv[2]);
    int NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
    int OFFSET = LENGTH % NUM_THREADS;

    srand(time(0));

    std::vector<int> array;
    array.reserve(LENGTH);

    /* initialize array with random numbers */
    for (int i = 0; i < LENGTH; i++) {
        array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
    }

    /* begin timing */
    auto start = std::chrono::high_resolution_clock::now();

    const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
    {
        // Pre loop
        std::cout << "parallel(" << nthreads << " threads):" << std::endl;
        std::vector<std::thread> workers;

        for (std::size_t t = 0; t < nthreads; t++) {
            workers.push_back(thread(thread_merge_sort, ref(array), t, nthreads, NUMBERS_PER_THREAD, OFFSET));
        }

        for (thread& t: workers) { // await thread termination
            t.join();
        }
    }

    auto elapsed = std::chrono::high_resolution_clock::now() - start;
    auto usec    = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();

    // and print time
    std::cout << "Spent " << usec << " executing  " << nthreads << " in parallel " << " array size " << LENGTH << std::endl;

    /* end timing */

    /* test to ensure that the array is in sorted order */
    if (!isTest_array_is_in_order(array, LENGTH)) {
        fprintf(stderr, "Error: array is not sorted!!\n");
        return 0;
    }
}

/* generate random numbers within the specified limit */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit) {
    //srand(time(NULL));
    return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
}

/** assigns work to each thread to perform merge sort */
void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET) {
    int left = thread_id * (NUMBERS_PER_THREAD);
    int right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
    if (thread_id == NUM_THREADS - 1) {
        right += OFFSET;
    }
    int middle = left + (right - left) / 2;
    if (left < right) {
        merge_sort(arr, left, right);
        merge_sort(arr, left + 1, right);
        merge(arr, left, middle, right);
    }
}

/* test to ensure that the array is in sorted order */
bool isTest_array_is_in_order(vector<int>& arr, int LENGTH) {
    for (int i = 1; i < LENGTH; i++) {
        if (arr[i] >= arr[i - 1]) {
            return true;
        } else {
            return false;
        }
    }
}

/* perform merge sort */
void merge_sort(vector<int>& arr, int left, int right) {
    if (left < right) {
        int middle = left + (right - left) / 2;
        merge_sort(arr, left, middle);
        merge_sort(arr, middle + 1, right);
        merge(arr, left, middle, right);
    }
}

/* merge function */
void merge(vector<int>& arr, int left, int middle, int right) {
    int i = 0;
    int j = 0;
    int k = 0;
    int left_length = middle - left + 1;
    int right_length = right - middle;
    int left_array[left_length];
    int right_array[right_length];

    /* copy values to left array */
    for (int i = 0; i < left_length; i++) {
        left_array[i] = arr[left + i];
    }

    /* copy values to right array */
    for (int j = 0; j < right_length; j++) {
        right_array[j] = arr[middle + 1 + j];
    }

    i = 0;
    j = 0;
    /** chose from right and left arrays and copy */
    while (i < left_length && j < right_length) {
        if (left_array[i] <= right_array[j]) {
            arr[left + k] = left_array[i];
            i++;
        } else {
            arr[left + k] = right_array[j];
            j++;
        }
        k++;
    }
    /* copy the remaining values to the array */
    while (i < left_length) {
        arr[left + k] = left_array[i];
        k++;
        i++;
    }
    while (j < right_length) {
        arr[left + k] = right_array[j];
        k++;
        j++;
    }
}

So first, I stubbed your merge_sort and merge, run with 2 Tasks and 4500000 Elements: no segfault.所以首先,我存根了您的 merge_sort 和合并,运行 2 个任务和 4500000 个元素:没有段错误。

For complete debugging you might consider to provide the full code...对于完整的调试,您可能会考虑提供完整的代码...

here my compiling, slightly modified code:这是我的编译,稍加修改的代码:

    #include <iostream>
    #include <thread>
    #include <vector>
    #include <chrono>
    #include <ctime>
    #include <cstdint>
    #include <algorithm>

    #include <thread>

    /* define variables for the problem */
    constexpr size_t const UPPER_LIM = 10;
    constexpr size_t const LOWER_LIM =  1;

    using namespace std;

    /* function definitions */

    void merge_sort(vector<int>& array, int left, int right){
      return;
    }

    void merge(vector<int>& array, int left, int middle, int right){
      return;
    }

    bool isTest_array_is_in_order(vector<int>& array, int LENGTH){
      return false;
    }

    /**
     * generate random numbers within the specified limit
     */
    int generate_random_number(unsigned int lower_limit, unsigned int upper_limit){
      return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
    }

    /**
     * assigns work to each thread to perform merge sort
     */
    void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET){
      int left = thread_id * (NUMBERS_PER_THREAD);
      int right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
      if (thread_id == NUM_THREADS - 1) {
        right += OFFSET;
      }
      int middle = left + (right - left) / 2;
      if (left < right) {
        merge_sort(arr, left, right);
        merge_sort(arr, left + 1, right);
        merge(arr, left, middle, right);
      }
    }

    int main(int argc, const char * argv[]){

      int const NUM_THREADS        = 2; //atoi (argv[1]);
      int const LENGTH             = 4500000; //atoi(argv[2]);
      int const NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
      int const OFFSET             = LENGTH % NUM_THREADS;

      cout << sizeof(int) << "\n";

      srand(time(0)) ;

      std::vector<int> array;
      array.reserve(LENGTH);

      /* initialize array with random numbers */
      for(int ii=0; ii < LENGTH; ++ii){
        array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
      }

      /* begin timing */
      auto start = std::chrono::high_resolution_clock::now();

      const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
      {
        // Pre loop
        std::cout<<"parallel ("<<nthreads<<" threads):"<<std::endl;
        std::vector<std::thread> workers;

        for(std::size_t tt=0; tt<nthreads; ++tt){
          workers.push_back(thread(thread_merge_sort, ref(array), tt, nthreads, NUMBERS_PER_THREAD, OFFSET));
        }

        // await thread termination
        for(thread& t: workers) {
          t.join();
        }
      }

      auto elapsed = std::chrono::high_resolution_clock::now() - start;
      auto usec    = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();

      // and print time
      std::cout << "Spent " << usec << " executing  " << nthreads << " in parallel " << " array size " << array.size() << "\n";

      /* end timing */
      return 0;
    }

[Edit]: [编辑]:

Your isTest_array_is_in_order can not work, as you return just after the first two elements are compared.您的 isTest_array_is_in_order 无法工作,因为您在比较前两个元素后立即返回。

    bool isTest_array_is_in_order(vector<int>& arr, int LENGTH)
    {
        for(int i=1;i<LENGTH;i++){
            if(arr[i]>=arr[i-1]){
                return true;
    }   else{

        return false;
        }

        }
    }

here is a version that shoud work:这是一个应该工作的版本:

    /**
     * test to ensure that the array is in sorted order
     */
    bool isTest_array_is_in_order(vector<int>& arr, int LENGTH){
      bool unorderd = false;
      for(int ii=1;ii<LENGTH;++ii){
        if(arr[ii]<arr[ii-1]){
          unorderd = true;
          break;
        }
      }
      return !unorderd;
    }

[Edit]: [编辑]:

So at first with your code I was able to confirm your segfaults所以一开始我可以用你的代码确认你的段错误

I changed the code, and now it seems to run pretty fine just tested 16 threads for 44999999 Elements, works nicely我更改了代码,现在它似乎运行得很好,只是为 44999999 个元素测试了 16 个线程,效果很好

After looking at your code, it crashed right here:查看您的代码后,它就在这里崩溃了:

    /* merge function */
    void merge(vector<int>& arr, int left, int middle, int right) {
        int i = 0;
        int j = 0;
        int k = 0;
        int left_length = middle - left + 1;
        int right_length = right - middle;
        int left_array[left_length];
        int right_array[right_length];

Here you create 2 local arrays, but on Stack, not on the Heap.在这里,您创建了 2 个本地数组,但是在堆栈上,而不是在堆上。 Stack is normally limited depending on your OS to some low MB like 10 or so.根据您的操作系统,堆栈通常被限制为一些低 MB,例如 10 左右。

So I replaced your C-Arrays with vectors and optimized the Code a bit further: more elaborate types, changed the main, so I can now sort different randomized variations with one run.所以我用向量替换了你的 C-Arrays 并进一步优化了代码:更复杂的类型,改变了主要的,所以我现在可以一次运行对不同的随机变化进行排序。

so here is my code now:所以这是我的代码:

#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <cstdint>
#include <thread>
#include <chrono>
#include <ctime>
#include <iterator>
#include <vector>
#include <array>
#include <random>
#include <algorithm>

using namespace std;

using idx_t  = size_t;
using data_t = int;

/* define variables for the problem */
constexpr data_t const UPPER_LIM =  2000000;
constexpr data_t const LOWER_LIM = 0;

constexpr idx_t const REPEAT_CNT = 10000;

/* function definitions */
std::string to_array_str(vector<data_t>& arr, idx_t max_elem){
  std::stringstream ss;

  idx_t ii=1;
  idx_t cnt = 0;

  for(auto _d:arr){
    ss << setw(8) << _d;
    if(0==(ii%10)){
      ss << ",\n";
      ii=0;
    }else{
      ss << ", ";
    }
    if(cnt>=max_elem) break;
    ++ii;
    ++cnt;
  }
  return ss.str();
}

/**
 * generate random numbers within the specified limit
 */
data_t generate_random_number(data_t const lower_limit, data_t const upper_limit){
  static std::random_device rd;
  static std::mt19937 gen(rd());
  static std::uniform_int_distribution<data_t> dis(lower_limit, upper_limit);
  return dis(gen);
  //return lower_limit + (upper_limit - lower_limit) * ((double)rand() / RAND_MAX);
}

/**
 * test to ensure that the array is in sorted order
 */
bool isTest_array_is_in_order(vector<data_t>& arr){
  bool unorderd = false;
  for(idx_t ii=1;ii<arr.size();++ii){
    if(arr[ii]<arr[ii-1]){
      unorderd = true;
      cout << "unordered Index: " << ii << "\n";
      cout << "arr[ii] " << arr[ii] << " arr[ii-1] " << arr[ii-1] << "\n";

      break;
    }
  }
  return !unorderd;
}

/**
 * merge function
 */
void merge(vector<data_t>& arr, idx_t const left, idx_t const middle, idx_t const right) {
  idx_t const  left_length = middle - left + 1;
  idx_t const right_length = right - middle;
  vector<data_t>  left_array( left_length);
  vector<data_t> right_array(right_length);

  constexpr bool const use_loopcopy = true;
  if(use_loopcopy){
  /* copy values to left array */
  for(idx_t ii=0; ii < left_length; ++ii){
      left_array[ii] = arr[left + ii];
  }
  /* copy values to right array */
  for(idx_t ii=0; ii < right_length; ++ii){
    right_array[ii] = arr[middle + 1 + ii];
  }
  }

  constexpr bool const use_libcode = false;
  if(use_libcode){
  {
    auto from =  arr.begin();
    auto to   = from;
    std::advance(from,left);
    std::advance(to,middle+1);
    std::copy(from,to,left_array.begin());
  }
  {
    auto from = arr.begin();
    auto to   = from;
    std::advance(from,middle+1);
    std::advance(to,right+1);
    std::copy(from,to,right_array.begin());
  }
  }

  idx_t ii = 0;
  idx_t jj = 0;
  idx_t kk = 0;
  /** chose from right and left arrays and copy */
  while((ii < left_length) && (jj < right_length)){
    if(left_array[ii] <= right_array[jj]){
      arr[left + kk] = left_array[ii];
      ++ii;
    } else {
      arr[left + kk] = right_array[jj];
      ++jj;
    }
    ++kk;
  }

  /* copy the remaining values to the array */
  while(ii < left_length){
    arr[left + kk] = left_array[ii];
    ++kk;
    ++ii;
  }

  while(jj < right_length){
    arr[left + kk] = right_array[jj];
    ++kk;
    ++jj;
  }
  return;
}

/**
 * perform merge sort
 */
void merge_sort(vector<data_t>& arr, idx_t const left, idx_t const right) {
  if(left < right){
    idx_t middle = left + ((right - left)>>1);
    //Divide
    merge_sort(arr, left, middle);
    merge_sort(arr, middle + 1, right);
    //Conquer
    merge(arr, left, middle, right);
  }
  return;
}

/**
 * assigns work to each thread to perform merge sort
 */
void thread_merge_sort(vector<data_t>& arr, idx_t const thread_id, idx_t const NUM_THREADS, idx_t const NUMBERS_PER_THREAD, idx_t const OFFSET){
  idx_t left  =  thread_id * (NUMBERS_PER_THREAD);
  idx_t right = (thread_id + 1) * (NUMBERS_PER_THREAD) - 1;
  if(thread_id == (NUM_THREADS - 1)) {
    right += OFFSET;
  }
  merge_sort(arr,left,right);
  return;
}

int main(int argc, const char * argv[]){

  /*
  int const NUM_THREADS        = 16; //atoi (argv[1]);
  int const LENGTH             = 1000000000; //atoi(argv[2]);
  */
  /*
  int const NUM_THREADS        = 8; //atoi (argv[1]);
  int const LENGTH             = 449999; //atoi(argv[2]);
  */
  int const NUM_THREADS        = 8; //atoi (argv[1]);
  int const LENGTH             = 100; //atoi(argv[2])

  int const NUMBERS_PER_THREAD = LENGTH / NUM_THREADS;
  int const OFFSET             = LENGTH % NUM_THREADS;

  cout << sizeof(int) << "\n";

  //srand(time(0)) ;

  std::vector<int> array(LENGTH);
  //array.reserve(LENGTH);

  constexpr size_t  nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
  std::cout<<"parallel ("<<nthreads<<" threads):"<<std::endl;


  uint64_t time = 0;
  bool ordered = true;
  for(idx_t ii=0; ii<REPEAT_CNT; ++ii){

    /* initialize array with random numbers */
    for(int ii=0; ii < LENGTH; ++ii){
      //array.push_back(generate_random_number(LOWER_LIM, UPPER_LIM));
      array[ii]=(generate_random_number(LOWER_LIM, UPPER_LIM));
    }

    /* begin timing */
    auto start = std::chrono::high_resolution_clock::now();

    {
      // Pre loop
      std::vector<std::thread> workers;

      for(std::size_t tt=0; tt<nthreads; ++tt){
        workers.push_back(thread(thread_merge_sort, ref(array), tt, nthreads, NUMBERS_PER_THREAD, OFFSET));
      }

      // await thread termination
      for(thread& t: workers) {
        t.join();
      }

      ordered &= isTest_array_is_in_order(array);
      if(!ordered) break;
    }

      auto elapsed = std::chrono::high_resolution_clock::now() - start;
      auto usec    = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();
      time = (usec + time)>>1;
  }

  // and print time
  //cout << "Spent " << usec << "[µs] executing " << nthreads << " Threads in parallel working on " << array.size() << " elements " << "\n";
  cout << "Spent " << time << "[µs] executing " << nthreads << " Threads in parallel working on " << array.size() << " elements " << "\n";

  //cout << "Result is ordered: " << isTest_array_is_in_order(array) << "\n";
  cout << "Result is ordered: " << ordered << "\n";

  cout << to_array_str(array,100) << "\n";

  return 0;
}

But even if that code now does not cause segfaults any more, playing around with different parameters, it turns out, that It harldy ever produces a sorted array, except for single Thread runs.但是,即使该代码现在不再导致段错误,使用不同的参数,事实证明,除了单线程运行之外,它几乎不会产生排序数组。 So the analysis of chqrlie is correct as the sorted parts of single threads also need to be sorted.所以chqrlie的分析是正确的,因为单线程的排序部分也需要排序。

There are multiple problems in your code:您的代码中有多个问题:

  • in merge , you allocate the subarrays with automatic storage with a combined size equal to that of the slice to sort, causing a stack overflow for large arrays.merge ,您分配具有自动存储的子数组,其组合大小等于要排序的切片的大小,从而导致大数组的堆栈溢出
  • the function isTest_array_is_in_order only compares the first 2 elements of the array.函数isTest_array_is_in_order只比较数组的前 2 个元素。
  • splitting the array into N slices to sort individually will only do that: sort portions of the array.将数组分成 N 个切片进行单独排序只会这样做:对数组的部分进行排序。 You need a final phase where you merge the slices into a totally sorted array.您需要最后一个阶段,将切片合并为一个完全排序的数组。
  • the thread function thread_merge_sort should just call merge_sort() on its slice:线程函数thread_merge_sort应该只在其切片上调用merge_sort()

     /** assigns work to each thread to perform merge sort */ void thread_merge_sort(vector<int> &arr, int thread_id, int NUM_THREADS, int NUMBERS_PER_THREAD, int OFFSET) { int left = thread_id * NUMBERS_PER_THREAD; int right = left + NUMBERS_PER_THREAD - 1; if (thread_id == NUM_THREADS - 1) { right += OFFSET; } merge_sort(arr, left, right); }
  • specifying the slices with left and right included is error prone: you must adjust the lengths cautiously, whereas if right was excluded, the code is much more regular as the length can be computed directly as right - left .指定包含leftright的切片容易出错:您必须谨慎地调整长度,而如果排除right ,则代码更加规则,因为长度可以直接计算为right - left

  • the slice boundaries and the index variables should be typed as size_t rather than int .切片边界和索引变量的类型应为size_t而不是int

Here is a modified version that uses vectors for the temporary copies:这是一个使用向量作为临时副本的修改版本:

#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
#include <ctime>
#include <algorithm>

/* generate random numbers in the range 1..9 */
#define LOWER_LIM  1
#define UPPER_LIM 10

using namespace std;

/* function declarations */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit);
void merge_sort(vector<int>& array, size_t start, size_t end);
void merge(vector<int>& array, size_t start, size_t middle, size_t end);
void thread_merge_sort(vector<int>& array, size_t start, size_t end);
bool is_vector_sorted(vector<int>& array, size_t length);

int main(int argc, const char *argv[]) {
    const size_t NUM_THREADS = argc > 1 ? strtol(argv[1], NULL, 0) : 1;
    const size_t LENGTH = argc > 2 ? strtol(argv[2], NULL, 0) : 1000;

    std::vector<int> array(LENGTH);

    /* initialize array with random numbers */
    srand(time(0));
    for (size_t i = 0; i < LENGTH; i++) {
        array[i] = generate_random_number(LOWER_LIM, UPPER_LIM);
    }

    const size_t nthreads = NUM_THREADS; //std::thread::hardware_concurrency();
    const size_t NUMBERS_PER_THREAD = LENGTH / nthreads;
    const size_t REMAINDER = LENGTH % nthreads;

    /* Output message before the loop */
    std::cout << "Sorting " << LENGTH << " elements in parallel" <<
        " using " << nthreads << " threads:" << std::endl;

    /* begin timing */
    auto start_clock = std::chrono::high_resolution_clock::now();

    std::vector<std::thread> workers;

    for (size_t t = 0; t < nthreads; t++) {
        size_t start = t * NUMBERS_PER_THREAD;
        size_t end = start + NUMBERS_PER_THREAD;
        if (t == nthreads - 1)
            end += REMAINDER;
        workers.push_back(thread(thread_merge_sort, ref(array), start, end));
    }
    /* await thread termination */
    for (thread& t: workers) {
        t.join();
    }
    /* merge thread slices */
    for (size_t size = NUMBERS_PER_THREAD; size < LENGTH; size += size) {
        for (size_t middle = size; middle < LENGTH; middle += size + size) {
            size_t end = min(middle + size, LENGTH);
            merge(array, middle - size, middle, end);
        }
    }

    /* end timing */
    auto elapsed = std::chrono::high_resolution_clock::now() - start_clock;
    auto usec    = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count();

    /* print time */
    std::cout << "Sorting time: " << usec << " microseconds." << std::endl;

    /* test to ensure that the array is in sorted order */
    if (!is_vector_sorted(array, LENGTH)) {
        fprintf(stderr, "Error: array is not sorted!!\n");
    }
    return 0;
}

/* generate random numbers within the specified limit */
int generate_random_number(unsigned int lower_limit, unsigned int upper_limit) {
    return int(lower_limit + double(upper_limit - lower_limit) * rand() / RAND_MAX);
}

/** assign work to each thread to perform merge sort */
void thread_merge_sort(vector<int> &arr, size_t start, size_t end) {
    merge_sort(arr, start, end);
}

/* test to ensure that the array is in sorted order */
bool is_vector_sorted(vector<int>& arr, size_t length) {
    for (size_t i = 1; i < length; i++) {
        if (arr[i] < arr[i - 1]) {
            return false;
        }
    }
    return true;
}

/* perform merge sort */
void merge_sort(vector<int>& arr, size_t start, size_t end) {
    if (end - start >= 2) {
        size_t middle = start + (end - start) / 2;
        merge_sort(arr, start, middle);
        merge_sort(arr, middle, end);
        merge(arr, start, middle, end);
    }
}

/* merge function */
void merge(vector<int>& arr, size_t start, size_t middle, size_t end) {
    size_t i, j, k;
    size_t left_length = middle - start;
    size_t right_length = end - middle;
    vector<int> left_array(&arr[start], &arr[middle]);
    vector<int> right_array(&arr[middle], &arr[end]);

    i = 0;
    j = 0;
    k = start;
    /** chose from right and left arrays and copy */
    while (i < left_length && j < right_length) {
        if (left_array[i] <= right_array[j]) {
            arr[k++] = left_array[i++];
        } else {
            arr[k++] = right_array[j++];
        }
    }
    /* copy the remaining values to the array */
    while (i < left_length) {
        arr[k++] = left_array[i++];
    }
    while (j < right_length) {
        arr[k++] = right_array[j++];
    }
}

The performance can be further improved by about 40% by saving only the left portion of the slice in merge : the elements of the right part of the slice are not overwritten unless they have already been copied.通过在merge仅保存切片的左侧部分,性能可以进一步提高约 40%:切片右侧部分的元素不会被覆盖,除非它们已经被复制。 Here is a modified version:这是一个修改后的版本:

/* merge function */
void merge(vector<int>& arr, size_t start, size_t middle, size_t end) {
    size_t i, j, k;
    size_t left_length = middle - start;
    /* save the left part of the slice */
    vector<int> left_array(&arr[start], &arr[middle]);

    i = 0;
    j = middle;
    k = start;
    /** chose from right and left arrays and copy */
    while (i < left_length && j < end) {
        if (left_array[i] <= arr[j]) {
            arr[k++] = left_array[i++];
        } else {
            arr[k++] = arr[j++];
        }
    }
    /* copy the remaining values to the array */
    while (i < left_length) {
        arr[k++] = left_array[i++];
    }
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM