I am trying to convert a basic long loop into thread to improve the loop performance.
Here is the threaded version:
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
using namespace std::chrono;
void funcSum(long long int start, long long int end, long long int *sum)
{
for(auto i = start; i <= end; ++i)
{
*sum += i;
}
}
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
thread t1(funcSum, start, end / 2, &sum);
thread t2(funcSum, end / 2 + 1 , end, &sum);
t1.join();
t2.join();
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<seconds>(stopTime - startTime);
cout << "Sum: " << sum << endl;
cout << duration.count() << " Seconds";
return 0;
}
And here is the normal code (Without threads):
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
using namespace std::chrono;
void funcSum(long long int start, long long int end, long long int *sum)
{
for(auto i = start; i <= end; ++i)
{
*sum += i;
}
}
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
funcSum(start, end, &sum);
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<seconds>(stopTime - startTime);
cout << "Sum: " << sum << endl;
cout << duration.count() << " Seconds";
return 0;
}
Sum: 1805000000949999955 5 Seconds Process finished with exit code 0
In both the cases, time spent is 5 seconds.
Why the first threaded version does not improve the performance? How do I decrease the time using threads for this sum of range?
Fixed version of threaded code:
// Compute the sum of start ... end
class Summer {
public:
long long int start;
long long int end;
long long int sum = 0;
Summer(long long int aStart, long long int aEnd)
: start(aStart),
end(aEnd)
{
}
void funcSum()
{
sum = 0;
for (auto i = start; i <= end; ++i)
{
sum += i;
}
}
};
class SummerFunctor {
Summer& mSummer;
public:
SummerFunctor(Summer& aSummer)
: mSummer(aSummer)
{
}
void operator()()
{
mSummer.funcSum();
}
};
// Version with n thread objects reports
// 1 threads, sum = 1805000000949999955, 1587 ms
// 2 threads, sum = 1805000000949999955, 2547 ms
// 4 threads, sum = 1805000000949999955, 1251 ms
// 6 threads, sum = 1805000000949999955, 916 ms
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
const size_t threadCount = 6;
if (threadCount < 2) {
funcSum(start, end, &sum);
} else {
Summer* summers[threadCount];
std::thread* threads[threadCount];
// Start threads
auto val = start;
auto partitionSize = (end-start) / threadCount;
for (size_t i = 0; i < threadCount; ++i) {
auto partitionEnd = std::min(start + partitionSize, end);
summers[i] = new Summer(start, partitionEnd);
start = partitionEnd + 1;
SummerFunctor functor (*summers[i]);
threads[i] = new std::thread(functor);
}
// Join threads
for (size_t i = 0; i < threadCount; ++i) {
threads[i]->join();
sum += summers[i]->sum;
delete threads[i];
delete summers[i];
}
}
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(stopTime - startTime);
cout << threadCount << " threads, sum = " << sum << ", " << duration.count() << " ms" << std::endl;
return 0;
}
I had to wrap the Summer object with a functor because std::thread insists on making a copy of a functor handed to it, that we can't access later. The execution gets better when more threads are used (running times see comments). Possible reasons for this:
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.