[英]efficiency of c++ arrays vs std::vector and std::array
我比较了如下分配1d数组或2d数组的不同类型。 我发现使用new运算符效率更高,也许std :: arrary和std :: vector是对象,它们是通用且安全的,但是会花费更多时间吗? 此外,我不知道为什么调用新的外部函数比内部函数更有效?
#include <iostream>
#include <vector>
#include <array>
#include <ctime>
void test1 () {
int *arr = new int[10000];
for (int i=0; i<10000; ++i) {
arr[i] = 3;
}
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
delete arr;
}
void test11 () {
int **arr = new int*[100];
for (int i=0; i<100; ++i) {
arr[i] = new int[100];
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
int a = arr[i][j];
}
}
delete [] arr;
}
void test2() {
std::vector<int> arr(10000);
for (int i=0; i<10000; ++i) {
arr[i] = 3;
}
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
}
void test22() {
std::vector<std::vector<int> > arr(100, std::vector<int>(100));
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
int a = arr[i][j];
}
}
}
void test3(int *arr, int n) {
for (int i=0; i<n; ++i) {
arr[i] = 3;
}
for (int i=0; i<n; ++i) {
int a = arr[i];
}
}
void test33(int **arr, int m, int n) {
for (int i=0; i<m; ++i) {
for (int j=0; j<n; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<m; ++i) {
for (int j=0; j<n; ++j) {
int a = arr[i][j];
}
}
}
void test4() {
std::array<int, 10000> arr;
for (int i=0; i<10000; ++i) {
arr[i] = 3;
}
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
}
void test44() {
std::array<std::array<int, 100>, 100> arr;
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j)
int a = arr[i][j];
}
}
int main() {
clock_t start, end;
start = clock();
for (int i=0; i<1000; ++i) {
test1();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test11();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test2();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test22();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
int *arr = new int[10000];
test3(arr, 10000);
delete arr;
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
int **arr = new int*[100];
for (int i=0; i<100; ++i) {
arr[i] = new int[100];
}
for (int i=0; i<1000; ++i) {
test33(arr, 100, 100);
}
delete [] arr;
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test4();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test44();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
}
输出为:
90 ms
80 ms
70 ms
120 ms
50 ms
40 ms
100 ms
190 ms
感谢您的帮助,也许我没有正确描述我的问题,我编写了一个将被多次调用的函数,此函数是一个新数组,然后将其删除:
void fun() {
int *arr = new int[10000]; //maybe very big
//todo something else
delete arr;
}
有人告诉我这不是很有效,因为它每次都需要新建和删除,现在我有两个问题:
1.内存管理的正确方法是什么?
int *arr = new int[]; delete arr;
int **arr = new int*[]; delete [] arr;
错误? 也许像这样:
for (int i=0; i<n; ++i){
delete [] arr;
}
delete arr;
2.我编写此函数的最佳方法是什么
我认为您的测试不正确。 也许您正在调试模式下运行? 我看不出有什么方法可以使test11()比test1()更快(即使它没有释放所有内存)。
此外,在上述许多情况下,发布模式编译器会优化您的代码,因为它实际上不执行任何操作:
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
int a = arr[i][j];
}
}
任何编译器都几乎可以肯定会消除这种情况,因为未使用'a',这意味着不会再使用'i'和'j'。
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
arr[i][j] = 3;
}
}
一些编译器甚至可能足以消除该代码,因为不再需要再次读取内存,但很可能不会。
我建议不要担心vector与new int []的性能开销。 在调试模式下,在发布模式下,您将获得免费的调试辅助工具(边界检查),只要您不调用抛出超出范围的函数,就实际目的而言,这些代码在性能上将基本相同。 另外,您不必担心内存管理(test1()和test11()都不完全正确)。
让我们做一些工作来改进测试,并通过正确使用它来给标准库一个机会……
#include <iostream>
#include <vector>
#include <array>
#include <ctime>
#include <memory>
#include <algorithm>
#include <iterator>
void test1 () {
auto arr = std::make_unique<int[]>(10000);
std::fill(arr.get(), arr.get() + 10000, 3);
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
}
void test11 () {
auto arr = std::make_unique<std::unique_ptr<int[]>[]>(100);
for (auto i = 0 ; i < 100 ; ++i) {
arr[i] = std::make_unique<int[]>(100);
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
int a = arr[i][j];
}
}
}
void test2() {
std::vector<int> arr(10000);
std::fill(std::begin(arr), std::end(arr), 3);
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
}
void test22() {
std::vector<std::vector<int> > arr(100, std::vector<int>(100));
std::for_each(begin(arr),
end(arr),
[](auto& inner) {
std::fill(std::begin(inner), std::end(inner), 3);
});
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j) {
int a = arr[i][j];
}
}
}
void test3(int *arr, int n) {
std::fill(arr, arr + n, 3);
for (int i=0; i<n; ++i) {
int a = arr[i];
}
}
void test33(const std::unique_ptr<std::unique_ptr<int[]>[]>& arr, int m, int n) {
for (int i=0; i<m; ++i) {
for (int j=0; j<n; ++j) {
arr[i][j] = 3;
}
}
for (int i=0; i<m; ++i) {
for (int j=0; j<n; ++j) {
int a = arr[i][j];
}
}
}
void test4() {
std::array<int, 10000> arr;
std::fill(std::begin(arr), std::end(arr), 3);
for (int i=0; i<10000; ++i) {
int a = arr[i];
}
}
void test44() {
std::array<std::array<int, 100>, 100> arr;
std::for_each(begin(arr),
end(arr),
[](auto& inner) {
std::fill(std::begin(inner), std::end(inner), 3);
});
for (int i=0; i<100; ++i) {
for (int j=0; j<100; ++j)
int a = arr[i][j];
}
}
int main() {
clock_t start, end;
start = clock();
for (int i=0; i<1000; ++i) {
test1();
}
end = clock();
std::cout << "test 1 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test11();
}
end = clock();
std::cout << "test 11 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test2();
}
end = clock();
std::cout << "test 2 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test22();
}
end = clock();
std::cout << "test 22 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
int *arr = new int[10000];
test3(arr, 10000);
delete [] arr;
}
end = clock();
std::cout << "test 3 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
auto arr = std::make_unique<std::unique_ptr<int[]>[]>(100);
for (auto i = 0 ; i < 100 ; ++i) {
arr[i] = std::make_unique<int[]>(100);
}
for (int i=0; i<1000; ++i) {
test33(arr, 100, 100);
}
arr.reset();
end = clock();
std::cout << "test 33 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test4();
}
end = clock();
std::cout << "test 4 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
start = clock();
for (int i=0; i<1000; ++i) {
test44();
}
end = clock();
std::cout << "test 44 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;
}
-O2编译时在我的计算机上的结果:
test 1 0.002 ms
test 11 13.506 ms
test 2 2.753 ms
test 22 13.738 ms
test 3 1.42 ms
test 33 1.552 ms
test 4 0 ms
test 44 0 ms
我们还要注意,数组是“小”的,并且正在重复分配和释放。 如果您可以重新使用缓冲区,则时序差异将完全消失。
还要注意:test33速度很快,因为它从不重新分配内存-您正在重新使用缓冲区。
在纯C数组上运行时,编译器能够识别出解开循环的机会,从而节省了少量的迭代开销。 可能(不太可能)编译器不会优化STL容器的访问循环,因为它不知道STL容器是否修改任何成员变量。
关于test11为何优于test1的最佳猜测是,它交织了外部循环的多个迭代(利用现代x86 / x64处理器超标量这一事实),即IE:
for(int j = 0; j < 100; ++j)
{
for(int i = 0; i < 100; i+=4)
{
arr[i][j] = 3;
arr[i+1][j] = 3;
arr[i+2][j] = 3;
arr[i+4][j] = 3;
}
}
或完全其他的东西。 编译器可以执行一些非常复杂的循环转换,以获取每一点性能。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.