如何测量C math.h 库函数的执行时间？

Question

通过使用time.h标头，我将sqrt()执行时间设为 2 纳秒（在 Linux 终端中使用gcc命令）和 44 纳秒（在 Ubuntu 终端中使用g++命令）。 谁能告诉我任何其他方法来测量math.h库函数的执行时间？

下面是代码：

#include <time.h>
#include <stdio.h>
#include<math.h>

int main()
{

    time_t begin,end; // time_t is a datatype to store time values.
    time (&begin); // note time before execution
    for(int i=0;i<1000000000;i++)  //using for loop till 10^9 times to make the execution time in nanoseconds
    {
        cbrt(9999999);  // calling the cube root function from math library
    }
    time (&end); // note time after execution
    double difference = difftime (end,begin);
    printf ("time taken for function() %.2lf in Nanoseconds.\n", difference );
    printf(" cube root is :%f \t",cbrt(9999999));

    return 0;
}

输出：

by using **gcc**: time taken for function() 2.00 seconds.
                  cube root is :215.443462
by using **g++**: time taken for function() 44.00 in Nanoseconds.
                  cube root is:215.443462

Linux终端结果

^{给出或获取提示的长度：}

$ g++ t1.c
$ ./a.out
time taken for function() 44.00 in Nanoseconds.
 cube root is :215.443462
$ gcc t1.c
$ ./a.out
time taken for function() 2.00 in Nanoseconds.
 cube root is :215.443462
$

Answer 1

如何测量c math.h 库函数的执行时间？

通常允许 C 编译器分析众所周知的标准库函数并替换诸如cbrt(9999999);类的修复代码cbrt(9999999); 与215.443462... . 此外，由于在循环中删除函数不会影响代码的功能，因此可以优化该循环。

使用volatile可以防止大部分情况发生，因为当函数被替换、删除时，编译器不能假设没有影响。

for(int i=0;i<1000000000;i++) {
    // cbrt(9999999);
    volatile double x = 9999999.0;
    volatile double y = cbrt(x);
}

time()的粒度通常只有 1 秒，如果十亿次循环只产生几秒钟，请考虑更多循环。

代码可以在下面使用来分解循环开销。

time_t begin,middle,end;
time (&begin);
for(int i=0;i<1000000000;i++) {
    volatile double x = 9999999.0;
    volatile double y = x;
}
time (&middle);
for(int i=0;i<1000000000;i++) {
    volatile double x = 9999999.0;
    volatile double y = cbrt(x);
}
time (&end);
double difference = difftime(end,middle) - difftime(middle,begin);

Answer 2

计时代码是一门艺术，艺术的一部分是确保编译器不会优化您的代码。 对于标准库函数，编译器很可能知道它是/做什么，并且能够在编译时评估一个常量。 在您的示例中，调用cbrt(9999999); 提供了两个优化机会。 来自cbrt()的值可以在编译时计算，因为参数是一个常量。 其次，没有使用返回值，而且标准函数没有副作用，所以编译器可以完全丢弃它。 您可以通过捕获结果来避免这些问题（例如，通过计算从 0 到 10 亿（减 1）的立方根的总和并在计时代码之后打印该值。

`tm97.c`

当我编译你的代码时，删掉了评论，我得到了：

$ cat tm97.c
#include <time.h>
#include <stdio.h>
#include <math.h>

int main(void)
{
    time_t begin, end;
    time(&begin);
    for (int i = 0; i < 1000000000; i++)
    {
        cbrt(9999999);
    }
    time(&end);
    double difference = difftime(end, begin);
    printf("time taken for function() %.2lf in Nanoseconds.\n", difference );
    printf(" cube root is :%f \t", cbrt(9999999));

    return 0;
}
$ make tm97
gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes         tm97.c -o tm97 -L../lib -lsoq 
tm97.c: In function ‘main’:
tm97.c:11:9: error: statement with no effect [-Werror=unused-value]
   11 |         cbrt(9999999);
      |         ^~~~
cc1: all warnings being treated as errors
rmk: error code 1
$

我在运行 macOS Mojave 10.14.6 和 XCode 11.3.1 (11C504) 和 GCC 9.3.0 的 2017 MacBook Pro 上使用 GCC 9.3.0 — XCode 11.4 需要 Catalina 10.15.2，但工作还没有围绕组织支持为此，还。 有趣的是，当g++编译相同的代码时，它编译时没有警告（错误）：

$ ln -s tm97.c tm89.cpp
make tm89 SXXFLAGS=-std=c++17 CXX=g++
g++ -O3 -g  -I../inc -std=c++17 -Wall -Wextra -Werror -L../lib tm89.cpp -lsoq -o tm89
$

我经常使用 GitHub 上我的SOQ （堆栈溢出问题）存储库中提供的一些计时代码作为src/libsoq子目录中的文件timer.c和timer.h 。 该代码仅在我的库中编译为 C 代码，所以我创建了一个简单的包装头， timer2.h ，以便下面的程序可以使用#include "timer2.h"并且它可以在 C 和 C++ 编译中正常工作：

#ifndef TIMER2_H_INCLUDED
#define TIMER2_H_INCLUDED

#ifdef __cplusplus
extern "C" {
#endif
#include "timer.h"
#ifdef __cplusplus
}
#endif

#endif /* TIMER2_H_INCLUDED */

`tm29.cpp`和`tm31.c`

此代码使用sqrt()函数进行测试。 它累积平方根的总和。 它使用来自timer.h / timer.c的计时代码围绕您的计时代码 — 键入Clock和函数clk_init() 、 clk_start() 、 clk_stop()和clk_elapsed_us()来评估时钟之间经过的时间（以微秒为单位）开始和最后停止。

源代码可以由 C 编译器或 C++ 编译器编译。

#include <time.h>
#include <stdio.h>
#include <math.h>
#include "timer2.h"

int main(void)
{
    time_t begin, end;
    double sum = 0.0;
    int i;
    Clock clk;
    clk_init(&clk);
    clk_start(&clk);
    time(&begin);
    for (i = 0; i < 1000000000; i++)
    {
        sum += sqrt(i);
    }
    time(&end);
    clk_stop(&clk);
    double difference = difftime(end, begin);
    char buffer[32];
    printf("Time taken for sqrt() is %.2lf nanoseconds (%s ns).\n",
           difference, clk_elapsed_us(&clk, buffer, sizeof(buffer)));
    printf("Sum of square roots from 0 to %d is: %f\n", i, sum);

    return 0;
}

`tm41.c`和`tm43.cpp`

这段代码与前面的代码几乎相同，但测试的函数是cbrt() （立方根）函数。

#include <time.h>
#include <stdio.h>
#include <math.h>
#include "timer2.h"

int main(void)
{
    time_t begin, end;
    double sum = 0.0;
    int i;
    Clock clk;
    clk_init(&clk);
    clk_start(&clk);
    time(&begin);
    for (i = 0; i < 1000000000; i++)
    {
        sum += cbrt(i);
    }
    time(&end);
    clk_stop(&clk);
    double difference = difftime(end, begin);
    char buffer[32];
    printf("Time taken for cbrt() is %.2lf nanoseconds (%s ns).\n",
           difference, clk_elapsed_us(&clk, buffer, sizeof(buffer)));
    printf("Sum of cube roots from 0 to %d is: %f\n", i, sum);

    return 0;
}

`tm59.c`和`tm61.c`

此代码使用fabs()而不是sqrt()或cbrt() 。 它仍然是一个函数调用，但它可能是内联的。 它显式调用从int到double的转换； 如果没有那个转换，GCC 会抱怨它应该使用整数abs()函数来代替。

#include <time.h>
#include <stdio.h>
#include <math.h>
#include "timer2.h"

int main(void)
{
    time_t begin, end;
    double sum = 0.0;
    int i;
    Clock clk;
    clk_init(&clk);
    clk_start(&clk);
    time(&begin);
    for (i = 0; i < 1000000000; i++)
    {
        sum += fabs((double)i);
    }
    time(&end);
    clk_stop(&clk);
    double difference = difftime(end, begin);
    char buffer[32];
    printf("Time taken for fabs() is %.2lf nanoseconds (%s ns).\n",
           difference, clk_elapsed_us(&clk, buffer, sizeof(buffer)));
    printf("Sum of absolute values from 0 to %d is: %f\n", i, sum);

    return 0;
}

`tm73.cpp`

该文件也将原始代码与我的时序包装代码一起使用。 C 版本不能编译——C++ 版本可以：

#include <time.h>
#include <stdio.h>
#include <math.h>
#include "timer2.h"

int main(void)
{
    time_t begin, end;
    Clock clk;
    clk_init(&clk);
    clk_start(&clk);
    time(&begin);
    for (int i = 0; i < 1000000000; i++)
    {
        cbrt(9999999);
    }
    time(&end);
    clk_stop(&clk);
    double difference = difftime(end, begin);
    char buffer[32];
    printf("Time taken for cbrt() is %.2lf nanoseconds (%s ns).\n",
           difference, clk_elapsed_us(&clk, buffer, sizeof(buffer)));
    printf("Cube root is: %f\n", cbrt(9999999));

    return 0;
}

定时

使用命令timecmd报告程序的开始和停止时间以及 PID 以及内置在各种命令中的计时代码（它是time命令主题的变体），我得到了以下结果。 （ rmk只是make的另一种实现。）

$ for prog in tm29 tm31 tm41 tm43 tm59 tm61 tm73
> do rmk $prog && timecmd -ur -- $prog
> done
g++ -O3 -g -I../inc -std=c++11 -Wall -Wextra -Werror tm29.cpp -o tm29 -L../lib -lsoq 
2020-03-28 08:47:50.040227 [PID 19076] tm29
Time taken for sqrt() is 1.00 nanoseconds (1.700296 ns).
Sum of square roots from 0 to 1000000000 is: 21081851051977.781250
2020-03-28 08:47:51.747494 [PID 19076; status 0x0000]  -  1.707267s  -  tm29
gcc -O3 -g -I../inc -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes tm31.c -o tm31 -L../lib -lsoq 
2020-03-28 08:47:52.056021 [PID 19088] tm31
Time taken for sqrt() is 1.00 nanoseconds (1.679867 ns).
Sum of square roots from 0 to 1000000000 is: 21081851051977.781250
2020-03-28 08:47:53.742383 [PID 19088; status 0x0000]  -  1.686362s  -  tm31
gcc -O3 -g -I../inc -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes tm41.c -o tm41 -L../lib -lsoq 
2020-03-28 08:47:53.908285 [PID 19099] tm41
Time taken for cbrt() is 7.00 nanoseconds (6.697999 ns).
Sum of cube roots from 0 to 1000000000 is: 749999999499.628418
2020-03-28 08:48:00.613357 [PID 19099; status 0x0000]  -  6.705072s  -  tm41
g++ -O3 -g -I../inc  -std=c++11 -Wall -Wextra -Werror tm43.cpp -o tm43 -L../lib -lsoq 
2020-03-28 08:48:00.817975 [PID 19110] tm43
Time taken for cbrt() is 7.00 nanoseconds (6.614539 ns).
Sum of cube roots from 0 to 1000000000 is: 749999999499.628418
2020-03-28 08:48:07.438298 [PID 19110; status 0x0000]  -  6.620323s  -  tm43
gcc -O3 -g -I../inc -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes tm59.c -o tm59 -L../lib -lsoq 
2020-03-28 08:48:07.598344 [PID 19121] tm59
Time taken for fabs() is 1.00 nanoseconds (1.114822 ns).
Sum of absolute values from 0 to 1000000000 is: 499999999067108992.000000
2020-03-28 08:48:08.718672 [PID 19121; status 0x0000]  -  1.120328s  -  tm59
g++ -O3 -g  -I../inc -std=c++11 -Wall -Wextra -Werror tm61.cpp -o tm61 -L../lib -lsoq 
2020-03-28 08:48:08.918745 [PID 19132] tm61
Time taken for fabs() is 2.00 nanoseconds (1.117780 ns).
Sum of absolute values from 0 to 1000000000 is: 499999999067108992.000000
2020-03-28 08:48:10.042134 [PID 19132; status 0x0000]  -  1.123389s  -  tm61
g++ -O3 -g  -I../inc -std=c++11 -Wall -Wextra -Werror tm73.cpp -o tm73 -L../lib -lsoq 
2020-03-28 08:48:10.236899 [PID 19143] tm73
Time taken for cbrt() is 0.00 nanoseconds (0.000004 ns).
Cube root is: 215.443462
2020-03-28 08:48:10.242322 [PID 19143; status 0x0000]  -  0.005423s  -  tm73
$

我已经多次运行这些程序； 上面的时间代表了我每次得到的东西。 可以得出以下几个结论：

sqrt() (1.7 ns) 比cbrt() (6.7 ns) 快。
fabs() (1.1 ns) 比sqrt() (1.7 ns) 快。
但是， fabs()对循环开销和从int转换为double所花费的时间给出了适度的近似值。
当不使用cbrt()的结果时，编译器会消除循环。
当使用 C++ 编译器编译时，问题中的代码完全删除了循环，只留下对time()的调用进行测量。 clk_elapsed_us()打印的结果是执行clk_start()和clk_stop()之间的代码所花费的时间，以秒为单位，分辨率为微秒0.000004是 4 微秒的经过时间。 该值以ns为单位标记，因为当循环执行 10 亿次时，以秒为单位的经过时间也代表一个循环的时间（以纳秒为单位）——一秒有 10 亿纳秒。
timecmd上报的时间与程序上报的时间一致。 有启动进程（ fork()和exec() ）的开销以及进程中包含在timecmd报告的时间中的 I/O 的timecmd 。
尽管没有显示， clang和clang++ （而不是 GCC 9.3.0）的时间非常相似，尽管cbrt()代码每次迭代需要大约 7.5 ns 而不是 6.7 ns。 其他人的时间差异基本上是噪音。

^{数字后缀都是2位数的素数。} ^{除了将不同的程序分开之外，它们没有其他意义。}

Answer 3

正如@Jonathan Leffler 评论的那样，编译器可以优化您的 C/C++ 代码。 如果 C 代码只是从 0 循环到 1000 w/o 对计数器i做任何事情（我的意思是，不打印它或在任何其他操作、索引等中使用中间值），编译器甚至可能不会创建程序集对应于该循环的代码。 可能的算术运算甚至会被预先计算。 对于下面的代码；

int foo(int x) {
    return x * 5;
}

int main() {
    int x = 3;
    int y = foo(x);
    ...
    ...
}

这并不奇怪的编译器生成功能的汇编代码只是两行（编译器甚至可以由通调用函数foo和生成的内联指令） foo ：

mov $15, %eax
; compiler will not bother multiplying 5 by 3
; but just move the pre-computed '15' to register
ret
; and then return

如何测量C math.h 库函数的执行时间？

问题描述

3 个解决方案

解决方案1
1 2020-03-28 08:22:39

解决方案2
1 2020-03-28 15:10:27

`tm97.c`

`tm29.cpp`和`tm31.c`

`tm41.c`和`tm43.cpp`

`tm59.c`和`tm61.c`

`tm73.cpp`

定时

解决方案3
0 2020-03-28 08:00:57

如何测量C math.h 库函数的执行时间？

问题描述

3 个解决方案

解决方案1 1 2020-03-28 08:22:39

解决方案2 1 2020-03-28 15:10:27

tm97.c

tm29.cpp和tm31.c

tm41.c和tm43.cpp

tm59.c和tm61.c

tm73.cpp

定时

解决方案3 0 2020-03-28 08:00:57

解决方案1
1 2020-03-28 08:22:39

解决方案2
1 2020-03-28 15:10:27

`tm97.c`

`tm29.cpp`和`tm31.c`

`tm41.c`和`tm43.cpp`

`tm59.c`和`tm61.c`

`tm73.cpp`

解决方案3
0 2020-03-28 08:00:57