I am writing a code to measure the time consumption of a sequence of codes in kernel by loading the codes as module into the kernel. I uses common rdtsc routine to calculate the time. Interesting thing is similar routine running in user mode results in normal values, whereas the results is always 0 when running in kernel mode, no matter how many lines of codes I have added into the time_count function. The calculation I use here is a common matrix product function, and the running cycles should increase rapidly through the increasing of matrix dimension. Can anyone point out the mistakes in my code why I could not measure the cycle number in kernel?
#include <linux/init.h>
#include <linux/module.h>
int matrix_product(){
int array1[500][500], array2[500][500], array3[500][500];
int i, j, k, sum;
for(i = 0; i < 50000; i++){
for(j = 0; j < 50000; j++){
array1[i][j] = 5*i + j;
array2[i][j] = 5*i + j;
}
}
for(i = 0; i < 50000; i++){
for(j = 0; j < 50000; j++){
for(k = 0; k < 50000; k++)
sum += array1[i][k]*array2[k][j];
array3[i][j] = sum;
sum = 0;
}
}
return 0;
}
static __inline__ unsigned long long rdtsc(void)
{
unsigned long hi, lo;
__asm__ __volatile__ ("xorl %%eax,%%eax\ncpuid" ::: "%rax", "%rbx", "%rcx", "%rdx");
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((unsigned long long)lo) | (((unsigned long long)hi)<<32) ;
}
static int my_init(void)
{
unsigned long str, end, curr, best, tsc, best_curr;
long i, t;
#define time_count(codes) for(i=0; i<120000; i++){str=rdtsc(); codes; end=rdtsc(); curr=end-str; if(curr<best)best=curr;}
best = ~0;
time_count();
tsc = best;
best = ~0;
time_count(matrix_product());
best_curr = best;
printk("<0>matrix product: %lu ticks\n", best_curr-tsc);
return 0;
}
static void my_exit(void){
return;
}
module_init(my_init);
module_exit(my_exit);`
Any help is appreciated! Thanks.
rdtsc
is not guaranteed to be available on every CPU, or to run at a constant rate, or be consistent between different cores.
You should use a reliable and portable function like getrawmonotonic
unless you have special requirements for the timestamps.
If you really want to use cycles directly, the kernel already defines get_cycles and cpuid functions for this.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.