[英]perf_event_open - how to monitoring multiple events
有誰知道如何設置可以觸發 PMU 以通過perf_event_open()
監視多個(類型)事件的perf_event_attr
結構?
與perf record -e cycles,faults ls
一樣,它有兩種不同的事件類型(PERF_TYPE_HARDWARE 和 PERF_TYPE_SOFTWARE),但在perf_event_open 的聯機幫助頁示例中, perf_event_attr.type
只能分配單個值。
任何建議將不勝感激,謝謝!
20170208更新感謝@gudok給我指路,結果好像有些不正常。 演示程序如下(用於測量整個系統的CPU周期和緩存未命中):
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/unistd.h>
#include <errno.h>
#include <stdint.h>
#include <inttypes.h>
#include <time.h>
struct read_format {
uint64_t nr;
struct {
uint64_t value;
uint64_t id;
} values[];
};
int main(int argc, char* argv[]) {
struct perf_event_attr pea;
int fd1, fd2;
uint64_t id1, id2;
uint64_t val1, val2;
char buf[4096];
struct read_format* rf = (struct read_format*) buf;
int i,j;
struct timespec time, time2;
time.tv_sec = 1;
time.tv_nsec = 0;
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_CPU_CYCLES;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
ioctl(fd1, PERF_EVENT_IOC_ID, &id1);
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_CACHE_MISSES;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.precise_ip = 2; // want to using PEBS
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1 /*!!!*/, 0);
ioctl(fd2, PERF_EVENT_IOC_ID, &id2);
ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
while (1) {
nanosleep(&time, &time2);
//ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
read(fd1, buf, sizeof(buf));
for (i = 0; i < rf->nr; i++) {
if (rf->values[i].id == id1) {
val1 = rf->values[i].value;
} else if (rf->values[i].id == id2) {
val2 = rf->values[i].value;
}
}
printf("cpu cycles: %"PRIu64"\n", val1);
printf("cache misses: %"PRIu64"\n", val2);
}
return 0;
}
輸出是:
cpu cycles: 120 // Just have about 120 CPU cycles in a second
cache misses: 0 // and doesn't have any cache miss?
cpu cycles: 233
cache misses: 0
cpu cycles: 352
cache misses: 0
cpu cycles: 455
cache misses: 0
cpu cycles: 562
cache misses: 0
cpu cycles: 673
cache misses: 0
cpu cycles: 794
cache misses: 0
cpu cycles: 907
cache misses: 0
cpu cycles: 1011
cache misses: 0
cpu cycles: 1129
cache misses: 3
cpu cycles: 1269
cache misses: 4
cpu cycles: 1423
這有點棘手。
我們像往常一樣創建第一個計數器。 此外,我們傳遞PERF_FORMAT_GROUP
和PERF_FORMAT_ID
以便能夠同時使用多個計數器。 這個櫃台將是我們的組長。
struct perf_event_attr pea;
int fd1, fd2;
uint64_t id1, id2;
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_CPU_CYCLES;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
接下來,我們檢索第一個計數器的標識符:
ioctl(fd1, PERF_EVENT_IOC_ID, &id1);
第二個(以及所有其他計數器)以相同的方式創建,只有一個例外:我們將fd1
值作為組長參數傳遞:
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_SOFTWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_SW_PAGE_FAULTS;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1, 0); // <-- here
ioctl(fd2, PERF_EVENT_IOC_ID, &id2);
接下來我們需要聲明一個數據結構來一次讀取多個計數器。 您必須根據傳遞給perf_event_open
的標志聲明不同的字段集。 手冊頁提到了所有可能的字段。 在我們的例子中,我們傳遞了添加id
字段的PERF_FORMAT_ID
標志。 這將使我們能夠區分不同的計數器。
struct read_format {
uint64_t nr;
struct {
uint64_t value;
uint64_t id;
} values[/*2*/];
};
現在我們調用標准分析 ioctls:
ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
do_something();
ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
最后,我們從組長文件描述符中讀取計數器。 兩個計數器都以我們聲明的單個read_format
結構返回:
char buf[4096];
struct read_format* rf = (struct read_format*) buf;
uint64_t val1, val2;
read(fd1, buf, sizeof(buf));
for (i = 0; i < rf->nr; i++) {
if (rf->values[i].id == id1) {
val1 = rf->values[i].value;
} else if (rf->values[i].id == id2) {
val2 = rf->values[i].value;
}
}
printf("cpu cycles: %"PRIu64"\n", val1);
printf("page faults: %"PRIu64"\n", val2);
以下是完整的程序清單:
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/unistd.h>
#include <errno.h>
#include <stdint.h>
#include <inttypes.h>
struct read_format {
uint64_t nr;
struct {
uint64_t value;
uint64_t id;
} values[];
};
void do_something() {
int i;
char* ptr;
ptr = malloc(100*1024*1024);
for (i = 0; i < 100*1024*1024; i++) {
ptr[i] = (char) (i & 0xff); // pagefault
}
free(ptr);
}
int main(int argc, char* argv[]) {
struct perf_event_attr pea;
int fd1, fd2;
uint64_t id1, id2;
uint64_t val1, val2;
char buf[4096];
struct read_format* rf = (struct read_format*) buf;
int i;
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_CPU_CYCLES;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
ioctl(fd1, PERF_EVENT_IOC_ID, &id1);
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_SOFTWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_SW_PAGE_FAULTS;
pea.disabled = 1;
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1 /*!!!*/, 0);
ioctl(fd2, PERF_EVENT_IOC_ID, &id2);
ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
do_something();
ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
read(fd1, buf, sizeof(buf));
for (i = 0; i < rf->nr; i++) {
if (rf->values[i].id == id1) {
val1 = rf->values[i].value;
} else if (rf->values[i].id == id2) {
val2 = rf->values[i].value;
}
}
printf("cpu cycles: %"PRIu64"\n", val1);
printf("page faults: %"PRIu64"\n", val2);
return 0;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.