簡體   English   中英

Linux posix C regexec()不返回所有匹配項

[英]Linux posix C regexec() not returning all matches

我有以下腳本來解析要查找字符串匹配的進程內存,一切正常,但轉儲具有1193可能匹配項的編輯器進程(在本例中為“ nano”)(如果我轉儲內存然后對其執行egrep,則可以正常工作)但是我的代碼只輸出3匹配項。 任何想法?

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
 #define MEM_MAX (1ULL << 48)
#else
 #define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <regex.h>

int main(int argc, char **argv)
{
        if (argc < 2) {
                printf("Usage: %s <pid>\n", argv[0]);
                exit(1);
        }

        char buf[128];
        int pid = atoi(argv[1]);
        snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
        int fd = open(buf, O_RDONLY);
        if (fd == -1) {
                fprintf(stderr, "Error opening mem file: %m\n");
                exit(1);
        }

        int status ,i;
        int cflags = REG_EXTENDED;
        regmatch_t pmatch[1];
        const size_t nmatch=1;
        regex_t reg;
        const char *pattern="([a-zA-Z]{18,20})";
        regcomp(&reg, pattern, cflags);

        long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
        if (ptret == -1) {
                fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
                close(fd);
                exit(1);
        }

        unsigned char page[4096];
        unsigned long long offset = 0;

        while (offset < MEM_MAX) {
                lseek64(fd, offset, SEEK_SET);

                ssize_t ret;
                ret = read(fd, page, sizeof(page));

                if (ret > 0) {
                        status = regexec(&reg, page, nmatch, pmatch, 0);
                        if(status == 0){
                                for (i=pmatch[0].rm_so; i<pmatch[0].rm_eo; ++i) {
                                        putchar(page[i]);
                                }
                                printf("\n");
                        }
                }

                offset += sizeof(page);
        }

        ptrace(PTRACE_DETACH, pid, 0, 0);
        close(fd);
        regfree(&reg);
        return 0;
}

pid為2208 nano,其中18-20字符之間的[ Read 1193 lines ]alpha

root ~/coding/proc/regex # ./memregmatch 22008
ABCABCABCABCABCABC
ABCABCABCABCABCABCAC
ABCCBAABCCBAABCCBABA
root ~/coding/proc/regex #

好的,用libpcre

#include <pcre.h>
#include <locale.h>

....

        const char *error;
        int   erroffset;
        pcre *re;
        int   rc;
        int   i;
        int   ovector[100];
        char *regex = "([a-zA-Z]{18,20})";
        re = pcre_compile (regex,          /* the pattern */
                        PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                        &error,         /* for error message */
                        &erroffset,     /* for error offset */
                        0);             /* use default character tables */
        if (!re)
        {
                printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
        }

....

                if (ret > 0) {
                        //
                        unsigned int offset = 0;
                        while (offset < sizeof(page) && (rc = pcre_exec(re, 0, page, sizeof(page), offset, 0, ovector, sizeof(ovector))) >= 0)
                        {
                                for(i = 0; i < rc; ++i)
                                {
                                        printf("%.*s\n", ovector[2*i+1] - ovector[2*i], page + ovector[2*i]);
                                }
                                offset = ovector[1];
                        }
                        //
                }

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM