優化解析器功能的C程序

Question

gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2)
c89

你好，

我想知道是否可以再優化此代碼。 由於這是在快速事務服務器中，因此每秒將有許多呼叫。 因此，解析器必須非常快速且經過優化。

我想知道我是否可以做出任何改進。

包含測試用例的完整代碼。 我要優化的功能是g_get_dnis_user_part 。

我希望這是要發布的正確論壇。

#include <stdio.h>
#include <ctype.h>
#include <string.h>

#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif

static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size);

int main(void)
{
    /* Test cases */
    const char *dnis_test1 = "0846372573@10.1.8.34";
    const char *dnis_test2 = "084637257310.1.8.34";
    const char *dnis_test3 = "084e672573@10.1.8.34";
    const char *dnis_test4 = "";
    const char *dnis_test5 = "084637257310.1.8.34@";
    size_t passes = 0;
    size_t failures = 0;

#define MAX_ADDRESS_LEN 32

    char user_part[MAX_ADDRESS_LEN];

    memset(user_part, 0, sizeof user_part);
    if(g_get_dnis_user_part(dnis_test1, user_part, MAX_ADDRESS_LEN) == TRUE) {
        printf("TEST 1 PASSED [ %s ] [ %s ]\n", dnis_test1, user_part);
        passes++;
    }
    else {
        printf("TEST 1 FAILED [ %s ] [ %s ]\n", dnis_test1, user_part);
        failures++;
    }

    memset(user_part, 0, sizeof user_part);
    if(g_get_dnis_user_part(dnis_test2, user_part, MAX_ADDRESS_LEN) == TRUE) {
        printf("TEST 2 PASSED [ %s ] [ %s ]\n", dnis_test2, user_part);
        passes++;
    }
    else {
        printf("TEST 2 FAILED [ %s ] [ %s ]\n", dnis_test2, user_part);
        failures++;
    }

    memset(user_part, 0, sizeof user_part);
    if(g_get_dnis_user_part(dnis_test3, user_part, MAX_ADDRESS_LEN) == TRUE) {
        printf("TEST 3 PASSED [ %s ] [ %s ]\n", dnis_test3, user_part);
        passes++;
    }
    else {
        printf("TEST 3 FAILED [ %s ] [ %s ]\n", dnis_test3, user_part);
        failures++;
    }

    memset(user_part, 0, sizeof user_part);
    if(g_get_dnis_user_part(dnis_test4, user_part, MAX_ADDRESS_LEN) == TRUE) {
        printf("TEST 4 PASSED [ %s ] [ %s ]\n", dnis_test4, user_part);
        passes++;
    }
    else {
        printf("TEST 4 FAILED [ %s ] [ %s ]\n", dnis_test4, user_part);
        failures++;
    }

    memset(user_part, 0, sizeof user_part);
    if(g_get_dnis_user_part(dnis_test5, user_part, MAX_ADDRESS_LEN) == TRUE) {
        printf("TEST 5 PASSED [ %s ] [ %s ]\n", dnis_test5, user_part);
        passes++;
    }
    else {
        printf("TEST 5 FAILED [ %s ] [ %s ]\n", dnis_test5, user_part);
        failures++;
    }

    printf("ALL TEST COMPLETED PASSES [ %ld ] FAILURES [ %ld ]\n", passes, failures);

    return 0;
}

/* Get the user part from the complete dnis number
   0846372573@10.1.8.34 -> 0846372573 nul terminated */
static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size)
{
    size_t i = 0;
    int status = FALSE;

    /* Make room for the nul terminator */
    if(size > 1) {
        size--;
    }
    else {
        return status;
    }

    for(i = 0; i < size; i++) {
        /* Check for valid digit */
        if(isdigit(*dnis) != 0) {
            user_part[i] = *dnis;
        }
        else {
            if(*dnis == '@') {
                /* We are at the end */
                status = TRUE;
                break;
            }
            else {
                /* Not a digit or @ - corrupted dnis string */
                status = FALSE;
                break;
            }
        }

        /* Next character */
        dnis++;
    }

    /* nul terminate the string */
    user_part[i++] = '\0';

    /* Status FALSE indicates that the @ was not found or possible corruption with dnis string */
    return status;
}

非常感謝您的任何建議，

Answer 1

不要過度優化。 這是一個非常簡單的函數，可對足以容納高速緩存的數據集進行操作。 很有可能它正在以盡可能快的速度運行（假設優化了編譯器標志等）。 但更重要的是，這只是整個程序的一小部分。 不要花費所有精力在匯編器中重寫此內容，也不要翻閱x86體系結構手冊，以確保CPU管線完全保持滿滿，或者在您肯定會在其他地方獲得更多成果的情況下進行任何操作。 首先剖析，然后優化剖析器說您太慢的位置。

Answer 2

更換

if(isdigit(*dnis) != 0)

與

if ( *dns>='0' && *dns<='9' )

如果您只關心十進制數字，而不必擔心語言環境

盡管不重要，但可以節省函數調用開銷。 （您需要確定這是否會產生重大影響）除此之外，我看不到要進行的任何重大更改。

Answer 3

我想我應該更像這樣編寫解析器：

static int g_get_dnis_user_part2(const char *dnis, char *user_part, size_t size)
{
    if (size == 0)
        return FALSE;

    size_t i;

    for (i=0; i<size-1 && isdigit(dnis[i]); i++)
        user_part[i] = dnis[i];
    user_part[i] = '\0';
    return (dnis[i] == '@') ? TRUE : FALSE;
}

如果確實需要，還可以將對isdigit的調用更改為my_isdigit類的my_isdigit ，您可以實現以下形式：

int my_isdigit(int input) {
    static char table[UCHAR_MAX+1];
    bool inited;

    if (!inited) 
        for (int i='0'; i<'9'; i++)
            table[i] = 1;

    return table[input];
}

我試圖保持這種狀態的干凈，但是為了使其更快一點，請顯式（單獨）進行初始化，這樣您就不必檢查它是否已初始化每個字符。 （但是有了不錯的分支預測，那不會有太大的收獲）。

除此之外，正如其他人已經提到的那樣，我將更改TRUE和FALSE的定義-您正在使用的定義使我感到震驚。 按照慣例，FALSE = 0和TRUE = 1，並且看不到在哪里更改對您有任何幫助。

Answer 4

我基本上同意bdonlan，如果您有疑問，請不要過度優化和衡量。 我什至會更進一步。 如果我正確理解您的算法，那么您正在執行的操作是檢查字符串的起始部分是否包含十進制字符，然后檢查后面的字符是否為@ 。

strspn檢查一整類字符，只需使用它即可。
檢查以下字符是否為'@'

而已。

gcc具有內置的strspn ，我不認為您可以做得更好，要記住運行的瓶頸將是從內存中吸取所有字符串。 一旦它們進入緩存，您就不會感到太大的不同。

Answer 5

也許您可以從展開循環中受益。 它不會太漂亮，但是類似這樣的東西應該可以工作（未經測試）：

#define CHECKDIGIT(d, user_part, status) \
do {\
    if(isdigit(*(d)) != 0) {    \
        *(user_part)++ = *(d)++;\
    }                           \
    else {                      \
        if(*(d) == '@') {       \
            (status) = TRUE;    \
            goto finish;        \
        }                       \
        else {                  \
            (status) = FALSE;   \
            goto finish;        \
        }                       \
    }                           \
} while(0)

static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size)
{
    size_t i = 0;
    int status = FALSE;
    int chunks, rem;

    /* Make room for the nul terminator */
    if(size > 1) {
        size--;
    }
    else {
        return status;
    }

    // Divide size in chunks of 8
    chunks = size >> 3;
    rem = size & 0x7;

    for(i = 0; i < chunks; i++) {
        /* Check for valid digit */
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
        CHECKDIGIT(dnis, user_part);
    }
    for(i = 0; i < rem; i++) {
        CHECKDIGIT(dnis, user_part);
    }

    finish:
    /* nul terminate the string */
    user_part[i++] = '\0';

    /* Status FALSE indicates that the @ was not found or possible corruption with     dnis string */
    return status;
}

優化解析器功能的C程序

問題描述

5 個解決方案

解決方案1
4 2012-03-30 06:26:09

解決方案2
1 2012-03-30 06:26:15

解決方案3
1 已采納 2012-03-30 06:49:00

解決方案4
1 2012-03-30 07:57:42

解決方案5
-1 2012-03-30 07:48:37

優化解析器功能的C程序

問題描述

5 個解決方案

解決方案1 4 2012-03-30 06:26:09

解決方案2 1 2012-03-30 06:26:15

解決方案3 1 已采納 2012-03-30 06:49:00

解決方案4 1 2012-03-30 07:57:42

解決方案5 -1 2012-03-30 07:48:37

解決方案1
4 2012-03-30 06:26:09

解決方案2
1 2012-03-30 06:26:15

解決方案3
1 已采納 2012-03-30 06:49:00

解決方案4
1 2012-03-30 07:57:42

解決方案5
-1 2012-03-30 07:48:37