簡體   English   中英

工具集以C解析iCalendar文件

[英]Toolset to parse iCalendar file in C

我需要在C中解析ics文件,並將逐行進行處理。 每行的格式可能會有很大不同,但通常都具有標准。

這是我注意到的一些規則:

  • 有一個屬性名稱
  • 可選參數,每個參數以分號開頭
    • 也可以有CSV
    • 可以用雙引號引起來,在這種情況下,逗號,分號和冒號之類的內容將需要忽略
  • 結腸
  • 適當的價值

這是一個需要解析的示例ics組件:

UID:uid1@example.com
DTSTAMP:19970714T170000Z
ORGANIZER;CN=John Doe:MAILTO:john.doe@example.com
CATEGORIES:Project Report, XYZ, Weekly Meeting
DTSTART:19970714T170000Z
DTEND:19970715T035959Z
SUMMARY:Bastille Day Party

您會注意到MAILTO類的東西: 僅解析第一個冒號,而該冒號之后的其余部分為屬性值。

使用strtok()之類的東西似乎基本可以解決此問題。

是否應使用正則表達式之類的方法來解決此問題? 研究它,我看到了一個用C#在這個stackoverflow答案上完成的正則表達式解決方案的示例。

你可以做到這一點

#include <stdlib.h>
#include <string.h>

int
main(void)
{
    FILE *ics;
    char line[100];

    ics = fopen("example.ics", "r");
    if (ics == NULL)
        return -1;
    while (fgets(line, sizeof(line), ics) != NULL)
    {
        char *separator;
        char *key;
        char *tail;
        char *value;

        if ((tail = strchr(line, '\n')) != NULL)
            *tail = '\0'; // Remove the trailing '\n'
        separator = strpbrk(line, ":;");
        if (separator == NULL)
            continue;
        *separator = '\0';

        key = line; // Maybe you want to strip surrounding white spaces
        value = separator + 1; // Maybe you want to strip surrounding white spaces

        fprintf(stdout, "%s --> %s\n", key, value);
    }
    fclose(ics);

    return 0;
}

為此使用正則表達式就像用火箭筒殺死蒼蠅。

// disclaimer : no support 
// code provided as a  example of minimal things one can do.

#include <malloc.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>

struct value { 
   struct value *next; 
   char *val; 
};
struct property { 
   struct property *next; 
   char *prop; 
};
struct parameter { 
   struct property *props; 
   struct value *vals; 
   struct parameter *next; 
   char *name; 
};
enum PARSE_STATE { PARAMETER, PROPERTY, VALUE };

//format for lines is...
//   PARAMETER[;PARAM_PROPERTY..]:VALUE[,VALUE2..]\n

struct parameter *parse( char *input )
{
    size_t start, end;
    char *buf;
    enum PARSE_STATE state;
    struct parameter *root = NULL;
    struct parameter *new_parameter;
    struct property *new_property;
    struct value *new_value;
    char in_quote = 0;
    start = end = 0;
    state = PARAMETER;
    while( input[end] )
    {
        switch( state ) 
        { 
        case PARAMETER : 
            if( input[end] == ';' || input[end] == ':' ) {
               new_parameter = malloc( sizeof( struct parameter ) );
               new_parameter->next = root;
               new_parameter->name = malloc( end - start + 1 );
               strncpy( new_parameter->name, input + start, end - start );
               new_parameter->name[end-start] = 0;
               new_parameter->props = new_parameter->vals = NULL;
               root = new_parameter;
               start = end + 1;
               if( input[end] == ';' )
                  state = PROPERTY;
               else 
                  state = VALUE;
             }
             break;
        case PROPERTY :
             if( input[end] == '"' ) {
                if( !in_quote ) 
                   in_quote = input[start];
                else if( input[start] == in_quote ) 
                   in_quote = 0;
                break;
             }
             if( in_quote ) break;
             if( input[end] == ';' || input[end] == ':' ) {
                new_property = malloc( sizeof( struct property ) );
                new_property->prop = malloc( end - start + 1 );
                strncpy( new_property->prop, input + start, end - start );
                new_property->prop[end-start] = 0;
                new_property->next = root->props;
                root->props = new_property;
                if( input[end] == ':' ) 
                   state = VALUE;
                start = end + 1;
                break;
             }
             break;   
        case VALUE : 
             if( input[end] == '\n' || input[end] == ',' ) {
                new_value = malloc( sizeof( struct value ) );
                new_value->val = malloc( end - start + 1 );
                strncpy( new_value->val, input + start, end - start );
                new_value->val[end-start] = 0;
                new_value->next = root->vals;
                root->vals = new_value;
                if( input[end] == '\n' ) 
                    state = PARAMETER;
                start = end + 1;
             }
             break;
        }
        end++;
    }
    if( end != start )
       fprintf( stderr, "missing newline at end of input\n" );
    return root;
}


void DumpResult( struct parameter *root )
{
   struct property *prop;
   struct value *val;
   for( ; root; root = root->next ) {
         printf( "%s ", root->name );
         for( prop = root->props; prop; prop = prop->next ) 
              printf( "; %s ", prop->prop );
         for( val = root->vals; val; val = val->next ) {
            if( val == root->vals ) 
               printf( " : %s ", val->val );
            else 
               printf( ", %s ", val->val );
         }
         printf( "\n" );
   }
}

並且...使用上面的代碼。 這些值的確會取反。

void main( void )
{
    char *string = "UID:uid1@example.com\n"
                           "DTSTAMP:19970714T170000Z\n"
                           "ORGANIZER;CN=John Doe;SENT-BY=\"mailto:smith@example.com\":mailto:john.doe@example.com\n"
                           "CATEGORIES:Project Report, XYZ, Weekly Meeting\n"
                           "DTSTART:19970714T170000Z\n"
                           "DTEND:19970715T035959Z\n"
                           "SUMMARY:Bastille Day Party\n";
    struct parameter *thing = parse( string );
    DumpResult( thing );
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM