繁体   English   中英

工具集以C解析iCalendar文件

[英]Toolset to parse iCalendar file in C

我需要在C中解析ics文件,并将逐行进行处理。 每行的格式可能会有很大不同,但通常都具有标准。

这是我注意到的一些规则:

  • 有一个属性名称
  • 可选参数,每个参数以分号开头
    • 也可以有CSV
    • 可以用双引号引起来,在这种情况下,逗号,分号和冒号之类的内容将需要忽略
  • 结肠
  • 适当的价值

这是一个需要解析的示例ics组件:

UID:uid1@example.com
DTSTAMP:19970714T170000Z
ORGANIZER;CN=John Doe:MAILTO:john.doe@example.com
CATEGORIES:Project Report, XYZ, Weekly Meeting
DTSTART:19970714T170000Z
DTEND:19970715T035959Z
SUMMARY:Bastille Day Party

您会注意到MAILTO类的东西: 仅解析第一个冒号,而该冒号之后的其余部分为属性值。

使用strtok()之类的东西似乎基本可以解决此问题。

是否应使用正则表达式之类的方法来解决此问题? 研究它,我看到了一个用C#在这个stackoverflow答案上完成的正则表达式解决方案的示例。

你可以做到这一点

#include <stdlib.h>
#include <string.h>

int
main(void)
{
    FILE *ics;
    char line[100];

    ics = fopen("example.ics", "r");
    if (ics == NULL)
        return -1;
    while (fgets(line, sizeof(line), ics) != NULL)
    {
        char *separator;
        char *key;
        char *tail;
        char *value;

        if ((tail = strchr(line, '\n')) != NULL)
            *tail = '\0'; // Remove the trailing '\n'
        separator = strpbrk(line, ":;");
        if (separator == NULL)
            continue;
        *separator = '\0';

        key = line; // Maybe you want to strip surrounding white spaces
        value = separator + 1; // Maybe you want to strip surrounding white spaces

        fprintf(stdout, "%s --> %s\n", key, value);
    }
    fclose(ics);

    return 0;
}

为此使用正则表达式就像用火箭筒杀死苍蝇。

// disclaimer : no support 
// code provided as a  example of minimal things one can do.

#include <malloc.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>

struct value { 
   struct value *next; 
   char *val; 
};
struct property { 
   struct property *next; 
   char *prop; 
};
struct parameter { 
   struct property *props; 
   struct value *vals; 
   struct parameter *next; 
   char *name; 
};
enum PARSE_STATE { PARAMETER, PROPERTY, VALUE };

//format for lines is...
//   PARAMETER[;PARAM_PROPERTY..]:VALUE[,VALUE2..]\n

struct parameter *parse( char *input )
{
    size_t start, end;
    char *buf;
    enum PARSE_STATE state;
    struct parameter *root = NULL;
    struct parameter *new_parameter;
    struct property *new_property;
    struct value *new_value;
    char in_quote = 0;
    start = end = 0;
    state = PARAMETER;
    while( input[end] )
    {
        switch( state ) 
        { 
        case PARAMETER : 
            if( input[end] == ';' || input[end] == ':' ) {
               new_parameter = malloc( sizeof( struct parameter ) );
               new_parameter->next = root;
               new_parameter->name = malloc( end - start + 1 );
               strncpy( new_parameter->name, input + start, end - start );
               new_parameter->name[end-start] = 0;
               new_parameter->props = new_parameter->vals = NULL;
               root = new_parameter;
               start = end + 1;
               if( input[end] == ';' )
                  state = PROPERTY;
               else 
                  state = VALUE;
             }
             break;
        case PROPERTY :
             if( input[end] == '"' ) {
                if( !in_quote ) 
                   in_quote = input[start];
                else if( input[start] == in_quote ) 
                   in_quote = 0;
                break;
             }
             if( in_quote ) break;
             if( input[end] == ';' || input[end] == ':' ) {
                new_property = malloc( sizeof( struct property ) );
                new_property->prop = malloc( end - start + 1 );
                strncpy( new_property->prop, input + start, end - start );
                new_property->prop[end-start] = 0;
                new_property->next = root->props;
                root->props = new_property;
                if( input[end] == ':' ) 
                   state = VALUE;
                start = end + 1;
                break;
             }
             break;   
        case VALUE : 
             if( input[end] == '\n' || input[end] == ',' ) {
                new_value = malloc( sizeof( struct value ) );
                new_value->val = malloc( end - start + 1 );
                strncpy( new_value->val, input + start, end - start );
                new_value->val[end-start] = 0;
                new_value->next = root->vals;
                root->vals = new_value;
                if( input[end] == '\n' ) 
                    state = PARAMETER;
                start = end + 1;
             }
             break;
        }
        end++;
    }
    if( end != start )
       fprintf( stderr, "missing newline at end of input\n" );
    return root;
}


void DumpResult( struct parameter *root )
{
   struct property *prop;
   struct value *val;
   for( ; root; root = root->next ) {
         printf( "%s ", root->name );
         for( prop = root->props; prop; prop = prop->next ) 
              printf( "; %s ", prop->prop );
         for( val = root->vals; val; val = val->next ) {
            if( val == root->vals ) 
               printf( " : %s ", val->val );
            else 
               printf( ", %s ", val->val );
         }
         printf( "\n" );
   }
}

并且...使用上面的代码。 这些值的确会取反。

void main( void )
{
    char *string = "UID:uid1@example.com\n"
                           "DTSTAMP:19970714T170000Z\n"
                           "ORGANIZER;CN=John Doe;SENT-BY=\"mailto:smith@example.com\":mailto:john.doe@example.com\n"
                           "CATEGORIES:Project Report, XYZ, Weekly Meeting\n"
                           "DTSTART:19970714T170000Z\n"
                           "DTEND:19970715T035959Z\n"
                           "SUMMARY:Bastille Day Party\n";
    struct parameter *thing = parse( string );
    DumpResult( thing );
}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM