简体   繁体   English

工具集以C解析iCalendar文件

[英]Toolset to parse iCalendar file in C

I need to parse an ics file in C and will be doing it line by line. 我需要在C中解析ics文件,并将逐行进行处理。 Each line can vary a lot by their format but generally holds a standard. 每行的格式可能会有很大不同,但通常都具有标准。

Here are some rules I have noticed: 这是我注意到的一些规则:

  • There is a property name 有一个属性名称
  • Optional parameters which each start with a semicolon 可选参数,每个参数以分号开头
    • Can have CSV too 也可以有CSV
    • Can be double quoted values, in which case things like commas, semi colons, and colons would need to be ignored within this 可以用双引号引起来,在这种情况下,逗号,分号和冒号之类的内容将需要忽略
  • Colon 结肠
  • Property value 适当的价值

Here is an example ics component that would need to be parsed out: 这是一个需要解析的示例ics组件:

UID:uid1@example.com
DTSTAMP:19970714T170000Z
ORGANIZER;CN=John Doe:MAILTO:john.doe@example.com
CATEGORIES:Project Report, XYZ, Weekly Meeting
DTSTART:19970714T170000Z
DTEND:19970715T035959Z
SUMMARY:Bastille Day Party

You'll notice in things like the MAILTO there is a following : . 您会注意到MAILTO类的东西: Only the first colon would be parsed, and the rest after that colon is the property value. 仅解析第一个冒号,而该冒号之后的其余部分为属性值。

Using something like strtok() seems to basic to be adequate for this problem. 使用strtok()之类的东西似乎基本可以解决此问题。

Should something like regular expression be used to solve this problem? 是否应使用正则表达式之类的方法来解决此问题? Looking into it, I see an example of a regex solution being done in C# on this stackoverflow answer . 研究它,我看到了一个用C#在这个stackoverflow答案上完成的正则表达式解决方案的示例。

You can do it with this 你可以做到这一点

#include <stdlib.h>
#include <string.h>

int
main(void)
{
    FILE *ics;
    char line[100];

    ics = fopen("example.ics", "r");
    if (ics == NULL)
        return -1;
    while (fgets(line, sizeof(line), ics) != NULL)
    {
        char *separator;
        char *key;
        char *tail;
        char *value;

        if ((tail = strchr(line, '\n')) != NULL)
            *tail = '\0'; // Remove the trailing '\n'
        separator = strpbrk(line, ":;");
        if (separator == NULL)
            continue;
        *separator = '\0';

        key = line; // Maybe you want to strip surrounding white spaces
        value = separator + 1; // Maybe you want to strip surrounding white spaces

        fprintf(stdout, "%s --> %s\n", key, value);
    }
    fclose(ics);

    return 0;
}

Using a regular expression for this is like killing a fly with a bazooka. 为此使用正则表达式就像用火箭筒杀死苍蝇。

// disclaimer : no support 
// code provided as a  example of minimal things one can do.

#include <malloc.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>

struct value { 
   struct value *next; 
   char *val; 
};
struct property { 
   struct property *next; 
   char *prop; 
};
struct parameter { 
   struct property *props; 
   struct value *vals; 
   struct parameter *next; 
   char *name; 
};
enum PARSE_STATE { PARAMETER, PROPERTY, VALUE };

//format for lines is...
//   PARAMETER[;PARAM_PROPERTY..]:VALUE[,VALUE2..]\n

struct parameter *parse( char *input )
{
    size_t start, end;
    char *buf;
    enum PARSE_STATE state;
    struct parameter *root = NULL;
    struct parameter *new_parameter;
    struct property *new_property;
    struct value *new_value;
    char in_quote = 0;
    start = end = 0;
    state = PARAMETER;
    while( input[end] )
    {
        switch( state ) 
        { 
        case PARAMETER : 
            if( input[end] == ';' || input[end] == ':' ) {
               new_parameter = malloc( sizeof( struct parameter ) );
               new_parameter->next = root;
               new_parameter->name = malloc( end - start + 1 );
               strncpy( new_parameter->name, input + start, end - start );
               new_parameter->name[end-start] = 0;
               new_parameter->props = new_parameter->vals = NULL;
               root = new_parameter;
               start = end + 1;
               if( input[end] == ';' )
                  state = PROPERTY;
               else 
                  state = VALUE;
             }
             break;
        case PROPERTY :
             if( input[end] == '"' ) {
                if( !in_quote ) 
                   in_quote = input[start];
                else if( input[start] == in_quote ) 
                   in_quote = 0;
                break;
             }
             if( in_quote ) break;
             if( input[end] == ';' || input[end] == ':' ) {
                new_property = malloc( sizeof( struct property ) );
                new_property->prop = malloc( end - start + 1 );
                strncpy( new_property->prop, input + start, end - start );
                new_property->prop[end-start] = 0;
                new_property->next = root->props;
                root->props = new_property;
                if( input[end] == ':' ) 
                   state = VALUE;
                start = end + 1;
                break;
             }
             break;   
        case VALUE : 
             if( input[end] == '\n' || input[end] == ',' ) {
                new_value = malloc( sizeof( struct value ) );
                new_value->val = malloc( end - start + 1 );
                strncpy( new_value->val, input + start, end - start );
                new_value->val[end-start] = 0;
                new_value->next = root->vals;
                root->vals = new_value;
                if( input[end] == '\n' ) 
                    state = PARAMETER;
                start = end + 1;
             }
             break;
        }
        end++;
    }
    if( end != start )
       fprintf( stderr, "missing newline at end of input\n" );
    return root;
}


void DumpResult( struct parameter *root )
{
   struct property *prop;
   struct value *val;
   for( ; root; root = root->next ) {
         printf( "%s ", root->name );
         for( prop = root->props; prop; prop = prop->next ) 
              printf( "; %s ", prop->prop );
         for( val = root->vals; val; val = val->next ) {
            if( val == root->vals ) 
               printf( " : %s ", val->val );
            else 
               printf( ", %s ", val->val );
         }
         printf( "\n" );
   }
}

And... using the above code. 并且...使用上面的代码。 The values do all get reversed.... 这些值的确会取反。

void main( void )
{
    char *string = "UID:uid1@example.com\n"
                           "DTSTAMP:19970714T170000Z\n"
                           "ORGANIZER;CN=John Doe;SENT-BY=\"mailto:smith@example.com\":mailto:john.doe@example.com\n"
                           "CATEGORIES:Project Report, XYZ, Weekly Meeting\n"
                           "DTSTART:19970714T170000Z\n"
                           "DTEND:19970715T035959Z\n"
                           "SUMMARY:Bastille Day Party\n";
    struct parameter *thing = parse( string );
    DumpResult( thing );
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM