简体   繁体   English

C 中的结构和 arrays 挑战

[英]Structs and arrays challenge in C

I am trying to solve this challenge: https://www.hackerrank.com/challenges/structuring-the-document/problem我正在尝试解决这个挑战: https://www.hackerrank.com/challenges/structuring-the-document/problem

Basically I have been given a locked stub of code with structs in it and I am supposed to parse a given text.基本上,我得到了一个带有结构的锁定代码存根,我应该解析给定的文本。 This is an abridged version of my code:这是我的代码的精简版:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5

#include <ctype.h>

struct word {
    char* data;
};

struct sentence {
    struct word* data;
    int word_count;//denotes number of words in a sentence
};

struct paragraph {
    struct sentence* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
};

struct document {
    struct paragraph* data;
    int paragraph_count;//denotes number of paragraphs in a document
};

struct document get_document(char* text) {
    int spaces = 0, periods = 0, newlines = 0;
    for(int i = 0; i < strlen(text); i++) 
        if(text[i] == ' ')
            spaces++;
        else if(text[i] == '.')
            periods++;
        else if(text[i] == '\n')
            newlines++;


    struct document doc;
    doc.paragraph_count = newlines + 1;
    doc.data = malloc((newlines + 1) * sizeof(struct paragraph));

    struct paragraph para[doc.paragraph_count];
    for(int i = 0; i < doc.paragraph_count; i++) {
        para[i].sentence_count = periods + 1;
        para[i].data = malloc((periods + 1) * sizeof(struct sentence));
    }

    struct sentence sen[para[0].sentence_count];
    for(int i = 0; i < para[0].sentence_count; i++) {
        sen[i].word_count = spaces + 1;
        sen[i].data = malloc((spaces + 1) * sizeof(struct word));
    }

    struct word word[spaces + periods + 1];

    int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
    for(int i = 0; i < strlen(text); i++) {
        if(text[i] == ' ' || text[i] == '.') {
            word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
            for(int j = start; j < i; j++)
                word[wordsub].data[k++] = text[j];
            word[wordsub].data[k++] = '\0';

            k = 0;

            if(i < strlen(text) - 1 && text[i + 1] == '\n')
                start = i + 2;
            else 
                start = i + 1;

            if(text[i] == ' ') {
                sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
            }
            if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
                sen[sensub].data[wordno++] = word[wordsub++];
                wordno = 0;
                para[parasub].data[parano++] = sen[sensub++];


            }
            if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
                sen[sensub++].data[wordno++] = word[wordsub];
                wordno = 0;

                parano = 0;
                para[parasub].data[parano++] = sen[sensub];

                doc.data[docsub++] = para[parasub++];


            }

        }
    }
    printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
    return doc;
}

int main() {
    struct document doc;
    char * text = "hello world.\nhi.bye.\nwow.";
    doc = get_document(text);
    printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}

The problem is the print statements are not printing "hello".问题是打印语句没有打印“hello”。 Also if I change the indices in the print statements I get a segmentation error.此外,如果我更改打印语句中的索引,则会出现分段错误。

Here:这里:

word[wordsub].data[k++] = text[j];

you are accessing data member out of allocated memory.您正在从分配的 memory 中访问数据成员。

The problem statement specifies that there are never two terminators after a word.问题陈述指定在一个单词之后永远不会有两个终止符。 There should also be one word at least.至少也应该有一个词。

So, the test phrase所以,测试短语

"hello world.\nhi.bye.\nwow."

does not fit, but不适合,但是

"hello world\nhi.bye\nwow"

fits and you will have "hello" printed.适合,您将打印“你好”。

Besides, your algorithm is very complex while the code could be simpler.此外,您的算法非常复杂,而代码可能更简单。 It was fun to try and I did it.尝试很有趣,我做到了。

First, let's use some typedef to write less text!首先,让我们使用一些typedef来编写更少的文本!

typedef struct word {
    char* data;
} W;

typedef struct sentence {
    W* data;
    int word_count;//denotes number of words in a sentence
} S;

typedef struct paragraph {
    S* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
} P;

typedef struct document {
    P* data;
    int paragraph_count;//denotes number of paragraphs in a document
} DOC;

Then the function itself.然后是 function 本身。 The logic is simple, do all of the following for each char of text in sequence逻辑很简单,对text的每个char依次执行以下所有操作

  • in case we have any separator ( ' ' , '.' or '\n' ) record the word如果我们有任何分隔符( ' ''.''\n' )记录单词
  • in case we have a separator ( '.' or '\n' ) record the sentence如果我们有分隔符( '.''\n' )记录句子
  • in case we have a separator ( '\n' ) record a paragraph如果我们有一个分隔符( '\n' )记录一个段落

The end of the string counts as the end of a paragraph.字符串的结尾算作段落的结尾。

Code代码

struct document get_document(char* text) {
     DOC doc = { NULL, 0 }; // you're the doc, doc
     P parr  = { NULL, 0 };
     S sarr  = { NULL, 0 };

     int wpos=0;

     for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
          char c = text[i];
          if ( ! c) c = '\n'; // End of string simulates end of paragraph

          if (c == '\n' || c == '.' || c == ' ') {
                // End of word, add it to sentence
                W word;
                word.data = malloc(i - wpos + 1);          // +1 for '\0'
                strncpy(word.data, text + wpos, i - wpos); // Copy only the word
                word.data[i - wpos] = 0;                   // 0 terminate it
                sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
                sarr.data[ sarr.word_count++ ] = word;
                wpos = i+1;

                if (c == '\n' || c == '.') {
                     // End of sentence, add it to paragraph
                     parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
                     parr.data[ parr.sentence_count++ ] = sarr;
                     sarr.data = NULL;    // clear sentences
                     sarr.word_count = 0;
                }

                if (c == '\n') {
                     // End of paragraph, add it to doc
                     doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
                     doc.data[ doc.paragraph_count++ ] = parr;
                     parr.data = NULL;     // clear paragraphs
                     parr.sentence_count = 0;
                }
          }
     }

    return doc;
}

Finally, to see if that's working, print all members (using a compliant text!)最后,要查看这是否有效,请打印所有成员(使用兼容的文本!)

int main(int argc, char **argv) {
    DOC doc;
    char * text = "hello world\nhi.bye\nwow";

    doc = get_document(text);

    for(int i=0 ; i<doc.paragraph_count ; i++) {
          printf("Para %d / %d\n", i, doc.paragraph_count-1);
          P para = doc.data[i];
          for(int j=0 ; j<para.sentence_count ; j++) {
                printf("Sent %d / %d\n", j, para.sentence_count-1);
                S sent = para.data[j];
                for(int k=0 ; k<sent.word_count ; k++) {
                     W word = sent.data[k];
                     printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
                }
          }
     }

     return 0;
}

We could add a bit of code to avoid the processing of two separators (like a trailing '\n' , or '.'我们可以添加一些代码来避免处理两个分隔符(如尾随'\n''.'

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM