C 中的结构和 arrays 挑战

Question

我正在尝试解决这个挑战： https://www.hackerrank.com/challenges/structuring-the-document/problem

基本上，我得到了一个带有结构的锁定代码存根，我应该解析给定的文本。 这是我的代码的精简版：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5

#include <ctype.h>

struct word {
    char* data;
};

struct sentence {
    struct word* data;
    int word_count;//denotes number of words in a sentence
};

struct paragraph {
    struct sentence* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
};

struct document {
    struct paragraph* data;
    int paragraph_count;//denotes number of paragraphs in a document
};

struct document get_document(char* text) {
    int spaces = 0, periods = 0, newlines = 0;
    for(int i = 0; i < strlen(text); i++) 
        if(text[i] == ' ')
            spaces++;
        else if(text[i] == '.')
            periods++;
        else if(text[i] == '\n')
            newlines++;


    struct document doc;
    doc.paragraph_count = newlines + 1;
    doc.data = malloc((newlines + 1) * sizeof(struct paragraph));

    struct paragraph para[doc.paragraph_count];
    for(int i = 0; i < doc.paragraph_count; i++) {
        para[i].sentence_count = periods + 1;
        para[i].data = malloc((periods + 1) * sizeof(struct sentence));
    }

    struct sentence sen[para[0].sentence_count];
    for(int i = 0; i < para[0].sentence_count; i++) {
        sen[i].word_count = spaces + 1;
        sen[i].data = malloc((spaces + 1) * sizeof(struct word));
    }

    struct word word[spaces + periods + 1];

    int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
    for(int i = 0; i < strlen(text); i++) {
        if(text[i] == ' ' || text[i] == '.') {
            word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
            for(int j = start; j < i; j++)
                word[wordsub].data[k++] = text[j];
            word[wordsub].data[k++] = '\0';

            k = 0;

            if(i < strlen(text) - 1 && text[i + 1] == '\n')
                start = i + 2;
            else 
                start = i + 1;

            if(text[i] == ' ') {
                sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
            }
            if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
                sen[sensub].data[wordno++] = word[wordsub++];
                wordno = 0;
                para[parasub].data[parano++] = sen[sensub++];


            }
            if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
                sen[sensub++].data[wordno++] = word[wordsub];
                wordno = 0;

                parano = 0;
                para[parasub].data[parano++] = sen[sensub];

                doc.data[docsub++] = para[parasub++];


            }

        }
    }
    printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
    return doc;
}

int main() {
    struct document doc;
    char * text = "hello world.\nhi.bye.\nwow.";
    doc = get_document(text);
    printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}

问题是打印语句没有打印“hello”。 此外，如果我更改打印语句中的索引，则会出现分段错误。

Answer 1

这里：

word[wordsub].data[k++] = text[j];

您正在从分配的 memory 中访问数据成员。

Answer 2

问题陈述指定在一个单词之后永远不会有两个终止符。 至少也应该有一个词。

所以，测试短语

"hello world.\nhi.bye.\nwow."

不适合，但是

"hello world\nhi.bye\nwow"

适合，您将打印“你好”。

此外，您的算法非常复杂，而代码可能更简单。 尝试很有趣，我做到了。

首先，让我们使用一些typedef来编写更少的文本！

typedef struct word {
    char* data;
} W;

typedef struct sentence {
    W* data;
    int word_count;//denotes number of words in a sentence
} S;

typedef struct paragraph {
    S* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
} P;

typedef struct document {
    P* data;
    int paragraph_count;//denotes number of paragraphs in a document
} DOC;

然后是 function 本身。 逻辑很简单，对text的每个char依次执行以下所有操作

如果我们有任何分隔符（ ' ' ， '.'或'\n' ）记录单词
如果我们有分隔符（ '.'或'\n' ）记录句子
如果我们有一个分隔符（ '\n' ）记录一个段落

字符串的结尾算作段落的结尾。

代码

struct document get_document(char* text) {
     DOC doc = { NULL, 0 }; // you're the doc, doc
     P parr  = { NULL, 0 };
     S sarr  = { NULL, 0 };

     int wpos=0;

     for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
          char c = text[i];
          if ( ! c) c = '\n'; // End of string simulates end of paragraph

          if (c == '\n' || c == '.' || c == ' ') {
                // End of word, add it to sentence
                W word;
                word.data = malloc(i - wpos + 1);          // +1 for '\0'
                strncpy(word.data, text + wpos, i - wpos); // Copy only the word
                word.data[i - wpos] = 0;                   // 0 terminate it
                sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
                sarr.data[ sarr.word_count++ ] = word;
                wpos = i+1;

                if (c == '\n' || c == '.') {
                     // End of sentence, add it to paragraph
                     parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
                     parr.data[ parr.sentence_count++ ] = sarr;
                     sarr.data = NULL;    // clear sentences
                     sarr.word_count = 0;
                }

                if (c == '\n') {
                     // End of paragraph, add it to doc
                     doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
                     doc.data[ doc.paragraph_count++ ] = parr;
                     parr.data = NULL;     // clear paragraphs
                     parr.sentence_count = 0;
                }
          }
     }

    return doc;
}

最后，要查看这是否有效，请打印所有成员（使用兼容的文本！）

int main(int argc, char **argv) {
    DOC doc;
    char * text = "hello world\nhi.bye\nwow";

    doc = get_document(text);

    for(int i=0 ; i<doc.paragraph_count ; i++) {
          printf("Para %d / %d\n", i, doc.paragraph_count-1);
          P para = doc.data[i];
          for(int j=0 ; j<para.sentence_count ; j++) {
                printf("Sent %d / %d\n", j, para.sentence_count-1);
                S sent = para.data[j];
                for(int k=0 ; k<sent.word_count ; k++) {
                     W word = sent.data[k];
                     printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
                }
          }
     }

     return 0;
}

我们可以添加一些代码来避免处理两个分隔符（如尾随'\n'或'.'

C 中的结构和 arrays 挑战

问题描述

2 个解决方案

解决方案1
1 已采纳 2020-05-15 10:24:48

解决方案2
1 2020-05-15 10:32:48

C 中的结构和 arrays 挑战

问题描述

2 个解决方案

解决方案1 1 已采纳 2020-05-15 10:24:48

解决方案2 1 2020-05-15 10:32:48

解决方案1
1 已采纳 2020-05-15 10:24:48

解决方案2
1 2020-05-15 10:32:48