[英]Structs and arrays challenge in C
我正在尝试解决这个挑战: https://www.hackerrank.com/challenges/structuring-the-document/problem
基本上,我得到了一个带有结构的锁定代码存根,我应该解析给定的文本。 这是我的代码的精简版:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5
#include <ctype.h>
struct word {
char* data;
};
struct sentence {
struct word* data;
int word_count;//denotes number of words in a sentence
};
struct paragraph {
struct sentence* data ;
int sentence_count;//denotes number of sentences in a paragraph
};
struct document {
struct paragraph* data;
int paragraph_count;//denotes number of paragraphs in a document
};
struct document get_document(char* text) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i < strlen(text); i++)
if(text[i] == ' ')
spaces++;
else if(text[i] == '.')
periods++;
else if(text[i] == '\n')
newlines++;
struct document doc;
doc.paragraph_count = newlines + 1;
doc.data = malloc((newlines + 1) * sizeof(struct paragraph));
struct paragraph para[doc.paragraph_count];
for(int i = 0; i < doc.paragraph_count; i++) {
para[i].sentence_count = periods + 1;
para[i].data = malloc((periods + 1) * sizeof(struct sentence));
}
struct sentence sen[para[0].sentence_count];
for(int i = 0; i < para[0].sentence_count; i++) {
sen[i].word_count = spaces + 1;
sen[i].data = malloc((spaces + 1) * sizeof(struct word));
}
struct word word[spaces + periods + 1];
int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
for(int i = 0; i < strlen(text); i++) {
if(text[i] == ' ' || text[i] == '.') {
word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
for(int j = start; j < i; j++)
word[wordsub].data[k++] = text[j];
word[wordsub].data[k++] = '\0';
k = 0;
if(i < strlen(text) - 1 && text[i + 1] == '\n')
start = i + 2;
else
start = i + 1;
if(text[i] == ' ') {
sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
}
if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
sen[sensub].data[wordno++] = word[wordsub++];
wordno = 0;
para[parasub].data[parano++] = sen[sensub++];
}
if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
sen[sensub++].data[wordno++] = word[wordsub];
wordno = 0;
parano = 0;
para[parasub].data[parano++] = sen[sensub];
doc.data[docsub++] = para[parasub++];
}
}
}
printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
return doc;
}
int main() {
struct document doc;
char * text = "hello world.\nhi.bye.\nwow.";
doc = get_document(text);
printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}
问题是打印语句没有打印“hello”。 此外,如果我更改打印语句中的索引,则会出现分段错误。
这里:
word[wordsub].data[k++] = text[j];
您正在从分配的 memory 中访问数据成员。
问题陈述指定在一个单词之后永远不会有两个终止符。 至少也应该有一个词。
所以,测试短语
"hello world.\nhi.bye.\nwow."
不适合,但是
"hello world\nhi.bye\nwow"
适合,您将打印“你好”。
此外,您的算法非常复杂,而代码可能更简单。 尝试很有趣,我做到了。
首先,让我们使用一些typedef
来编写更少的文本!
typedef struct word {
char* data;
} W;
typedef struct sentence {
W* data;
int word_count;//denotes number of words in a sentence
} S;
typedef struct paragraph {
S* data ;
int sentence_count;//denotes number of sentences in a paragraph
} P;
typedef struct document {
P* data;
int paragraph_count;//denotes number of paragraphs in a document
} DOC;
然后是 function 本身。 逻辑很简单,对text
的每个char依次执行以下所有操作
' '
, '.'
或'\n'
)记录单词'.'
或'\n'
)记录句子'\n'
)记录一个段落字符串的结尾算作段落的结尾。
代码
struct document get_document(char* text) {
DOC doc = { NULL, 0 }; // you're the doc, doc
P parr = { NULL, 0 };
S sarr = { NULL, 0 };
int wpos=0;
for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
char c = text[i];
if ( ! c) c = '\n'; // End of string simulates end of paragraph
if (c == '\n' || c == '.' || c == ' ') {
// End of word, add it to sentence
W word;
word.data = malloc(i - wpos + 1); // +1 for '\0'
strncpy(word.data, text + wpos, i - wpos); // Copy only the word
word.data[i - wpos] = 0; // 0 terminate it
sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
sarr.data[ sarr.word_count++ ] = word;
wpos = i+1;
if (c == '\n' || c == '.') {
// End of sentence, add it to paragraph
parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
parr.data[ parr.sentence_count++ ] = sarr;
sarr.data = NULL; // clear sentences
sarr.word_count = 0;
}
if (c == '\n') {
// End of paragraph, add it to doc
doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
doc.data[ doc.paragraph_count++ ] = parr;
parr.data = NULL; // clear paragraphs
parr.sentence_count = 0;
}
}
}
return doc;
}
最后,要查看这是否有效,请打印所有成员(使用兼容的文本!)
int main(int argc, char **argv) {
DOC doc;
char * text = "hello world\nhi.bye\nwow";
doc = get_document(text);
for(int i=0 ; i<doc.paragraph_count ; i++) {
printf("Para %d / %d\n", i, doc.paragraph_count-1);
P para = doc.data[i];
for(int j=0 ; j<para.sentence_count ; j++) {
printf("Sent %d / %d\n", j, para.sentence_count-1);
S sent = para.data[j];
for(int k=0 ; k<sent.word_count ; k++) {
W word = sent.data[k];
printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
}
}
}
return 0;
}
我们可以添加一些代码来避免处理两个分隔符(如尾随'\n'
或'.'
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.