Structs and arrays challenge in C

Question

I am trying to solve this challenge: https://www.hackerrank.com/challenges/structuring-the-document/problem

Basically I have been given a locked stub of code with structs in it and I am supposed to parse a given text. This is an abridged version of my code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5

#include <ctype.h>

struct word {
    char* data;
};

struct sentence {
    struct word* data;
    int word_count;//denotes number of words in a sentence
};

struct paragraph {
    struct sentence* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
};

struct document {
    struct paragraph* data;
    int paragraph_count;//denotes number of paragraphs in a document
};

struct document get_document(char* text) {
    int spaces = 0, periods = 0, newlines = 0;
    for(int i = 0; i < strlen(text); i++) 
        if(text[i] == ' ')
            spaces++;
        else if(text[i] == '.')
            periods++;
        else if(text[i] == '\n')
            newlines++;


    struct document doc;
    doc.paragraph_count = newlines + 1;
    doc.data = malloc((newlines + 1) * sizeof(struct paragraph));

    struct paragraph para[doc.paragraph_count];
    for(int i = 0; i < doc.paragraph_count; i++) {
        para[i].sentence_count = periods + 1;
        para[i].data = malloc((periods + 1) * sizeof(struct sentence));
    }

    struct sentence sen[para[0].sentence_count];
    for(int i = 0; i < para[0].sentence_count; i++) {
        sen[i].word_count = spaces + 1;
        sen[i].data = malloc((spaces + 1) * sizeof(struct word));
    }

    struct word word[spaces + periods + 1];

    int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
    for(int i = 0; i < strlen(text); i++) {
        if(text[i] == ' ' || text[i] == '.') {
            word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
            for(int j = start; j < i; j++)
                word[wordsub].data[k++] = text[j];
            word[wordsub].data[k++] = '\0';

            k = 0;

            if(i < strlen(text) - 1 && text[i + 1] == '\n')
                start = i + 2;
            else 
                start = i + 1;

            if(text[i] == ' ') {
                sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
            }
            if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
                sen[sensub].data[wordno++] = word[wordsub++];
                wordno = 0;
                para[parasub].data[parano++] = sen[sensub++];


            }
            if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
                sen[sensub++].data[wordno++] = word[wordsub];
                wordno = 0;

                parano = 0;
                para[parasub].data[parano++] = sen[sensub];

                doc.data[docsub++] = para[parasub++];


            }

        }
    }
    printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
    return doc;
}

int main() {
    struct document doc;
    char * text = "hello world.\nhi.bye.\nwow.";
    doc = get_document(text);
    printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}

The problem is the print statements are not printing "hello". Also if I change the indices in the print statements I get a segmentation error.

Answer 1

Here:

word[wordsub].data[k++] = text[j];

you are accessing data member out of allocated memory.

Answer 2

The problem statement specifies that there are never two terminators after a word. There should also be one word at least.

So, the test phrase

"hello world.\nhi.bye.\nwow."

does not fit, but

"hello world\nhi.bye\nwow"

fits and you will have "hello" printed.

Besides, your algorithm is very complex while the code could be simpler. It was fun to try and I did it.

First, let's use some typedef to write less text!

typedef struct word {
    char* data;
} W;

typedef struct sentence {
    W* data;
    int word_count;//denotes number of words in a sentence
} S;

typedef struct paragraph {
    S* data  ;
    int sentence_count;//denotes number of sentences in a paragraph
} P;

typedef struct document {
    P* data;
    int paragraph_count;//denotes number of paragraphs in a document
} DOC;

Then the function itself. The logic is simple, do all of the following for each char of text in sequence

in case we have any separator ( ' ' , '.' or '\n' ) record the word
in case we have a separator ( '.' or '\n' ) record the sentence
in case we have a separator ( '\n' ) record a paragraph

The end of the string counts as the end of a paragraph.

Code

struct document get_document(char* text) {
     DOC doc = { NULL, 0 }; // you're the doc, doc
     P parr  = { NULL, 0 };
     S sarr  = { NULL, 0 };

     int wpos=0;

     for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
          char c = text[i];
          if ( ! c) c = '\n'; // End of string simulates end of paragraph

          if (c == '\n' || c == '.' || c == ' ') {
                // End of word, add it to sentence
                W word;
                word.data = malloc(i - wpos + 1);          // +1 for '\0'
                strncpy(word.data, text + wpos, i - wpos); // Copy only the word
                word.data[i - wpos] = 0;                   // 0 terminate it
                sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
                sarr.data[ sarr.word_count++ ] = word;
                wpos = i+1;

                if (c == '\n' || c == '.') {
                     // End of sentence, add it to paragraph
                     parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
                     parr.data[ parr.sentence_count++ ] = sarr;
                     sarr.data = NULL;    // clear sentences
                     sarr.word_count = 0;
                }

                if (c == '\n') {
                     // End of paragraph, add it to doc
                     doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
                     doc.data[ doc.paragraph_count++ ] = parr;
                     parr.data = NULL;     // clear paragraphs
                     parr.sentence_count = 0;
                }
          }
     }

    return doc;
}

Finally, to see if that's working, print all members (using a compliant text!)

int main(int argc, char **argv) {
    DOC doc;
    char * text = "hello world\nhi.bye\nwow";

    doc = get_document(text);

    for(int i=0 ; i<doc.paragraph_count ; i++) {
          printf("Para %d / %d\n", i, doc.paragraph_count-1);
          P para = doc.data[i];
          for(int j=0 ; j<para.sentence_count ; j++) {
                printf("Sent %d / %d\n", j, para.sentence_count-1);
                S sent = para.data[j];
                for(int k=0 ; k<sent.word_count ; k++) {
                     W word = sent.data[k];
                     printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
                }
          }
     }

     return 0;
}

We could add a bit of code to avoid the processing of two separators (like a trailing '\n' , or '.'

Structs and arrays challenge in C

Question

2 answers

solution1
1 ACCPTED 2020-05-15 10:24:48

solution2
1 2020-05-15 10:32:48

Structs and arrays challenge in C

Question

2 answers

solution1 1 ACCPTED 2020-05-15 10:24:48

solution2 1 2020-05-15 10:32:48

solution1
1 ACCPTED 2020-05-15 10:24:48

solution2
1 2020-05-15 10:32:48