I'm trying to implement a trie for storing words in C, but I'm getting a segmentation fault when trying to acess a struct member.
The code is below:
#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ALPHABET_SIZE 27
#define SIZE 45
//Trie data structure declaration
typedef struct _dictionary {
bool is_word;
char letter;
struct _dictionary *children[ALPHABET_SIZE];
} dicto;
dicto *DICT;
//Function prototypes
void insert(char *string);
int toIndex(char s);
int main() {
FILE *fp = fopen("small", "r");
if (fp == NULL) {
printf("Could not open file\n");
return 1;
}
char word[46];
while (fgets(word, sizeof(word), fp)) {
insert(word);
if (feof(fp)) {
return 0;
}
}
return 2;
}
//Inserts word into trie
void insert(char *string) {
dicto *trav; //Pointer to walk through the trie
trav = DICT;
for (int n = 0; n = strlen(string); n++) {
if (trav->children[toIndex(string[n])] == NULL) {
trav->children[toIndex(string[n])] = malloc(sizeof(DICT));
trav->letter = string[n];
trav = trav->children[toIndex(string[n])];
} else {
trav->letter = string[n];
trav = trav->children[toIndex(string[n])];
}
if (trav->letter == '\0') {
trav->is_word = true;
}
}
return;
}
/**
* Output alphabetic index from given input
*/
int toIndex(char s) {
s = toupper(s);
int index = s - 65;
return index;
}
I've tried debugging it with Valgrind
and GDB
. The output from Valgrind is:
==1979== Memcheck, a memory error detector
==1979== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==1979== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==1979== Command: ./test_function1
==1979==
==1979== Invalid read of size 4
==1979== at 0x8048684: insert (in /home/test_function1)
==1979== by 0x80485F7: main (in /home/test_function1)
==1979== Address 0xffffff00 is not stack'd, malloc'd or (recently) free'd
==1979==
==1979==
==1979== Process terminating with default action of signal 11 (SIGSEGV)
==1979== Access not within mapped region at address 0xFFFFFF00
==1979== at 0x8048684: insert (in /home/test_function1)
==1979== by 0x80485F7: main (in /home/test_function1)
==1979== If you believe this happened as a result of a stack
==1979== overflow in your program's main thread (unlikely but
==1979== possible), you can try to increase the size of the
==1979== main thread stack using the --main-stacksize= flag.
==1979== The main thread stack size used in this run was 8388608.
==1979==
==1979== HEAP SUMMARY:
==1979== in use at exit: 344 bytes in 1 blocks
==1979== total heap usage: 2 allocs, 1 frees, 4,440 bytes allocated
==1979==
==1979== LEAK SUMMARY:
==1979== definitely lost: 0 bytes in 0 blocks
==1979== indirectly lost: 0 bytes in 0 blocks
==1979== possibly lost: 0 bytes in 0 blocks
==1979== still reachable: 344 bytes in 1 blocks
==1979== suppressed: 0 bytes in 0 blocks
==1979== Reachable blocks (those to which a pointer was found) are not shown.
==1979== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==1979==
==1979== For counts of detected and suppressed errors, rerun with: -v
==1979== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
Segmentation fault (core dumped)
And by running GDB, looks like the error comes from line 54:
if (trav->children[toIndex(string[n])] == NULL)
No idea on what might be happening.
This is just a quick answer regarding one of the possible issues with the code in the question. I didn't read through the whole thing.
After the following allocation, the memory is full of junk data:
trav->children[toIndex(string[n])] = malloc(sizeof(dicto));
You would be better off either using calloc (which guarantees the memory to be zeroed out):
trav->children[toIndex(string[n])] = calloc(sizeof(dicto), 1);
Or zero out the data yourself:
trav->children[toIndex(string[n])] = malloc(sizeof(dicto));
memset(trav->children[toIndex(string[n])], 0, sizeof(dicto));
If you keep the junk data in the memory, than the following condition might be false even when it should be true:
if(trav->children[toIndex(string[n])] == NULL)
PS
Also, sizeof(DICT)
is the size of the pointer , NOT the structure . You might consider sizeof(*DICT)
or sizeof(dicto)
.
There are multiple problems in your code:
testing feof(fp)
does not do what you think, it is actually unnecessary as fgets()
will return NULL
at end of file.
the loop for (int n = 0; n = strlen(string); n++)
never ends as n
is recomputed as the length of the string at each iteration, Use this instead:
for (int n = 0, len = strlen(string); n < len; n++) {
when you allocate a new node, you must initialize the structure, otherwise you may have undefined behavior as the memory block returned by malloc()
is uninitialized. Use calloc()
instead.
the toIndex()
function does not necessarily return a value in the range 0
to 26
. You should not hard-code the value of 'A'
and you should test is the character is indeed a letter.
Here is a modified version:
#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ALPHABET_SIZE 27
#define SIZE 45
//Trie data structure declaration
typedef struct _dictionary {
bool is_word;
char letter;
struct _dictionary *children[ALPHABET_SIZE];
} dicto;
dicto *DICT;
//Function prototypes
void insert(char *string);
int toIndex(char s);
int main(void) {
char word[SIZE + 1];
FILE *fp = fopen("small", "r");
if (fp == NULL) {
printf("Could not open file\n");
return 1;
}
while (fgets(word, sizeof(word), fp)) {
insert(word);
}
return 0;
}
//Inserts word into trie
void insert(char *string) {
dicto *trav = DICT; //Pointer to walk through the trie
for (int n = 0, len = strlen(string); n < len; n++) {
int index = toIndex(string[n]);
if (trav->children[index] == NULL) {
trav->children[index] = malloc(sizeof(DICT));
}
trav->letter = string[n];
trav = trav->children[index];
}
trav->is_word = true;
}
/**
* Output alphabetic index from given input (assuming ASCII)
*/
int toIndex(char c) {
if (c >= 'a' && c <= 'z')
return c - 'a';
if (c >= 'A' && c <= 'Z')
return c - 'A';
return 26; /* not a letter */
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.