简体   繁体   中英

Insert data into a trie

So I'm trying to insert data into a trie, and my code works fine. But then I change my insert function a little bit and it doesn't work anymore and also causes the memory leak. To me, both versions of insert do the same thing but obviously, they are not. Can someone please explain to me why? Thanks in advance.

Here is the code that works

#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#define SIZE 26

#define hash(c) (tolower(c) - (int)'a')

typedef struct node{
    bool endWord;
    struct node* children[SIZE];
} node;

void freeTrie(node* root){

    if(root == NULL) return;

    for (size_t i = 0; i < SIZE; i++) {
        freeTrie(root->children[i]);
    }

    free(root);
}

node* newNode(){
    node* new = NULL;

    new = (node*) malloc(sizeof(node));

    if(new != NULL){

        new->endWord = false;

        for(int i = 0; i < SIZE; i++)
            new->children[i] = NULL;
    }

    return new;
}

void insert(node* root, const char* data){

    node* temp = root;

    for (size_t i = 0, len = strlen(data); i < len; i++) {

        int index = hash(data[i]);

        if(temp->children[index] == NULL){

            temp->children[index] = newNode();

            if (temp->children[index] /*still*/ == NULL){
                printf("Something went wrong\n");
                return;
            }
        }

        temp = temp->children[index];
    }
    temp->endWord = true;
}

bool search(node* root, const char* data){

    node* temp = root;

    for (size_t i = 0, len = strlen(data); i < len; i++) {

        int index = hash(data[i]);

        temp = temp->children[index];

        if (temp == NULL){
            printf("search end here\n");
            return false;
        }
    }

    return (temp != NULL && temp->endWord);
}

int main() {

    char data[][8] = {"fox", "foo", "dog", "do"};

    node* root = newNode();

    if(root == NULL){
        printf("Something went wrong\n");
        return 1;
    }

    for (size_t i = 0, dataSize = sizeof(data)/sizeof(data[0]); i < dataSize; i++) {
        insert(root, data[i]);
    }

    printf("Check: \n");

    char output[][32] = {"not found", "found"};

    // char s[5];
    // fscanf(stdin, "%s", s);

    printf("%s\n", output[search(root, "fox")]);

    freeTrie(root);

    printf("Done\n");

    return 0;
}

Here is the insert that makes me confused

void insert(node* root, const char* data){

    node* temp = root;

    for (size_t i = 0, len = strlen(data); i < len; i++) {

        int index = hash(data[i]);

        temp = temp->children[index];

        if(temp == NULL){

            temp = newNode();

            if (temp /*still*/ == NULL){
                printf("Something went wrong\n");
                return;
            }
        }
    }

    temp->endWord = true;
}

PS: I do this for a problem set of the CS50x course, in which I have to load a dictionary of 143091 words (in alphabetical order) into my trie. My program takes about 0.1s to load and 0.06s to unload when the staff's does the same job with just 0.02s and 0.01s. I am not allowed to see the staff's source code but I guess they used a trie to store data. How can I improve my code for faster runtime? Would it run faster if I store data in an array and then binary search instead?

When you write

temp = temp->children[index];

you copy value contained in temp->children[index] (I'll call it A ) into a completely independent variable named temp . When you later modify temp , you modify temp only, not A . That is, all new nodes do not get inserted into the trie.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM