简体   繁体   中英

This TRIE algorithm code runs on CS50 IDE compiler but goes into an infinite loop in TDM-GCC on Windows

The wrote this code for searching a word in a dictionary using TRIE data structure. This code runs perfectly on my CS50 IDE compiler using both make(Clang) and GCC and always gives the right answer but when I run the same code on my GCC compiler(TDM-GCC), it goes into an infinite loop. it started using a lot of RAM (512 MB until I forcefully closed it). The code that I ran was exactly the same in both the cases. Also in both the cases the code compiled perfectly.

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

struct trieblock123;
typedef struct trieblock123 trieblock;
typedef trieblock *node;
struct trieblock123 
{
    char alphabet;
    char reply[5];
    node pnt;
};


typedef struct 
{
    node first;
    int count;
}head;

void load(FILE* dict, head* header);
void init(node pointer);

int main(void)
{
    FILE* dict = fopen("large", "r");

    head* header = malloc(sizeof(head));
    (*header).count = 0;
    (*header).first = NULL;


    node curtrie = NULL;
    node temptrie = NULL;
    node temptrie1 = NULL;
    char temp;
    temp = fgetc(dict);
    int counter = 0;
    temptrie = (node)(malloc(26 * sizeof(trieblock)));
    int i;
    for(i = 0; i < 26; i++)
    {
        (temptrie[i]).alphabet = (char)(((int)('a')) + i);
        (temptrie[i]).pnt = NULL;
    }
    if(counter == 0)
    {
        (*header).first = temptrie;
    }
    while(((int)(temp) <= (int)('z') && (int)(temp) >= (int)('a')) || temp == '\n')
    {
        if(((int)(temp) > (int)('z') || (int)(temp) < (int)('a')) && temp != '\n')
            break;
        curtrie = temptrie;

        while(temp != '\n')
        {
            char temp1;
            temp1 = fgetc(dict);
            if((curtrie[(int)(temp) - (int)('a')]).pnt == NULL) 
            {
                if(temp1 != '\n')
                {
                    temptrie1 = (node)(malloc(26 * sizeof(trieblock)));
                    for(i = 0; i < 26; i++)
                    {
                        (temptrie1[i]).alphabet = (char)(((int)('a')) + i);
                        (temptrie1[i]).pnt = NULL;
                    }

                    (curtrie[(int)(temp) - (int)('a')]).pnt = temptrie1;
                    curtrie = temptrie1;
                }
                else
                {
                    strcpy((curtrie[(int)(temp) - (int)('a')]).reply, "yes");
                }
            }
            else if((curtrie[(int)(temp) - (int)('a')]).pnt != NULL)
            {
                curtrie = (curtrie[(int)(temp) - (int)('a')]).pnt;
            }
            fseek(dict, -1 * sizeof(char), SEEK_CUR);   
            temp = fgetc(dict);
        }


        if(temp == '\n')
        {
            temp = fgetc(dict);
        }

        counter++;
    }
    (*header).count = counter;

    char tocheck[100];
    scanf("%s", tocheck);

    i = 0;
    node start = NULL;
    start = temptrie;

    for(i = 0; i < strlen(tocheck); i++)
    {
        char cha = tocheck[i];
        if(i != strlen(tocheck) - 1)
        {
            if((start[(int)(cha) - (int)('a')]).pnt == NULL)
            {
                printf("mis-spelled\n");
                break;
            }
            else
            {
                start = (start[(int)(cha) - (int)('a')]).pnt;
            }
        }
        else
        {
            if(strcmp(((start[(int)(cha) - (int)('a')]).reply), "yes") == 0)
            {
                printf("correctly spelled\n");
                break;
            }
            else
            {
                printf("mis-spelled\n");
                break;
            }
        }
    }
    return 0;
}

This may not be the kind of an answer you would expect, however.

Your code is hard to debug and maintain because of this problems:

  1. repetitions - this is error prone, use functions instead
  2. too many unnecessary casts - check C type system and how types are implicitly converted to each other
  3. weird typedefs - do not redefine pointer types unless you are using some kind of suffix or prefix to indicate that the respective typedef is indeed a pointer
  4. too many parentheses, you do not need the most of them (especially the kind of stuff (*s).a = something use -> instead)

Some other issues are:

  1. Whenever you malloc do not just expect to have zeros in there (in fact do not expect to get any memory in any time at all), your original code may segfault in case reply comes initialized with some garbage.
  2. strings in C are zero terminated, hence doing strlen involves iteration over the entire string, unless you are modifying it just cache the result once in a variable and use it.
  3. Track your conditional statements there is no need to doublecheck the negative conditions.
  4. Working with IO try to minimize calls whenever appropriate.
  5. Do not mix responsibility. For instance code checking for string existance in your trie structure should not be the one responsible to print the answer.
  6. Do not use strings as flags this is just plainly confusing.

This one should be an equivalent (unless I messed up):

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

typedef struct node_s node_t;
struct node_s {
  char alphabet;
  char present;
  node_t *pnt;
};

typedef struct {
    node_t *first;
    int count;
} head_t;

#define NLETTERS 26
inline static int is_letter(char c) { return c <= 'z' && c >= 'a'; }

inline static node_t* new_trieblock() {
  const int size = NLETTERS * sizeof(node_t);
  node_t* block = malloc(size);
  memset(block, 0, size);
  for (int i = 0; i < NLETTERS; i++) {
    block[i].alphabet = 'a' + i;
  }
  return block;
}

inline static int trie_has(node_t *n, char *str) {
  node_t *trie = n;
  int len = strlen(str);
  for (int i = 0; i < len - 1; i++) {
    trie = trie[str[i] - 'a'].pnt;
    if (!trie) return 0;    
  }
  return trie[str[len-1] - 'a'].present;
}

int main(void) {
    FILE* dict = fopen("large", "r");

    head_t *header = malloc(sizeof(head_t));
    header->count = 0;
    header->first = new_trieblock();

    node_t *trie = header->first;
    char c = fgetc(dict);
    int nc;
    while(is_letter(c) || c == '\n') {
        nc = fgetc(dict);

        if (nc == '\n' || nc == EOF) {
            trie[c - 'a'].present = 1;
            header->count++;
        } else {
          if (!trie[c - 'a'].pnt) {
            trie = trie[c - 'a'].pnt = new_trieblock();
          } else {
            trie = trie[c - 'a'].pnt;
          }
        }

        c = nc;
        while (c == '\n') {
          trie = header->first;
          c = fgetc(dict);
        }
    }

    char tocheck[100];
    scanf("%s", tocheck);

    if (trie_has(header->first, tocheck)) {
        printf("correctly spelled\n");
    } else {
        printf("mis-spelled\n");
    }

    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM