简体   繁体   中英

segmentation fault (core dumped) in binary search tree

I am trying to implement a r&b tree, but firstly I want a simple binary tree (which does not save content on its leaves) and then implement r&b properties. Problem is I get segmentation fault I cannot explain.

Program is as follows:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <stdbool.h>

typedef struct node
{
    unsigned long int val;
    bool black;
    struct node* parent;
    struct node* lchild;
    struct node* rchild;
}mynode;

mynode* createNode(unsigned long int ival, mynode* father);
mynode* createLeaf(unsigned long int ival, mynode* father);
mynode* search (unsigned long int ival, mynode *root);
void insert ( unsigned long int ival, mynode *root);


int main()
{
    mynode root;
    mynode *rootptr;
    mynode *leafptr;
    FILE *fp;
    int ch;
    unsigned long long lines=0, i=0;
    unsigned long *myArr;
    unsigned long int ival;

   fp = fopen("integers.txt","r");
   if(fp == NULL)
   {
      printf("Error in opening file.");
      return(-1);
   }

    while(!feof(fp))
    {
    ch = fgetc(fp);
    if(ch == '\n')
    {
        lines++;
    }
    }
    lines++;
    printf("lines = %lu", lines);
    myArr = (unsigned long*)calloc(lines, sizeof(unsigned long));

    fseek(fp, 0, SEEK_SET);
    while(!feof(fp))
    {
          fscanf(fp, "%lu,", &myArr[i] ); // des ta pos k giati tou input.
          i++;
    }
    fclose(fp);

    root.val = myArr[0];
    root.parent = NULL;
    root.lchild = NULL;
    root.rchild = NULL;
    root.black = true;
    rootptr = &root;
    leafptr = createLeaf(rootptr->val, rootptr);
    rootptr->lchild = leafptr;
    leafptr = createLeaf(rootptr->val, rootptr);
    rootptr->rchild = leafptr;
    for(i=1; i<lines; i++)
    {
        ival = myArr[i];
        insert(ival, rootptr);
    }

    return 0;
}

mynode* createNode(unsigned long int ival, mynode* father)
{
  mynode* nodeptr;
  mynode node;
  nodeptr = &node;
  nodeptr->val = ival;
  nodeptr->lchild = NULL;
  nodeptr->rchild = NULL;
  nodeptr->parent = father;
  nodeptr->black = true;
  return nodeptr;
}

mynode* createLeaf(unsigned long int ival, mynode* father)
{
  mynode* nodeptr;
  mynode leaf;
  nodeptr = &leaf;
  nodeptr->val = ival;
  nodeptr->lchild = NULL;
  nodeptr->rchild = NULL;
  nodeptr->parent = father;
  nodeptr->black = true;
  return nodeptr;
}

mynode* search (unsigned long int ival, mynode *rootptr)
{
    mynode* myptr;

    myptr = rootptr;

    while ( ( (myptr->lchild) != NULL) && ( (myptr->rchild) != NULL))
    {
        if ( ival < myptr->val)
        {
            myptr = myptr->lchild;
        }
        else
        {
            myptr = myptr->rchild;
        }
    }
    return myptr;
    }

void insert (unsigned long int ival, mynode *root)
{
    mynode * current;
    mynode * leafptr;
    mynode * father;
    unsigned long int max, min;
    unsigned long int num;

    current = search(ival, root);
    num = current->val;
    if((current->val) == ival)
    {
        return ;
    }
    else
    {
        if(ival>(current->val))
        {
            max = ival;
            min = current->val;
        }
        else
        {
            max = current->val;
            min = ival;
        }
        father = current->parent;
        current = createNode(min, father);
        if(num == (father->lchild)->val)
        {
            father->lchild = current;
        }
        else
        {
            father->rchild = current;
        }
        leafptr = createLeaf(min, current);
        current->lchild = leafptr;
        leafptr = createLeaf(max, current);
        current->rchild = leafptr;
       return ;
    }
}

I open a file. Count the number of lines, because I know each line has one number. Create an array using the above information. I then create the root and its 2 leaves. And then I insert (there I get segmentantion fault) rest of the array to my data structure. I think problem lies in the functions.

Here is the text file with numbers.

mynode* nodeptr;
mynode node;
nodeptr = &node;

Here node lives on the stack in memory that will be reclaimed at function exit. You're returning a pointer to memory that you do not own! You'll want to use the function malloc() , like so:

mynode* nodeptr = malloc(sizeof(mynode));

This will allocate memory on the heap that nodeptr will point to. This memory will not be reclaimed on function exit. To reclaim this memory, you will need to call free() .

Just too many bugs and issues to go thorough.

Let us instead look at a proper example, with error checking, that outputs the generated binary search tree in Graphviz DOT format.

First, you'll want to keep the pointers at the beginning of the structure, because they are most used, and require proper alignment. (If you do not put the biggest members first, the compiler may insert padding into your structures, wasting memory. No, the compiler is not allowed to reorder the structure members in C, so it cannot do that for you.)

#include <stdlib.h>
#include <stdio.h>

struct node {
    struct node *parent;
    struct node *left;
    struct node *right;
    double       value;
};

Next, we need a function for creating a new node. It is a good idea to check for malloc() returning NULL , and if so, abort the program:

struct node *new_node(const double value)
{
    struct node *n;

    n = malloc(sizeof (struct node));
    if (!n) {
        fprintf(stderr, "Out of memory.\n");
        exit(EXIT_FAILURE);
    }

    n->parent = NULL;
    n->left = NULL;
    n->right = NULL;
    n->value = value;

    return n;
}

Next, you need a function that will insert a node into a tree. The tree itself is just the handle to its root member, and since the new node can be the new root, we need to pass a pointer to the pointer to the root element:

void insert_node(struct node **root, struct node *leaf)
{
    struct node *parent;

    /* Make sure we have a pointer to the root pointer, and a leaf node. */
    if (!root) {
        fprintf(stderr, "insert_node(): root == NULL!\n");
        exit(EXIT_FAILURE);
    }
    if (!leaf) {
        fprintf(stderr, "insert_node(): leaf == NULL!\n");
        exit(EXIT_FAILURE);
    }

    /* Make sure leaf pointers are all NULL. */
    leaf->parent = NULL;
    leaf->left = NULL;
    leaf->right = NULL;

The above code is just sanity checks, but I wanted to include them for completeness. Anyway, if the tree is empty, the root pointer points to a NULL pointer, ie *root == NULL . In this case, the leaf is the new (sole) node in the tree:

    /* Is this a new root node? */
    if (!*root) {
        /* Yes. */
        *root = leaf;
        return;
    }

Otherwise, we need to descend into the tree. I've decided that left means "less than or equal to" , because it is easy to remember. If we are going left, and parent's left node is empty, that is where we'll put the new leaf node. Similarly, if we are going right, and parent's right node is empty, we put the leaf there. Otherwise we descend.

    /* Find the parent node where leaf belongs. */
    parent = *root;
    while (1)
        if (parent->value >= leaf->value) {
            if (parent->left) {
                parent = parent->left;
                continue;
            }

            /* This belongs at parent->left. */
            parent->left = leaf;
            leaf->parent = parent;
            return;

        } else {
            if (parent->right) {
                parent = parent->right;
                continue;
            }

            /* This belongs at parent->right. */
            parent->right = leaf;
            leaf->parent = parent;
            return;
        }
}

To traverse the tree, and print its structure in DOT language, one needs just two functions: a recursive function to print the node, and a main function to print the boilerplate, and to call the recursive function. I use %p or node pointer value as the node identifiers, because it is simple and reliable:

static void dot_recurse(FILE *out, struct node *one)
{
    fprintf(out, "    \"%p\" [ label=\"%.3f\" ];\n", (void *)one, one->value);

    if (one->parent)
        fprintf(out, "    \"%p\" -> \"%p\";\n", (void *)one, (void *)(one->parent));

    if (one->left) {
        dot_recurse(out, one->left);
        fprintf(out, "    \"%p\" -> \"%p\" [ label=\"≤\" ];\n", (void *)one, (void *)(one->left));
    }

    if (one->right) {
        dot_recurse(out, one->right);
        fprintf(out, "    \"%p\" -> \"%p\" [ label=\">\" ];\n", (void *)one, (void *)(one->right));
    }
}

void dot(FILE *out, struct node *tree)
{
    if (out && tree) {
        fprintf(out, "digraph {\n");
        dot_recurse(out, tree);
        fprintf(out, "}\n");
    }
}

Above, arrows to parent will be unlabeled, arrows to left child will be labeled , and arrows to right child will be labeled > near the middle of the arrow.

Note that for dot() , the first parameter is the stream the file will be emitted to, and the second parameter is a pointer to the root node. Because we do not modify the tree, the pointer to the root node suffices; we do not need a pointer to a pointer to the root node here.

Finally, we need to read values from a stream (here, standard input), and construct a tree node from each parsed value, and insert them to the tree. There is absolutely no reason to read the file twice, as the number of values is irrelevant: we can simply read values until we cannot!

int main(void)
{
    struct node *tree = NULL;
    double       value;

    while (scanf(" %lf", &value) == 1)
        insert_node(&tree, new_node(value));

    /* Dump tree in DOT format. Use Graphviz to visualize the output. */
    dot(stdout, tree);

    return EXIT_SUCCESS;
}

The latter part of main() just dumps the tree in DOT format to standard output, and exits (successfully). It is not necessary to free dynamically allocated memory prior to exiting, as the operating system will do that automatically.

Let's say we have an input file in.txt containing

4.695 5.108 3.518 4.698 8.496
7.956 9.435 5.341 0.583 7.074
7.661 5.966 0.557 4.332 1.436
6.170 7.936 4.630 7.694 0.220

and we executed our program, piping that file to its standard input, and piping the output to say out.dot . (In Linux, Mac OS, and BSDs, this would be just ./binary < in.txt > out.dot , after compiling the above C source to an executable named binary in the current directory.)

The out.dot will then contain

digraph {
    "0x13dd020" [ label="4.695" ];
    "0x13dd080" [ label="3.518" ];
    "0x13dd080" -> "0x13dd020";
    "0x13dd1a0" [ label="0.583" ];
    "0x13dd1a0" -> "0x13dd080";
    "0x13dd260" [ label="0.557" ];
    "0x13dd260" -> "0x13dd1a0";
    "0x13dd3b0" [ label="0.220" ];
    "0x13dd3b0" -> "0x13dd260";
    "0x13dd260" -> "0x13dd3b0" [ label="≤" ];
    "0x13dd1a0" -> "0x13dd260" [ label="≤" ];
    "0x13dd2c0" [ label="1.436" ];
    "0x13dd2c0" -> "0x13dd1a0";
    "0x13dd1a0" -> "0x13dd2c0" [ label=">" ];
    "0x13dd080" -> "0x13dd1a0" [ label="≤" ];
    "0x13dd290" [ label="4.332" ];
    "0x13dd290" -> "0x13dd080";
    "0x13dd350" [ label="4.630" ];
    "0x13dd350" -> "0x13dd290";
    "0x13dd290" -> "0x13dd350" [ label=">" ];
    "0x13dd080" -> "0x13dd290" [ label=">" ];
    "0x13dd020" -> "0x13dd080" [ label="≤" ];
    "0x13dd050" [ label="5.108" ];
    "0x13dd050" -> "0x13dd020";
    "0x13dd0b0" [ label="4.698" ];
    "0x13dd0b0" -> "0x13dd050";
    "0x13dd050" -> "0x13dd0b0" [ label="≤" ];
    "0x13dd0e0" [ label="8.496" ];
    "0x13dd0e0" -> "0x13dd050";
    "0x13dd110" [ label="7.956" ];
    "0x13dd110" -> "0x13dd0e0";
    "0x13dd170" [ label="5.341" ];
    "0x13dd170" -> "0x13dd110";
    "0x13dd1d0" [ label="7.074" ];
    "0x13dd1d0" -> "0x13dd170";
    "0x13dd230" [ label="5.966" ];
    "0x13dd230" -> "0x13dd1d0";
    "0x13dd2f0" [ label="6.170" ];
    "0x13dd2f0" -> "0x13dd230";
    "0x13dd230" -> "0x13dd2f0" [ label=">" ];
    "0x13dd1d0" -> "0x13dd230" [ label="≤" ];
    "0x13dd200" [ label="7.661" ];
    "0x13dd200" -> "0x13dd1d0";
    "0x13dd320" [ label="7.936" ];
    "0x13dd320" -> "0x13dd200";
    "0x13dd380" [ label="7.694" ];
    "0x13dd380" -> "0x13dd320";
    "0x13dd320" -> "0x13dd380" [ label="≤" ];
    "0x13dd200" -> "0x13dd320" [ label=">" ];
    "0x13dd1d0" -> "0x13dd200" [ label=">" ];
    "0x13dd170" -> "0x13dd1d0" [ label=">" ];
    "0x13dd110" -> "0x13dd170" [ label="≤" ];
    "0x13dd0e0" -> "0x13dd110" [ label="≤" ];
    "0x13dd140" [ label="9.435" ];
    "0x13dd140" -> "0x13dd0e0";
    "0x13dd0e0" -> "0x13dd140" [ label=">" ];
    "0x13dd050" -> "0x13dd0e0" [ label=">" ];
    "0x13dd020" -> "0x13dd050" [ label=">" ];
}

and if visualized (using eg dot -Tsvg out.dot > out.svg ), will look like this:

点生成的SVG图像

As you can see, each left child is equal to or less than its parent, and each right child is greater than its parent. Using DOT language output like this is an excellent method of debugging tree functions, too; you can even use node shape (oval or rectangle, for example) to indicate red/black -- or use actual colors. The DOT attribute list is here .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM