简体   繁体   中英

Inserting node in binary tree in C

I am learning C and found a basic tree implementation in my C book:

#include <stdio.h>
#include <stdlib.h>

struct tree_node {
  int data;
  struct tree_node *left_p, *right_p;
};

struct tree_node *t_search(struct tree_node *root, int v) {

  while (root) {
    printf("Looking for %d, looking at %d\n", v, root->data);
    if (root->data == v)
      return root;
    if (root->data > v)
      root = root->left_p;
    else
      root = root->right_p;
  }

  return 0;
}

int t_insert(struct tree_node **root, int v) {

  while (*root) {
    if ((*root)->data == v)
      return 1;
    if ((*root)->data > v)
      root = &((*root)->left_p);
    else
      root = &((*root)->right_p);
  }

  if ((*root = (struct tree_node *) malloc(sizeof(struct tree_node))) == 0)
    return 2;

  (*root)->data = v;
  (*root)->left_p = 0;
  (*root)->right_p = 0;

  return 0;
}

int main(void) {

  struct tree_node *tp, *root_p = 0;
  int i;

  t_insert(&root_p, 4);
  t_insert(&root_p, 2);
  t_insert(&root_p, 6);
  t_insert(&root_p, 1);
  t_insert(&root_p, 3);
  t_insert(&root_p, 4);
  t_insert(&root_p, 7);

  for (i = 0; i < 9; i++) {
    tp = t_search(root_p, i);
    if (tp)
      printf("%d found\n", i);
    else
      printf("%d not found\n", i);
  }

  return 0;
}

While the code seems to be straight forward, I am having a hard time to understand the t_insert function. Why does it take in a struct tree_node **root instead of struct tree_node *root ? t_serach is virtually the same code but uses only a pointer and not a pointer pointer. Maybe another example would explain the issue in a better way.

For what it's worth: I am from a Java background.

Note: Yes, I know the tree is not rebalanced upon insertion.

Some stuff we need:

#include <stdio.h>
struct thing {
   struct thing *next;
   int value;
   };

struct thing otherthing ={ NULL, 42};
struct thing something ={ &otherthing, 13};
int forty_two = 42;

    void do_the_int(int i);
    void do_the_intp(int *ip);
    void do_the_intpp(int **ipp);
    void do_the_structp(struct thing *p);
    void do_the_structpp(struct thing **pp);

A function can change its arguments, but the results will not be seen by the caller, so:

void do_the_int(int i) {
  i = 42;
}

is effectively a no-op. To actually change something, the function needs a pointer to it, like in:

void do_the_intp(int *ip) {
  *ip = 42;
}

now, the value where ip points to is actually changed by the function. Next, if you want to change a pointer the function will need a pointer to it: a pointer to pointer :

int forty_two = 42; // global (or static)
void do_the_intpp(int **ipp) {
  *ipp = &forty_two;
}

For other datatypes (than int), things are not different: if you want to change a struct, the function will need a pointer to struct , and if the function would need to change a pointer to struct it would need a pointer to pointer to struct . So

void do_the_structp(struct thing *p) {
   p->value = 42;
}

will actually change something within the struct *p , and

void do_the_structpp(struct thing **pp) {
   *pp = (*pp)->next;
}

Will actually change the pointer located at *pp .

Now, let's call them:

int main(void) {
int zero=0
  , one=1
  , two=2;
int *the_p;
struct thing *tp, *cur;

  the_p = &two;
  tp = &something;

  printf("Before: zero=%d, one=%d, two=%d the_p=%p *the_p=%d\n"
        , zero, one, two, (void*) the_p,*the_p);
  for(cur=tp; cur; cur = cur->next) {
        printf("p=%p Next=%p Val=%d\n"
              , (void*) cur, (void*) cur->next, cur->value );
        }

  do_the_int(zero);
  do_the_intp(&one);
  do_the_intpp(&the_p);
  do_the_structp(tp);
  do_the_structpp( &tp);

  printf("After: zero=%d, one=%d, two=%d the_p=%p *the_p=%d\n"
        , zero, one, two, (void*) the_p,*the_p);
  for(cur=tp; cur; cur = cur->next) {
        printf("p=%p Next=%p Val=%d\n"
              , (void*) cur, (void*) cur->next, cur->value );
        }


  return 0;
}

Output:

Before: zero=0, one=1, two=2 the_p=0x7fff97a7db28 *the_p=2
p=0x601030 Next=0x601020 Val=13
p=0x601020 Next=(nil) Val=42
After: zero=0, one=42, two=2 the_p=0x601040 *the_p=42
p=0x601020 Next=(nil) Val=42

It passes in a double pointer because it needs to change pointers in the process. All elements of a tree can be thought of as pointers which need to be changed when insert is called. If you don't pass in a double pointer, what you're essentially doing is copying pointer as a local variable to the function and changes have no effect when the insert function exits. Ignore me if I'm wrong.

If you want to only read what is pointed by a pointer, your argument is of type foo* . If you want to change what is pointed by a pointer, your argument is of type foo * . But there, you want to be able to change what you point to. So, you need a pointer to your pointer, to be able to change its value.

In java, any method having as an argument a TreeNode is equivalent to a C function having as an argument a tree_node* ; hence, if you have a method with that signature :

// Java code
static void treeInsert (TreeNode t);

Within that method, you can access methods from t that can read its current status, you can even modify the state of t , and that state will be changed even once you left the method.

But, if you're doing something like that

t = null;

that won't have an effect once you leave the method. That means

// Java code
static void treeNull (TreeNode t) {
    t = null;  // Warning, local change only !
}

static void treeChange (TreeNode t) {
    t.changeState();
}

static void foo () {
    t = new TreeNode();
    t.treeChange();   // Ok, t has now an update state
    t.treeNull();     // Warning, t is not null here
}

You can't have the equivalent of a double pointer in java, so you can't make that work.

In C, you can do this :

// C code
void updateState(tree_node* t) {
    data = data + 1;  // This is nonsense, just to make the point
}

void localChangePointer(tree_node* t) {
    t = NULL;
}

void changePointer(tree_node** t) {
    free_and_clean(*t);
    *t = NULL;
}

void sillyStuff() {
    tree_node* t;
    // Initialize t
    ...

    updateState(t); // (*t).data changed
    localChangePointer(t);  // Warning : t is not null yet !
    changePointer(&t);      // This time, we gave the address of t, not t itself

    // Now t == NULL;
}

Maybe you will like such variant:

#include <stdio.h>
#include <stdlib.h>
#include <err.h>

typedef struct btree_node{
    int data;
    struct btree_node *left, *right;
} BTree;

// get tree leaf nearest to v
BTree *bt_get(BTree *root, int v){
    if(!root) return NULL;
    int D = root->data;
    if(D == v) return root;
    if(root->data > v) return root->left;
    else return root->right;
}

// find leaf with v or nearest to it (prev)
BTree *bt_search(BTree *root, int v, BTree **prev){
    if(!root){
        if(prev) *prev = NULL;
        return NULL;
    }
    BTree *p;
    do{
        p = root;
        root = bt_get(root, v);
    }while(root && root->data != v);
    if(prev) *prev = p;
    return root;
}

BTree *bt_create_leaf(int v){
    BTree *node = calloc(1, sizeof(BTree));
    if(!node) return NULL;
    node->data = v;
    return node;
}

// insert new leaf (or leave it as is, if exists)
// return root node
BTree *bt_insert(BTree *root, int v){
    BTree *closest, *node;
    node = bt_search(root, v, &closest);
    if(node) return node; // value exists
    if(!closest) // there's no values at all!
        return bt_create_leaf(v); // create root node
    // we have a closest node to our data, so create node and insert it:
    node = bt_create_leaf(v);
    if(!node) return NULL;
    int D = closest->data;
    if(D > v) // there's no left leaf of closest node, add our node there
        closest->left = node;
    else
        closest->right = node;
    return root;
}

int main(void){
    BTree *tp, *root_p = NULL;
    int i, ins[] = {4,2,6,1,3,4,7,14,0,12};
    for(i = 0; i < 10; i++)
        if(!(root_p = bt_insert(root_p, ins[i]))) err(1, "Malloc error"); // can't insert
    for(i = 0; i < 15; i++){
        tp = bt_search(root_p, i, NULL);
        printf("%d ", i);
        if(!tp) printf("not ");
        printf("found\n");
    }
    return 0;
}

// I was wrong in previous answer, correct

Here would be doing it without double pointers. I don't see any difference.

int t_insert(struct tree_node *root, int v) {
  while (root) {
    if (root->data == v)
      return 1;
    if (root->data > v)
      root = &(root->left_p);
    else
      root = &(root->right_p);
  }

  if ((root = (struct tree_node *) malloc(sizeof(struct tree_node))) == 0)
    return 2;

  root->data = v;
  root->left_p = 0;
  root->right_p = 0;

  return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM