简体   繁体   中英

BST and Linked List C program stuck at large values

I'm doing a college work where the professor asked us to implement BST and linked list and count how many comparisons it makes to insert and search a large amount of randomly-generated values. We're supposed to start at 10 values, then 100, then 1000, up until 10^12. The thing is, it always gets stuck at 100000 (10^5). The RAM usage is low, but CPU is at max. I'm freeing both the tree and lists after each step. The code is found here (offsite) and below.

Just to sum up some important points: each value (the key to the node) is an unsigned it (max 65535), but up to 10^12 values should be inserted and another 10^12 searched.

Is it supposed to take this long? My processor is an i5-7200u

Is there a chance it's a memory problem and GCC is blocking it somehow?

Thanks a lot

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

typedef long long ll;
typedef unsigned long long ull;

// BST

typedef struct no_A_struct {
  unsigned int chave;
  struct no_A_struct *esquerda;
  struct no_A_struct *direita;
} no_A;

// new node
no_A *novo_no_A(unsigned int chave) {
  no_A *no = (no_A *) malloc(sizeof(no_A));
  no->chave = chave;
  no->esquerda = no->direita = NULL;
  return no;
}

// insert note
no_A *insere_A(no_A *raiz, unsigned int chave, ull *cont) {
  (*cont)++; if (raiz == NULL) return novo_no_A(chave);
  (*cont)++; if (chave < raiz->chave) raiz->esquerda = insere_A(raiz->esquerda, chave, cont);
  else {
    (*cont)++; if (chave > raiz->chave) raiz->direita = insere_A(raiz->direita, chave, cont);
  }
  return raiz; 
}

// search node
no_A *busca_A(no_A *raiz, unsigned int chave, ull *cont) {
  (*cont)++; if (raiz == NULL) return NULL;
  (*cont)++; if (chave == raiz->chave) return raiz;
  (*cont)++; if (chave > raiz->chave) return busca_A(raiz->direita, chave, cont);
  return busca_A(raiz->esquerda, chave, cont);
}

// free tree
void desaloca_A(no_A *raiz) { // TODO iterativa?
  if (raiz == NULL) return;
  desaloca_A(raiz->esquerda);
  desaloca_A(raiz->direita);
  free(raiz);
}

// LINKED LIST WITH IN ORDER INSERTION

typedef struct no_L_struct {
  unsigned int chave;
  struct no_L_struct *prox;
} no_L;

// new node
no_L *novo_no_L(unsigned int chave) {
  no_L *no = (no_L *) malloc(sizeof(no_L));
  no->chave = chave;
  no->prox = NULL;
  return no;
}

// insert node
void insere_L(no_L **inicio, unsigned int chave, ull *cont) {
  no_L *novo_no = novo_no_L(chave);
  (*cont)++; if (*inicio == NULL) { *inicio = novo_no; return; }
  (*cont)++; if (novo_no->chave <= (*inicio)->chave) {
    novo_no->prox = *inicio;
    *inicio = novo_no;
  } else {
    no_L *atual = *inicio;
    for (;;) {
      (*cont)++; if (atual->prox == NULL) break;
      (*cont)++; if (novo_no->chave <= atual->prox->chave) break;
      atual = atual->prox;
    }
    novo_no->prox = atual->prox;
    atual->prox = novo_no;
  }
}

// search node
no_L *busca_L(no_L *atual, unsigned int chave, ull *cont) {
  for (;;) {
    (*cont)++; if (atual == NULL) break;
    (*cont)++; if (atual->chave == chave) break;
    atual = atual->prox;
  }
  return atual;
}

// void printa_L(no_L *atual) {
//   if (atual == NULL) return;
//   printf("%u", atual->chave);
//   printa_L(atual->prox);
// }

// free list
void desaloca_L(no_L *atual) {
  no_L *no_apagar;
  while (atual != NULL) {
    no_apagar = atual;
    atual = atual->prox;
    free(no_apagar);
  }
}

int main() {
  ll QTD_VALORES[] = {10, 100, 1000, // 10^: 1, 2, 3
              10000, 100000, 1000000, // 4, 5, 6
              1000000000, 10000000000, // 9, 10
              100000000000, 1000000000000}; // 11, 12
  int ITERACOES = 1; // TODO voltar pra 100
  unsigned int VALOR_MAX = 65535;

  int tamanho_qtd_valores = sizeof(QTD_VALORES)/sizeof(QTD_VALORES[0]);
  srand(time(0));

  for (int qtd_i=0; qtd_i<tamanho_qtd_valores; qtd_i++) {
    ll qtd = QTD_VALORES[qtd_i];
    printf("== QTD DE VALORES %lli ==\n", qtd);

    for (int i=0; i<ITERACOES; i++) {

      ull comp_A_insercao = 0, comp_A_busca = 0,
          comp_L_insercao = 0, comp_L_busca = 0;
      no_A *arvore = NULL;
      no_L *lista = NULL;

      // generates and insert values
      unsigned int valores_busca[qtd];
      for (ll v=0; v<qtd; v++) {
        // // insert values
        unsigned int valor_insercao = rand() % VALOR_MAX + 1;
        arvore = insere_A(arvore, valor_insercao, &comp_A_insercao);
        insere_L(&lista, valor_insercao, &comp_L_insercao);

        valores_busca[v] = rand() % VALOR_MAX + 1;
      }

      // search values
      for (ll v=0; v<qtd; v++) {
        busca_A(arvore, valores_busca[v], &comp_A_busca);
        busca_L(lista, valores_busca[v], &comp_L_busca);
      }

      // desaloca_A(arvore);
      // desaloca_L(lista);

      // TODO divisões retornar numero real?
      printf("INTERACTION %d: \n", i+1);
      printf("Tree insertion, total=%llu, avg=%llu\n", comp_A_insercao,
            comp_A_insercao / qtd);
      printf("Tree search, total=%llu, avg=%llu\n", comp_A_busca,
            comp_A_busca / qtd);
      printf("List insertion, total=%llu, avg=%llu\n", comp_L_insercao,
            comp_L_insercao / qtd);
      printf("List search, total=%llu, avg=%llu\n", comp_L_busca,
            comp_L_busca / qtd);    
    }
    printf("\n");
  }
}

Are you sure you are supposed to insert items already in the list? I think you're suppose to avoid adding duplicates keys.


Inserting the first node in the linked list will requires 0 comparisons. The second, 1/2 (on avg). The third, 2/2, The fourth 3/2, etc. So inserting N times will have a average time proportional to (0+1+2+...+(N-2)+(N-1))/2 = N*(N-1)/4.

$ perl -e'printf "%d: %d\n", $_, (10**$_)*(10**$_-1)/4 for 1..5;'
1: 22
2: 2475
3: 249750
4: 24997500
5: 2499975000

That's an average time of 2.5 billion time units to insert 10 5 nodes. For example, if it takes 100 ns per comparison, it will take over 4 minutes to insert 10 5 nodes on average.

At that same rate, inserting 10 12 nodes would take an average of 800 million years.


If, on the other hand, you avoid adding a key that's already in the list, the average time will be no more than 65,536*N/2

$ perl -e'printf "%d: %d\n", $_, 65536*(10**$_-1)/4 for 1..12;'
1: 147456
2: 1622016
3: 16367616
4: 163823616
5: 1638383616
6: 16383983616
7: 163839983616
8: 1638399983616
9: 16383999983616
10: 163839999983616
11: 1638399999983616
12: 16383999999983616

So instead of taking 800 million years, inserting 10 12 nodes will take an average of "only" 19 days using the rate assumed above. Even if I'm off by an order of magnitude, we're still talking about 2 days.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM