BST 和鏈表 C 程序卡在大值上

Question

我正在做一項大學工作，教授要求我們實現 BST 和鏈表，並計算插入和搜索大量隨機生成的值進行了多少次比較。 我們應該從 10 個值開始，然后是 100，然后是 1000，直到 10^12。 問題是，它總是卡在 100000 (10^5) 處。 RAM 使用率很低，但 CPU 處於最大值。 我在每一步之后都釋放了樹和列表。 代碼可在此處（異地）和下方找到。

總結一些要點：每個值（節點的鍵）都是一個未簽名的 it（最大 65535），但最多應該插入 10^12 個值並搜索另一個 10^12 個值。

應該需要這么長時間嗎？ 我的處理器是 i5-7200u

是否有可能是內存問題而 GCC 以某種方式阻止了它？

非常感謝

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

typedef long long ll;
typedef unsigned long long ull;

// BST

typedef struct no_A_struct {
  unsigned int chave;
  struct no_A_struct *esquerda;
  struct no_A_struct *direita;
} no_A;

// new node
no_A *novo_no_A(unsigned int chave) {
  no_A *no = (no_A *) malloc(sizeof(no_A));
  no->chave = chave;
  no->esquerda = no->direita = NULL;
  return no;
}

// insert note
no_A *insere_A(no_A *raiz, unsigned int chave, ull *cont) {
  (*cont)++; if (raiz == NULL) return novo_no_A(chave);
  (*cont)++; if (chave < raiz->chave) raiz->esquerda = insere_A(raiz->esquerda, chave, cont);
  else {
    (*cont)++; if (chave > raiz->chave) raiz->direita = insere_A(raiz->direita, chave, cont);
  }
  return raiz; 
}

// search node
no_A *busca_A(no_A *raiz, unsigned int chave, ull *cont) {
  (*cont)++; if (raiz == NULL) return NULL;
  (*cont)++; if (chave == raiz->chave) return raiz;
  (*cont)++; if (chave > raiz->chave) return busca_A(raiz->direita, chave, cont);
  return busca_A(raiz->esquerda, chave, cont);
}

// free tree
void desaloca_A(no_A *raiz) { // TODO iterativa?
  if (raiz == NULL) return;
  desaloca_A(raiz->esquerda);
  desaloca_A(raiz->direita);
  free(raiz);
}

// LINKED LIST WITH IN ORDER INSERTION

typedef struct no_L_struct {
  unsigned int chave;
  struct no_L_struct *prox;
} no_L;

// new node
no_L *novo_no_L(unsigned int chave) {
  no_L *no = (no_L *) malloc(sizeof(no_L));
  no->chave = chave;
  no->prox = NULL;
  return no;
}

// insert node
void insere_L(no_L **inicio, unsigned int chave, ull *cont) {
  no_L *novo_no = novo_no_L(chave);
  (*cont)++; if (*inicio == NULL) { *inicio = novo_no; return; }
  (*cont)++; if (novo_no->chave <= (*inicio)->chave) {
    novo_no->prox = *inicio;
    *inicio = novo_no;
  } else {
    no_L *atual = *inicio;
    for (;;) {
      (*cont)++; if (atual->prox == NULL) break;
      (*cont)++; if (novo_no->chave <= atual->prox->chave) break;
      atual = atual->prox;
    }
    novo_no->prox = atual->prox;
    atual->prox = novo_no;
  }
}

// search node
no_L *busca_L(no_L *atual, unsigned int chave, ull *cont) {
  for (;;) {
    (*cont)++; if (atual == NULL) break;
    (*cont)++; if (atual->chave == chave) break;
    atual = atual->prox;
  }
  return atual;
}

// void printa_L(no_L *atual) {
//   if (atual == NULL) return;
//   printf("%u", atual->chave);
//   printa_L(atual->prox);
// }

// free list
void desaloca_L(no_L *atual) {
  no_L *no_apagar;
  while (atual != NULL) {
    no_apagar = atual;
    atual = atual->prox;
    free(no_apagar);
  }
}

int main() {
  ll QTD_VALORES[] = {10, 100, 1000, // 10^: 1, 2, 3
              10000, 100000, 1000000, // 4, 5, 6
              1000000000, 10000000000, // 9, 10
              100000000000, 1000000000000}; // 11, 12
  int ITERACOES = 1; // TODO voltar pra 100
  unsigned int VALOR_MAX = 65535;

  int tamanho_qtd_valores = sizeof(QTD_VALORES)/sizeof(QTD_VALORES[0]);
  srand(time(0));

  for (int qtd_i=0; qtd_i<tamanho_qtd_valores; qtd_i++) {
    ll qtd = QTD_VALORES[qtd_i];
    printf("== QTD DE VALORES %lli ==\n", qtd);

    for (int i=0; i<ITERACOES; i++) {

      ull comp_A_insercao = 0, comp_A_busca = 0,
          comp_L_insercao = 0, comp_L_busca = 0;
      no_A *arvore = NULL;
      no_L *lista = NULL;

      // generates and insert values
      unsigned int valores_busca[qtd];
      for (ll v=0; v<qtd; v++) {
        // // insert values
        unsigned int valor_insercao = rand() % VALOR_MAX + 1;
        arvore = insere_A(arvore, valor_insercao, &comp_A_insercao);
        insere_L(&lista, valor_insercao, &comp_L_insercao);

        valores_busca[v] = rand() % VALOR_MAX + 1;
      }

      // search values
      for (ll v=0; v<qtd; v++) {
        busca_A(arvore, valores_busca[v], &comp_A_busca);
        busca_L(lista, valores_busca[v], &comp_L_busca);
      }

      // desaloca_A(arvore);
      // desaloca_L(lista);

      // TODO divisões retornar numero real?
      printf("INTERACTION %d: \n", i+1);
      printf("Tree insertion, total=%llu, avg=%llu\n", comp_A_insercao,
            comp_A_insercao / qtd);
      printf("Tree search, total=%llu, avg=%llu\n", comp_A_busca,
            comp_A_busca / qtd);
      printf("List insertion, total=%llu, avg=%llu\n", comp_L_insercao,
            comp_L_insercao / qtd);
      printf("List search, total=%llu, avg=%llu\n", comp_L_busca,
            comp_L_busca / qtd);    
    }
    printf("\n");
  }
}

Answer 1

您確定要插入列表中已有的項目嗎？ 我認為您應該避免添加重復的鍵。

插入鏈表中的第一個節點將需要 0 次比較。 第二個，1/2（平均）。 第三個，2/2，第四個3/2，等等。所以插入N次的平均時間將與(0+1+2+...+(N-2)+(N-1))/ 2 = N*(N-1)/4。

$ perl -e'printf "%d: %d\n", $_, (10**$_)*(10**$_-1)/4 for 1..5;'
1: 22
2: 2475
3: 249750
4: 24997500
5: 2499975000

這是插入 10 ^{5 個}節點的平均時間為 25 億個時間單位。 例如，如果每次比較需要 100 ns，那么平均插入 10 ^{5 個}節點需要 4 分鍾以上。

以同樣的速度，插入 10 ^{12 個}節點平均需要 8 億年。

另一方面，如果您避免添加已在列表中的鍵，則平均時間不會超過 65,536*N/2

$ perl -e'printf "%d: %d\n", $_, 65536*(10**$_-1)/4 for 1..12;'
1: 147456
2: 1622016
3: 16367616
4: 163823616
5: 1638383616
6: 16383983616
7: 163839983616
8: 1638399983616
9: 16383999983616
10: 163839999983616
11: 1638399999983616
12: 16383999999983616

因此，使用上面假設的速率，插入 10 ^{12 個}節點將平均“僅”19 天，而不是花費 8 億年。 即使我偏離了一個數量級，我們仍然在談論 2 天。

BST 和鏈表 C 程序卡在大值上

問題描述

1 個解決方案

解決方案1
0 已采納 2019-12-01 20:16:24

BST 和鏈表 C 程序卡在大值上

問題描述

1 個解決方案

解決方案1 0 已采納 2019-12-01 20:16:24

解決方案1
0 已采納 2019-12-01 20:16:24