C 语言 - 重新散列单独的链接 Hash 表

Question

因此，我到处寻找灵感，但我并没有真正找到使用单独链接方法重新散列 hash 表的任何东西。 所以我尝试了自己，我想我知道我做错了什么，但我不知道如何实现它，请帮忙。

一切正常，除了新添加的 function rehash()

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>

struct list_node
{

   struct list_node *next;
   char *key;
   char *value; 

};

struct hash_table 
{

    int table_size;
    struct list_node **list_arr;

};  




unsigned int hash(const char *key, unsigned int table_size);
struct hash_table *initialize(unsigned int table_size);
struct list_node *find(struct hash_table *H, const char *key);
void insert(struct hash_table *H, const char *key, const char *value);
void dump(struct hash_table *H);
void del(struct hash_table *H, const char *key);
struct hash_table *rehash(struct hash_table *H);

unsigned int 
hash(const char *key, unsigned int table_size)
{
    unsigned long int hashx = 0;
    for(int i=0;key[i];i++)
    {
        hashx = (hashx<<5) + key[i];
    }

    return (hashx%table_size);
}


struct hash_table
*initialize(unsigned int table_size)
{

    struct hash_table *H = malloc(sizeof(*H));
    H->list_arr = malloc(sizeof(*H->list_arr)*table_size);

    H->table_size = table_size;

    for(unsigned int i = 0; i<table_size; i++)
    {
        H->list_arr[i] = malloc(sizeof(*H->list_arr[i]));
        H->list_arr[i]->next = NULL;
    }

    return H;

}


void
insert(struct hash_table *H, const char *key, const char *value)
{   
    unsigned int index = hash(key, H->table_size);
    struct list_node *head = H->list_arr[index];
    struct list_node *current = head->next;
    
    
    while(current!=NULL)
    {
        if(strcmp(current->key,key)==0)
        {
            free(current->value);
            current->value = malloc(strlen(value)+1);
            strcpy(current->value,value);
            return;
        }

        current=current->next;

    }

    struct list_node *newNode = malloc(sizeof(*H->list_arr[index]));
    newNode->next = head->next;
    head->next = newNode;
    newNode->key = malloc(strlen(key)+1);
    newNode->value = malloc(strlen(value)+1);
    strcpy(newNode->key,key);
    strcpy(newNode->value,value);


}

void
dump(struct hash_table *H)
{
    for( int i = 0; i<H->table_size; i++)
    {
        struct list_node *entry = H->list_arr[i]->next;
        if(entry==NULL){continue;}
        printf("Index[%d]: ", i);
        while(entry!=NULL)
        {
            printf("\t%s|%s\t--> ", entry->key, entry->value);
            entry = entry->next;
        }
        printf("\tNULL");
        printf("\n");

    }
}

void delete(struct hash_table *H, const char *key)
{
    unsigned int index = hash(key,H->table_size);
    struct list_node *prev = H->list_arr[index];

    while(strcmp(prev->next->key,key)!=0)
    {
        if(prev->next==NULL){printf("Key not found!");return;}
        prev=prev->next;
    }

    struct list_node *temp = prev->next;
    prev->next = temp->next;
    free(temp);

}


struct hash_table *rehash(struct hash_table *H)
{
    unsigned int old_size = H->table_size;
    struct list_node *old_entries = H->list_arr;

    H = initialize(2*old_size);

    for(unsigned int i = 0; i<old_size; i++)
    {
        while(old_entries[i]!=NULL)
        {
            insert(H,old_entries[i].key,old_entries[i].value);
            old_entries[i] = old_entries[i]->next;
        }
    }

    free(old_entries);

    return H;
}



int main()
{   

    struct hash_table *H = initialize(20);
    insert(H,"name1","David");
    insert(H,"name2","Radka");
    dump(H);    
    H = rehash(H);
    dump(H);
    return 1;

}

我认为做 old_entries[i] 是错误的，但没有想到别的，请帮我解决这个问题。

Answer 1

OK，想了一会儿，我意识到我创建了一个struct list_node指针变量，它指向H->list_arr ，它是一个指针数组。 那是我的错误。 我应该将它声明为double pointer 。

这是修改后的 rehash() function：

struct hash_table *rehash(struct hash_table *H)
{
    unsigned int old_size = H->table_size;
    struct list_node **old_entries = H->list_arr;

    H = initialize(2*old_size);

    for(unsigned int i = 0; i<old_size; i++)
    {
        old_entries[i] = old_entries[i]->next;
        while(old_entries[i]!=NULL)
        {
            insert(H,old_entries[i]->key,old_entries[i]->value);
            old_entries[i] = old_entries[i]->next;
        }
    }

    free(old_entries);

    return H;
}

使用此代码，您必须将新hash_table的地址返回到指向旧hash_table的指针 --> [ H = rehash(H) ] 因为将指针 H 作为参数传递只会在本地更改它。 因此，我尝试了第二个版本（因为我太懒了；）并且不专心并且可能忘记重新分配它）我不需要返回任何东西，我想通过调用 function 来更改它，我的指针指向新的hash_table自动 -> [ rehash(&H) ]，这是另一个“懒惰”的选择：

void
rehash(struct hash_table **H)
{
    unsigned int old_size = (*H)->table_size;
    struct list_node **old_entries = (*H)->list_arr;

    *H = initialize(2*old_size);

    for(unsigned int i = 0; i<old_size; i++)
    {
        old_entries[i] = old_entries[i]->next;
        while(old_entries[i]!=NULL)
        {
            insert(*H,old_entries[i]->key,old_entries[i]->value);
            old_entries[i] = old_entries[i]->next;
        }
    }

    free(old_entries);

    
}

如果我正在做一些效率低下的事情（在空间和时间方面），请告诉我，因为我只在 CS 学士的第三学期，我们这个学期才开始 DSA。

Answer 2

通过在每个 bin 的开头放置虚拟元素是一个好主意，但您不需要使用malloc()分配这样的虚拟元素。 您可以只使 bin 数组成为节点数组，而不是指向节点的指针。 然后，您在分配数组时分配了虚拟对象。 因此，您可以将 hash 表定义为

struct hash_table
{
    int table_size;
    struct list_node *list_arr;
};

（而不是使用struct list_node **list_arr ）。

在初始化过程中循环遍历 bin 时，必须将 bin 的next指针设置为 NULL，但不要分配它们。

struct hash_table
*initialize(unsigned int table_size)
{
    struct hash_table *H = malloc(sizeof(*H));
    H->list_arr = malloc(sizeof(*H->list_arr)*table_size);
    H->table_size = table_size;

    for(unsigned int i = 0; i<table_size; i++)
    {
        // no malloc here!
        H->list_arr[i].next = NULL;
    }

    return H;
}

无论如何，这与重新散列无关，只是一个建议。 但是因为你有虚拟元素作为垃圾箱，你可以重构你的代码（这就是我认为虚拟元素是个好主意的原因）。 您可以从桌子上取出垃圾箱并从那里工作，之后无需担心桌子本身。 您可以通过以下方式获取相关的密钥箱

struct list_node *get_bin(struct hash_table *H, const char *key)
{
    unsigned int index = hash(key, H->table_size);
    return &H->list_arr[index];
}

你可以在 bin 中找到节点

struct list_node *find_node(struct list_node *bin, const char *key)
{
    for (struct list_node *current = bin->next;
         current;
         current = current->next) {
        if(strcmp(current->key,key)==0) return current;
    }
    return 0;
}

并且，例如，简化插入到

void prepend_node(struct list_node *node, struct list_node *bin)
{
  node->next = bin->next;
  bin->next = node;
}

void insert(struct hash_table *H, const char *key, const char *value)
{
  struct list_node *bin = get_bin(H, key);
  struct list_node *node = find_node(bin, key);
  if (node) {
    // update node
    free(node->value);
    node->value = malloc(strlen(value)+1);
    strcpy(node->value,value);
  } else {
    // prepend new node
    prepend_node(new_node(key, value), bin);
  }
}

new_node() function 看起来像

struct list_node *new_node(const char *key, const char *value)
{
  struct list_node *node = malloc(sizeof *node);
  if (!node) abort(); // add some error handling here
  node->key = malloc(strlen(key)+1);
  if (!node->key) abort(); // add some error handling here
  strcpy(node->key,key);
  node->value = malloc(strlen(value)+1);
  if (!node->value) abort(); // add some error handling here
  strcpy(node->value,value);
  return node;
}

因为 bin 嵌入在数组中，您可以放心地在所有函数中假设它们不是NULL ，这样可以避免测试一些特殊情况。

它不是更短的代码，因为我把它分成了几个函数，但在我看来，当每个 function 做一件简单的事情时，它更具可读性。 在这里，获取 bin，在 bin 中查找密钥，创建节点，假装 bin 等。使用“原始” malloc()和strcpy()等，分散在代码中，很难跟踪所有内容工作正常。 代码总行数增加了，但每个 function 更短更简单。 而且您可以摆脱它，因为您可以将 bin 作为列表处理，而无需访问 hash 表数组，这正是因为所有 bin 都有一个虚拟头元素。

您现在可以重写rehash()以仅添加到垃圾箱。 您知道旧垃圾箱中的所有钥匙都是唯一的，因此您无需检查任何内容。 您只需将每个节点放在其新 bin 的前面：

struct hash_table *rehash(struct hash_table *H)
{
    unsigned int old_size = H->table_size;
    struct list_node *old_entries = H->list_arr;
    free(H); // You forgot to free this one!
    H = initialize(2*old_size);

    for(unsigned int i = 0; i<old_size; i++)
    {
        struct list_node *old_bin = &old_entries[i];
        for (struct list_node *node = old_bin->next;
             node; node = node->next) {
          // just prepend to new bin; the key should be unique
          prepend_node(node, get_bin(H, node->key));
        }
    }
    free(old_entries);

    return H;
}

我添加了一个free(H) ，因为您忘记为H释放 memory ，但是在不创建新表的情况下更新H会更有效。 您可以分开初始化和分配。 但是你不会获得太多，因为初始化垃圾箱是耗时的部分。

不过，说到释放。 记得写一个 function 来释放一个 hash 表（记得释放 bin，包括所有节点）。 当然，如果您在更新之前释放H ，请不要将其与重新散列一起使用——您需要保留节点，但您确实需要这样的 function。

C 语言 - 重新散列单独的链接 Hash 表

问题描述

2 个解决方案

解决方案1
0 2020-11-29 23:10:14

解决方案2
0 2020-11-30 04:09:52

C 语言 - 重新散列单独的链接 Hash 表

问题描述

2 个解决方案

解决方案1 0 2020-11-29 23:10:14

解决方案2 0 2020-11-30 04:09:52

解决方案1
0 2020-11-29 23:10:14

解决方案2
0 2020-11-30 04:09:52