在C中的數組中找到n個重復數字

Question

我已經在C中使用哈希表庫函數實現了以下問題說明。由於我從未在C中使用標准庫哈希表，因此我的問題是：

我是否正確使用哈希表函數（我相信獲得輸出並不意味着正確使用）？
有沒有更好的方法來解決給定問題陳述的解決方案？

問題陳述：在數組中找到n個最頻繁出現的元素。

1 <N <100000 [數組長度]
-1000000 <n <1000000 [數組整數]

我在SO上經歷了一些類似的問題-在一個答案中，我確實看到推薦的方法是使用哈希表。

#include <stdio.h>
#include <stdlib.h>
#include <search.h>
#include <stdbool.h>

#define REPEAT 3
#define BUFFERSIZE 10

void freqElement(int* arr, int len, int times);
int createHT(int* arr, int len);

int main(void)
{
    int arr[] = {2, 3, 5, 6, 10, 10, 2, 5, 2};
    int len = sizeof(arr)/sizeof(int);
    ENTRY e;
    ENTRY *ep;

    hcreate(len);

    if (!createHT(arr, len))
    {
        printf(" error in entering data \n");
    }

    freqElement(arr, len, REPEAT);

    hdestroy();
    return 0;
}

int createHT(int* arr, int len)
{
    ENTRY e, *ep;

    for(int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
        e.key = buffer;
        e.data = (void *)1;

        ep = hsearch(e, FIND);
        if (ep)
        {
            ep->data = (void *)((int)ep->data + (int)e.data);
        }
        ep = hsearch(e, ENTER);
        if (ep == NULL)
        {
            fprintf(stderr, "entry failed\n");
            exit(EXIT_FAILURE);
        }
    }
    return 1;
}

void freqElement(int* arr, int len, int times)
{
   ENTRY *ep, e;

   for (int i = 0; i < len; i++)
   {
       char buffer[BUFFERSIZE];
       snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
       e.key = buffer;
       ep = hsearch(e, FIND);
       if(ep)
       {
           if((int)ep->data == times)
           {
               printf(" value %s is repeated %d times \n", ep->key, times);
               break;
           }
       }
   }

}

Answer 1

我不確定我是否會hcreate()使用hcreate() ， hsearch() ， hdestroy()三元組函數，但是可以使用它。 POSIX規范在某些問題上尚不清楚，例如htdestroy()釋放密鑰，但Mac OS X手冊說：

hdestroy()函數處理搜索表，並且可能隨后又調用了hcreate() 。 調用hdestroy() ，數據不再被視為可訪問。 hdestroy()函數為搜索表中的每個比較鍵調用free(3) ，而不是與該鍵關聯的數據項。

（POSIX並未提及hdestroy()在比較鍵上調用free() 。）

這是您的代碼的相對簡單的改編，至少在Mac OS X 10.11.4上的GCC 6.1.0和Valgrind 3.12.0-SVN下，可以在valgrind下正常運行。

$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes \
>     -Wstrict-prototypes -Wold-style-definition -Werror hs17.c -o hs17
$

碼

#include <search.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUFFERSIZE 10

void freqElement(int *arr, int len, int times);
int createHT(int *arr, int len);

int main(void)
{
    int arr[] = { 2, 3, 5, 6, 10, 10, 2, 5, 2, 8, 8, 7, 8, 7, 8, 7, };
    int len = sizeof(arr) / sizeof(int);

    if (hcreate(len) == 0)
        fprintf(stderr, "Failed to create hash table of size %d\n", len);
    else
    {
        if (!createHT(arr, len))
            fprintf(stderr, "error in entering data\n");
        else
        {
            for (int i = 1; i < len; i++)
                freqElement(arr, len, i);
        }

        hdestroy();
    }
    return 0;
}

int createHT(int *arr, int len)
{
    ENTRY e, *ep;

    for (int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, sizeof(buffer), "%d", arr[i]);
        e.key = strdup(buffer);
        e.data = (void *)0;
        printf("Processing [%s]\n", e.key);

        ep = hsearch(e, ENTER);
        if (ep)
        {
            ep->data = (void *)((intptr_t)ep->data + 1);
            if (ep->key != e.key)
                free(e.key);
        }
        else
        {
            fprintf(stderr, "entry failed for [%s]\n", e.key);
            free(e.key);    // Not dreadfully important
            exit(EXIT_FAILURE);
        }
    }
    return 1;
}

// Check whether this number has been processed before
static bool processed_before(int *arr, int len, int value)
{
    for (int j = 0; j < len; j++)
    {
        if (value == arr[j])
            return true;
    }
    return false;
}

void freqElement(int *arr, int len, int times)
{
    ENTRY *ep, e;

    for (int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
        e.key = buffer;
        ep = hsearch(e, FIND);
        if (ep)
        {
            if ((intptr_t)ep->data == times && !processed_before(arr, i, arr[i]))
                printf(" value %s is repeated %d times\n", ep->key, times);
        }
    }
}

freqElement() processed_before()函數可防止多次打印帶有多個條目的值-這是對freqElement()函數所做更改的結果，該函數報告具有給定出現次數的所有條目，而不僅僅是第一個此類條目。 這不是完全理想的，但是該代碼包含一些打印內容，以便可以監視進度，這有助於確保代碼正確運行。

輸出示例

Processing [2]
Processing [3]
Processing [5]
Processing [6]
Processing [10]
Processing [10]
Processing [2]
Processing [5]
Processing [2]
Processing [8]
Processing [8]
Processing [7]
Processing [8]
Processing [7]
Processing [8]
Processing [7]
 value 3 is repeated 1 times 
 value 6 is repeated 1 times 
 value 5 is repeated 2 times 
 value 10 is repeated 2 times 
 value 2 is repeated 3 times 
 value 7 is repeated 3 times 
 value 8 is repeated 4 times

Answer 2

讓我們從問題陳述開始，因為正如我在評論中提到的那樣，我認為您當前的代碼無法解決問題：

在數組中找到n個最頻繁的元素。

1 <N <100000 [數組長度]

-1000000 <n <1000000 [數組整數]

我的意思是說我們想要一個這樣的函數：

size_t n_most_popular(int input[], size_t input_size, int output[], size_t output_size);

此函數采用整數（在-1000000和1000000之間）的輸入數組（大小最大為100000），並使用輸入中的N個最常見的元素填充輸出數組，其中N為output_size 。 為了方便起見，我們可以規定將最常見的元素放置在輸出的前面，而將不常見的元素放置在輸出的后面。

一種簡單的方法是首先對輸入數組進行排序（可能在適當的位置，可能使用標准qsort() ）。 然后，您將得到一個像這樣的數組：

[1,1,1,1,2,2,3,3,3,3,3,4,7,...]

然后，構造一個結構數組，其中每個結構都包含來自輸入的唯一值及其出現的次數。 它的最大長度是input_size ，從排序后的輸入一次通過構建它很簡單。

最后，使用標准qsort()按count字段降序對該結構數組進行排序。 將第一個output_size元素復制到輸出數組，然后返回輸出數組的實際填充大小（如果輸入數組中的唯一值不足，則該大小可能小於output_size ）。

這是一個可以正常工作的C程序：

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

size_t most_popular(int input[], size_t input_size, int output[], size_t output_size);

int main(void)
{
    int arr[] = {2, 3, 5, 6, 10, 10, 2, 5, 2};
    size_t len = sizeof(arr)/sizeof(int);

    int out[3];
    size_t outlen = sizeof(out)/sizeof(int);

    size_t count = most_popular(arr, len, out, outlen);

    for (size_t ii = 0; ii < count; ii++) {
        printf("most popular rank %lu: %d\n", ii+1, out[ii]);
    }

    return 0;
}

typedef struct
{
    int value;
    int count;
} value_count;

int value_count_greater(const void* lhs, const void* rhs)
{
    const value_count *vcl = lhs, *vcr = rhs;
    return vcr->count - vcl->count;
}

int int_less(const void *lhs, const void *rhs)
{
    const int *il = lhs, *ir = rhs;
    return *il - *ir;
}

// returns 0 if out of memory or input_size is 0, else returns valid portion of output                                                                                    
size_t most_popular(int input[], size_t input_size, int output[], size_t output_size)
{
    qsort(input, input_size, sizeof(input[0]), int_less);

    value_count* value_counts = malloc(input_size * sizeof(value_count));
    if (value_counts == NULL) {
        return 0;
    }

    // count how many times each value occurs in input                                                                                                                    
    size_t unique_count = 0;
    for (size_t ii = 0; ii < input_size; ii++) {
        if (ii == 0 || input[ii] != value_counts[unique_count-1].value) {
            value_counts[unique_count].value = input[ii];
            value_counts[unique_count].count = 1;
            unique_count++;
        } else {
            value_counts[unique_count-1].count++;
        }
    }

    // sort unique values by how often they occur, most popular first                                                                                                     
    qsort(value_counts, unique_count, sizeof(value_counts[0]), value_count_greater);

    size_t result_size = unique_count < output_size ? unique_count : output_size;
    for (size_t ii = 0; ii < result_size; ii++) {
        output[ii] = value_counts[ii].value;
    }

    free(value_counts);
    return result_size;
}

在C中的數組中找到n個重復數字

問題描述

2 個解決方案

解決方案1
1 已采納 2016-05-01 17:18:52

碼

輸出示例

解決方案2
0 2016-05-01 02:49:01

在C中的數組中找到n個重復數字

問題描述

2 個解決方案

解決方案1 1 已采納 2016-05-01 17:18:52

碼

輸出示例

解決方案2 0 2016-05-01 02:49:01

解決方案1
1 已采納 2016-05-01 17:18:52

解決方案2
0 2016-05-01 02:49:01