在C中的数组中找到n个重复数字

Question

我已经在C中使用哈希表库函数实现了以下问题说明。由于我从未在C中使用标准库哈希表，因此我的问题是：

我是否正确使用哈希表函数（我相信获得输出并不意味着正确使用）？
有没有更好的方法来解决给定问题陈述的解决方案？

问题陈述：在数组中找到n个最频繁出现的元素。

1 <N <100000 [数组长度]
-1000000 <n <1000000 [数组整数]

我在SO上经历了一些类似的问题-在一个答案中，我确实看到推荐的方法是使用哈希表。

#include <stdio.h>
#include <stdlib.h>
#include <search.h>
#include <stdbool.h>

#define REPEAT 3
#define BUFFERSIZE 10

void freqElement(int* arr, int len, int times);
int createHT(int* arr, int len);

int main(void)
{
    int arr[] = {2, 3, 5, 6, 10, 10, 2, 5, 2};
    int len = sizeof(arr)/sizeof(int);
    ENTRY e;
    ENTRY *ep;

    hcreate(len);

    if (!createHT(arr, len))
    {
        printf(" error in entering data \n");
    }

    freqElement(arr, len, REPEAT);

    hdestroy();
    return 0;
}

int createHT(int* arr, int len)
{
    ENTRY e, *ep;

    for(int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
        e.key = buffer;
        e.data = (void *)1;

        ep = hsearch(e, FIND);
        if (ep)
        {
            ep->data = (void *)((int)ep->data + (int)e.data);
        }
        ep = hsearch(e, ENTER);
        if (ep == NULL)
        {
            fprintf(stderr, "entry failed\n");
            exit(EXIT_FAILURE);
        }
    }
    return 1;
}

void freqElement(int* arr, int len, int times)
{
   ENTRY *ep, e;

   for (int i = 0; i < len; i++)
   {
       char buffer[BUFFERSIZE];
       snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
       e.key = buffer;
       ep = hsearch(e, FIND);
       if(ep)
       {
           if((int)ep->data == times)
           {
               printf(" value %s is repeated %d times \n", ep->key, times);
               break;
           }
       }
   }

}

Answer 1

我不确定我是否会hcreate()使用hcreate() ， hsearch() ， hdestroy()三元组函数，但是可以使用它。 POSIX规范在某些问题上尚不清楚，例如htdestroy()释放密钥，但Mac OS X手册说：

hdestroy()函数处理搜索表，并且可能随后又调用了hcreate() 。 调用hdestroy() ，数据不再被视为可访问。 hdestroy()函数为搜索表中的每个比较键调用free(3) ，而不是与该键关联的数据项。

（POSIX并未提及hdestroy()在比较键上调用free() 。）

这是您的代码的相对简单的改编，至少在Mac OS X 10.11.4上的GCC 6.1.0和Valgrind 3.12.0-SVN下，可以在valgrind下正常运行。

$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes \
>     -Wstrict-prototypes -Wold-style-definition -Werror hs17.c -o hs17
$

码

#include <search.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUFFERSIZE 10

void freqElement(int *arr, int len, int times);
int createHT(int *arr, int len);

int main(void)
{
    int arr[] = { 2, 3, 5, 6, 10, 10, 2, 5, 2, 8, 8, 7, 8, 7, 8, 7, };
    int len = sizeof(arr) / sizeof(int);

    if (hcreate(len) == 0)
        fprintf(stderr, "Failed to create hash table of size %d\n", len);
    else
    {
        if (!createHT(arr, len))
            fprintf(stderr, "error in entering data\n");
        else
        {
            for (int i = 1; i < len; i++)
                freqElement(arr, len, i);
        }

        hdestroy();
    }
    return 0;
}

int createHT(int *arr, int len)
{
    ENTRY e, *ep;

    for (int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, sizeof(buffer), "%d", arr[i]);
        e.key = strdup(buffer);
        e.data = (void *)0;
        printf("Processing [%s]\n", e.key);

        ep = hsearch(e, ENTER);
        if (ep)
        {
            ep->data = (void *)((intptr_t)ep->data + 1);
            if (ep->key != e.key)
                free(e.key);
        }
        else
        {
            fprintf(stderr, "entry failed for [%s]\n", e.key);
            free(e.key);    // Not dreadfully important
            exit(EXIT_FAILURE);
        }
    }
    return 1;
}

// Check whether this number has been processed before
static bool processed_before(int *arr, int len, int value)
{
    for (int j = 0; j < len; j++)
    {
        if (value == arr[j])
            return true;
    }
    return false;
}

void freqElement(int *arr, int len, int times)
{
    ENTRY *ep, e;

    for (int i = 0; i < len; i++)
    {
        char buffer[BUFFERSIZE];
        snprintf(buffer, BUFFERSIZE, "%d", arr[i]);
        e.key = buffer;
        ep = hsearch(e, FIND);
        if (ep)
        {
            if ((intptr_t)ep->data == times && !processed_before(arr, i, arr[i]))
                printf(" value %s is repeated %d times\n", ep->key, times);
        }
    }
}

freqElement() processed_before()函数可防止多次打印带有多个条目的值-这是对freqElement()函数所做更改的结果，该函数报告具有给定出现次数的所有条目，而不仅仅是第一个此类条目。 这不是完全理想的，但是该代码包含一些打印内容，以便可以监视进度，这有助于确保代码正确运行。

输出示例

Processing [2]
Processing [3]
Processing [5]
Processing [6]
Processing [10]
Processing [10]
Processing [2]
Processing [5]
Processing [2]
Processing [8]
Processing [8]
Processing [7]
Processing [8]
Processing [7]
Processing [8]
Processing [7]
 value 3 is repeated 1 times 
 value 6 is repeated 1 times 
 value 5 is repeated 2 times 
 value 10 is repeated 2 times 
 value 2 is repeated 3 times 
 value 7 is repeated 3 times 
 value 8 is repeated 4 times

Answer 2

让我们从问题陈述开始，因为正如我在评论中提到的那样，我认为您当前的代码无法解决问题：

在数组中找到n个最频繁的元素。

1 <N <100000 [数组长度]

-1000000 <n <1000000 [数组整数]

我的意思是说我们想要一个这样的函数：

size_t n_most_popular(int input[], size_t input_size, int output[], size_t output_size);

此函数采用整数（在-1000000和1000000之间）的输入数组（大小最大为100000），并使用输入中的N个最常见的元素填充输出数组，其中N为output_size 。 为了方便起见，我们可以规定将最常见的元素放置在输出的前面，而将不常见的元素放置在输出的后面。

一种简单的方法是首先对输入数组进行排序（可能在适当的位置，可能使用标准qsort() ）。 然后，您将得到一个像这样的数组：

[1,1,1,1,2,2,3,3,3,3,3,4,7,...]

然后，构造一个结构数组，其中每个结构都包含来自输入的唯一值及其出现的次数。 它的最大长度是input_size ，从排序后的输入一次通过构建它很简单。

最后，使用标准qsort()按count字段降序对该结构数组进行排序。 将第一个output_size元素复制到输出数组，然后返回输出数组的实际填充大小（如果输入数组中的唯一值不足，则该大小可能小于output_size ）。

这是一个可以正常工作的C程序：

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

size_t most_popular(int input[], size_t input_size, int output[], size_t output_size);

int main(void)
{
    int arr[] = {2, 3, 5, 6, 10, 10, 2, 5, 2};
    size_t len = sizeof(arr)/sizeof(int);

    int out[3];
    size_t outlen = sizeof(out)/sizeof(int);

    size_t count = most_popular(arr, len, out, outlen);

    for (size_t ii = 0; ii < count; ii++) {
        printf("most popular rank %lu: %d\n", ii+1, out[ii]);
    }

    return 0;
}

typedef struct
{
    int value;
    int count;
} value_count;

int value_count_greater(const void* lhs, const void* rhs)
{
    const value_count *vcl = lhs, *vcr = rhs;
    return vcr->count - vcl->count;
}

int int_less(const void *lhs, const void *rhs)
{
    const int *il = lhs, *ir = rhs;
    return *il - *ir;
}

// returns 0 if out of memory or input_size is 0, else returns valid portion of output                                                                                    
size_t most_popular(int input[], size_t input_size, int output[], size_t output_size)
{
    qsort(input, input_size, sizeof(input[0]), int_less);

    value_count* value_counts = malloc(input_size * sizeof(value_count));
    if (value_counts == NULL) {
        return 0;
    }

    // count how many times each value occurs in input                                                                                                                    
    size_t unique_count = 0;
    for (size_t ii = 0; ii < input_size; ii++) {
        if (ii == 0 || input[ii] != value_counts[unique_count-1].value) {
            value_counts[unique_count].value = input[ii];
            value_counts[unique_count].count = 1;
            unique_count++;
        } else {
            value_counts[unique_count-1].count++;
        }
    }

    // sort unique values by how often they occur, most popular first                                                                                                     
    qsort(value_counts, unique_count, sizeof(value_counts[0]), value_count_greater);

    size_t result_size = unique_count < output_size ? unique_count : output_size;
    for (size_t ii = 0; ii < result_size; ii++) {
        output[ii] = value_counts[ii].value;
    }

    free(value_counts);
    return result_size;
}

在C中的数组中找到n个重复数字

问题描述

2 个解决方案

解决方案1
1 已采纳 2016-05-01 17:18:52

码

输出示例

解决方案2
0 2016-05-01 02:49:01

在C中的数组中找到n个重复数字

问题描述

2 个解决方案

解决方案1 1 已采纳 2016-05-01 17:18:52

码

输出示例

解决方案2 0 2016-05-01 02:49:01

解决方案1
1 已采纳 2016-05-01 17:18:52

解决方案2
0 2016-05-01 02:49:01