如何使用fgetc（）计算唯一字数，然后在C中打印计数

Question

我问了一个与此程序有关的问题，但经过大量研究和反复讨论，我再次陷入困境。

我正在尝试编写一个程序，该程序将接受用户输入并存储它，然后打印出所有唯一单词以及它们各自出现的次数

例如

Please enter something: Hello#@Hello# hs,,,he,,whywhyto[then the user hits enter] 

hello 2 
hs 1 
he 1 
whywhyto 1

上面应该是输出，当然，为什么不是单词，但是在这种情况下并不重要，因为我假设任何字母的模式都由非字母分隔（空格，0-9，＃$ （@等）被认为是一个单词，我需要使用2D数组，因为我无法使用链表，也无法理解它们。

这是我到目前为止所拥有的

#include <stdio.h> 
#include <ctype.h> 

int main() 
{ 
char array[64]; 

int i=0, j, input; 

printf("Please enter an input:"); 


input=fgetc(stdin); 

while(input != '\n')
{ 
if(isalpha(input)) 
{ 


array[i]=input; 
i++; 
} 

input=fgetc(stdin); 
} 

for(j=0;j<i;j++) 
{ 
// printf("%c ",j,array[j]); 
printf("%c",array[j]); 
} 
printf("\n"); 
}

我正在使用isalpha来仅获取字母，但是所有这些操作是它摆脱了不是字母的任何东西，将其存储然后打印回去，但是我不知道如何让它第一次将单词存储一次出现，然后仅增加每个单词的计数。我只能使用至少对我来说很难的fgetc（），我只有大约3-4个月的C经验，我知道我将不得不使用2维数组，已经在阅读它们，但是我无法要了解我将如何实施它们，请给我一些帮助。

Answer 1

不知道这是否是家庭作业，所以我没有为您做任何事情，我还整理了一点代码。 但是，如果您不了解此人可以输入多少个单词，几乎就需要一个动态数据结构，例如链表

#include <stdio.h>
#include <string.h>
#include <ctype.h> 

typedef struct linkedlist linkedlist;
struct linkedlist{
    char *word;
    int count;
    linkedlist *next;
};

int main() 
{ 
    //know your bounds, this will cause trouble if word is longer than 64 chars
    char array[64]; 
    int i=0, input;
    linkedlist *head = NULL;

    printf("Please enter an input:"); 

    while((input=fgetc(stdin)) != '\n')
    { 
        if(isalpha(input) && i!=63) //added this so that code does not brake (word is 64 chars)
        { 
            array[i]=input; 
        }
        else{
            array[i]='\0';
            char *word = malloc(strlen(array)+1);
            strcpy(word, array);
            add_word(word, &head);
            i=0; //need to restart i to keep reading words
        }

        i++;
    } 

    //print out final results
    for(linkedlist *temp = head; temp != NULL; temp = temp->next){
        printf("%s %d ", temp->word, temp->count);
    }
}

//adds word to end of list if does not exist
//increments word count if it exists
void add_word(char *word, linkedlist **ll){
    //implement this
}

//frees resources used by malloc (lookup how to free a linkedlist/destroy a linked list
//make sure to free both final and head in main
void destroy_list(linkedlist **ll){
    //implement this
}

对于add_word，您将需要一些类似于（PSEUDO-CODE）的内容：

list = *ll
if(list == NULL): //new list
    *ll = malloc(sizeof(linkedlist))
    ll->word = word
    ll->count = 1
    ll->next = NULL
    return

while list->next != null:
    if word = list->word:
        free(word)
        list->count++
        return
    list = list->next

if list->word = word: //last word in list
    free(word)
    list->count++
else: //word did not exist, add new word to end of list
    temp = malloc(sizeof(linkedlist))
    temp->word = word
    temp->count = 1
    list->next = temp

也许不是最有效的方法，但是您可以改进它。希望我不会进一步混淆您，祝您好运

Answer 2

这是似乎有效的代码：

#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>

enum { MAX_WORDS = 64, MAX_WORD_LEN = 20 };

int main(void)
{
    char words[MAX_WORDS][MAX_WORD_LEN];
    int  count[MAX_WORDS] = { 0 };
    int w = 0;
    char word[MAX_WORD_LEN];
    int c;
    int l = 0;

    while ((c = getchar()) != EOF)
    {
        if (isalpha(c))
        {
            if (l < MAX_WORD_LEN - 1)
               word[l++] = c;
            else
            {
                fprintf(stderr, "Word too long: %*s%c...\n", l, word, c);
                break;
            }
        }
        else if (l > 0)
        {
            word[l] = '\0';
            printf("Found word <<%s>>\n", word);
            assert(strlen(word) < MAX_WORD_LEN);
            int found = 0;
            for (int i = 0; i < w; i++)
            {
                if (strcmp(word, words[i]) == 0)
                {
                    count[i]++;
                    found = 1;
                    break;
                }
            }
            if (!found)
            {
                if (w >= MAX_WORDS)
                {
                    fprintf(stderr, "Too many distinct words (%s)\n", word);
                    break;
                }
                strcpy(words[w], word);
                count[w++] = 1;
            }
            l = 0;
        }
    }

    for (int i = 0; i < w; i++)
        printf("%3d: %s\n", count[i], words[i]);

    return 0;
}

样本输出：

$ ./wordfreq <<< "I think, therefore I am, I think, or maybe I do not think after all, and therefore I am not."
Found word <<I>>
Found word <<think>>
Found word <<therefore>>
Found word <<I>>
Found word <<am>>
Found word <<I>>
Found word <<think>>
Found word <<or>>
Found word <<maybe>>
Found word <<I>>
Found word <<do>>
Found word <<not>>
Found word <<think>>
Found word <<after>>
Found word <<all>>
Found word <<and>>
Found word <<therefore>>
Found word <<I>>
Found word <<am>>
Found word <<not>>
  5: I
  3: think
  2: therefore
  2: am
  1: or
  1: maybe
  1: do
  2: not
  1: after
  1: all
  1: and
$ ./wordfreq <<< "I think thereforeIamIthinkormaybeI do not think after all, and therefore I am not."
Found word <<I>>
Found word <<think>>
Word too long: thereforeIamIthinkor...
  1: I
  1: think
$ ./wordfreq <<< "a b c d e f g h i j k l m n o p q r s t u v w x y z
>                 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
>                 aa ab ac ad ae af ag ah ai aj ak al am
>                 an ao ap aq ar as at au av aw ax ay az
>                "
Found word <<a>>
Found word <<b>>
Found word <<c>>
Found word <<d>>
Found word <<e>>
Found word <<f>>
Found word <<g>>
Found word <<h>>
Found word <<i>>
Found word <<j>>
Found word <<k>>
Found word <<l>>
Found word <<m>>
Found word <<n>>
Found word <<o>>
Found word <<p>>
Found word <<q>>
Found word <<r>>
Found word <<s>>
Found word <<t>>
Found word <<u>>
Found word <<v>>
Found word <<w>>
Found word <<x>>
Found word <<y>>
Found word <<z>>
Found word <<A>>
Found word <<B>>
Found word <<C>>
Found word <<D>>
Found word <<E>>
Found word <<F>>
Found word <<G>>
Found word <<H>>
Found word <<I>>
Found word <<J>>
Found word <<K>>
Found word <<L>>
Found word <<M>>
Found word <<N>>
Found word <<O>>
Found word <<P>>
Found word <<Q>>
Found word <<R>>
Found word <<S>>
Found word <<T>>
Found word <<U>>
Found word <<V>>
Found word <<W>>
Found word <<X>>
Found word <<Y>>
Found word <<Z>>
Found word <<aa>>
Found word <<ab>>
Found word <<ac>>
Found word <<ad>>
Found word <<ae>>
Found word <<af>>
Found word <<ag>>
Found word <<ah>>
Found word <<ai>>
Found word <<aj>>
Found word <<ak>>
Found word <<al>>
Found word <<am>>
Too many distinct words (am)
  1: a
  1: b
  1: c
  1: d
  1: e
  1: f
  1: g
  1: h
  1: i
  1: j
  1: k
  1: l
  1: m
  1: n
  1: o
  1: p
  1: q
  1: r
  1: s
  1: t
  1: u
  1: v
  1: w
  1: x
  1: y
  1: z
  1: A
  1: B
  1: C
  1: D
  1: E
  1: F
  1: G
  1: H
  1: I
  1: J
  1: K
  1: L
  1: M
  1: N
  1: O
  1: P
  1: Q
  1: R
  1: S
  1: T
  1: U
  1: V
  1: W
  1: X
  1: Y
  1: Z
  1: aa
  1: ab
  1: ac
  1: ad
  1: ae
  1: af
  1: ag
  1: ah
  1: ai
  1: aj
  1: ak
  1: al
$

对“单词太长”和“单词太多”的测试有助于使我确信代码是正确的。 设计这样的测试是一种好习惯。

Answer 3

OP仍有大量工作要做。

这个技巧是1）读取输入2）识别定界符3）将单词与整个缓冲区进行比较，以及4）仅将它们打印一次。

这种方法是高效的内存，因为它只使用了OP建议的64个char缓冲区。 搜索复杂度为O（n * n）

#include <ctype.h>
#include <stdio.h>
#include <string.h>

// Helper function to find word occurrences.
void Print_count(const char *word, const char *array, int i) {
  int count = 0;
  const char *found;
  for (int j = 0; j < i; j++) {
    if (isalpha((unsigned char ) array[j])) {
      if (strcmp(&array[j], word) == 0) {
        found = &array[j];
        count++;
      }
      // skip rest of word
      do {
        j++;
      } while (isalpha((unsigned char ) array[j]));
    }
  }
  if (found == word) {
    printf("%s %d\n", word, count);
  }
}

int main(void) {
  char array[64];
  int i = 0;
  int j;
  int input;
  printf("Please enter an input:");

  // get the input
  while ((input = fgetc(stdin)) != '\n' && input != EOF) {
    array[i] = input;
    if (i + 1 >= sizeof array)
      break;
    i++;
  }
  array[i] = '\0';

  // change all delimiters to \0
  for (j = 0; j < i; j++) {
    if (!isalpha((unsigned char ) array[j])) {
      array[j] = '\0';
    }
  }


  for (j = 0; j < i; j++) {
    // Use the beginning of each word ... 
    if (isalpha((unsigned char ) array[j])) {
      Print_count(&array[j], array, i);
      // skip test of word
      do {
        j++;
      } while (isalpha((unsigned char ) array[j]));
    }
  }
  return 0;
}

输入Hello#@Hello# hs,,,he,,whywhyto

输出：

Hello 2
hs 1
he 1
whywhyto 1

如何使用fgetc（）计算唯一字数，然后在C中打印计数

问题描述

3 个解决方案

解决方案1
1 2014-11-23 05:52:07

解决方案2
1 已采纳 2014-11-23 07:34:47

解决方案3
0 2014-11-23 06:18:27

如何使用fgetc（）计算唯一字数，然后在C中打印计数

问题描述

3 个解决方案

解决方案1 1 2014-11-23 05:52:07

解决方案2 1 已采纳 2014-11-23 07:34:47

解决方案3 0 2014-11-23 06:18:27

解决方案1
1 2014-11-23 05:52:07

解决方案2
1 已采纳 2014-11-23 07:34:47

解决方案3
0 2014-11-23 06:18:27