简体   繁体   中英

Find most frequent letter in C

Here is my code to find the most frequent letter in an string. I need to store the most frequent letters in the most_freq string and they should be in alphabetical order too.

I don't know how to handle the situation where there are more letters with the same frequency and they are also the most frequent.

char text[200];
char most_freq[27];
int freq[26];
int i;
int counter;
int temp = 0;
int max_freq;

fgets(text, sizeof(text), stdin);
size_t len = strlen(text);
if (text[len - 1] == '\n') {
    text[len - 1] = '\0';
    len--;
}

for (i = 0; i < 26; i++) {
    freq[i] = 0;
}

for (i = 0; i < len; i++) {
    if (text[i] >= 'a' && text[i] <= 'z') {
        counter = text[i] - 'a';
        freq[counter]++;
    }
}

int max = 0;

for (i = 0; i < 26; i++) {
    if (freq[i] > temp) {
        temp = freq[i];
        max_freq = temp;
    }
    printf("%c occurs %d times.\n", i + 'a', freq[i]);

    if (freq[i] > freq[max]) {
        max = i;
    }
}

printf("Highest frequency: %d \n", max_freq);
//printf("%c \n",max+'a');
sprintf(most_freq, "%c", max + 'a');
puts(most_freq);
#include <stdio.h>
#include <ctype.h>

#define SIZE 99

int main() {

    char s[SIZE];
    int freq[26] = {0}, i, max = 0;
    
    fgets(s, SIZE, stdin);
    
    for(i = 0; s[i]; i++) {
        
        if( isalpha((unsigned char)s[i]) ) ++freq[s[i] - ( islower((unsigned char)s[i]) ? 'a' : 'A')];
    }
    
    for(i = 0; i < 26; i++) {
        
        if( max < freq[i] ) max = freq[i];
    }
    
    for(i = 0; i < 26; i++) {
        
        if( freq[i] == max ) putchar('a'+i);
    }
    
    return 0;
}

I've done it by first calculating all of the letter frequencies, then finding the maximum letter frequency, and finally find printing all the characters to have that maximum frequency.

Here is my solution. I commented it to make it clearer:

#include <stdio.h>
#include <ctype.h>

int main() {
  char   text[200] = { 0 };
  size_t freq[26]  = { 0 };

  // get the user's text
  fgets(text,sizeof(text),stdin);
  const size_t len = strlen(text);

  // calculate frequencies
  for (size_t i = 0; i < len; ++i)
    if (text[i] >= 'a' && text[i] <= 'z')
      ++freq[text[i]-'a'];

  size_t maxCount = 0;

  // find the maximum frequency and print each letter's frequency
  for (size_t i = 0; i < sizeof(freq); ++i) {
    if (freq[i] > maxCount)
      maxCount = freq[i];

    printf("%c occurs %d times.\n", i+'a', freq[i]);
  }

  printf("\n\n"); // padding

  // Print all characters with the maximum frequency
  for (size_t i = 0; i < sizeof(freq)/sizeof(freq[0]); ++i)
    if (freq[i] == maxCount)
      printf("%c occurs with maximum frequency.\n", 'a'+i);
}

EDIT: You could also extend your program by making it work with letters of any case by using the tolower() function from libc's ctype.h header file to make all characters lowercase when calculating their frequencies.

The line ++freq[text[i]-'a']; would become ++freq[tolower(text[i])-'a']; .

Here is a simpler version:

#include <stdio.h>

int main() {
    char text[200];
    int freq[26] = { 0 };
    int max_freq = 0;

    // read use input or use the empty string
    if (!fgets(text, sizeof(text), stdin))
        *text = '\0';

    // compute frequencies, update maximum frequency
    for (int i = 0; text[i] != '\0'; i++) {
        if (text[i] >= 'a' && text[i] <= 'z') {
            int index = text[i] - 'a';     // assuming ASCII
            if (++freq[index] > max_freq)
                max_freq = freq[index];  // update maximum frequency
        }
    }

    // print characters with maximum frequency in alphabetical order
    for (int i = 0; i < 26; i++) {
        if (freq[i] == max_freq)
            putchar('a' + i);
    }

    putchar('\n');
    return 0;
}

Generally, when wanting the frequency of any object within a specified range, you want your frequency array to cover the entire range of possible values for that object. For a full explanation of what is frequency array is, and how to use it, a fuller explanation is provided in answer to How to remove duplicate char in string in C

In this case for ASCII characters, you have a range of 128 possible value (or 256 if you include the extended ASCII set), See ASCII Table & Description . By creating your frequency array with 128 (or 256) elements, you eliminate having to check for special cases such as lower/uppercase characters.

For example, you could cover all ASCII characters and find the most frequent a user may enter with the following:

#include <stdio.h>
#include <ctype.h>

#define SIZE 256        /* size works for both frequency array and text as a multiple */

int main (void) {

    char text[SIZE * 4];                                /* 1024 byte buffer */
    int freq[SIZE] = {0}, max = 0;                      /* frequeny array & max */
    
    fputs ("enter string: ", stdout);                   /* prompt */
    if (!fgets (text, SIZE, stdin)) {                   /* read/validate EVERY input */
        puts ("(user canceled input)");
        return 0;
    }
    
    for (int i = 0; text[i]; i++)                       /* loop over each char */
        if (++freq[(unsigned char)text[i]] > max)       /* increment/check against max */
            max = freq[(unsigned char)text[i]];
    
    printf ("\nmost frequent appear %d times: ", max);
    for (int i = '!'; i < SIZE; i++)                    /* loop over freq array */
        if (freq[i] == max)                             /* if count == max */
            putchar (i);                                /* output char */
    
    putchar ('\n');                                     /* tidy up with newline */
}

( note: since a user may validly cancel input generating a manual EOF by pressing Ctrl + d , or Ctrl + z on windows, it is good to validate every input and handle the EOF case as well as any other possible failure modes)

Since ASCII characters below ' ' (space, 32 , 0x20 ) are non-printable or whitespace such as '\t' , '\n' , \r , etc.., you can begin your output loop with the '!'character to ignore whitespace. If you need the frequency of every character, you can create a short lookup table with the string representations of each non-printable or whitespace character. That way if the ' ' character were the, or one of the, most frequent characters, you could output, eg "(sp)" or something similar, "(tab)" for tab, etc..

Example Use/Output

$ ./bin/badfreq
enter string: 123+HH~helloo+345

most frequent appear 2 times: +3Hlo

Saving Most Frequent Characters To Array

There are very few changes needed to buffer the most frequently occurring characters in an array instead of directly outputting them with putchar() . All you need to do is declare an array of sufficient size to hold the maximum number of possible characters ( +1 to allow space for a nul-terminating character if you wish to treat the array as a C-string.

Below we add the buffer (character array) most_freq[] to hold the most frequently used characters, and the fill the most_freq[] array where we were simply outputting it in the first example, eg

#include <stdio.h>
#include <ctype.h>

#define SIZE 256        /* size works for both frequency array and text as a multiple */

int main (void) {

    char text[SIZE * 4],                                /* 1024 byte read buffer */
         most_freq[SIZE + 1] = "";                      /* 257 byte buffer for storage */
    int freq[SIZE] = {0},                               /* frequeny array */
        max = 0,                                        /* times most frequent occurs */
        mf_ndx = 0;                                     /* most_frequent index */
    
    fputs ("enter string: ", stdout);                   /* prompt */
    if (!fgets (text, SIZE, stdin)) {                   /* read/validate EVERY input */
        puts ("(user canceled input)");
        return 0;
    }
    
    for (int i = 0; text[i]; i++)                       /* loop over each char */
        if (++freq[(unsigned char)text[i]] > max)       /* increment/check against max */
            max = freq[(unsigned char)text[i]];
    
    for (int i = '!'; i < SIZE; i++)                    /* loop over freq array */
        if (freq[i] == max)                             /* if count == max */
            most_freq[mf_ndx++] = i;                    /* store in most_freq array */
    most_freq[mf_ndx] = 0;                              /* nul-terminate as string */
    
    printf ("\n%d most frequent chars appear %d times: %s\n",   /* output results */
            mf_ndx, max, most_freq);
}

Following exit of the loop we use to fill the array, we add the nul-termianting character '\0' (same as plain old ASCII 0 ) after the last character added so we can then treat the most_freq[] array as a string.

Example Use/Output

This does allow a simple way to provide a bit more information in the output, eg

$ ./bin/most_freq_array
enter string: 123+HH~helloo+345

5 most frequent chars appear 2 times: +3Hlo

Or in your specific example case of "helloo" :

$ ./bin/most_freq_array
enter string: helloo

2 most frequent chars appear 2 times: lo

Look things over and let me know if your have further questions.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM