簡體   English   中英

字數統計程序C ++

[英]Word Counting Program C++

我目前正在嘗試用C ++編寫一個單詞計數程序,並且遇到使它無法解析通過字符串並將單詞彼此分開的難題。 除此之外,我很難獲得唯一單詞的單詞計數,每次單詞重復時都會增加。 我發現的問題很可能是我的findWord()和DistinctWords()函數。 也許您會在其他功能中看不到某些東西,但是對於上述功能,我不知道它們到底是什么。 這些是我的老師提供的指導:


創建一個程序,該程序將統計並報告文本文件中不區分大小寫的不同單詞的出現次數。

該程序應具有以下循環:

1.提示用戶輸入文件名。 如果用戶僅按Enter鍵,則終止循環和程序。

2.驗證是否存在具有輸入名稱的文件。 如果該文件不存在,則顯示一條適當的消息並返回到步驟1。

3.讀取並顯示文件內容。

4.顯示文件中不同單詞的計數。

5,顯示文件中每個不同單詞的排序列表以及每個單詞的出現次數。 按字數從大到小的順序對列表進行排序。


我現在很困,我的作業要在午夜。 幫助將不勝感激。 感謝您的時間。 這是我的代碼,我還將在其后復制粘貼示例測試文本文件:


#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>      // Needed to use files
#include <vector>
#include <algorithm>    // Needed for sort from standard libraries
using namespace std;

struct WordCount{
    string word;    // Word
    int count;      // Occurence #

    void iCount(){ count++; }

    WordCount(string s){ word = s; count = 1;}
};

// Function prototypes
string InputText();         // Get user file name and get text from said file
string Normalize(string);   // Convert string to lowercase and remove punctuation
vector<WordCount> DistinctWords(string);    // Sorted vector of word count structures
bool findWord(string, vector<WordCount>);   // Linear search for word in vector of structures
void DisplayResults(vector<WordCount>);     // Display results

// Main
int main(int argc, char** argv) {
    // Program Title
    cout << "Lab 9 - Text File Word Counter\n";
    cout << "-------------------------------\n\n";

    // Input text from file
    string buffer = InputText();

    while (buffer != ""){
        // Title for text file reading
        cout << "\nThis is the text string read from the file\n";
        cout << "-------------------------------------------\n";
        cout << buffer << endl << endl;

        // Build vector of words and counts
        vector<WordCount> words = DistinctWords(buffer);

        // Display results
        cout << "There are " << words.size() << " unique words in the above text." << endl;
        cout << "--------------------------------------------" << endl << endl;
        DisplayResults(words);
        buffer = InputText();
    }
    return 0;
}

/***********************************************
InputText() - 
Gets user file name and gets text from the file.
************************************************/
string InputText(){
    string fileName;
    ifstream inputFile;     // Input file stream object
    string str;     // Temporary string
    string text;    // Text file string

    cout << "File name? "; 
    getline(cin, fileName);

    // Case to terminate the program for enter key
    if (fileName.empty()){ exit(0);}

    // Open file
    inputFile.open(fileName);

    if (!inputFile){
        cout << "Error opening data file\n";
        cout << "File name? "; cin >> fileName;
    }
    else{
        while (!inputFile.eof()){
            getline(inputFile, str);
            text += str;
        }
    }
    inputFile.close(); return text;
}

/****************************************************
Normalize(string) - 
Converts string to lowercase and removes punctuation.
*****************************************************/
string Normalize(string s){
    // Initialize variables
    string nString;
    char c;

    // Make all text lowercase
    for (int i = 0; i < s.length(); i++){
        c = s[i];
        c = tolower(c);
        nString += c;
    }

    // Remove punctuation
    for (int i = 0; i < nString.length(); i++){
        if (ispunct(nString[i]))
            nString.erase(i, 1);
    }
    // Return converted string
    return nString;
}

/******************************************
vector<WordCount> DistinctWords(string) - 
Sorts vector of word count structures.
*******************************************/
vector<WordCount> DistinctWords(string s){
    vector<WordCount> words; // Initialize vector for words
    string nString = Normalize(s); // Convert passed string to lowercase and remove punctuation

    // Parse string
    istringstream iss(nString);

    while(iss >> nString){
        string n;   // Intialize temporary string

        iss >> n;   // Put word in n
        if (findWord(n, words) == true){ continue; }        // Check to verify that there is no preexisting occurence of the word passed
        else{
            WordCount tempO(n);     // Make structure object with n
            words.push_back(tempO);     // Push structure object into words vector
        }
    }
    return words;
}

/*********************************************
bool findWord(string, vector<WordCount>) -
Linear search for word in vector of structures
**********************************************/
bool findWord(string s, vector<WordCount> words){
    // Search through vector 
    for (auto r : words){
        if (r.word == s){   // Increment count of object if found again
            r.iCount(); return true;
        }
        else        // Go back to main function if not found
            return false;       
    }
}

/***********************************************
void DisplayResults(vector<WordCount>) -
Displays results.
************************************************/
void DisplayResults(vector<WordCount> words){
    // TROUBLESHOOT FIRST ERASE THIS AFTER!!!!!
    cout << "Word"  << setw(20) << "Count\n";
    cout << "-----------------------\n";
    for (auto &r : words){
        cout << setw(6) << left << r.word;
        cout << setw(15) << right << r.count << endl;
    }
}

那是最美好的時光,那是最糟糕的時光,那是智慧的時代,那是愚昧的時代,那是信仰的時代,那是懷疑的時代,那是光明的時代,是黑暗的季節,是希望的春天,是絕望的冬天,我們眼前的一切,眼前的一切,我們都直接去天堂,我們都直接去天堂-簡而言之,這一時期與目前的時期如此遙遠,以至於它的一些喧鬧的當局堅持以最高的比較程度來接受它是好是壞。

這是他為此特定測試文件提供的示例顯示

考慮將地圖用於字數統計任務

int main() 
{
  map<string, int> wordCount;

  vector<string> inputWords = {"some", "test", "stuff", "test", 
                                    "stuff"}; //read from file instead

  for(auto& s: inputWords)
      wordCount[s]++;                      //wordCount itself


  for(auto& entry: wordCount)              //print all words and assosiated counts
    cout << entry.first << " " << entry.second <<endl; 


   cout <<wordCount.size() <<endl;      //thats number of distinct words 
}

你差點就吃了! 您只是忘記了通過引用而不是通過副本傳遞“單詞”向量。 最后,我還為該排序提供了一個自定義比較器。

#include <iostream>
#include <sstream>
#include <iomanip>
#include <string>
#include <fstream>      // Needed to use files
#include <vector>
#include <algorithm>    // Needed for sort from standard libraries
using namespace std;

struct WordCount{
    string word;    // Word
    int count;      // Occurence #

    void iCount(){ count++; }

    WordCount(string s){ word = s; count = 1;}
};

struct {
    bool operator()(const WordCount& a, const WordCount& b)
    {   
        if (a.count < b.count)
            return false;
        else if (a.count > b.count)
            return true;
        else{
            if (a.word < b.word)
                return true;
            else
                return false;
        }
    }   
} CompareWordCount;

// Function prototypes
string InputText();         // Get user file name and get text from said file
string Normalize(string);   // Convert string to lowercase and remove punctuation
vector<WordCount> DistinctWords(string);    // Sorted vector of word count structures
bool findWord(string, vector<WordCount>&);   // Linear search for word in vector of structures
void DisplayResults(vector<WordCount>);     // Display results

// Main
int main(int argc, char** argv) {
    // Program Title
    cout << "Lab 9 - Text File Word Counter\n";
    cout << "-------------------------------\n\n";

    // Input text from file
    string buffer = InputText();

    while (buffer != ""){
        // Title for text file reading
        cout << "\nThis is the text string read from the file\n";
        cout << "-------------------------------------------\n";
        cout << buffer << endl << endl;

        // Build vector of words and counts
        vector<WordCount> words = DistinctWords(buffer);

        // Display results
        cout << "There are " << words.size() << " unique words in the above text." << endl;
        cout << "--------------------------------------------" << endl << endl;
        DisplayResults(words);
        buffer = InputText();
        buffer = "";
    }
    return 0;
}

/***********************************************
InputText() - 
Gets user file name and gets text from the file.
************************************************/
string InputText(){
    string fileName;
    ifstream inputFile;     // Input file stream object
    string str;     // Temporary string
    string text;    // Text file string

    cout << "File name? "; 
    getline(cin, fileName);
    // Case to terminate the program for enter key
    if (fileName.empty()){ exit(0);}

    // Open file
    inputFile.open(fileName);

    if (!inputFile){
        cout << "Error opening data file\n";
        cout << "File name? "; cin >> fileName;
    }
    else{
        while (!inputFile.eof()){
            getline(inputFile, str);
            text += str;
        }
    }
    inputFile.close(); return text;
}

/****************************************************
Normalize(string) - 
Converts string to lowercase and removes punctuation.
*****************************************************/
string Normalize(string s){
    // Initialize variables
    string nString;
    char c;

    // Make all text lowercase
    for (int i = 0; i < s.length(); i++){
        c = s[i];
        c = tolower(c);
        if (isalpha(c) || isblank(c))
            nString += c;
    }

    // Return converted string
    return nString;
}

/******************************************
vector<WordCount> DistinctWords(string) - 
Sorts vector of word count structures.
*******************************************/
vector<WordCount> DistinctWords(string s){
    vector<WordCount> words; // Initialize vector for words
    string nString = Normalize(s); // Convert passed string to lowercase and remove punctuation

    // Parse string
    istringstream iss(nString);

    string n;   // Intialize temporary string
    while(iss >> n){

        if (findWord(n, words) == true){ continue; }        // Check to verify that there is no preexisting occurence of the word passed
        else{
            WordCount tempO(n);     // Make structure object with n
            words.push_back(tempO);     // Push structure object into words vector
        }
    }
    return words;
}

/*********************************************
bool findWord(string, vector<WordCount>) -
Linear search for word in vector of structures
**********************************************/
bool findWord(string s, vector<WordCount>& words){
    // Search through vector 
    for (auto& r : words){
        if (r.word.compare(s) == 0){   // Increment count of object if found again
            r.iCount(); return true;
        }
    }
}

/***********************************************
void DisplayResults(vector<WordCount>) -
Displays results.
************************************************/
void DisplayResults(vector<WordCount> words){
    // TROUBLESHOOT FIRST ERASE THIS AFTER!!!!!
    cout << "Word"  << setw(20) << "Count\n";
    cout << "-----------------------\n";

    sort(words.begin(), words.end(),CompareWordCount);
    for (auto &r : words){
        cout << setw(6) << left << r.word;
        cout << setw(15) << right << r.count << endl;
    }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM