簡體   English   中英

使用數組計算文本文件中單詞出現的次數

[英]Using an array of struct counting the number of occurrence of a word in a text file C++

大家好,這是我第一次參加Stackoverflow。 我有一個問題,關於使用C ++計算文本文件中單詞的出現。 到目前為止,這是我的代碼。 我必須創建一個單詞索引和每個單詞的計數器的數組結構,然后將它們全部存儲在AVL樹中。 打開文件並讀取單詞后,我在avl樹或trie中尋找它。 如果存在,請使用節點的索引來增加單詞的Cnt。 如果不存在,則將其添加到單詞數組中,並將其位置放在下一個結構中,並將結構位置放在avl樹中。 我也將struct Cnt設置為1。我現在遇到的問題是我的程序似乎無法正確處理計數,因此它只輸出0。請給我有關如何修復該錯誤的建議。 請在下面找到我的代碼:

#include <iostream>
#include <fstream>
#include <string>
#include <cstdlib>
#include <cstring>
#include <ctype.h>
#include <stdio.h>
#include <string>
#include <cctype>
#include <stdlib.h>
#include <stdbool.h>
using namespace std;

struct Node* insert(struct Node* node, int key) ;
void preOrder(struct Node *root) ;
void removePunct(char str[]);
int compareWord(char word1[], char word2[] );

struct Stats {
    int wordPos, wordCnt;
};
Stats record[50000];
int indexRec = 0;
char word[50000*10] ;
int indexWord = 0;

int main() {
    ifstream fin;
    string fname;
    char line[200], wordArray[500000];

    cout << "Enter the text file name:" << endl;
    cin >> fname;
    fin.open(fname.c_str());
    if (!fin) {
        cerr << "Unable to open file" << endl;
        exit(1);
    }
    struct Node *root = NULL;
    while (!fin.eof() && fin >> line) { //use getline
        for(int n=0,m=0; m!=strlen(line); m+=n) {
            sscanf(&line[m],"%s%n",word,&n);
            removePunct(word);
            //strcpy(&wordArray[indexWord],word);
            int flag = compareWord(wordArray, word);
            if(flag==-1) {
                strcpy(&wordArray[indexWord],word);
                record[indexRec].wordPos = indexWord;
                record[indexRec].wordCnt = 1;
                root = insert(root, record[indexRec].wordPos);
                indexWord+=strlen(word)+1;
                // indexes of the word array
                indexRec++;
                cout << wordArray[indexWord] << " ";
            } else
                record[flag].wordCnt++;

            cout << record[indexRec].wordCnt;
            cout << endl;

        }
        /*for(int x = 0; x <= i; x++)
        {
            cout << record[x].wordPos << record[x].wordCnt << endl;
        }*/

    }

    fin.close();
    return 0;

}

void removePunct(char str[]) {
    char *p;
    int bad = 0;
    int cur = 0;
    while (str[cur] != '\0') {
        if (bad < cur && !ispunct(str[cur]) && !isspace(str[cur])) {
            str[bad] = str[cur];
        }
        if (ispunct(str[cur]) || isspace(str[cur])) {
            cur++;
        } else {
            cur++;
            bad++;
        }
    }
    str[bad] = '\0';
    for (p= str; *p!= '\0'; ++p) {
        *p= tolower(*p);
    }
    return;
}
int compareWord(char word1[], char word2[] ) {
    int x = strcmp(word1, word2);
    if (x == 0 ) return x++;
    if (x != 0) return -1;
}

struct Node {
    int key;
    struct Node *left;
    struct Node *right;
    int height;
};

// A utility function to get maximum of two integers
int max(int a, int b);

// A utility function to get height of the tree
int height(struct Node *N) {
    if (N == NULL)
        return 0;
    return N->height;
}

// A utility function to get maximum of two integers
int max(int a, int b) {
    return (a > b)? a : b;
}

/* Helper function that allocates a new node with the given key and
    NULL left and right pointers. */
struct Node* newNode(int key) {
    struct Node* node = (struct Node*)
                        malloc(sizeof(struct Node));
    node->key   = key;
    node->left   = NULL;
    node->right  = NULL;
    node->height = 1;  // new node is initially added at leaf
    return(node);
}

// A utility function to right rotate subtree rooted with y
// See the diagram given above.
struct Node *rightRotate(struct Node *y) {
    struct Node *x = y->left;
    struct Node *T2 = x->right;

    // Perform rotation
    x->right = y;
    y->left = T2;

    // Update heights
    y->height = max(height(y->left), height(y->right))+1;
    x->height = max(height(x->left), height(x->right))+1;

    // Return new root
    return x;
}

// A utility function to left rotate subtree rooted with x
// See the diagram given above.
struct Node *leftRotate(struct Node *x) {
    struct Node *y = x->right;
    struct Node *T2 = y->left;

    // Perform rotation
    y->left = x;
    x->right = T2;

    //  Update heights
    x->height = max(height(x->left), height(x->right))+1;
    y->height = max(height(y->left), height(y->right))+1;

    // Return new root
    return y;
}

// Get Balance factor of node N
int getBalance(struct Node *N) {
    if (N == NULL)
        return 0;
    return height(N->left) - height(N->right);
}

// Recursive function to insert key in subtree rooted
// with node and returns new root of subtree.
struct Node* insert(struct Node* node, int key) {
    /* 1.  Perform the normal BST insertion */
    if (node == NULL)
        return(newNode(key));

    if (key < node->key)
        node->left  = insert(node->left, key);
    else if (key > node->key)
        node->right = insert(node->right, key);
    else // Equal keys are not allowed in BST
        return node;

    /* 2. Update height of this ancestor node */
    node->height = 1 + max(height(node->left),
                           height(node->right));

    /* 3. Get the balance factor of this ancestor
          node to check whether this node became
          unbalanced */
    int balance = getBalance(node);

    // If this node becomes unbalanced, then
    // there are 4 cases

    // Left Left Case
    if (balance > 1 && key < node->left->key)
        return rightRotate(node);

    // Right Right Case
    if (balance < -1 && key > node->right->key)
        return leftRotate(node);

    // Left Right Case
    if (balance > 1 && key > node->left->key) {
        node->left =  leftRotate(node->left);
        return rightRotate(node);
    }

    // Right Left Case
    if (balance < -1 && key < node->right->key) {
        node->right = rightRotate(node->right);
        return leftRotate(node);
    }

    /* return the (unchanged) node pointer */
    return node;
}
void preOrder(struct Node *root) {
    if(root != NULL) {
        printf("%d ", root->key);
        preOrder(root->left);
        preOrder(root->right);
    }
}

一個問題(我看不出這是否是唯一的問題)是您有這樣的代碼,刪除了所有中間行:

record[indexRec].wordCnt = 1;
if find word fails
    indexRec++;
cout << record[indexRec].wordCnt;

因此,當您有一個新單詞時(如果我能正確理解代碼!),您將在打印下一條記錄。 一種解決方法是:

if (flag==-1)
    cout << record[indexRec-1].wordCnt;
else
    cout << record[indexRec].wordCnt;

還有很多其他問題,例如compareWord()非常錯誤的,您應該確定您是否真的要使用C ++還是僅將C與std::cout ,文件讀取代碼是奇怪的,您同時包括C和C ++版本標准標題等,但這是另一個問題!

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM