简体   繁体   中英

Trying to remove punctuation and count words

I am trying to remove all punctuation from a word. The program reads a file then using a hash counts all occurrences of each word. It mostly works. I am running into trouble when I come across this line

, , ,  , , , , , ,  . ./  . /   !@#$%^&*()_(&*^%&^%$%$%##%$%$# %%%$  ^%%^ % ^ %^&^ &^ &^ &^&^ &^ &^ &^ &^ %^% ^ % %$ %$ %$

My program prints out " , 32 "

If there were words it would print out

"word, number"

But for this case it prints what Im assuming is an empty string and ive tried for that

Here is my file that does the main stuff.

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>
#include"hash.h"

#define SIZE 5000

void fileRead(char * filename);
void fileWrite();
void removep(char * p);
struct listnode * hashTable[9000];

int main(int argc, char ** argv){
    int i;
    if(argc<2)
        fprintf(stderr,"Enter filename \n");

    hashCreate(hashTable, SIZE);

    for(i=1; i<argc; i++){
        fileRead(argv[i]);
    }

    fileWrite();
    hashDelete(hashTable, SIZE);
    return 0;
}

void fileWrite(){
    FILE * file=fopen("wordfrequency.txt","w");
    int i;
    struct listnode * temp;
    for(i=0;i<SIZE;i++){
        temp=hashTable[i];
        if(hashTable[i]->count!=0){
            for(temp=hashTable[i]; temp!=NULL; temp=temp->next){
                fprintf(file,"%s, %d\n",temp->word, temp->count);
            }
        }
    }
    fclose(file);
}

void fileRead(char * filename){
    FILE * file = fopen(filename,"r");
    char word[500];
    if(!file){
        fprintf(stderr,"Error opening file \n");
        return;
    }
    while(fscanf(file, "%s", word)==1){
        removep(word);
        if(word!=NULL || word[0]!='\0')
            hashAdd(word,hashTable,SIZE);
    }
    fclose(file);
}

void removep(char *p)
{
    char *src = p, *dst = p;

    while (*src)
    {
       if (ispunct((unsigned char)*src))
       {
          src++;
       }
       else if (isupper((unsigned char)*src))
       {
          *dst++ = tolower((unsigned char)*src);
          src++;
       }
       else if (src == dst)
       {
          src++;
          dst++;
       }
       else
       {
          *dst++ = *src++;
       }
    }

    *dst = 0;
}

I figured it out. I changed

if(word!=NULL || word[0]!='\\0')

to

if(word[0]!=' ' && word[0]!='\\0')

and now it works perfectly.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM