简体   繁体   中英

Unable to read words from txt file and count the number of words

I have a small project to code a twitter crawler and I have encounter some issues when analyzing the tweets collected.

The tweets collected is place into a txt file. What I wanna achieve is to count how many words are there in the txt file, number of words that contain the word 'engineering' and number of hashtags. Below is what I have tried so far,

import java.io.*;
import java.util.StringTokenizer;

public class TwitterAnalyzer {

public static void main(String args[]){
    try{

        String keyword = "Engineering";
        FileInputStream fInstream = new FileInputStream("C:\\Users\\Alan\\Documents\\NetBeansProjects\\TwitterCrawler\\"+keyword+"-data.txt");
        DataInputStream in = new DataInputStream(fInstream);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String strLine;


        int numberOfKeywords = 0;
        int numberOfWords = 0;
        int numberOfHashtags = 0;

        while((strLine = br.readLine()) != null){

            strLine = br.readLine();
            System.out.println(strLine);
            StringTokenizer st = new StringTokenizer(strLine, " \t\n\r\f.,;:!?\"");
            while(st.hasMoreTokens()){
                String word = st.nextToken();
                numberOfWords++;
                if(word.contains(keyword)){
                    numberOfKeywords++;
                }
                if(word.contains("#")){
                    numberOfHashtags++;
                }
            }
        }



        System.out.println(numberOfWords);
        System.out.println(numberOfKeywords);
        System.out.println(numberOfHashtags);
        br.close();

    }catch (FileNotFoundException fe){
        fe.printStackTrace();
        System.out.println("Unable to locate file");
        System.exit(-1);
    }catch (IOException ie){
        ie.printStackTrace();
        System.out.println("Unable to read file");
        System.exit(-1);
    }        


}
}

Here is the link to the txt file.

Any here is greatly appreciated!

Try this way it will help

import java.io.BufferedReader;
import java.io.FileReader;

public class CountWords {

    public static void main (String args[]) throws Exception {

       System.out.println ("Engineering");       
       FileReader fr = new FileReader ("c:\\Customer1.txt");        
       BufferedReader br = new BufferedReader (fr);     
       String line = br.readLin ();
       int count = 0;
       while (line != null) {
          String []parts = line.split(" ");
          for( String w : parts)
          {
            count++;        
          }
          line = br.readLine();
       }         
       System.out.println(count);
    }
}

the following code returns: 202, 14, 22

public static void main(String args[]){
    try{
        String keyword = "engineering";
        Pattern keywordPattern = Pattern.compile(keyword);

        Pattern hashTagPattern = Pattern.compile("#[a-zA-Z0-9_]");

        FileInputStream fInstream = new FileInputStream("E:\\t.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(fInstream));
        String strLine;


        int numberOfKeywords = 0;
        int numberOfWords = 0;
        int numberOfHashtags = 0;

        while((strLine = br.readLine()) != null){
            Matcher  matcher = keywordPattern.matcher(strLine.toLowerCase());
            while (matcher.find())
                numberOfKeywords++;
            numberOfWords += strLine.split("\\s").length;
            matcher = hashTagPattern.matcher(strLine);
            while (matcher.find())
                numberOfHashtags++;
        }

        System.out.println(numberOfWords);
        System.out.println(numberOfKeywords);
        System.out.println(numberOfHashtags);
        br.close();

    }catch (FileNotFoundException fe){
        fe.printStackTrace();
        System.out.println("Unable to locate file");
        System.exit(-1);
    }catch (IOException ie){
        ie.printStackTrace();
        System.out.println("Unable to read file");
        System.exit(-1);
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM