我正在嘗試使用Hadoop MapReduce計算文本文件中“單詞對”的出現次數

[英]I am trying to count the number of occurrences of “pairs of word” in a text file using Hadoop MapReduce



我該如何解決這個錯誤? 我的Mapper輸出類的鍵設置為Text,值的設置為LongWritable 另外,我只想計算一個單詞在文本文檔中出現的次數並將其寫入輸出文件。

public class WordCountV2 extends Configured implements Tool {

        /** Entry-point for our program. Constructs a Job object representing a single
         * Map-Reduce job and asks Hadoop to run it.

        public static void main(String[] args) throws Exception {
            int exitCode = ToolRunner.run(new WordCountV2(), args);

         * Run method which schedules the Hadoop Job
         * @param args Arguments passed in main function
        public int run(String[] args) throws Exception {

            if (args.length != 2) {
                System.err.printf("Usage: %s needs two arguments <input> <output> files\n", getClass().getSimpleName());
                return -1;

        /*Initialize the Hadoop job and set the jar as well as the name of the Job
        * Tell Hadoop where to locate the code that must be shipped if this
        * job is to be run across a cluster.
            Job job = new Job();

            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));

        /* Set the datatypes of the keys and values outputted by the maps and reduces.
         * These must agree with the types used by the Mapper and Reducer. Mismatches
         * will not be caught until runtime.
         * job.setOutputKeyClass( Text.class ); will set the types expected as output from both the map and reduce phases.
         * If your Mapper emits different types than the Reducer,
         * you can set the types emitted by the mapper with the JobConf's setMapOutputKeyClass() and setMapOutputValueClass() methods.
         * These implicitly set the input types expected by the Reducer.

            //by default the output of mapper is KEY(Text) VALUE(Long)

            //Output file format is TextOutputFormat

            //Set the MapClass and ReduceClass in the job

            //Wait for the job to complete and print if the job was successful or not
            int returnValue = job.waitForCompletion(true) ? 0:1;

            if(job.isSuccessful()) {
                System.out.println("Job was successful");
            } else if(!job.isSuccessful()) {
                System.out.println("Job was not successful");

            return returnValue;

        /** Mapper for word count. */

    public static class Map extends Mapper<Object, Text, Text, LongWritable> {

        /** Regex pattern to find pairs of words (alphanumeric + _). */

        final static Pattern WORD_PATTERN = Pattern.compile("(\\w+)(?=(\\s\\w+))");

        /** Constant 1 as a LongWritable value. */
        private final static LongWritable ONE = new LongWritable(1L);

        /** Text object to store a word to write to output. */
        private Text word = new Text();

        /** Actual map function. Takes one document's text and emits key-value
         * pairs for each word found in the document.
         * @param key Document identifier (ignored).
         * @param value Text of the current document.
         * @param context MapperContext object for accessing output,
         *                configuration information, etc.
        public void map(Text key, Text value, Context context) throws IOException, InterruptedException {

            /* Matching the pattern with the input Text value*/

            Matcher matcher = WORD_PATTERN.matcher(value.toString());

            while (matcher.find()) {
                // group(1)--->Checks for words
                //group(2) ----> checks for spaces and words after
                context.write(word, ONE);

    /** Reducer for word count.
     * Like the Mapper base class, the base class Reducer is parameterized by
     * <in key type, in value type, out key type, out value type>.
     * For each Text key, which represents a pair of word, this reducer gets a list of
     * LongWritable values, computes the sum of those values, and the key-value
     * pair (word, sum).
    public static class Reduce extends Reducer<Text, LongWritable, Text, LongWritable> {
        /** Actual reduce function.
         * @param key Word.
         * @param values Iterator over the values for this key.
         * @param context ReducerContext object for accessing output,
         *                configuration information, etc.
        public void reduce(Text key, Iterator<LongWritable> values, Context context) throws IOException, InterruptedException {
            long sum = 0L;
            while (values.hasNext()) {
                sum += values.next().get();
            context.write(key, new LongWritable(sum));




public void map(LongWritable key, Text value, Context context)


