简体   繁体   English

无法获得简单的Hadoop mapreduce程序所需的输出

[英]Unable to get required output for simple Hadoop mapreduce program

I am trying to write this mapreduce program which has to take input from two files, one has the details of occupations and states , and the other has details of occupation and job growth percentage. 我正在尝试编写这个mapreduce程序,该程序必须从两个文件中获取输入,一个文件包含职业和州的详细信息,另一个文件包含职业和工作增长率的详细信息。 I use two mappers and combine them and in my reducer try to see which jobs have growth percent more than 30. My output should ideally be the occupation followed by the list of states. 我使用两个映射器并将它们组合在一起,然后在我的简化器中尝试查看哪些工作的增长率超过30%。理想情况下,我的输出应该是职业,然后是州列表。 I am however, only getting the occupation names and not the states. 但是,我只得到职业名称,而不是州。 I have posted the code and the sample input files below. 我已经在下面发布了代码和示例输入文件。 PLease point out what i am doing wrong. 请指出我在做什么错。 Thanks. 谢谢。 (Please note that the samples of the input files i have provided are just small portions of the actual files). (请注意,我提供的输入文件样本仅是实际文件的一小部分)。

package com;

import java.io.IOException;

//import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class GrowthState extends Configured implements Tool {

    //Parser for Mapper1
    public static class StateParser{

        private String State,Occupation;

        public void parse(String record){

            String str[] = record.split("\t");
            if(str[4].length() != 0)
                setOccupation(str[4]);
            else
                setOccupation("Default Occupation");

            if(str[2].length() != 0)
                setState(str[2]);
            else
                setState("Default State");

        }

        public void parse(Text record){
            parse(record.toString());
        }

        public String getState() {
            return State;
        }

        public void setState(String state) {
            State = state;
        }

        public String getOccupation() {
            return Occupation;
        }

        public void setOccupation(String occupation) {
            Occupation = occupation;
        }
    }

    //Mapper1  - Processing state.txt
    public static class GrowthMap1 extends Mapper<LongWritable,Text,Text,Text>{
        StateParser sp = new StateParser();
        Text outkey = new Text();
        Text outvalue = new Text();
        public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
            sp.parse(value);
            outkey.set(sp.getOccupation());
            outvalue.set("m1\t"+sp.getState());
            context.write(outkey,outvalue);
            //String str[] = value.toString().split("\t");
            //context.write(new Text(str[2]), new Text("m1\t"+str[4]));
        }
    }

    public static class ProjParser{
        private String Occupation,percent;

        public void parse(String record){
            String str[] = record.split("\t");
            if(str[0].length() != 0)
                setOccupation(str[0]);
            else
                setOccupation("Default Occupation");

            if(str[5].length() != 0)
                setPercent(str[5]);
            else
                setPercent("0");
        }

        public void parse(Text record){
            parse(record.toString());
        }

        public String getOccupation() {
            return Occupation;
        }

        public void setOccupation(String occupation) {
            Occupation = occupation;
        }

        public String getPercent() {
            return percent;
        }

        public void setPercent(String percent) {
            this.percent = percent;
        }
    }

    //Mapper2 - processing projection.txt
    public static class GrowthMap2 extends Mapper<LongWritable,Text,Text,Text> {
        ProjParser pp = new ProjParser();
        Text outkey = new Text();
        Text outvalue = new Text();
        public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
            pp.parse(value);
            outkey.set(pp.getOccupation());
            outvalue.set("m2\t"+pp.getPercent());
            context.write(outkey, outvalue);
            //String str[] = value.toString().split("\t");
            //context.write(new Text(str[0]), new Text("m2\t"+str[5]));
        }
    }

    //Reducer
    public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{
        Text outvalue = new Text();
        public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{
            float cent = 0;
            String state = "";
            for(Text values : value){
                String[] str = values.toString().split("\t");
                if(str[0].equals("m1")){
                        state = state + " " + str[1];
                }else if(str[0].equals("m2")){
                    try{
                        cent = Float.parseFloat(str[1]);
                    }catch(Exception nf){
                        cent = 0;
                    }
                }
            }
            if(cent>=30){
                outvalue.set(state);
                context.write(key,outvalue );
            }
        }
    }

    //Driver

    @Override
    public int run(String[] args) throws Exception {

        Job job = new Job(getConf(), "States of Growth");

        job.setJarByClass(GrowthState.class);
        job.setReducerClass(GrowthReduce.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, GrowthMap1.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, GrowthMap2.class);

        FileOutputFormat.setOutputPath(job,new Path(args[2]));

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        return job.waitForCompletion(true)?0:1;
    }

    public static void main(String args[]) throws Exception{

        int exitcode = ToolRunner.run(new GrowthState(), args);
        System.exit(exitcode);
    }

}

Sample input file1: 样本输入文件1:

01  AL  Alabama 00-0000 All Occupations total   "1,857,530" 0.4 1000.000    1.00    19.66   "40,890"    0.5 8.30    9.72    14.83   23.95   36.04   "17,260"    "20,220"    "30,850"    "49,810"    "74,950"        
01  AL  Alabama 11-0000 Management Occupations  major   "67,500"    1.1 36.338  0.73    51.48   "107,080"   0.6 24.54   33.09   44.98   62.09   88.43   "51,050"    "68,830"    "93,550"    "129,150"   "183,940"       
01  AL  Alabama 11-1011 Chief Executives    detailed    "1,080" 4.8 0.580   0.32    97.67   "203,150"   2.5 52.05   67.58   #   #   #   "108,270"   "140,570"   #   #   #       
01  AL  Alabama 11-1021 General and Operations Managers detailed    "26,480"    1.5 14.258  0.94    58.00   "120,640"   0.9 27.65   35.76   49.00   71.44   #   "57,510"    "74,390"    "101,930"   "148,590"   #       
01  AL  Alabama 11-1031 Legislators detailed    "1,470" 8.7 0.790   1.94    *   "21,920"    3.5 *   *   *   *   *   "16,120"    "17,000"    "18,450"    "20,670"    "32,820"    TRUE    
01  AL  Alabama 11-2011 Advertising and Promotions Managers detailed    80  16.3    0.042   0.19    44.88   "93,350"    9.5 21.59   30.28   38.92   52.22   74.07   "44,900"    "62,980"    "80,960"    "108,620"   "154,060"       
01  AL  Alabama 11-2021 Marketing Managers  detailed    610 11.5    0.329   0.24    61.28   "127,460"   7.4 31.96   37.63   53.39   73.17   #   "66,480"    "78,280"    "111,040"   "152,200"   #       
01  AL  Alabama 11-2022 Sales Managers  detailed    "2,330" 5.4 1.253   0.47    54.63   "113,620"   2.2 27.28   35.42   48.92   67.62   89.42   "56,740"    "73,660"    "101,750"   "140,640"   "186,000"       
05  AR  Arkansas    43-4161 "Human Resources Assistants, Except Payroll and Timekeeping"    detailed    "1,470" 6.6 1.265   1.26    17.25   "35,870"    1.5 11.09   13.54   17.11   20.74   23.30   "23,060"    "28,170"    "35,590"    "43,150"    "48,450"        
05  AR  Arkansas    43-4171 Receptionists and Information Clerks    detailed    "7,080" 3.3 6.109   0.84    11.26   "23,420"    0.8 8.14    9.19    10.87   13.09   14.94   "16,940"    "19,110"    "22,600"    "27,230"    "31,070"        
05  AR  Arkansas    43-4181 Reservation and Transportation Ticket Agents and Travel Clerks  detailed    590 23.6    0.510   0.50    12.61   "26,220"    6.1 8.99    9.81    10.88   14.82   20.59   "18,710"    "20,400"    "22,630"    "30,830"    "42,830"        
05  AR  Arkansas    43-4199 "Information and Record Clerks, All Other"  detailed    920 4.7 0.795   0.61    18.45   "38,370"    1.8 13.59   15.33   18.49   21.35   23.86   "28,270"    "31,880"    "38,470"    "44,410"    "49,630"        
05  AR  Arkansas    43-5011 Cargo and Freight Agents    detailed    480 16.5    0.418   0.73    *   *   *   *   *   *   *   *   *   *   *   *   *       
05  AR  Arkansas    43-5021 Couriers and Messengers detailed    510 12.4    0.444   0.84    11.92   "24,790"    2.1 8.73    9.91    11.26   13.49   16.03   "18,160"    "20,620"    "23,420"    "28,060"    "33,350"    

sample input file 2: 样本输入文件2:

Management occupations  11-0000 "8,861.5"   "9,498.0"   636.6   7.2 22.2    "2,586.7"   "$93,910"   —   —   — 
Top executives  11-1000 "2,361.5"   "2,626.8"   265.2   11.2    3.3 717.4   "$99,550"   —   —   — 
Chief executives    11-1011 330.5   347.9   17.4    5.3 17.7    87.8    "$168,140"  Bachelor's degree   5 years or more None
General and operations managers 11-1021 "1,972.7"   "2,216.8"   244.1   12.4    1.0 613.1   "$95,440"   Bachelor's degree   Less than 5 years   None
Legislators 11-1031 58.4    62.1    3.7 6.4 —   16.5    "$19,780"   Bachelor's degree   Less than 5 years   None
"Advertising, marketing, promotions, public relations, and sales managers"  11-2000 637.4   700.5   63.1    9.9 3.4 203.3   "$107,950"  —   —   — 
Advertising and promotions managers 11-2011 35.5    38.0    2.4 6.9 17.8    13.4    "$88,590"   Bachelor's degree   Less than 5 years   None
Marketing and sales managers    11-2020 539.8   592.5   52.7    9.8 2.6 168.6   "$110,340"  —   —   — 
Marketing managers  11-2021 180.5   203.4   22.9    12.7    2.6 61.7    "$119,480"  Bachelor's degree   5 years or more None
Sales managers  11-2022 359.3   389.0   29.8    8.3 2.7 106.9   "$105,260"  Bachelor's degree   Less than 5 years   None
Public relations and fundraising managers   11-2031 62.1    70.1    8.0 12.9    1.6 21.3    "$95,450"   Bachelor's degree   5 years or more None
Operations specialties managers 11-3000 "1,647.5"   "1,799.7"   152.1   9.2 3.3 459.1   "$100,720"  —   —   — 
Administrative services managers    11-3011 280.8   315.0   34.2    12.2    0.1 79.9    "$81,080"   Bachelor's degree   Less than 5 years   None
Computer and information systems managers   11-3021 332.7   383.6   50.9    15.3    3.1 97.1    "$120,950"  Bachelor's degree   5 years or more None
Financial managers  11-3031 532.1   579.2   47.1    8.9 5.1 146.9   "$109,740"  Bachelor's degree   5 years or more None
Industrial production managers  11-3051 172.7   168.6   -4.1    -2.4    6.1 31.4    "$89,190"   Bachelor's degree   5 years or more None
Purchasing managers 11-3061 71.9    73.4    1.5 2.1 0.3 17.3    "$100,170"  Bachelor's degree   5 years or more None
"Transportation, storage, and distribution managers"    11-3071 105.2   110.3   5.1 4.9 4.8 29.1    "$81,830"   High school diploma or equivalent   5 years or more None
Compensation and benefits managers  11-3111 20.7    21.4    0.6 3.1 —   6.1 "$95,250"   Bachelor's degree   5 years or more None
Human resources managers    11-3121 102.7   116.3   13.6    13.2    1.0 40.6    "$99,720"   Bachelor's degree   5 years or more None
Training and development managers   11-3131 28.6    31.8    3.2 11.2    —   10.7    "$95,400"   Bachelor's degree   5 years or more None
Other management occupations    11-9000 "4,215.0"   "4,371.0"   156.1   3.7 43.1    "1,207.0"   "$81,940"   —   —   — 

There is a problem with your reducer. 减速器有问题。

The faulty code is shown below. 错误代码如下所示。 The loop below gets called for all the values of a particular key (for eg for "Advertising and promotions managers", it gets called twice. Once with value "Alabama" and again with value "6.9"). 下面的循环针对某个特定键的所有值进行调用(例如,对于“广告和促销经理”而言,它被调用两次。一次使用值“阿拉巴马”,一次使用值“ 6.9”)。 Problem is, you have put the if(cent >= 30) statement, outside the for loop. 问题是,您已将if(cent >= 30)语句放在for循环之外。 It should be inside, for matching the key. 它应该在里面,用于匹配密钥。

  for(Text values : value){
        String[] str = values.toString().split("\t");
        if(str[0].equals("m1")){
                state = state + " " + str[1];
        }else if(str[0].equals("m2")){
            try{
                cent = Float.parseFloat(str[1]);
            }catch(Exception nf){
                cent = 0;
            }
        }
    }
    if(cent>=30){
        outvalue.set(state);
        context.write(key,outvalue );
    }

Following piece of code works fine. 以下代码段工作正常。

//Reducer
public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{
    Text outvalue = new Text();
    HashMap<String, String> stateMap = new HashMap<String, String>();


public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{
    float cent = 0;

    for(Text values : value){
        String[] str = values.toString().split("\t");

        if(str[0].equals("m1")){
            stateMap.put(key.toString().toLowerCase(), str[1]);
        }
        else if(str[0].equals("m2")){
            try{
                cent = Float.parseFloat(str[1]);
                if(stateMap.containsKey(key.toString().toLowerCase()))
                {
                    if(cent>30) {
                        outvalue.set(stateMap.get(key.toString().toLowerCase()));
                        context.write(key, outvalue);
                    }
                    stateMap.remove(key.toString());
                }
            }catch(Exception nf){
                cent = 0;
            }
        }
    }
}
}

The logic is: 逻辑是:

  1. As and when you encounter a state (value "m1"), you put it in state map. 当遇到状态(值“ m1”)时,会将其放在状态图中。
  2. Next time, when you encounter percent with same key (value "m2"), you check if the state is already in the map. 下次,当您遇到具有相同键(值“ m2”)的百分比时,请检查状态是否已在地图中。 If yes, then you output the key/value. 如果是,则输出键/值。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM