簡體   English   中英

如何在mongoDB中使用mapReduce存儲來自hdfs的處理數據

[英]how to store processed data from hdfs using mapReduce in mongoDB as output

我有一個mapreduce應用程序,該應用程序處理了HDFS中的數據並將輸出數據存儲在HDFS中

但是,現在我需要將輸出數據存儲在mongodb中,以將其存儲到HDFS中

誰能讓我知道該怎么做?

謝謝

映射類

package com.mapReduce;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FMapper extends Mapper<LongWritable, Text, Text, Text> {
    private String pART;
    private String actual;
    private String fdate;
    public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
        String tempString = ivalue.toString();
        String[] data = tempString.split(",");
        pART=data[1];
        try{
            fdate=convertyymmdd(data[0]);
            /**IF ACTUAL IS LAST HEADER
             * actual=data[2];
             * */
            actual=data[data.length-1];
            context.write(new Text(pART), new Text(fdate+","+actual+","+dynamicVariables(data)));
        }catch(ArrayIndexOutOfBoundsException ae){
            System.err.println(ae.getMessage());
        }

    }


    public static String convertyymmdd(String date){

        String dateInString=null;
        String data[] =date.split("/");
        String month=data[0];
        String day=data[1];
        String year=data[2];
        dateInString =year+"/"+month+"/"+day;
        System.out.println(dateInString);   
        return dateInString;
    }

    public static String dynamicVariables(String[] data){
        StringBuilder str=new StringBuilder();
        boolean isfirst=true; 
    /** IF ACTUAL IS LAST HEADER
     * for(int i=3;i<data.length;i++){ */
        for(int i=2;i<data.length-1;i++){

            if(isfirst){
                str.append(data[i]);
                isfirst=false;
            }
            else
            str.append(","+data[i]);
        }
        return str.toString();
        }

}

減速器類

package com.mapReduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import javax.faces.bean.ApplicationScoped;
import javax.faces.bean.ManagedBean;
import javax.faces.bean.ManagedProperty;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import com.ihub.bo.ForcastBO;
import com.ihub.service.ForcastService;
public class FReducer extends Reducer<Text, Text, Text, Text> {
    private String pART;
    private List<ForcastBO> list = null;
    private List<List<String>> listOfList = null;
    private List<String> vals = null;
    private static List<ForcastBO> forcastBos=new ArrayList<ForcastBO>();

    @Override
    public void reduce(Text _key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    // TODO Auto-generated method stub
        pART = _key.toString();
        // process values
        for (Text val : values) {
            String tempString = val.toString();
            String[] data = tempString.split(",");
            ForcastBO fb=new ForcastBO();
            fb.setPart(pART);
            fb.setDate(data[0]);
            fb.setActual(data[1]);
            fb.setW0(data[2]);
            fb.setW1(data[3]);
            fb.setW2(data[4]);
            fb.setW3(data[5]);
            fb.setW4(data[6]);
            fb.setW5(data[7]);
            fb.setW6(data[8]);
            fb.setW7(data[9]);
            try {
                list.add(fb);
            } catch (Exception ae) {
                System.out.println(ae.getStackTrace() + "****" + ae.getMessage() + "*****" + ae.getLocalizedMessage());
            }
        }   
    }

    @Override
    public void run(Context context) throws IOException, InterruptedException {
        setup(context);
        try {
          while (context.nextKey()) {

         listOfList = new ArrayList<List<String>>();
         list=new ArrayList<ForcastBO>();
            reduce(context.getCurrentKey(), context.getValues(), context);
            files_WE(listOfList, list, context);

          }

          }finally {
              cleanup(context);
            }
    }


    public void files_WE(List<List<String>> listOfList, List<ForcastBO> list, Context context) {

        Collections.sort(list);

            try {
                setData(listOfList, list);

                Collections.sort(listOfList, new Comparator<List<String>>() {
                    @Override
                    public int compare(List<String> o1, List<String> o2) {
                        return o1.get(0).compareTo(o2.get(0));
                    }
                });

                for (int i = listOfList.size() - 1; i > -1; i--) {
                    List<String> list1 = listOfList.get(i);
                    int k = 1;
                    for (int j = 3; j < list1.size(); j++) {
                        try {
                            list1.set(j, listOfList.get(i - k).get(j));
                        } catch (Exception ex) {
                            list1.set(j, null);
                        }
                        k++;
                    }

                }
            } catch (Exception e) {
                //e.getLocalizedMessage();
            }

            for(List<String> ls:listOfList){
                System.out.println(ls.get(0));
                ForcastBO forcastBO=new ForcastBO();
                try{
                    forcastBO.setPart(ls.get(0));
                    forcastBO.setDate(ls.get(1));
                    forcastBO.setActual(ls.get(2));
                    forcastBO.setW0(ls.get(3));
                    forcastBO.setW1(ls.get(4));
                    forcastBO.setW2(ls.get(5));
                    forcastBO.setW3(ls.get(6));
                    forcastBO.setW4(ls.get(7));
                    forcastBO.setW5(ls.get(8));
                    forcastBO.setW6(ls.get(9));
                    forcastBO.setW7(ls.get(10));
                    forcastBos.add(forcastBO);
                    }catch(Exception e){
                        forcastBos.add(forcastBO);
                    }
                try{
                    System.out.println(forcastBO);
                    //service.setForcastBOs(forcastBos);
            }catch(Exception e){
                System.out.println("FB::::"+e.getStackTrace());
            }
            }
    }





        public void setData(List<List<String>> listOfList, List<ForcastBO> list) {
            List<List<String>> temListOfList=new ArrayList<List<String>>();
            for (ForcastBO str : list) {
                vals = new ArrayList<String>();
                vals.add(str.getPart());
                vals.add(str.getDate());
                vals.add(str.getActual());
                vals.add(str.getW0());
                vals.add(str.getW1());
                vals.add(str.getW2());
                vals.add(str.getW3());
                vals.add(str.getW4());
                vals.add(str.getW5());
                vals.add(str.getW6());
                vals.add(str.getW7());
                temListOfList.add(vals);
            }


            Collections.sort(temListOfList, new Comparator<List<String>>() {
                @Override
                public int compare(List<String> o1, List<String> o2) {
                    return o1.get(1).compareTo(o2.get(1));
                }
            });

            for(List<String> ls:temListOfList){
                System.out.println(ls);
                listOfList.add(ls);
                }
        }

        public static List<ForcastBO> getForcastBos() {
            return forcastBos;
        }



    }

駕駛艙

package com.mapReduce;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class MRDriver {

    public static void main(String[] args)  throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "JobName");
        job.setJarByClass(MRDriver.class);
        // TODO: specify a mapper
        job.setMapperClass(FMapper.class);
        // TODO: specify a reducer
        job.setReducerClass(FReducer.class);

        // TODO: specify output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // TODO: delete temp file
        FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"),
                conf); 
        Path workingDir=hdfs.getWorkingDirectory();

        Path newFolderPath= new Path("/sd1");
        newFolderPath=Path.mergePaths(workingDir, newFolderPath);
        if(hdfs.exists(newFolderPath))

        {
            hdfs.delete(newFolderPath); //Delete existing Directory

        }
        // TODO: specify input and output DIRECTORIES (not files)

        FileInputFormat.setInputPaths(job,new Path("hdfs://localhost:9000/Forcast/SampleData"));
        FileOutputFormat.setOutputPath(job, newFolderPath);

        if (!job.waitForCompletion(true))
            return;
    }
}

基本上,您需要更改“輸出格式類”,並且這里有幾種方法:

  1. 使用適用於Hadoop的MongoDB連接器http : //docs.mongodb.org/ecosystem/tools/hadoop/? _ga = 1.111209414.370990604.1441913822
  2. 實現您自己的OutputFormathttps : //hadoop.apache.org/docs/r2.7.0/api/org/apache/hadoop/mapred/OutputFormat.html (改為使用FileOutputFormat)。
  3. 在reducer內執行mongodb查詢,而不是在MapREduce上下文中寫(不好,您可能會在HDFS中以空輸出文件結尾,具體取決於驅動程序中指定的OutputFormat)

在我看來,選項1是最好的選擇,但是我沒有使用MongoDB連接器來說明它是否足夠穩定和功能。 選項2要求您真正了解hadoop底層的工作方式,以避免結束許多開放的連接以及事務和hadoop任務重試的問題。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM