繁体   English   中英

如何在setup()中初始化实例变量?

[英]How to initialize instance variables in setup()?

这应该是一个简单的问题,但是我正在努力。 除了初始化从配置文件中读取的“ train_rows”和“ cols”的参数值外,我代码中的所有内容均正常工作。

我设置了日志以在setup()方法中显示“ train_rows”和“ cols”的值,并且这些值是正确的。 但是,当我在map()方法中尝试相同的操作时,两个值都显示为0。我在做什么错?

导入java.io.File; 导入java.io.IOException; 导入java.io.FileNotFoundException; 导入java.util.Scanner; 导入org.apache.log4j.Logger;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class KNNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
  private static final Logger sLogger = Logger.getLogger(KNNMapper.class);
  private int[][] train_vals;
  private int[] train_label_vals;
  private int train_rows;
  private int test_rows;
  private int cols;

  @Override
  public void setup(Context context) throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();

      train_rows = conf.getInt("rows", -1);
      cols = conf.getInt("columns", -1);

      //just changed this
      //int[][] train_vals = new int[train_rows][cols];
      //int[] train_label_vals = new int[train_rows];

      train_vals = new int[train_rows][cols];
      train_label_vals = new int[train_rows];

      // read train csv, parse, and store into 2d int array
      Scanner myScan;
        try {
            File trainfile = new File("train_sample.csv");
            if (!trainfile.exists()) {
                throw new IllegalArgumentException("train file didn't load");
            }
            myScan = new Scanner(trainfile);

            //Set the delimiter used in file
            myScan.useDelimiter("[,\r\n]+");

            //Get all tokens and store them in some data structure
            //I am just printing them

            for(int row = 0; row < train_rows; row++) {
                for(int col = 0; col < cols; col++) {
                    train_vals[row][col] = Integer.parseInt(myScan.next().toString());
                }
            }

            myScan.close();

        } catch (FileNotFoundException e) {
            System.out.print("Error: Train file execution did not work.");
        }

    // read train_labels csv, parse, and store into 2d int array
        try {
            File trainlabels = new File("train_labels.csv");
            if (!trainlabels.exists()) {
                throw new IllegalArgumentException("train labels didn't load");
            }

            myScan = new Scanner(trainlabels);

            //Set the delimiter used in file
            myScan.useDelimiter("[,\r\n]+");

            //Get all tokens and store them in some data structure
            //I am just printing them

            for(int row = 0; row < train_rows; row++) {
                    train_label_vals[row] = Integer.parseInt(myScan.next().toString());
                    if(row < 10) {
                        System.out.println(train_label_vals[row]);
                    }
            }

            myScan.close();

        } catch (FileNotFoundException e) {
            System.out.print("Error: Train Labels file not found.");
        }
  }

  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

        // setup() gave us train_vals & train_label_vals.
        // Each line in map() represents a test observation.  We iterate 
        // through every train_val row to find nearest L2 match, then
        // return a key/value pair of <observation #, 

        // convert from Text to String

        System.out.println("I'm in the map!");
        String line = value.toString();
        double distance;
        double best_distance = Double.POSITIVE_INFINITY;
        int col_num;

        int best_digit = -1;
        IntWritable rowId = null;
        int i;
        IntWritable rowNum;
        String[] pixels;

        System.out.println("Number of train rows:" + train_rows);
        System.out.println("Number of columns:" + cols);
        // comma delimited files, split on commas
        // first we find the # of rows

        pixels = line.split(",");
        rowId = new IntWritable(Integer.parseInt(pixels[0]));
        System.out.println("working on row " + rowId);
        best_distance = Double.POSITIVE_INFINITY;

        for (i = 0; i < train_rows; i++) {
            distance = 0.0;

            col_num = 0;

            for (int j = 1; j < cols; j++) {
                distance += (Integer.parseInt(pixels[j]) - train_vals[i][j-1])^2;
            }

            if (distance < best_distance) {
                best_distance = distance;
                best_digit = train_label_vals[i];
            }
        }
        System.out.println("And we're out of the loop baby yeah!");
        context.write(rowId, new IntWritable(best_digit));
        System.out.println("Mapper done!");
  }
}

我对此表示怀疑,假设您要扫描hdfs中的文件。

您使用了:

导入java.io.File; 文件trainfile =新File(“ train_sample.csv”);

在hadoop中,这就是我们在hdfs中检查文件的方式:

尝试{FileSystem fs = FileSystem.get(context.getConfiguration());

if (fs.exists(new Path("/user/username/path/of/file/inhdfs"))) {
        System.out.println("File exists");
}

 } catch (IOException e) {
e.printStackTrace();

}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM