簡體   English   中英

java.lang.IllegalArgumentException:錯誤的 FS:,預期:hdfs://localhost:9000

[英]java.lang.IllegalArgumentException: Wrong FS: , expected: hdfs://localhost:9000

我正在嘗試實現 reduce side join,並使用 mapfile reader 查找分布式緩存,但在 stderr 中檢查時它沒有查找值,它顯示以下錯誤,lookupfile 文件已經存在於 hdfs 中,並且似乎已正確加載如標准輸出所示進入緩存。

java88888888888888888888888888888888888888.51666666663285704962962921_-1196516516516983_17070629/LOCATE: at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:390) at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:140) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:554 ) at org.apache.hadoop.fs.FileSystem.getLength(FileSystem.java:816) at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile.java:1479) at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile. java:1474) 在 org.apache.hadoop.io.MapFile$Reader.createDataFileReader(MapF ile.java:302) at org.apache.hadoop.io.MapFile$Reader.open(MapFile.java:284) at org.apache.hadoop.io.MapFile$Reader.(MapFile.java:273) at org.apache.hadoop.io. MapFile$Reader.(MapFile.java:260) at org.apache.hadoop.io.MapFile$Reader.(MapFile.java:253) at mr_poc.reducerrsj.initializeDepartmentsMap(reducerrsj.java:59) at mr_poc.reducerrsj.setup(reducerrsj .java:42) at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:174) at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649) at org.apache.hadoop.mapred.ReduceTask.run( ReduceTask.java:418) 在 org.apache.hadoop.mapred.Child$4.run(Child.java:255) 在 java.security.AccessController.doPrivileged.authe.security(Native Method) ubject.doAs(Subject.java:416) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190) at org.apache.hadoop.mapred.Child.main(Child.java:249) java.lang.NullPointerException at mr_poc.reducerrsj.buildOutputValue(reducerrsj.java:83) at mr_poc.reducerrsj.reduce(reducerrsj.java:127) at mr_poc.reducerrsj.reduce(reducerrsj.java:1) at org.apache.hadoop.mapreduce.Reducer.run(Reducer .java:177) at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418) at org.apache.hadoop.mapred.Child$4.run (Child.java:255) 在 java.security.AccessController.doPrivileged(Native Method) 在 javax.security.auth.Subject.doAs(Subject.java:416) 在 org.apache.883cu802889se 禮儀。

這是我的驅動程序代碼,

package mr_poc;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class driverrsj extends Configured implements Tool{

    @Override
    public int run(String[] arg) throws Exception {
if(arg.length != 3)
{
    System.out.printf("3 parameters are required for DriverRSJ- <Input Dir1> <Input Dir2> <Output Dir> \n");
    return -1;
}
Job job = new Job(getConf());
Configuration conf = job.getConfiguration();
DistributedCache.addCacheFile(new URI("/input/delivery_status"), conf);
System.out.println("Cache : " + job.getConfiguration().get("mapred.cache.files"));
job.setJarByClass(driverrsj.class);
conf.setInt("cust_info", 1);
conf.setInt("status", 2);
StringBuilder inputPaths = new StringBuilder();
inputPaths.append(arg[0].toString()).append(",").append(arg[1].toString());
FileInputFormat.setInputPaths(job, inputPaths.toString());
FileOutputFormat.setOutputPath(job, new Path(arg[2]));
job.setJarByClass(driverrsj.class);
job.setMapperClass(mappperRSJ.class);
job.setReducerClass(reducerrsj.class);
job.setMapOutputKeyClass(CompositeKeyWritableRSJ.class);
job.setMapOutputValueClass(Text.class);
//job.setPartitionerClass(partinonrsj.class);
job.setSortComparatorClass(secondarysortcomp.class);
job.setGroupingComparatorClass(GroupingComparatorRSJ.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);


boolean success =job.waitForCompletion(true);
return success? 0 : 1;

    }
    
    public static void main(String[] args) throws Exception{
        int exitCode = ToolRunner.run(new Configuration(), new driverrsj(),args);
        System.exit(exitCode);
        
    }
    

}

這是我的減速器代碼

package 先生_poc;

import java.io.File;
import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class reducerrsj extends Reducer<CompositeKeyWritableRSJ, Text, NullWritable, Text>{
    StringBuilder reduceValueBuilder = new StringBuilder("");
    NullWritable nullWritableKey = NullWritable.get();
    Text reduceOutputValue = new Text("");
    String strSeparator = ",";
    private MapFile.Reader deptMapReader = null;
    Text txtMapFileLookupKey = new Text();
    Text txtMapFileLookupValue = new Text();
    //Path[] cacheFilesLocal;
    //Path[] eachPath;
    
    @Override
    protected void setup(Context context) throws IOException,InterruptedException {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
        
        
        
        for ( Path eachPath : cacheFiles){
            
            System.out.println(eachPath.toString());
              System.out.println(eachPath.getName());
            if(eachPath.getName().toString().contains("delivery_status"))
            {
                  
                URI uriUncompressedFile = new File(eachPath.toString()+ "/DeliveryStatusCodes").toURI();
                initializeDepartmentsMap(uriUncompressedFile, context);
            
            }
            }
        }
    
    //@SuppressWarnings("deprecation")
    private void initializeDepartmentsMap(URI uriUncompressedFile, Context context)
    throws IOException {
    // {{
    // Initialize the reader of the map file (side data)
        Configuration conf = context.getConfiguration();
        conf.addResource(new Path("/usr/local/hadoop-1.2.1/conf/core-site.xml"));
        FileSystem dfs = FileSystem.get(conf);
    try {
        
        
    deptMapReader = new MapFile.Reader(dfs,uriUncompressedFile.toString(), context.getConfiguration());
    } catch (Exception e) {
    e.printStackTrace();
    }
    // }}
    }
    private StringBuilder buildOutputValue(CompositeKeyWritableRSJ key,
            StringBuilder reduceValueBuilder, Text value) {
             
            if (key.getsourceindex() == 2) {
            
             
            String arrSalAttributes[] = value.toString().split(",");
            txtMapFileLookupKey.set(arrSalAttributes[0].toString());
            System.out.println("key=" + txtMapFileLookupKey);
            
            
            try {
                
            deptMapReader.get(txtMapFileLookupKey, txtMapFileLookupValue);
            }
             catch (Exception e) {
            txtMapFileLookupValue.set("");
                e.printStackTrace();
            } finally {
            txtMapFileLookupValue
            .set((txtMapFileLookupValue.equals(null) || txtMapFileLookupValue
            .equals("")) ? "NOT-FOUND"
            : txtMapFileLookupValue.toString());
            }
        
            reduceValueBuilder.append(txtMapFileLookupValue.toString());
            
             
            } else if(key.getsourceindex() == 1) {
        
            String arrEmpAttributes[] = value.toString().split(",");
            reduceValueBuilder.append(arrEmpAttributes[0].toString()).append(
            strSeparator);
            } 
            
             
            
            txtMapFileLookupKey.set("");
            txtMapFileLookupValue.set("");
            
            return reduceValueBuilder;
    }
     
    @Override
    public void reduce(CompositeKeyWritableRSJ key, Iterable<Text> values,
    Context context) throws IOException, InterruptedException {
     
    
    for (Text value : values) {
    buildOutputValue(key, reduceValueBuilder, value);
    }
     
    // Drop last comma, set value, and emit output
    if (reduceValueBuilder.length() > 1) {
     
    //reduceValueBuilder.setLength(reduceValueBuilder.length() - 1);
    // Emit output
    reduceOutputValue.set(reduceValueBuilder.toString());
    context.write(nullWritableKey, reduceOutputValue);
    } else {
    System.out.println("Key=" + key.getjoinkey() + "src="
    + key.getsourceindex());
     
    }
    // Reset variables
    reduceValueBuilder.setLength(0);
    reduceOutputValue.set("");
     
    }
    @Override
    protected void cleanup(Context context) throws IOException,
    InterruptedException {
         if(deptMapReader != null)
         {
deptMapReader.close();
    }
    }
}

這是我的核心站點 Xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
  <name>hadoop.tmp.dir</name>
  <value>/app/hadoop/tmp</value>
  <description>A base for other temporary directories.</description>
</property>
<property>
  <name>fs.default.name</name>
  <value>hdfs://localhost:9000</value>
  <description>The name of the default file system.  A URI whose
  scheme and authority determine the FileSystem implementation.  The
  uri's scheme determines the config property (fs.SCHEME.impl) naming
  the FileSystem implementation class.  The uri's authority is used to
  determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>

任何幫助將不勝感激。 提前致謝!!!

我有同樣的問題,我通過添加解決了這個問題

FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"),conf)

在驅動程序類中。

您必須從java.net.URI導入URI

我想我也遇到過類似的問題。 這個問題的關鍵點是你要從DistributedCache操作一個SequenceFile,它應該在你的本地文件系統上。 從您的日志中,有一條線

"org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:140)" 

如果您可以查看SequenceFile.Reader的源代碼,您會發現該日志是由此代碼引起的

fs.getFileStatus(filename).getLen()    

這里的“fs”應該是LocalFileSystem而不是DistributedFileSystem。

我的解決方案是改變

deptMapReader = new MapFile.Reader(dfs,uriUncompressedFile.toString(), context.getConfiguration());

JobConf conf = context.getConfiguration();
String originalFS = conf.get("fs.default.name");   //backup original configuration
conf.set("fs.default.name", "file:///");           //change configuration to local file system
deptMapReader = new MapFile.Reader(dfs,uriUncompressedFile.toString(), conf);
conf.set("fs.default.name", originalFS);           //restore original configuration

執行此操作后,SequenceFile.Reader對象可以訪問本地文件系統上的緩存文件。

我認為這個問題的發生是因為SequenceFile API發生了變化,並且在這種情況下,不推薦使用像MapFile.Reader(fs,path,conf)這樣的SequenceFile.Reader的一些API。

這個解決方案適合我。

您應該根據您的core-site.xml文件設置conf的屬性,如下所示:

conf.set("fs.defaultFS", "hdfs://host:port");
conf.set("mapreduce.jobtracker.address", "host:port");

在job runner中包含以下行:DistributedCache.addCacheFile(new URI(“”),conf);

下面是mapper的setup方法中的代碼

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration configuration = new Configuration();
    FileSystem fileSystem = null;
    try {
         fileSystem = FileSystem.get(new URI("<File location"),configuration);
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }

    String location = <S3 file location>;
    FSDataInputStream fsDataInputStream =fileSystem.open(new Path(location));
    Scanner scanner = new Scanner(fsDataInputStream);
    int i = 1;
    while(scanner.hasNextLine()) {
        String str[] = scanner.nextLine().split(",");
        LOG.info("keys are \t" + str[0] + str[1]);
        stickerMap.put(str[0] + str[1], i);
        ++i;
    }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM