簡體   English   中英

Hive Generic UDF: Hive does not cast as expected, Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map

[英]Hive Generic UDF : Hive does not cast as expected, Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map

我正在嘗試為我的 hive 查詢創建一個簡單的通用 udf。

這是我的 hive 表

CREATE TABLE `dum`(`val` map<string,array<string>>);
insert into dum select map('A',array('1','2','3'),'B',array('4','5','6'));

這就是它的外觀

select * from dum;
{"A":["1","2","3"],"B":["4","5","6"]}

我正在嘗試創建一個簡單的 UDF,它可以將上述 map 的值中的所有項目組合成一個列表。 這是我想看到的

select modudf(val) from dum;
["1","2","3","4","5","6"]

所以我創造了

package some.package;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import java.util.ArrayList;

import java.util.List;
import java.util.Map;

@UDFType(deterministic = true)
public class CustomUDF extends UDF {

public List<String> evaluate(Map<String, List<String>> inMap) {

    List<String> res = new ArrayList<String>();
    for(Map.Entry<String, List<String>> ent : inMap.entrySet()){
        
        for(String item : ent.getValue())
            res.add(item);
    }
        return res;
    }
}

當我嘗試調用它時,這非常有效

add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dum;

我明白了

["1","2","3","4","5","6"]

但是,我想創建一個通用的 udf,所以我嘗試了

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;

import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CustomUDF2 extends GenericUDF {
    private MapObjectInspector  inputMapOI                        = null;

    private Converter           inputMapKeyConverter              = null;

    private ListObjectInspector inputMapValueListOI               = null;
    private Converter           inputMapValueListElementConverter = null;

    @Override
    public String getDisplayString(String[] arguments) {
        return "Some message";
    }

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 argument is expected.");
        }

        if (!(arguments[0] instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The first parameter should be a map object.");
        }

        this.inputMapOI = (MapObjectInspector) arguments[0];

        ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        this.inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);

        if (!(this.inputMapOI.getMapValueObjectInspector() instanceof ListObjectInspector)) {
            throw new UDFArgumentException("The map value type must be a list (aka array)");
        }
        this.inputMapValueListOI = (ListObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        ObjectInspector inputListElementOI = this.inputMapValueListOI.getListElementObjectInspector();
        ObjectInspector outputListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        this.inputMapValueListElementConverter = ObjectInspectorConverters.getConverter(inputListElementOI, outputListElementOI);

        ObjectInspector outputMapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        ObjectInspector outputMapValueListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        ObjectInspector outputMapValueListOI = ObjectInspectorFactory.getStandardListObjectInspector(outputMapValueListElementOI);
        return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 argument is expected.");
        }

        Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
        List<String> dataList = new ArrayList<String>();
        for (Object key : map.keySet()) {
            List<?> valueList = this.inputMapValueListOI.getList(map.get(key));
            if ((valueList == null) || (valueList.size() == 0)) {
                continue;
            }

            String strKey = (String) this.inputMapKeyConverter.convert(key);

            for (Object value : valueList) {
                String strValue = (String) this.inputMapValueListElementConverter.convert(value);
                dataList.add(strValue);
            }
        }

        return dataList;
    }
}

但是這次當我調用它時,我得到了一個錯誤

    add jar /path/to/my/jar;
    CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF2';
    select modudf(val) from dum;

Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"val":{"A":["1","2","3"],"B":["4","5","6"]}}
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:562)
    at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148)
    ... 8 more
Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map
    at org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector.getMap(StandardMapObjectInspector.java:85)
    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:321)
    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247)
    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)
    at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55)
    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:725)
    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
    at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
    at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
    at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
    ... 9 more

據我所知,我從未嘗試將 ArrayList 轉換為 Map。

我究竟做錯了什么?

您需要在initialize中返回的是與 UDF 的返回類型相對應的ObjectInspector (在您的情況下為ListObjectInspector )。

當你寫

public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    ...
    return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
}

Hive 期望找到Map作為 output。 因此,當evaluate后執行的代碼找到List時,它會引發異常。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM