[英]Error retrieving data from hive using python
我使用python連接蜂巢並將數據檢索到熊貓中,但是它給出了一個錯誤:
pyhive.exc.OperationalError: TExecuteStatementResp
我的代碼:
# -*- coding: utf-8 -*-
from pyhive import hive
from impala.util import as_pandas
from string import Template
config = {
'host': '127.0.0.1',
'database': 'default'
}
def get_conn(conf):
conn = hive.connect(**conf)
return conn
def execute_hql(hql, params = None):
conn = get_conn(config)
cursor = conn.cursor()
hql = Template(hql).substitute(params)
cursor.execute(hql)
df = as_pandas(cursor)
return df
test.py
# -*- coding: utf-8 -*-
from pyhive import hive
from impala.util import as_pandas
import DB.hive_engines
hql = """
SELECT
keywords,
count(keywords)
FROM
table
WHERE
eventname = 'xxx' AND
cdate >= '$start_date' AND
cdate <= '$end_date'
GROUP BY
keywords
"""
if __name__ == '__main__':
params = {'start_date': '2016-04-01', 'end_date': '2016-04-03'}
df = DB.hive_engines.execute_hql(hql, params)
print df
異常消息:
pyhive.exc.OperationalError:TExecuteStatementResp(status = TStatus(errorCode = 1,errorMessage ='處理語句時出錯:失敗:執行錯誤,從org.apache.hadoop.hive.ql.exec.mr.MapRedTask返回代碼1, sqlState ='08S01',infoMessages = ['* org.apache.hive.service.cli.HiveSQLException:處理語句時出錯:FAILED:執行錯誤,從org.apache.hadoop.hive.ql.exec.mr返回代碼1 .MapRedTask:28:27','org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:326','org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation .java:146','org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:173','org.apache.hive.service.cli.operation.Operation:run:Operation.java :268','org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:410','org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:391 ','sun.reflect.GeneratedMethodAccessor31 :invoke ::-1','sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43','java.lang.reflect.Method:invoke:Method.java:606','org.apache.hive.service .cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78','org.apache.hive.service.cli.session.HiveSessionProxy:access $ 000:HiveSessionProxy.java:36','org.apache.hive.service。 cli.session.HiveSessionProxy $ 1:run:HiveSessionProxy.java:63','java.security.AccessController:doPrivileged:AccessController.java:-2','javax.security.auth.Subject:doAs:Subject.java:415' ,'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1671','org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59','com.sun。 proxy。$ Proxy27:executeStatement ::-1','org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:245','org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement :ThriftCLIService.java:509','org.apache.hive.service.cli.thrift.TCLIService $ Processo r $ ExecuteStatement:getResult:TCLIService.java:1313','org.apache.hive.service.cli.thrift.TCLIService $ Processor $ ExecuteStatement:getResult:TCLIService.java:1298','org.apache.thrift.ProcessFunction: process:ProcessFunction.java:39','org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39','org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56',' org.apache.thrift.server.TThreadPoolServer $ WorkerProcess:run:TThreadPoolServer.java:285','java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1145','java.util.concurrent.ThreadPoolExecutor $ Worker:運行:ThreadPoolExecutor.java:615','java.lang.Thread:run:Thread.java:745'],statusCode = 3),operationHandle = None)
謝謝!
繼這個討論中,我使用了一個有效的用戶名在創建連接,並且解決了這個問題。
為了這個答案的完整性,我要復制上面提到的論壇中的建議代碼。 請注意那里的有效用戶名。
from pyhive import hive
conn = hive.Connection(host='<myhost>',
port='<myport>',
database='spin1',
username='<a valid user>') # IMPORTANT**
cursor = conn.cursor()
print cursor.fetchall()
在沒有有效用戶名的情況下,我遇到了問題中提到的相同異常。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.