[英]Python Error; UnicodeEncodeError: 'ascii' codec can't encode character u'\u2026'
[英]Python: 'ascii' codec can't encode character u'\\u2026'
我正在嘗試通過以下代碼在python中使用Bing api:
#!/usr/bin/python
from bingapi import bingapi
import re
import json
import urllib
import cgi
import cgitb
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
def strip_tags2(data):
p = re.compile(r'<[^<]*?>')
q = re.compile(r'[&;!@#$%^*()]*')
data = p.sub('', data)
return q.sub('', data)
def getUrl(item):
return item['Url']
def getContent(item):
return item['Description']
def getTitle(item):
return item['Title']
def getInfo(qry, siteStr):
qryStr = qry + "+" + siteStr
#qryStr = u"%s" % qryStr.encode('UTF-8')
query = urllib.urlencode({'q' : qryStr})
url = 'http://api.bing.net/json.aspx?Appid=<myappid>&Version=2.2&Market=en-US&Query=%s&Sources=web&Web.Count=10&JsonType=raw' % (query)
search_results = urllib.urlopen(url)
j = json.loads(search_results.read())
results = j['SearchResponse']['Web']['Results']
return results
def updateRecent(qry):
f = open("recent.txt", "r")
lines = f.readlines()
f.close()
lines = lines[1:]
if len(qry) > 50: #truncate if string too long
qry = (qry[:50] + '...')
qry = strip_tags2(qry) #strip out the html if injection try
lines.append("\n%s" % qry)
f = open("recent.txt", "w")
f.writelines(lines)
f.close()
if __name__ == '__main__':
form = cgi.FieldStorage()
qry = form["qry"].value
qry = r'%s' % qry
updateRecent(qry)
siteStr = "(site:answers.yahoo.com OR site:chacha.com OR site:blurtit.com OR site:answers.com OR site:question.com OR site:answerbag.com OR site:stackexchange.com)"
print "Content-type: text/html"
print
header = open("header.html", "r")
contents = header.readlines()
header.close()
for item in contents:
print item
print """
<div id="results">
<center><h1>Results:</h1></center>
"""
for item in getInfo(siteStr, qry):
print "<h3>%s</h3>" % getTitle(item)
print "<br />"
print "%s" % getUrl(item)
print "<br />"
print "<p style=\"color:gray\">%s</p>" % getContent(item)
print "<br />"
print "</div>"
footer = open("footer.html", "r")
contents = footer.readlines()
footer.close()
for thing in contents:
print thing
我打印了一些結果,然后給我以下錯誤:
UnicodeEncodeError: 'ascii' codec can't encode character u'\\u2026' in position 72: ordinal not in range(128)
有人可以解釋為什么會這樣嗎? 顯然,這與url的編碼方式有關,但是到底是什么錯誤呢? 提前致謝!
該特定的Unicode字符是“水平省略號”。 您的getXXXXX()函數中的一個或多個正在返回Unicode字符串,其中一個包含非ASCII字符。 我建議聲明輸出的編碼,例如:
Content-Type: text/html; charset=utf-8
並以該編碼方式顯式編碼您的輸出。
我們需要知道引發異常的行號,它將在回溯中。 無論如何,問題在於您正在從文件/ URL中讀取unicode,然后隱式地將它們轉換為US-ASCII,這可能是通過串聯操作之一進行的。 您應在所有常量字符串前加上u前綴,以表示它們是unicode字符串,例如
u"\n%s" % qry
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.