Python：'ascii'編解碼器無法編碼字符u'\\\\ u2026'

Question

我正在嘗試通過以下代碼在python中使用Bing api：

#!/usr/bin/python
from bingapi import bingapi  
import re
import json
import urllib
import cgi
import cgitb
from HTMLParser import HTMLParser

class MLStripper(HTMLParser):
    def __init__(self):
            self.reset()
            self.fed = []
    def handle_data(self, d):
            self.fed.append(d)
    def get_data(self):
            return ''.join(self.fed)

def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

def strip_tags2(data):
    p = re.compile(r'<[^<]*?>')
    q = re.compile(r'[&;!@#$%^*()]*')
    data = p.sub('', data)
    return q.sub('', data)

def getUrl(item):
    return item['Url']

def getContent(item):
    return item['Description']

def getTitle(item):
    return item['Title']

def getInfo(qry, siteStr):
    qryStr = qry + "+" + siteStr
    #qryStr = u"%s" % qryStr.encode('UTF-8')
    query = urllib.urlencode({'q' : qryStr})
    url = 'http://api.bing.net/json.aspx?Appid=<myappid>&Version=2.2&Market=en-US&Query=%s&Sources=web&Web.Count=10&JsonType=raw' % (query)
    search_results = urllib.urlopen(url)
    j = json.loads(search_results.read())
    results = j['SearchResponse']['Web']['Results']
    return results

def updateRecent(qry):
    f = open("recent.txt", "r")
    lines = f.readlines()
    f.close()
    lines = lines[1:]

    if len(qry) > 50: #truncate if string too long
            qry = (qry[:50] + '...')
    qry = strip_tags2(qry) #strip out the html if injection try

    lines.append("\n%s" % qry)
    f = open("recent.txt", "w")
    f.writelines(lines)
    f.close()

if __name__ == '__main__':
    form = cgi.FieldStorage()
    qry = form["qry"].value
    qry = r'%s' % qry

    updateRecent(qry)

    siteStr = "(site:answers.yahoo.com OR site:chacha.com OR site:blurtit.com OR site:answers.com OR site:question.com OR site:answerbag.com OR site:stackexchange.com)"

    print "Content-type: text/html"
    print

    header = open("header.html", "r")
    contents = header.readlines()
    header.close()
    for item in contents:
            print item

    print """
    <div id="results">
    <center><h1>Results:</h1></center>
    """
    for item in getInfo(siteStr, qry):
            print "<h3>%s</h3>" % getTitle(item)
            print "<br />"
            print "%s" % getUrl(item)
            print "<br />"
            print "<p style=\"color:gray\">%s</p>" % getContent(item)
            print "<br />"
    print "</div>"

    footer = open("footer.html", "r")
    contents = footer.readlines()
    footer.close()
    for thing in contents:
            print thing

我打印了一些結果，然后給我以下錯誤：

UnicodeEncodeError: 'ascii' codec can't encode character u'\\u2026' in position 72:    ordinal not in range(128)

有人可以解釋為什么會這樣嗎？ 顯然，這與url的編碼方式有關，但是到底是什么錯誤呢？ 提前致謝！

Answer 1

該特定的Unicode字符是“水平省略號”。 您的getXXXXX（）函數中的一個或多個正在返回Unicode字符串，其中一個包含非ASCII字符。 我建議聲明輸出的編碼，例如：

Content-Type: text/html; charset=utf-8

並以該編碼方式顯式編碼您的輸出。

Answer 2

我們需要知道引發異常的行號，它將在回溯中。 無論如何，問題在於您正在從文件/ URL中讀取unicode，然后隱式地將它們轉換為US-ASCII，這可能是通過串聯操作之一進行的。 您應在所有常量字符串前加上u前綴，以表示它們是unicode字符串，例如

u"\n%s" % qry

Python：'ascii'編解碼器無法編碼字符u'\\\\ u2026'

問題描述

2 個解決方案

解決方案1
3 已采納 2011-02-06 18:48:30

解決方案2
2 2011-02-06 18:36:01

Python：&#39;ascii&#39;編解碼器無法編碼字符u&#39;\\\\ u2026&#39;

問題描述

2 個解決方案

解決方案1 3 已采納 2011-02-06 18:48:30

解決方案2 2 2011-02-06 18:36:01

Python：'ascii'編解碼器無法編碼字符u'\\\\ u2026'

解決方案1
3 已采納 2011-02-06 18:48:30

解決方案2
2 2011-02-06 18:36:01