简体   繁体   中英

python csv unicode 'ascii' codec can't encode character u'\xf6' in position 1: ordinal not in range(128)

I have copied this script from [python web site][1] This is another question but now problem with encoding:

import sqlite3
import csv
import codecs
import cStringIO
import sys

class UTF8Recoder:
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8
    """
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode("utf-8")

class UnicodeReader:
    """
    A CSV reader which will iterate over lines in the CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        f = UTF8Recoder(f, encoding)
        self.reader = csv.reader(f, dialect=dialect, **kwds)

    def next(self):
        row = self.reader.next()
        return [unicode(s, "utf-8") for s in row]

    def __iter__(self):
        return self

class UnicodeWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
        self.writer.writerow([s.encode("utf-8") for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

This time problem with encoding, when I ran this it gave me this error:

Traceback (most recent call last):
  File "makeCSV.py", line 87, in <module>
    uW.writerow(d)
  File "makeCSV.py", line 54, in writerow
    self.writer.writerow([s.encode("utf-8") for s in row])
AttributeError: 'int' object has no attribute 'encode'

Then I converted all integers to string, but this time I got this error:

Traceback (most recent call last):
  File "makeCSV.py", line 87, in <module>
    uW.writerow(d)
  File "makeCSV.py", line 54, in writerow
    self.writer.writerow([str(s).encode("utf-8") for s in row])
UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 1: ordinal not in range(128)

I have implemented above to deal with unicode characters, but it gives me such error. What is the problem and how to fix it?

Then I converted all integers to string,

You converted both integers and strings to byte strings . For strings this will use the default character encoding which happens to be ASCII, and this fails when you have non-ASCII characters. You want unicode instead of str .

self.writer.writerow([unicode(s).encode("utf-8") for s in row])

It might be better to convert everything to unicode before calling that method. The class is designed specifically for parsing Unicode strings. It was not designed to support other data types.

From the documentation:

Unlike the StringIO module, this module is not able to accept Unicode strings that cannot be encoded as plain ASCII strings.

Ie only 7-bit clean strings can be stored.

If you are using Python 2:

make encoding as : str(s.encode("utf-8")) ie

def writerow(self, row):
    self.writer.writerow([str(s.encode("utf-8")) for s in row])
    # Fetch UTF-8 output from the queue ...
    data = self.queue.getvalue()
    data = data.decode("utf-8")
    # ... and reencode it into the target encoding
    data = self.encoder.encode(data)
    # write to the target stream
    self.stream.write(data)
    # empty queue
    self.queue.truncate(0)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM