簡體   English   中英

CSV到sqlite3數據庫。將列表從Utf8轉換為unicode

[英]CSV to sqlite3 database.Convert list from Utf8 to unicode

import csv, sqlite3

conn = sqlite3.connect("mycustomers9.sql")
curs =  conn.cursor()
try:
    curs.execute("CREATE TABLE t (unknown1 TEXT, county TEXT, businessName TEXT, address1 TEXT, city1 TEXT, zip1 INTEGER, phone1 INTEGER,Email1 TEXT, approvalstatus TEXT, date1 TEXT, date2 TEXT, typeofConstruct TEXT, typeofBiz TEXT, unknown2 TEXT, unknown3 TEXT, unknown4 TEXT, unknown5 TEXT, unknown6 TEXT,BizName2 TEXT,Address2 TEXT, City2 TEXT,Zip2 TEXT,Country2 TEXT,Phone2 TEXT,Email2 TEXT,Phone3 TEXT);")
except sqlite3.OperationalError:
    print "Table already exist"
with open('HR_plan_review.csv', 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    to_db = [(i["unknown1"], i['county'], i['businessName'], i['address1'], i['city1'], i['zip1'], i['phone1'], i['Email1'], i['approvalstatus'], i['date1'],i['date2'], i['typeofConstruct'], i['typeofBiz'], i['unknown2'], i['unknown3'], i['unknown4'], i['unknown5'], i['unknown6'], i['BizName2'], i['Address2'], i['City2'], i['Zip2'], i['Country2'], i['Phone2'], i['Email2'], i['Phone3']) for i in dr]

curs.executemany("INSERT INTO t (unknown1, county, businessName, address1, city1,zip1, phone1, Email1, approvalstatus, date1, date2,typeofConstruct, typeofBiz, unknown2, unknown3, unknown4,unknown5, unknown6,BizName2,Address2, City2,Zip2,Country2,Phone2,Email2,Phone3) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);", to_db)

to_db返回一個以utf-8編碼的列表,而sqllite數據庫似乎要求格式采用unicode。 在運行上述sql語句之前,如何將“ to_db”列表轉換為unicode。 下面是我運行上面的代碼時收到的錯誤消息。

sqlite3.ProgrammingError:不得使用8位字節串,除非您使用可以解釋8位字節串的te xt_factory(例如text_factory = str)。 強烈建議您改為將應用程序切換為Unicode字符串。

根據答案輸入進行編輯

修改后的代碼(下面)現在可以成功執行,但是不會將csv中獲取的值插入數據庫。

import csv, sqlite3

conn = sqlite3.connect("mycustomers12.sql")
curs =  conn.cursor()
try:
    curs.execute(""" CREATE TABLE t (unknown1 TEXT, county TEXT, businessName TEXT, address1 TEXT, city1 TEXT, zip1 INTEGER, \n
    phone1 INTEGER,Email1 TEXT, approvalstatus TEXT, date1 TEXT, date2 TEXT, typeofConstruct TEXT, typeofBiz TEXT, unknown2 TEXT, \n
    unknown3 TEXT, unknown4 TEXT, unknown5 TEXT, unknown6 TEXT,BizName2 TEXT,Address2 TEXT, City2 TEXT,Zip2 TEXT,Country2 TEXT,\n
    Phone2 TEXT,Email2 TEXT,Phone3 TEXT);""")
except sqlite3.OperationalError:
    print "Table already exist"


infile = open('HR_plan_review.csv', 'rb') 
dr = csv.DictReader(infile, delimiter = ',')
keys=("unknown1", 'county', 'businessName', 'address1',
    'city1', 'zip1', 'phone1', 'Email1', 'approvalstatus',
    'date1','date2', 'typeofConstruct', 'typeofBiz', 'unknown2',
    'unknown3', 'unknown4', 'unknown5', 'unknown6', 'BizName2',
    'Address2', 'City2', 'Zip2', 'Country2', 'Phone2',
    'Email2', 'Phone3')
args=[tuple(key.decode('utf-8') for key in keys) for row in dr]
sql='INSERT INTO t ({f}) VALUES ({p})'.format(
    f=','.join(keys),
    p=','.join(['?']*len(keys)))
curs.executemany(sql, args)

可悲的是, csv無法處理unicode(至少在Python 2.7中)。 但是您可以通過將DictReader包裝在生成器中來解決此問題:

with open('HR_plan_review.csv', 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    def unicoded_data():
        for row in dr:
            # Assuming infile encoding is utf-8.
            yield dict([(key, unicode(value, encoding='utf-8'))
                       for key, value in row.iteritems()])

    to_db = [(i["unknown1"], i['county'], i['businessName'], i['address1'], i['city1'], i['zip1'], i['phone1'], i['Email1'], i['approvalstatus'], i['date1'],i['date2'], i['typeofConstruct'], i['typeofBiz'], i['unknown2'], i['unknown3'], i['unknown4'], i['unknown5'], i['unknown6'], i['BizName2'], i['Address2'], i['City2'], i['Zip2'], i['Country2'], i['Phone2'], i['Email2'], i['Phone3']) for i in unicoded_data()]

您需要提交執行:

conn.commit()

同樣,您的executemany語句應放在“ with”塊中:

import csv, sqlite3

myfile = 'CSV FILE PATH'
conn = sqlite3.connect("DBNAME.sqlite3")
curs =  conn.cursor()
try:
    curs.execute("CREATE TABLE t (webrank INTEGER, term, TEXT PRIMARY KEY, gloss TEXT);")
except sqlite3.OperationalError:
    print "Table already exist"
with open('{}.csv'.format(myfile), 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    def unicoded_data():
        for row in dr:
            # Assuming infile encoding is utf-8.
            yield int(row['WebRank']), unicode(row['term'], encoding='utf-8'), unicode(row['gloss'], encoding='utf-8')

    curs.executemany("INSERT INTO t (webrank, term, gloss) VALUES (?,?,?);", unicoded_data())
    conn.commit()
keys=("unknown1", 'county', 'businessName', 'address1',
    'city1', 'zip1', 'phone1', 'Email1', 'approvalstatus',
    'date1','date2', 'typeofConstruct', 'typeofBiz', 'unknown2',
    'unknown3', 'unknown4', 'unknown5', 'unknown6', 'BizName2',
    'Address2', 'City2', 'Zip2', 'Country2', 'Phone2',
    'Email2', 'Phone3')
args=[tuple(i[key].decode('utf-8') for key in keys) for row in dr]
sql='INSERT INTO t ({f}) VALUES ({p})'.format(
    f=','.join(keys),
    p=','.join(['?']*len(keys)))
curs.executemany(sql, args)

或者,對於更健壯的解決方案,您可以使用UnicodeDictReader(略微修改的UnicodeReader版本) (來自csv文檔)以Unicode值的形式返回行:

class UTF8Recoder:
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8
    """
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode("utf-8")

class UnicodeDictReader:
    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        f = UTF8Recoder(f, encoding)
        self.reader = csv.DictReader(f, dialect=dialect, **kwds)

    def next(self):
        row = self.reader.next()
        return dict((key,unicode(val, "utf-8")) for key,val in row.iteritems())

    def __iter__(self):
        return self

with open('HR_plan_review.csv', 'rb') as infile:
    dr = UnicodeDictReader(infile, delimiter = ',')

我上面發布的代碼仍然可以使用,只需更改

args=[tuple(i[key].decode('utf-8') for key in keys) for row in dr]

args=[tuple(i[key] for key in keys) for row in dr]

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM