簡體   English   中英

pyExcelerator和utf8?

[英]pyExcelerator and utf8?

我已經發現被盜計算器上下面的代碼(但忘了在哪里,不好意思):

#!/usr/local/bin/python
import string
import sys
import getopt
import re
import os
import os.path
import csv
from pyExcelerator import *

def usage():
  """ Display the usage """
  print "Usage: " + sys.argv[0] + " [OPTIONS] csvfile"
  print "OPTIONS:"
  print "--title|-t: If set, the first line is the title line"
  print "--lines|-l n: Split output into files of n lines or less each"
  print "--sep|-s c [def:,] : The character to use for field delimiter"
  print "--output|o : output file name/pattern"
  print "--help|h : print this information"
  sys.exit(2)

def openExcelSheet(outputFileName):
  """ Opens a reference to an Excel WorkBook and Worksheet objects """
  workbook = Workbook()
  worksheet = workbook.add_sheet("Sheet 1")
  return workbook, worksheet

def writeExcelHeader(worksheet, titleCols):
  """ Write the header line into the worksheet """
  cno = 0
  for titleCol in titleCols:
    worksheet.write(0, cno, titleCol)
    cno = cno + 1

def writeExcelRow(worksheet, lno, columns):
  """ Write a non-header row into the worksheet """
  cno = 0
  for column in columns:
    worksheet.write(lno, cno, column)
    cno = cno + 1

def closeExcelSheet(workbook, outputFileName):
  """ Saves the in-memory WorkBook object into the specified file """
  workbook.save(outputFileName)

def getDefaultOutputFileName(inputFileName):
  """ Returns the name of the default output file based on the value
      of the input file. The default output file is always created in
      the current working directory. This can be overriden using the
      -o or --output option to explicitly specify an output file """
  baseName = os.path.basename(inputFileName)
  rootName = os.path.splitext(baseName)[0]
  return string.join([rootName, "xls"], '.')

def renameOutputFile(outputFileName, fno):
  """ Renames the output file name by appending the current file number
      to it """
  dirName, baseName = os.path.split(outputFileName)
  rootName, extName = os.path.splitext(baseName)
  backupFileBaseName = string.join([string.join([rootName, str(fno)], '-'), extName], '')
  backupFileName = os.path.join(dirName, backupFileBaseName)
  try:
    os.rename(outputFileName, backupFileName)
  except OSError:
    print "Error renaming output file:", outputFileName, "to", backupFileName, "...aborting"
    sys.exit(-1)

def validateOpts(opts):
  """ Returns option values specified, or the default if none """
  titlePresent = False
  linesPerFile = -1
  outputFileName = ""
  sepChar = ","
  for option, argval in opts:
    if (option in ("-t", "--title")):
      titlePresent = True
    if (option in ("-l", "--lines")):
      linesPerFile = int(argval)
    if (option in ("-s", "--sep")):
      sepChar = argval
    if (option in ("-o", "--output")):
      outputFileName = argval
    if (option in ("-h", "--help")):
      usage()
  return titlePresent, linesPerFile, sepChar, outputFileName

def main():
  """ This is how we are called """
  try:
    opts,args = getopt.getopt(sys.argv[1:], "tl:s:o:h", ["title", "lines=", "sep=", "output=", "help"])
  except getopt.GetoptError:
    usage()
  if (len(args) != 1):
    usage()
  inputFileName = args[0]
  try:
    inputFile = open(inputFileName, 'r')
  except IOError:
    print "File not found:", inputFileName, "...aborting"
    sys.exit(-1)
  titlePresent, linesPerFile, sepChar, outputFileName = validateOpts(opts)
  if (outputFileName == ""):
    outputFileName = getDefaultOutputFileName(inputFileName)
  workbook, worksheet = openExcelSheet(outputFileName)
  fno = 0
  lno = 0
  titleCols = []
  reader = csv.reader(inputFile, delimiter=sepChar)
  for line in reader:
    if (lno == 0 and titlePresent):
      if (len(titleCols) == 0):
        titleCols = line
      writeExcelHeader(worksheet, titleCols)
    else:
      writeExcelRow(worksheet, lno, line)
    lno = lno + 1
    if (linesPerFile != -1 and lno >= linesPerFile):
      closeExcelSheet(workbook, outputFileName)
      renameOutputFile(outputFileName, fno)
      fno = fno + 1
      lno = 0
      workbook, worksheet = openExcelSheet(outputFileName)
  inputFile.close()
  closeExcelSheet(workbook, outputFileName)
  if (fno > 0):
    renameOutputFile(outputFileName, fno)

if __name__ == "__main__":
  main()

我的問題是,當使用它轉換utf8文件(例如包含“帶有小字符的拉丁小寫字母O(U + 00F3)“ó”)時,它會產生字節序列0x43 0x04,由開放辦公室和ms呈現excel為“ y”(0x43,而僅刪除0x04)。

有人知道我或pyExcelerator做錯了嗎?

您應該手動編碼/解碼utf-8文件中的數據:

reader = csv.reader(inputFile, delimiter=sepChar)
for line in reader:
  if (lno == 0 and titlePresent):
    if (len(titleCols) == 0):
      titleCols = line
    writeExcelHeader(worksheet, titleCols)
  else:
    # writeExcelRow(worksheet, lno, line)
    # in unicode function source file encoding should be passed
    writeExcelRow(worksheet, lno, [unicode(cell, 'utf-8') for cell in line])


def writeExcelRow(worksheet, lno, columns):
  """ Write a non-header row into the worksheet """
  cno = 0
  for column in columns:
    #worksheet.write(lno, cno, column)
    worksheet.write(lno, cno, column.encode('utf-8'))
    cno = cno + 1

在此處查看示例(unicode_csv_reader,utf_8_encoder): http ://docs.python.org/2/library/csv.html#examples

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM