[英]Reading an excel sheet and inserting it to the database mysql
I am new to java. 我是Java新手。 I am using an apache poi to insert a data from excel sheet to the mysql. 我正在使用一个Apache POI将数据从Excel工作表插入mysql。 At the Backend I have created the table using mysql. 在后端,我使用mysql创建了表。 I have imported all the jar files. 我已经导入了所有的jar文件。 The error which I am Facing is that: 1. The date value cannot be inserted. 我正面临的错误是:1.无法插入日期值。 2.When I remove the date column,it sends only the address of the data and not the value. 2.当我删除日期列时,它只发送数据的地址,而不发送值。 3.Sometimes it tells source not found. 3.有时它告诉未找到源。 Please do help me with this issue. 请帮我解决这个问题。
public static ArrayList readExcelFile(String fileName) throws SQLException
{
/** --Define a ArrayList
--Holds ArrayList Of Cells
*/
ArrayList cellArrayLisstHolder = new ArrayList();
enter code here
try{
/** Creating Input Stream**/
FileInputStream myInput = new FileInputStream(fileName);
/** Create a POIFSFileSystem object**/
POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
/** Create a workbook using the File System**/
HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem);
/** Get the first sheet from workbook**/
HSSFSheet mySheet = myWorkBook.getSheetAt(0);
/** We now need something to iterate through the cells.**/
Iterator rowIter = mySheet.rowIterator();
while(rowIter.hasNext()){
HSSFRow myRow = (HSSFRow) rowIter.next();
Iterator cellIter = myRow.cellIterator();
ArrayList cellStoreArrayList=new ArrayList();
while(cellIter.hasNext()){
HSSFCell myCell = (HSSFCell) cellIter.next();
HSSFCell cell = myRow.createCell((short) 0);
cell.setCellType(HSSFCell.CELL_TYPE_NUMERIC);
SimpleDateFormat datetemp = new SimpleDateFormat("mm-dd-yyyy");
Date cellValue = datetemp.parse("1994-01-01");
cell.setCellValue(cellValue);
//binds the style you need to the cell.
HSSFCellStyle dateCellStyle = myWorkBook.createCellStyle();
short df = myWorkBook.createDataFormat().getFormat("dd-mmm");
dateCellStyle.setDataFormat(df);
cell.setCellStyle(dateCellStyle);
cellStoreArrayList.add(myCell);
}
cellArrayLisstHolder.add(cellStoreArrayList);
}
}catch (Exception e){e.printStackTrace(); }
return cellArrayLisstHolder;
}%>
<%
File f = new File("DeptHosp.xls");
System.out.println(f.getAbsolutePath());
File file = new File(".");
for(String fileNames : file.list()) System.out.println(fileNames);
String fileName="D://PROJECT//SOFTWARES//eclipse_Juno//eclipse//DeptHosp.xls";
//Read an Excel File and Store in a ArrayList
System.out.println(" path found");
ArrayList dataHolder=readExcelFile(fileName);
//Print the data read
//printCellDataToConsole(dataHolder);
con=connection.getConn();
System.out.println("Inserting the details");
String query=
"insert into departmentmaster(Dept_id,Dept_Groupid,Dept_Kid,Dept_Groupkid,Dept_Group,Dept_Name,Dept_type ,Dept_HospitalId,Dept_Datecreated,Dept_datelastrefreshed)values(?,?,?,?,?,?,?,?,?,?)";
ps=con.prepareStatement(query);
System.out.println("Database");
int count=0;
ArrayList cellStoreArrayList=null;
Date datevalue=null;
//For inserting into database
for (int i=1;i < dataHolder.size(); i++) {
cellStoreArrayList=(ArrayList)dataHolder.get(i);
ps.setString(1,((HSSFCell)cellStoreArrayList.get(0)).getStringCellValue());
ps.setString(2,((HSSFCell)cellStoreArrayList.get(1)).getStringCellValue());
ps.setString(3,((HSSFCell)cellStoreArrayList.get(2)).getStringCellValue());
ps.setString(4,((HSSFCell)cellStoreArrayList.get(3)).getStringCellValue());
ps.setString(5,((HSSFCell)cellStoreArrayList.get(4)).getStringCellValue());
ps.setString(6,((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue());
ps.setString(7,((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue());
ps.setString(8,((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue());
ps.setString(9,((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue());
ps.setString(10,((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue());
count= ps.executeUpdate();
System.out.println(((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue() + "\t");
}
//For checking data is inserted or not?
if(count>0)
{ %>
<table>
<tr>
<th>Dept_Id</th>
<th>Dept_GroupId</th>
<th>Dept_KId</th>
<th>Dept_GroupKid</th>
<th>Dept_Group</th>
<th>Dept_Name</th>
<th>Dept_Type</th>
<th>Hospital_Id</th>
<th>Dept_datecreated</th>
<th>Dept_datelastrefreshed</th>
</tr>
<% for (int i=1;i < dataHolder.size(); i++) {
cellStoreArrayList=(ArrayList)dataHolder.get(i);%>
<tr>
<td><%=((HSSFCell)cellStoreArrayList.get(0)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(1)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(2)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(3)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(4)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue() %></td>
<td><%=((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue() %></td>
</tr>
<%}
}
else
{%>
<center> Details have not been inserted!!!!!!!!!</center>
<% }%>
The error which i am getting is: Jan 06, 2014 12:05:00 PM org.apache.catalina.core.StandardWrapperValve invoke SEVERE: Servlet.service() for servlet [jsp] in context with path [/Excel] threw exception [An exception occurred processing JSP page /Excel.jsp at line 139 我遇到的错误是:2014年1月6日,下午org.apache.catalina.core.StandardWrapperValve为路径为[/ Excel]的上下文中的Servlet [jsp]调用SEVERE:Servlet.service() [在第139行处理JSP页面/Excel.jsp时发生了异常。
136: ps.setString(6,((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue());
137: ps.setString(7,((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue());
138: ps.setString(8,((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue());
139: ps.setString(9,((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue());
140: ps.setString(10, ((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue());
141:
142: count= ps.executeUpdate();
Stacktrace:] with root cause
java.lang.IndexOutOfBoundsException: Index: 8, Size: 8
at java.util.ArrayList.rangeCheck(Unknown Source)
at java.util.ArrayList.get(Unknown Source)
at org.apache.jsp.Excel_jsp._jspService(Excel_jsp.java:234)
at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:70)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:728)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:432)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:390)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:334)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:728)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:305)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:51)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:222)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:123)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:502)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:171)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:100)
at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:953)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:408)
at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1041)
at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:603)
at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:310)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
I cant comment to your question, so i will try to answer based on my understanding that you have issues reading/writing the DATE fields in Excel. 我无法评论您的问题,因此我将根据您的理解,即您在Excel中读取/写入DATE字段时遇到的问题,尝试回答。
I think you are missing a check on Date field, excel stores the dates as Numeric Contents internally and while displaying, It displays a formatted date. 我认为您缺少对日期字段的检查,Excel在内部将日期存储为数字内容,并且在显示时会显示格式化的日期。 Also you need to ensure that the column is formatted as DATE column in the sheet you are reading. 另外,您还需要确保将列格式设置为正在读取的工作表中的DATE列。
You need to use below code fragment to read content as DATE. 您需要使用下面的代码片段作为DATE读取内容。
if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { 如果(cell.getCellType()== HSSFCell.CELL_TYPE_NUMERIC){
// Excel stores the Date as a Numeric Contents. POI provides
// a Date utility to check
// if a particular cell is formatted as a date.
if (DateUtil.isCellDateFormatted(cell)) {
Date date = DateUtil.getJavaDate((double) cell
.getNumericCellValue());
SimpleDateFormat df = new SimpleDateFormat("dd/MM/yyyy HH:mm z");
System.out.println("The cell is a Date : " + df.format(date));
} else {
// treat the cell as 'double' number
System.out.println("The cell is a number : "
+ cell.getNumericCellValue());
}
} }
For complete code snipper you can refer this link . 有关完整的代码窃听器,您可以参考此链接 。 Please note I have authored the link. 请注意,我已经创建了链接。
Can you use jython instead of POI? 您可以使用jython代替POI吗? If so, the following code will spit out a CSV which can then be loaded into MySQL using the [bulk loader](http://dev.mysql.com/doc/refman/5.1/en/mysqlimport.html] by the command that follows the code: 如果是这样,下面的代码将吐出一个CSV,然后可以通过[bulk loader](http://dev.mysql.com/doc/refman/5.1/en/mysqlimport.html]将其加载到MySQL中遵循以下代码:
#!/usr/bin/python
#
# Copyright information
#
# Copyright (C) 2013-2014 Hasan Diwan
# This program is free software; you can redistribute it and/or modify
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
__author__ = "Hasan Diwan <hasan.diwan@gmail.com>"
__license__ = "BSD"
__version__ = "0.1"
import csv, datetime, zipfile, string, sys, os, re
import xml.parsers.expat
from xml.dom import minidom
try:
# python2.4
from cStringIO import StringIO
except:
pass
try:
from argparse import ArgumentParser
except:
# python2.4
from optparse import OptionParser
# see also ruby-roo lib at: http://github.com/hmcgowan/roo
FORMATS = {
'general' : 'float',
'0' : 'float',
'0.00' : 'float',
'#,##0' : 'float',
'#,##0.00' : 'float',
'0%' : 'percentage',
'0.00%' : 'percentage',
'0.00e+00' : 'float',
'mm-dd-yy' : 'date',
'd-mmm-yy' : 'date',
'd-mmm' : 'date',
'mmm-yy' : 'date',
'h:mm am/pm' : 'date',
'h:mm:ss am/pm' : 'date',
'h:mm' : 'time',
'h:mm:ss' : 'time',
'm/d/yy h:mm' : 'date',
'#,##0 ;(#,##0)' : 'float',
'#,##0 ;[red](#,##0)' : 'float',
'#,##0.00;(#,##0.00)' : 'float',
'#,##0.00;[red](#,##0.00)' : 'float',
'mm:ss' : 'time',
'[h]:mm:ss' : 'time',
'mmss.0' : 'time',
'##0.0e+0' : 'float',
'@' : 'float',
'yyyy\\-mm\\-dd' : 'date',
'dd/mm/yy' : 'date',
'hh:mm:ss' : 'time',
"dd/mm/yy\\ hh:mm" : 'date',
'dd/mm/yyyy hh:mm:ss' : 'date',
'yy-mm-dd' : 'date',
'd-mmm-yyyy' : 'date',
'm/d/yy' : 'date',
'm/d/yyyy' : 'date',
'dd-mmm-yyyy' : 'date',
'dd/mm/yyyy' : 'date',
'mm/dd/yy hh:mm am/pm' : 'date',
'mm/dd/yyyy hh:mm:ss' : 'date',
'yyyy-mm-dd hh:mm:ss' : 'date',
}
STANDARD_FORMATS = {
0 : 'general',
1 : '0',
2 : '0.00',
3 : '#,##0',
4 : '#,##0.00',
9 : '0%',
10 : '0.00%',
11 : '0.00e+00',
12 : '# ?/?',
13 : '# ??/??',
14 : 'mm-dd-yy',
15 : 'd-mmm-yy',
16 : 'd-mmm',
17 : 'mmm-yy',
18 : 'h:mm am/pm',
19 : 'h:mm:ss am/pm',
20 : 'h:mm',
21 : 'h:mm:ss',
22 : 'm/d/yy h:mm',
37 : '#,##0 ;(#,##0)',
38 : '#,##0 ;[red](#,##0)',
39 : '#,##0.00;(#,##0.00)',
40 : '#,##0.00;[red](#,##0.00)',
45 : 'mm:ss',
46 : '[h]:mm:ss',
47 : 'mmss.0',
48 : '##0.0e+0',
49 : '@',
}
class XlsxException(Exception):
pass
class InvalidXlsxFileException(XlsxException):
pass
class SheetNotFoundException(XlsxException):
pass
class OutFileAlreadyExistsException(XlsxException):
pass
class Xlsx2csv:
"""
Usage: Xlsx2csv("test.xslx", **params).convert("test.csv", sheetid=1)
parameters:
sheetid - sheet no to convert (0 for all sheets)
dateformat - override date/time format
delimiter - csv columns delimiter symbol
sheet_delimiter - sheets delimiter used when processing all sheets
skip_empty_lines - skip empty lines
"""
def __init__(self, xlsxfile, dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False, escape_strings=False, cmd=False):
try:
self.ziphandle = zipfile.ZipFile(xlsxfile)
except (zipfile.BadZipfile, IOError):
if cmd:
sys.stderr.write("Invalid xlsx file: " + xlsxfile + os.linesep)
sys.exit(1)
raise InvalidXlsxFileException("Invalid xlsx file: " + xlsxfile)
self.dateformat = dateformat
self.delimiter = delimiter
self.sheetdelimiter = sheetdelimiter
self.skip_empty_lines = skip_empty_lines
self.cmd = cmd
self.py3 = sys.version_info[0] == 3
self.shared_strings = self._parse(SharedStrings, "xl/sharedStrings.xml")
self.styles = self._parse(Styles, "xl/styles.xml")
self.workbook = self._parse(Workbook, "xl/workbook.xml")
if escape_strings:
self.shared_strings.escape_strings()
def convert(self, outfile, sheetid=1):
"""outfile - path to file or filehandle"""
if sheetid > 0:
self._convert(sheetid, outfile)
else:
if isinstance(outfile, str):
if not os.path.exists(outfile):
os.makedirs(outfile)
elif os.path.isfile(outfile):
if cmd:
sys.stderr.write("File " + outfile + " already exists!" + os.linesep)
sys.exit(1)
raise OutFileAlreadyExistsException("File " + outfile + " already exists!")
for s in self.workbook.sheets:
sheetname = s['name']
if not self.py3:
sheetname = sheetname.encode('utf-8')
of = outfile
if isinstance(outfile, str):
of = os.path.join(outfile, sheetname + '.csv')
elif self.sheetdelimiter and len(self.sheetdelimiter):
of.write(self.sheetdelimiter + " " + str(s['id']) + " - " + sheetname + os.linesep)
self._convert(s['id'], of)
def _convert(self, sheetid, outfile):
closefile = False
if isinstance(outfile, str):
outfile = open(outfile, 'w+')
closefile = True
try:
writer = csv.writer(outfile, quoting=csv.QUOTE_ALL, delimiter=self.delimiter, lineterminator='\r\n')
sheetfile = self._filehandle("xl/worksheets/sheet%i.xml" % sheetid)
if not sheetfile:
if self.cmd:
sys.stderr.write("Sheet %s not found!%s" %(sheetid, os.linesep))
sys.exit(1)
raise SheetNotFoundException("Sheet %s not found" %sheetid)
try:
sheet = Sheet(self.workbook, self.shared_strings, self.styles, sheetfile)
sheet.set_dateformat(self.dateformat)
sheet.set_skip_empty_lines(self.skip_empty_lines)
sheet.to_csv(writer)
finally:
sheetfile.close()
finally:
if closefile:
outfile.close()
def _filehandle(self, filename):
for name in filter(lambda f: f.lower() == filename.lower(), self.ziphandle.namelist()):
# python2.4 fix
if not hasattr(self.ziphandle, "open"):
return StringIO(self.ziphandle.read(name))
return self.ziphandle.open(name, "r")
return None
def _parse(self, klass, filename):
instance = klass()
filehandle = self._filehandle(filename)
if filehandle:
instance.parse(filehandle)
filehandle.close()
return instance
class Workbook:
def __init__(self):
self.sheets = []
self.date1904 = False
def parse(self, filehandle):
workbookDoc = minidom.parseString(filehandle.read())
if len(workbookDoc.firstChild.getElementsByTagName("fileVersion")) == 0:
self.appName = 'unknown'
else:
self.appName = workbookDoc.firstChild.getElementsByTagName("fileVersion")[0]._attrs['appName'].value
try:
self.date1904 = workbookDoc.firstChild.getElementsByTagName("workbookPr")[0]._attrs['date1904'].value.lower().strip() != "false"
except:
pass
sheets = workbookDoc.firstChild.getElementsByTagName("sheets")[0]
for sheetNode in sheets.getElementsByTagName("sheet"):
attrs = sheetNode._attrs
name = attrs["name"].value
if self.appName == 'xl':
if 'r:id' in attrs: id = int(attrs["r:id"].value[3:])
else: id = int(attrs['sheetId'].value)
else:
if 'sheetId' in attrs: id = int(attrs["sheetId"].value)
else: id = int(attrs['r:id'].value[3:])
self.sheets.append({'name': name, 'id': id})
class Styles:
def __init__(self):
self.numFmts = {}
self.cellXfs = []
def parse(self, filehandle):
styles = minidom.parseString(filehandle.read()).firstChild
# numFmts
numFmtsElement = styles.getElementsByTagName("numFmts")
if len(numFmtsElement) == 1:
for numFmt in numFmtsElement[0].childNodes:
if numFmt.nodeType == minidom.Node.ELEMENT_NODE:
numFmtId = int(numFmt._attrs['numFmtId'].value)
formatCode = numFmt._attrs['formatCode'].value.lower().replace('\\', '')
self.numFmts[numFmtId] = formatCode
# cellXfs
cellXfsElement = styles.getElementsByTagName("cellXfs")
if len(cellXfsElement) == 1:
for cellXfs in cellXfsElement[0].childNodes:
if cellXfs.nodeType != minidom.Node.ELEMENT_NODE or cellXfs.nodeName != "xf":
continue
if 'numFmtId' in cellXfs._attrs:
numFmtId = int(cellXfs._attrs['numFmtId'].value)
self.cellXfs.append(numFmtId)
else:
self.cellXfs.append(None)
class SharedStrings:
def __init__(self):
self.parser = None
self.strings = []
self.si = False
self.t = False
self.rPh = False
self.value = ""
def parse(self, filehandle):
self.parser = xml.parsers.expat.ParserCreate()
self.parser.CharacterDataHandler = self.handleCharData
self.parser.StartElementHandler = self.handleStartElement
self.parser.EndElementHandler = self.handleEndElement
self.parser.ParseFile(filehandle)
def escape_strings(self):
for i in range(0, len(self.strings)):
self.strings[i] = self.strings[i].replace("\r", "\\r").replace("\n", "\\n").replace("\t", "\\t")
def handleCharData(self, data):
if self.t:
self.value+= data
def handleStartElement(self, name, attrs):
if name == 'si':
self.si = True
self.value = ""
elif name == 't' and self.rPh:
self.t = False
elif name == 't' and self.si:
self.t = True
elif name == 'rPh':
self.rPh = True
def handleEndElement(self, name):
if name == 'si':
self.si = False
self.strings.append(self.value)
elif name == 't':
self.t = False
elif name == 'rPh':
self.rPh = False
class Sheet:
def __init__(self, workbook, sharedString, styles, filehandle):
self.py3 = sys.version_info[0] == 3
self.parser = None
self.writer = None
self.sharedString = None
self.styles = None
self.in_sheet = False
self.in_row = False
self.in_cell = False
self.in_cell_value = False
self.in_cell_formula = False
self.columns = {}
self.rowNum = None
self.colType = None
self.s_attr = None
self.data = None
self.dateformat = None
self.skip_empty_lines = False
self.filehandle = filehandle
self.workbook = workbook
self.sharedStrings = sharedString.strings
self.styles = styles
def set_dateformat(self, dateformat):
self.dateformat = dateformat
def set_skip_empty_lines(self, skip):
self.skip_empty_lines = skip
def to_csv(self, writer):
self.writer = writer
self.parser = xml.parsers.expat.ParserCreate()
self.parser.CharacterDataHandler = self.handleCharData
self.parser.StartElementHandler = self.handleStartElement
self.parser.EndElementHandler = self.handleEndElement
self.parser.ParseFile(self.filehandle)
def handleCharData(self, data):
if self.in_cell_value:
self.collected_string+= data
self.data = self.collected_string
if self.colType == "s": # shared string
self.data = self.sharedStrings[int(self.data)]
elif self.colType == "b": # boolean
self.data = (int(data) == 1 and "TRUE") or (int(data) == 0 and "FALSE") or data
elif self.s_attr:
s = int(self.s_attr)
# get cell format
format = None
xfs_numfmt = self.styles.cellXfs[s]
if xfs_numfmt in self.styles.numFmts:
format = self.styles.numFmts[xfs_numfmt]
elif xfs_numfmt in STANDARD_FORMATS:
format = STANDARD_FORMATS[xfs_numfmt]
# get format type
if format and format in FORMATS:
format_type = FORMATS[format]
try:
if format_type == 'date': # date/time
if self.workbook.date1904:
date = datetime.datetime(1904, 1, 1) + datetime.timedelta(float(self.data))
else:
date = datetime.datetime(1899, 12, 30) + datetime.timedelta(float(self.data))
if self.dateformat:
# str(dateformat) - python2.5 bug, see: http://bugs.python.org/issue2782
self.data = date.strftime(str(self.dateformat))
else:
dateformat = format.replace("yyyy", "%Y").replace("yy", "%y"). \
replace("hh:mm", "%H:%M").replace("h", "%H").replace("%H%H", "%H").replace("ss", "%S"). \
replace("d", "%e").replace("%e%e", "%d"). \
replace("mmmm", "%B").replace("mmm", "%b").replace(":mm", ":%M").replace("m", "%m").replace("%m%m", "%m"). \
replace("am/pm", "%p")
self.data = date.strftime(str(dateformat)).strip()
elif format_type == 'time': # time
self.data = str(float(self.data) * 24*60*60)
elif format_type == 'float' and ('E' in self.data or 'e' in self.data):
self.data = ("%f" %(float(self.data))).rstrip('0').rstrip('.')
except (ValueError, OverflowError):
# invalid date format
pass
# does not support it
#elif self.in_cell_formula:
# self.formula = data
def handleStartElement(self, name, attrs):
if self.in_row and name == 'c':
self.colType = attrs.get("t")
self.s_attr = attrs.get("s")
cellId = attrs.get("r")
if cellId:
self.colNum = cellId[:len(cellId)-len(self.rowNum)]
self.colIndex = 0
else:
self.colIndex+= 1
#self.formula = None
self.data = ""
self.in_cell = True
elif self.in_cell and (name == 'v' or name == 'is'):
self.in_cell_value = True
self.collected_string = ""
#elif self.in_cell and name == 'f':
# self.in_cell_formula = True
elif self.in_sheet and name == 'row' and 'r' in attrs:
self.rowNum = attrs['r']
self.in_row = True
self.columns = {}
self.spans = None
if 'spans' in attrs:
self.spans = [int(i) for i in attrs['spans'].split(":")]
elif name == 'sheetData':
self.in_sheet = True
def handleEndElement(self, name):
if self.in_cell and name == 'v':
self.in_cell_value = False
#elif self.in_cell and name == 'f':
# self.in_cell_formula = False
elif self.in_cell and name == 'c':
t = 0
for i in self.colNum: t = t*26 + ord(i) - 64
self.columns[t - 1 + self.colIndex] = self.data
self.in_cell = False
if self.in_row and name == 'row':
if len(self.columns.keys()) > 0:
d = [""] * (max(self.columns.keys()) + 1)
for k in self.columns.keys():
val = self.columns[k]
if not self.py3:
val = val.encode("utf-8")
d[k] = val
if self.spans:
l = self.spans[0] + self.spans[1] - 1
if len(d) < l:
d+= (l - len(d)) * ['']
# write line to csv
if not self.skip_empty_lines or d.count('') != len(d):
self.writer.writerow(d)
self.in_row = False
elif self.in_sheet and name == 'sheetData':
self.in_sheet = False
def convert_recursive(path, sheetid, kwargs):
kwargs['cmd'] = False
for name in os.listdir(path):
fullpath = os.path.join(path, name)
if os.path.isdir(fullpath):
convert_recursive(fullpath, kwargs)
else:
if fullpath.lower().endswith(".xlsx"):
outfilepath = fullpath[:-4] + 'csv'
print("Converting %s to %s" %(fullpath, outfilepath))
try:
Xlsx2csv(fullpath, **kwargs).convert(outfilepath, sheetid)
except zipfile.BadZipfile:
print("File %s is not a zip file" %fullpath)
if __name__ == "__main__":
if "ArgumentParser" in globals():
parser = ArgumentParser(description = "xlsx to csv convertor")
parser.add_argument('infile', metavar='xlsxfile', help="xlsx file path")
parser.add_argument('outfile', metavar='outfile', nargs='?', help="output csv file path")
parser.add_argument('-v', '--version', action='version', version='%(prog)s')
argparser = True
else:
parser = OptionParser(usage = "%prog [options] infile [outfile]", version=__version__)
parser.add_argument = parser.add_option
argparser = False
parser.add_argument("-a", "--all", dest="all", default=False, action="store_true",
help="export all sheets")
parser.add_argument("-d", "--delimiter", dest="delimiter", default=",",
help="delimiter - csv columns delimiter, 'tab' or 'x09' for tab (comma is default)")
parser.add_argument("-f", "--dateformat", dest="dateformat",
help="override date/time format (ex. %%Y/%%m/%%d)")
parser.add_argument("-i", "--ignoreempty", dest="skip_empty_lines", default=False, action="store_true",
help="skip empty lines")
parser.add_argument("-e", "--escape", dest='escape_strings', default=False, action="store_true",
help="Escape \\r\\n\\t characters")
parser.add_argument("-p", "--sheetdelimiter", dest="sheetdelimiter", default="--------",
help="sheets delimiter used to separate sheets, pass '' if you don't want delimiters (default '--------')")
parser.add_argument("-s", "--sheet", dest="sheetid", default=1, type=int,
help="sheet no to convert (0 for all sheets)")
if argparser:
options = parser.parse_args()
else:
(options, args) = parser.parse_args()
if len(args) < 1:
parser.print_usage()
sys.stderr.write("error: too few arguments" + os.linesep)
sys.exit(1)
options.infile = args[0]
options.outfile = len(args) > 1 and args[1] or None
if len(options.delimiter) == 1:
delimiter = options.delimiter
elif options.delimiter == 'tab':
delimiter = '\t'
elif options.delimiter == 'comma':
delimiter = ','
elif options.delimiter[0] == 'x':
delimiter = chr(int(options.delimiter[1:]))
else:
raise XlsxException("Invalid delimiter")
kwargs = {
'delimiter' : delimiter,
'sheetdelimiter' : options.sheetdelimiter,
'dateformat' : options.dateformat,
'skip_empty_lines' : options.skip_empty_lines,
'escape_strings' : options.escape_strings,
'cmd' : True
}
sheetid = options.sheetid
if options.all:
sheetid = 0
if os.path.isdir(options.infile):mysq
convert_recursive(options.infile, sheetid, kwargs)
else:
xlsx2csv = Xlsx2csv(options.infile, **kwargs)
outfile = options.outfile or sys.stdout
xlsx2csv.convert(outfile, sheetid)
To import this to mysql, use java -cp jython.jar org.python.util.jython xlsx2csv [xlsx-file] [csv-output-file]; mysqlimport --local db1 [csv-output-file]
要将其导入到mysql,请使用java -cp jython.jar org.python.util.jython xlsx2csv [xlsx-file] [csv-output-file]; mysqlimport --local db1 [csv-output-file]
java -cp jython.jar org.python.util.jython xlsx2csv [xlsx-file] [csv-output-file]; mysqlimport --local db1 [csv-output-file]
. java -cp jython.jar org.python.util.jython xlsx2csv [xlsx-file] [csv-output-file]; mysqlimport --local db1 [csv-output-file]
I hope this helps... 我希望这有帮助...
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.