[英]How do you convert Excel to CSV using OpenXML SDK?
我需要将Excel(2010)文件转换为csv。 目前我正在使用Excel Interop打开和SaveAs csv,这很好用。 然而,Interop在我们使用它的environemt中存在一些问题,所以我正在寻找另一种解决方案。
我发现没有互操作的Excel文件的使用方法是使用OpenXML SDK。 我一起编写了一些代码来遍历每个工作表中的所有单元格,然后将它们简单地写入CSV中的另一个文件。
我遇到的一个问题是处理空行和单元格。 看来,使用此代码,空白行和单元格完全不存在,因此我无法了解它们。 有没有通过所有行和单元格,包括空白?
string filename = @"D:\test.xlsx";
string outputDir = Path.GetDirectoryName(filename);
//--------------------------------------------------------
using (SpreadsheetDocument document = SpreadsheetDocument.Open(filename, false))
{
foreach (Sheet sheet in document.WorkbookPart.Workbook.Descendants<Sheet>())
{
WorksheetPart worksheetPart = (WorksheetPart) document.WorkbookPart.GetPartById(sheet.Id);
Worksheet worksheet = worksheetPart.Worksheet;
SharedStringTablePart shareStringPart = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
SharedStringItem[] items = shareStringPart.SharedStringTable.Elements<SharedStringItem>().ToArray();
// Create a new filename and save this file out.
if (string.IsNullOrWhiteSpace(outputDir))
outputDir = Path.GetDirectoryName(filename);
string newFilename = string.Format("{0}_{1}.csv", Path.GetFileNameWithoutExtension(filename), sheet.Name);
newFilename = Path.Combine(outputDir, newFilename);
using (var outputFile = File.CreateText(newFilename))
{
foreach (var row in worksheet.Descendants<Row>())
{
StringBuilder sb = new StringBuilder();
foreach (Cell cell in row)
{
string value = string.Empty;
if (cell.CellValue != null)
{
// If the content of the first cell is stored as a shared string, get the text
// from the SharedStringTablePart. Otherwise, use the string value of the cell.
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
value = items[int.Parse(cell.CellValue.Text)].InnerText;
else
value = cell.CellValue.Text;
}
// to be safe, always use double quotes.
sb.Append(string.Format("\"{0}\",", value.Trim()));
}
outputFile.WriteLine(sb.ToString().TrimEnd(','));
}
}
}
}
如果我有以下Excel文件数据:
one,two,three
,,
last,,row
我将得到以下CSV(这是错误的):
one,two,three
last,row
//Xlsx to Csv
ConvertXlsxToCsv(@"D:\test.xlsx", @"C:\");
internal static void ConvertXlsxToCsv(string SourceXlsxName, string DestinationCsvDirectory)
{
try
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(SourceXlsxName, false))
{
foreach (Sheet _Sheet in document.WorkbookPart.Workbook.Descendants<Sheet>())
{
WorksheetPart _WorksheetPart = (WorksheetPart)document.WorkbookPart.GetPartById(_Sheet.Id);
Worksheet _Worksheet = _WorksheetPart.Worksheet;
SharedStringTablePart _SharedStringTablePart = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
SharedStringItem[] _SharedStringItem = _SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ToArray();
if (string.IsNullOrEmpty(DestinationCsvDirectory))
DestinationCsvDirectory = Path.GetDirectoryName(SourceXlsxName);
string newFilename = string.Format("{0}_{1}.csv", Path.GetFileNameWithoutExtension(SourceXlsxName), _Sheet.Name);
newFilename = Path.Combine(DestinationCsvDirectory, newFilename);
using (var outputFile = File.CreateText(newFilename))
{
foreach (var row in _Worksheet.Descendants<Row>())
{
StringBuilder _StringBuilder = new StringBuilder();
foreach (Cell _Cell in row)
{
string Value = string.Empty;
if (_Cell.CellValue != null)
{
if (_Cell.DataType != null && _Cell.DataType.Value == CellValues.SharedString)
Value = _SharedStringItem[int.Parse(_Cell.CellValue.Text)].InnerText;
else
Value = _Cell.CellValue.Text;
}
_StringBuilder.Append(string.Format("{0},", Value.Trim()));
}
outputFile.WriteLine(_StringBuilder.ToString().TrimEnd(','));
}
}
}
}
}
catch (Exception Ex)
{
throw Ex;
}
}
我不认为OpenXml是解决此问题的正确工具。 我建议使用OleDbConnection将数据从工作表中取出,然后使用此方法将数据转换为csv文件。
一旦你将数据存储在内存中的数据表中,你就可以更好地控制这种情况。
您可以使用oledb连接并查询excel文件,将行转换为csv格式并将结果保存到文件中
这是一个我为此测试的简单示例,它为excel文件中的每个工作表创建了一个不同的csv文件unicode编码,制表符分隔符
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.OleDb;
using System.IO;
using System.Linq;
using System.Text;
namespace XlsTests
{
class Program
{
static void Main(string[] args)
{
string _XlsConnectionStringFormat = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0 Xml;HDR=NO;IMEX=1\"";
string xlsFilename = @"C:\test.xlsx";
using (OleDbConnection conn = new OleDbConnection(string.Format(_XlsConnectionStringFormat, xlsFilename)))
{
try
{
conn.Open();
string outputFilenameHeade = Path.GetFileNameWithoutExtension(xlsFilename);
string dir = Path.GetDirectoryName(xlsFilename);
string[] sheetNames = conn.GetSchema("Tables")
.AsEnumerable()
.Select(a => a["TABLE_NAME"].ToString())
.ToArray();
foreach (string sheetName in sheetNames)
{
string outputFilename = Path.Combine(dir, string.Format("{0}_{1}.csv", outputFilenameHeade, sheetName));
using (StreamWriter sw = new StreamWriter(File.Create(outputFilename), Encoding.Unicode))
{
using (DataSet ds = new DataSet())
{
using (OleDbDataAdapter adapter = new OleDbDataAdapter(string.Format("SELECT * FROM [{0}]", sheetName), conn))
{
adapter.Fill(ds);
foreach (DataRow dr in ds.Tables[0].Rows)
{
string[] cells = dr.ItemArray.Select(a => a.ToString()).ToArray();
sw.WriteLine("\"{0}\"", string.Join("\"\t\"", cells));
}
}
}
}
}
}
catch (Exception exp)
{
// handle exception
}
finally
{
if (conn.State != ConnectionState.Open)
{
try
{
conn.Close();
}
catch (Exception ex)
{
// handle exception
}
}
}
}
}
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.