[英]Reading a Multiple Header CSV file using CsvHelper
我有一個帶有多個標題行的大 CSV 文件,您可以在下面看到一個示例。 如何在 C# 中使用 CsvHelper 讀取它?
如下所示,標題在 CSV 中定期重復。 還有很多行以“+”開頭。
一個例子如下:
FAUF-Rückmeldungen aus SFC500: 4200 Sätze ausgegeben
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10| 410959107| 2|0800|26.07.2021 |00:01:24 |155164-B |
|TR10| 411158037| 20|0900|26.07.2021 |00:02:33 |155217-A |
|TR10| 410985740| 25|0900|26.07.2021 |00:02:39 |155196-A |
|TR10| 410279717| 57|0900|26.07.2021 |00:02:40 |155196-A |
|TR10| 410630007| 6|0900|26.07.2021 |00:02:41 |155196-B |
|TR10| 411237292| 25|0900|26.07.2021 |00:02:41 |155196-A |
|TR10| 410276088| 20|0900|26.07.2021 |00:06:56 |155217-A |
|TR10| 410950998| 1|0900|26.07.2021 |00:06:57 |155217-A |
|TR10| 411237292| 26|0900|26.07.2021 |00:06:57 |155196-A |
|TR10| 410556669| 1|0900|26.07.2021 |00:06:58 |155217-A |
|TR10| 411237292| 27|0900|26.07.2021 |00:06:58 |155196-A |
|TR10| 410556669| 2|0900|26.07.2021 |00:06:59 |155217-A |
|TR10| 410630007| 7|0900|26.07.2021 |00:07:00 |155196-B |
|TR10| 411525402| 5|0900|26.07.2021 |00:07:00 |155114-A |
|TR10| 411525402| 6|0900|26.07.2021 |00:07:01 |155114-A |
|TR10| 411528024| 1|0900|26.07.2021 |00:07:02 |155114-A |
|TR10| 411528024| 2|0900|26.07.2021 |00:07:03 |155114-A |
|TR10| 411528929| 30|0900|26.07.2021 |00:07:04 |155114-A |
|TR10| 411544500| 3|0900|26.07.2021 |00:07:05 |155114-A |
|TR10| 411528928| 8|0905|26.07.2021 |00:10:19 |155123-C |
|TR10| 410279717| 58|0900|26.07.2021 |00:11:48 |155196-A |
|TR10| 411237292| 28|0900|26.07.2021 |00:11:49 |155196-A |
|TR10| 410630007| 8|0900|26.07.2021 |00:11:50 |155196-B |
|TR10| 411237293| 2|0990|26.07.2021 |00:14:14 |155164-A |
|TR10| 410633488| 1|0600|26.07.2021 |00:14:52 |155163-0 |
|TR10| 410633212| 1|0600|26.07.2021 |00:14:59 |155163-0 |
|TR10| 411218828| 2|0600|26.07.2021 |00:15:08 |155163-0 |
|TR10| 411438190| 3|0910|26.07.2021 |00:15:14 |155163-E |
|TR10| 411527748| 1|0910|26.07.2021 |00:15:19 |155163-B |
|TR10| 411367433| 2|0910|26.07.2021 |00:16:17 |155163-D |
|TR10| 411032464| 3|0910|26.07.2021 |00:16:26 |155163-D |
|TR10| 411525402| 7|0900|26.07.2021 |00:16:49 |155114-A |
|TR10| 411528024| 3|0900|26.07.2021 |00:16:50 |155114-A |
|TR10| 411544500| 4|0900|26.07.2021 |00:16:51 |155114-A |
|TR10| 410985740| 26|0900|26.07.2021 |00:16:55 |155196-A |
|TR10| 410279717| 59|0900|26.07.2021 |00:16:56 |155196-A |
|TR10| 411237292| 29|0900|26.07.2021 |00:16:57 |155196-A |
|TR10| 410900407| 2|0040|26.07.2021 |00:17:46 |155135-D |
|TR10| 409944144| 1|0910|26.07.2021 |00:18:47 |155163-C |
|TR10| 411544499| 1|0905|26.07.2021 |00:19:42 |155123-C |
|TR10| 411525401| 5|0905|26.07.2021 |00:19:56 |155123-C |
|TR10| 410985740| 27|0900|26.07.2021 |00:21:47 |155196-A |
|TR10| 410630007| 9|0900|26.07.2021 |00:21:48 |155196-B |
|TR10| 411237292| 30|0900|26.07.2021 |00:21:48 |155196-A |
|TR10| 411544437| 4|0900|26.07.2021 |00:22:22 |155114-A |
|TR10| 411544436| 1|0905|26.07.2021 |00:22:41 |155123-C |
|TR10| 411551402| 2|0005|26.07.2021 |00:24:00 |155115-B |
|TR10| 411362459| 1|0005|26.07.2021 |00:24:52 |155115-B |
|TR10| 411369893| 1|0060|26.07.2021 |00:25:25 |155112-G |
|TR10| 411530629| 1|0005|26.07.2021 |00:25:37 |155115-B |
|TR10| 411369897| 1|0063|26.07.2021 |00:25:40 |155112-F |
|TR10| 411369894| 1|0070|26.07.2021 |00:25:54 |155518-0 |
|TR10| 411369897| 2|0063|26.07.2021 |00:26:02 |155112-F |
|TR10| 411369894| 2|0070|26.07.2021 |00:26:10 |155518-0 |
|TR10| 411369897| 3|0063|26.07.2021 |00:26:21 |155112-F |
|TR10| 411369894| 3|0070|26.07.2021 |00:26:28 |155518-0 |
|TR10| 411369897| 4|0063|26.07.2021 |00:26:37 |155112-F |
|TR10| 411369894| 4|0070|26.07.2021 |00:26:43 |155518-0 |
|TR10| 410950998| 2|0900|26.07.2021 |00:26:45 |155217-A |
+----+---------------+---------------+----+--------------+-------------+------------+
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10| 410279717| 60|0900|26.07.2021 |00:26:46 |155196-A |
|TR10| 410950998| 3|0900|26.07.2021 |00:26:46 |155217-A |
|TR10| 410630007| 10|0900|26.07.2021 |00:26:47 |155196-B |
|TR10| 411369897| 5|0063|26.07.2021 |00:26:54 |155112-F |
|TR10| 411369894| 5|0070|26.07.2021 |00:27:04 |155518-0 |
|TR10| 411369897| 6|0063|26.07.2021 |00:27:15 |155112-F |
|TR10| 411369894| 6|0070|26.07.2021 |00:27:23 |155518-0 |
|TR10| 411086222| 1|0001|26.07.2021 |00:27:50 |155212-A |
|TR10| 411086223| 1|0005|26.07.2021 |00:27:58 |155210-A |
|TR10| 411520617| 7|0905|26.07.2021 |00:30:28 |155123-C |
|TR10| 411872172| 1|0010|26.07.2021 |00:31:27 |155145-A |
|TR10| 411872177| 1|0010|26.07.2021 |00:31:39 |155145-A |
|TR10| 411528024| 4|0900|26.07.2021 |00:31:50 |155114-A |
|TR10| 411872182| 1|0010|26.07.2021 |00:31:50 |155145-A |
|TR10| 410985740| 28|0900|26.07.2021 |00:31:54 |155196-A |
|TR10| 410279717| 61|0900|26.07.2021 |00:31:55 |155196-A |
|TR10| 411872187| 1|0010|26.07.2021 |00:32:02 |155145-A |
|TR10| 410699054| 1|0060|26.07.2021 |00:32:52 |155112-K |
|TR10| 410699055| 1|0063|26.07.2021 |00:33:01 |155112-L |
|TR10| 410699056| 1|0070|26.07.2021 |00:33:11 |155518-0 |
|TR10| 411434349| 2|0080|26.07.2021 |00:33:18 |155213-F |
|TR10| 410850582| 1|0051|26.07.2021 |00:33:54 |155146-E |
|TR10| 410850583| 1|0055|26.07.2021 |00:34:01 |155146-F |
|TR10| 410850580| 1|0080|26.07.2021 |00:34:09 |155518-0 |
|TR10| 410774889| 1|0050|26.07.2021 |00:34:13 |155171-D |
|TR10| 411243279| 2|0005|26.07.2021 |00:34:27 |155531-A |
|TR10| 411243280| 3|0010|26.07.2021 |00:34:37 |155550-B |
|TR10| 411243281| 1|0020|26.07.2021 |00:34:48 |155550-E |
|TR10| 411228376| 1|0001|26.07.2021 |00:36:15 |155112-D |
|TR10| 410985740| 29|0900|26.07.2021 |00:36:46 |155196-A |
|TR10| 411525402| 8|0900|26.07.2021 |00:36:46 |155114-A |
|TR10| 411237292| 31|0900|26.07.2021 |00:36:47 |155196-A |
|TR10| 411533238| 1|0001|26.07.2021 |00:36:55 |155144-A |
|TR10| 410898440| 2|0010|26.07.2021 |00:37:02 |155171-A |
|TR10| 411533239| 1|0005|26.07.2021 |00:37:02 |155104-A |
|TR10| 411874854| 1|0010|26.07.2021 |00:37:37 |FCM-E |
|TR10| 411032291| 1|0060|26.07.2021 |00:40:09 |155112-G |
|TR10| 411874855| 1|0010|26.07.2021 |00:40:21 |FCM-E |
|TR10| 411032293| 1|0063|26.07.2021 |00:40:35 |155112-F |
|TR10| 411032292| 1|0070|26.07.2021 |00:40:42 |155518-0 |
|TR10| 411032293| 2|0063|26.07.2021 |00:40:51 |155112-F |
|TR10| 411032292| 2|0070|26.07.2021 |00:40:59 |155518-0 |
|TR10| 411032293| 3|0063|26.07.2021 |00:41:08 |155112-F |
|TR10| 411032292| 3|0070|26.07.2021 |00:41:15 |155518-0 |
|TR10| 411032293| 4|0063|26.07.2021 |00:41:25 |155112-F |
|TR10| 411032292| 4|0070|26.07.2021 |00:41:32 |155518-0 |
|TR10| 411032293| 5|0063|26.07.2021 |00:41:41 |155112-F |
|TR10| 410556669| 3|0900|26.07.2021 |00:41:46 |155217-A |
|TR10| 410279717| 62|0900|26.07.2021 |00:41:47 |155196-A |
|TR10| 411237292| 32|0900|26.07.2021 |00:41:48 |155196-A |
|TR10| 411032292| 5|0070|26.07.2021 |00:41:49 |155518-0 |
|TR10| 411032293| 6|0063|26.07.2021 |00:41:59 |155112-F |
|TR10| 411032292| 6|0070|26.07.2021 |00:42:07 |155518-0 |
|TR10| 411535704| 1|0010|26.07.2021 |00:43:40 |155144-A |
|TR10| 411875458| 1|0010|26.07.2021 |00:43:54 |155144-A |
|TR10| 411528024| 5|0900|26.07.2021 |00:46:47 |155114-A |
|TR10| 410985740| 30|0900|26.07.2021 |00:46:48 |155196-A |
|TR10| 410279717| 63|0900|26.07.2021 |00:46:50 |155196-A |
|TR10| 411525401| 6|0905|26.07.2021 |00:46:56 |155123-C |
|TR10| 411528023| 1|0905|26.07.2021 |00:47:30 |155123-C |
+----+---------------+---------------+----+--------------+-------------+------------+
我生成了一個類
namespace CsvHelper
{
class Program
{
static void Main(string[] args)
{
ReadCsv();
}
static void ReadCsv()
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|"
};
using (var reader = new StreamReader("file.csv"))
using (var csv = new CsvReader(reader, config))
{
var records = csv.GetRecords<SFC>();
}
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public int AVO { get; set; }
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; }
public string Arbeitsplatz { get; set; }
}
}
}
如何使用 CsvHelper 將此文件讀入List<SFC>
?
您的文本文件由以下重復的行模式組成:
+----+---------------+
這樣的初始定界符|Werk|Rückmeldenummer|
這樣的標題 .|TR10| 410959107|
|TR10| 410959107|
.您可以通過跳過初始行然后檢查第一個字段以查看它是否“看起來像”分隔符來讀取這種格式的 CSV 文件,如下所示:
enum ReadState
{
Initial,
InitialDelimiter,
Header,
HeaderDataDelimiter,
Data,
}
public static List<TRecord> ReadCsv<TRecord>(string filename, ClassMap<TRecord> map)
{
List<TRecord> records = new ();
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|", // Fixed Delimeter => Delimiter
PrepareHeaderForMatch = args => args.Header.Trim(), // Added
TrimOptions = TrimOptions.Trim, // Added
};
using (var reader = new StreamReader(filename))
using (var csv = new CsvReader(reader, config))
{
csv.Context.RegisterClassMap(map);
var state = ReadState.Initial;
while (csv.Read())
{
var isDelimiter = csv.GetField(0).StartsWith("+-");
var newState = (isDelimiter, state) switch
{
(true, ReadState.Initial) => ReadState.InitialDelimiter,
(true, ReadState.Header) => ReadState.HeaderDataDelimiter,
//(true, ReadState.HeaderDataDelimiter) => ReadState.Initial, // Uncomment if your CSV file might contain empty tables with headers and delimiters but no data.
(true, ReadState.Data) => ReadState.Initial,
(false, ReadState.Initial) => ReadState.Initial,
(false, ReadState.InitialDelimiter) => ReadState.Header,
(false, ReadState.HeaderDataDelimiter) => ReadState.Data,
(false, ReadState.Data) => ReadState.Data,
_ => throw new ApplicationException(string.Format("Unexpected row on state {0}", state))
};
switch (newState)
{
case ReadState.Header: csv.ReadHeader(); break;
case ReadState.Data: records.Add(csv.GetRecord<TRecord>()); break;
}
state = newState;
}
}
return records;
}
然后為SFC
定義一個classmap如下:
class SFCMap : ClassMap<SFC>
{
public SFCMap() : this(new CsvConfiguration(CultureInfo.InvariantCulture)) {}
public SFCMap(CsvConfiguration config)
{
AutoMap(config);
Map(m => m.Rückmeldedatum).TypeConverterOption.Format("dd.mm.yyyy").TypeConverterOption.DateTimeStyles(DateTimeStyles.AllowWhiteSpaces);
}
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public string AVO { get; set; } // Fixed int => string (so as to not lose leading zeros
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; } // Fixed Timespan => TimeSpan
public string Arbeitsplatz { get; set; }
}
您將能夠將您的 CSV 文件讀入List<SFC>
,如下所示:
var records = ReadCsv(filename, new SFCMap());
筆記:
您將AVO
定義為int
,但字段有前導零,例如0800
。 因此,我將其類型更改為string
以便保留這些類型。
解析Rückmeldedatum
時需要指定格式"dd.mm.yyyy"
Rückmeldedatum
。 我添加了ClassMap<SFC>
以提供此功能。
您的“CSV”實際上是一個固定寬度的文件,而不是一個 CSV 文件。 我的假設是您希望修剪字符串字段周圍的格式空間。 如果不這樣做,請刪除TrimOptions = TrimOptions.Trim
。
如果您的 CSV 文件可能包含帶有標題和分隔符的空表,但沒有像這樣的數據:
+----+---------------+---------------+----+--------------+-------------+------------+ |Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz| +----+---------------+---------------+----+--------------+-------------+------------+ +----+---------------+---------------+----+--------------+-------------+------------+
然后取消注釋:
//(true, ReadState.HeaderDataDelimiter) => ReadState.Initial,
另請參閱文檔頁面讀取多個數據集,其中討論了類似的解析問題。
演示小提琴在這里。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.