简体   繁体   中英

C# skip csv coulmns while importing to data table

in the below mentioned code i am trying to import a large CSV file to datatable adding first row as header. while importing i am skipping certain header's if it contains certain text strings. i want to skip row value if the header is missing.Please help. here is my code.

 private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
    {
        DataTable csvData = new DataTable();

        try
        {
            using (TextFieldParser csvReader = new TextFieldParser(csv_file_path))
            {
                csvReader.SetDelimiters(new string[] { "," });
                csvReader.HasFieldsEnclosedInQuotes = true;
                string[] colFields = csvReader.ReadFields();
                foreach (string column in colFields)
                {
                    if (column.Contains("code"))
                    {
//here i am skipping a column if it contains the string
                        continue;
                    }

                    else if (column.Contains("Q"))
                    {
                        continue;
                    }

                    else if (column.Contains("M"))
                    {
                        continue;
                    }

                    DataColumn datecolumn = new DataColumn(column);
                    datecolumn.AllowDBNull = true;
                    csvData.Columns.Add(datecolumn);
                }
                while (!csvReader.EndOfData)
                {
                    string[] fieldData = csvReader.ReadFields();
                    //Making empty value as null
                    for (int i = 0; i < fieldData.Length; i++)
                    {
                        if (fieldData[i] == "")
                        {
                            fieldData[i] = null;
                        }
                    }
                    csvData.Rows.Add(fieldData);
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.Message.ToString());
        }
        return csvData;
    }

You need to store which columns you have skipped, for example in this way:

List<int> skippedColumnOrdinals = new List<int>();
string[] skipWhenContains = { "code", "Q", "M" };

for (int index = 0; index < colFields.Length; index++)
{
    string column = colFields[index];
    bool skipColumn = skipWhenContains.Any(column.Contains);
    if (skipColumn)
    {
        skippedColumnOrdinals.Add(index);
        continue;
    }

    DataColumn datecolumn = new DataColumn(column) { AllowDBNull = true };
    csvData.Columns.Add(datecolumn);
}

while (!csvReader.EndOfData)
{
    string[] fieldData = csvReader.ReadFields()
        .Where((field, index) => !skippedColumnOrdinals.Contains(index))
        .Select(field => field == "" ? null : field)
        .ToArray();
    csvData.Rows.Add(fieldData);
}

A simpler solution would be to import the entire content of the .csv file into the data table, and then simply remove the columns from the data table:

private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
{
    DataTable csvData = new DataTable();

    try
    {
        using (TextFieldParser csvReader = new TextFieldParser(csv_file_path))
        {
            csvReader.SetDelimiters(new string[] { "," });
            csvReader.HasFieldsEnclosedInQuotes = true;
            string[] colFields = csvReader.ReadFields();
            foreach (string column in colFields)
            {
                DataColumn datecolumn = new DataColumn(column);
                datecolumn.AllowDBNull = true;
                csvData.Columns.Add(datecolumn);
            }
            while (!csvReader.EndOfData)
            {
                string[] fieldData = csvReader.ReadFields();
                //Making empty value as null
                for (int i = 0; i < fieldData.Length; i++)
                {
                    if (fieldData[i] == "")
                    {
                        fieldData[i] = null;
                    }
                }
                csvData.Rows.Add(fieldData);
            }
        }

        // get the names of the columns to remove
        var columnNamesToRemove = csvData.Columns
            .OfType<DataColumn>()
            .Where(
                c => c.Name.Contains("code") || 
                c => c.Name.Contains("Q") || 
                c => c.Name.Contains("M") 
            )
            .Select(c => c.Name);

        // remove the columns from the data table
        foreach(var name in columnNamesToRemove)
        {
            csvData.Columns.Remove(name);
        }
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message.ToString());
    }
    return csvData;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM