简体   繁体   中英

EPPlus - Read Excel Table

Using EPPlus, I want to read an excel table, then store all the contents from each column into its corresponding List . I want it to recognize the table's heading and categorize the contents based on that.

For example, if my excel table is as below:

Id    Name     Gender
 1    John     Male
 2    Maria    Female
 3    Daniel   Unknown

I want the data to store in List<ExcelData> where

public class ExcelData
{
    public string Id { get; set; }
    public string Name { get; set; }
    public string Gender { get; set; }
}

So that I can call out the contents using the heading name. For example, when I do this:

foreach (var data in ThatList)
{
     Console.WriteLine(data.Id + data.Name + data.Gender);
}

It will give me this output:

1JohnMale
2MariaFemale
3DanielUnknown

This is really all I got:

var package = new ExcelPackage(new FileInfo(@"C:\ExcelFile.xlsx"));
ExcelWorksheet sheet = package.Workbook.Worksheets[1];

var table = sheet.Tables.First();

table.Columns.Something //I guess I can use this to do what I want

Please help :( I have spent long hours searching for sample code regarding this so that I can learn from it but to no avail. I also understand ExcelToLinQ is managed to do that but it can't recognize table.

Not sure why but none of the above solution work for me. So sharing what worked:

public void readXLS(string FilePath)
{
    FileInfo existingFile = new FileInfo(FilePath);
    using (ExcelPackage package = new ExcelPackage(existingFile))
    {
        //get the first worksheet in the workbook
        ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
        int colCount = worksheet.Dimension.End.Column;  //get Column Count
        int rowCount = worksheet.Dimension.End.Row;     //get row count
        for (int row = 1; row <= rowCount; row++)
        {
            for (int col = 1; col <= colCount; col++)
            {
                Console.WriteLine(" Row:" + row + " column:" + col + " Value:" + worksheet.Cells[row, col].Value?.ToString().Trim());
            }
        }
    }
}

There is no native but what if you use what I put in this post:

How to parse excel rows back to types using EPPlus

If you want to point it at a table only it will need to be modified. Something like this should do it:

public static IEnumerable<T> ConvertTableToObjects<T>(this ExcelTable table) where T : new()
{
    //DateTime Conversion
    var convertDateTime = new Func<double, DateTime>(excelDate =>
    {
        if (excelDate < 1)
            throw new ArgumentException("Excel dates cannot be smaller than 0.");

        var dateOfReference = new DateTime(1900, 1, 1);

        if (excelDate > 60d)
            excelDate = excelDate - 2;
        else
            excelDate = excelDate - 1;
        return dateOfReference.AddDays(excelDate);
    });

    //Get the properties of T
    var tprops = (new T())
        .GetType()
        .GetProperties()
        .ToList();

    //Get the cells based on the table address
    var start = table.Address.Start;
    var end = table.Address.End;
    var cells = new List<ExcelRangeBase>();

    //Have to use for loops insteadof worksheet.Cells to protect against empties
    for (var r = start.Row; r <= end.Row; r++)
        for (var c = start.Column; c <= end.Column; c++)
            cells.Add(table.WorkSheet.Cells[r, c]);

    var groups = cells
        .GroupBy(cell => cell.Start.Row)
        .ToList();

    //Assume the second row represents column data types (big assumption!)
    var types = groups
        .Skip(1)
        .First()
        .Select(rcell => rcell.Value.GetType())
        .ToList();

    //Assume first row has the column names
    var colnames = groups
        .First()
        .Select((hcell, idx) => new { Name = hcell.Value.ToString(), index = idx })
        .Where(o => tprops.Select(p => p.Name).Contains(o.Name))
        .ToList();

    //Everything after the header is data
    var rowvalues = groups
        .Skip(1) //Exclude header
        .Select(cg => cg.Select(c => c.Value).ToList());

    //Create the collection container
    var collection = rowvalues
        .Select(row =>
        {
            var tnew = new T();
            colnames.ForEach(colname =>
            {
                //This is the real wrinkle to using reflection - Excel stores all numbers as double including int
                var val = row[colname.index];
                var type = types[colname.index];
                var prop = tprops.First(p => p.Name == colname.Name);

                //If it is numeric it is a double since that is how excel stores all numbers
                if (type == typeof(double))
                {
                    if (!string.IsNullOrWhiteSpace(val?.ToString()))
                    {
                        //Unbox it
                        var unboxedVal = (double)val;

                        //FAR FROM A COMPLETE LIST!!!
                        if (prop.PropertyType == typeof(Int32))
                            prop.SetValue(tnew, (int)unboxedVal);
                        else if (prop.PropertyType == typeof(double))
                            prop.SetValue(tnew, unboxedVal);
                        else if (prop.PropertyType == typeof(DateTime))
                            prop.SetValue(tnew, convertDateTime(unboxedVal));
                        else
                            throw new NotImplementedException(String.Format("Type '{0}' not implemented yet!", prop.PropertyType.Name));
                    }
                }
                else
                {
                    //Its a string
                    prop.SetValue(tnew, val);
                }
            });

            return tnew;
        });


    //Send it back
    return collection;
}

Here is a test method:

[TestMethod]
public void Table_To_Object_Test()
{
    //Create a test file
    var fi = new FileInfo(@"c:\temp\Table_To_Object.xlsx");

    using (var package = new ExcelPackage(fi))
    {
        var workbook = package.Workbook;
        var worksheet = workbook.Worksheets.First();
        var ThatList = worksheet.Tables.First().ConvertTableToObjects<ExcelData>();
        foreach (var data in ThatList)
        {
            Console.WriteLine(data.Id + data.Name + data.Gender);
        }

        package.Save();
    }
}

Gave this in the console:

1JohnMale
2MariaFemale
3DanielUnknown

Just be careful if you Id field is an number or string in excel since the class is expecting a string.

This is my working version. Note that the resolvers code is not shown but are a spin on my implementation which allows columns to be resolved even though they are named slightly differently in each worksheet.

public static IEnumerable<T> ToArray<T>(this ExcelWorksheet worksheet, List<PropertyNameResolver> resolvers) where T : new()
{

  // List of all the column names
  var header = worksheet.Cells.GroupBy(cell => cell.Start.Row).First();

  // Get the properties from the type your are populating
  var properties = typeof(T).GetProperties().ToList();


  var start = worksheet.Dimension.Start;
  var end = worksheet.Dimension.End;

  // Resulting list
  var list = new List<T>();

  // Iterate the rows starting at row 2 (ie start.Row + 1)
  for (int row = start.Row + 1; row <= end.Row; row++)
  {
    var instance = new T();
    for (int col = start.Column; col <= end.Column; col++)
    {
      object value = worksheet.Cells[row, col].Text;

      // Get the column name zero based (ie col -1)
      var column = (string)header.Skip(col - 1).First().Value;

      // Gets the corresponding property to set
      var property = properties.Property(resolvers, column);

      try
      {
        var propertyName = property.PropertyType.IsGenericType
          ? property.PropertyType.GetGenericArguments().First().FullName
          : property.PropertyType.FullName;


        // Implement setter code as needed. 
        switch (propertyName)
        {
          case "System.String":
            property.SetValue(instance, Convert.ToString(value));
            break;
          case "System.Int32":
            property.SetValue(instance, Convert.ToInt32(value));
            break;
          case "System.DateTime":
            if (DateTime.TryParse((string) value, out var date))
            {
              property.SetValue(instance, date);
            }
            property.SetValue(instance, FromExcelSerialDate(Convert.ToInt32(value)));
            break;
          case "System.Boolean":
            property.SetValue(instance, (int)value == 1);
            break;
        }
      }
      catch (Exception e)
      {
        // instance property is empty because there was a problem.
      }

    } 
    list.Add(instance);
  }
  return list;
}

// Utility function taken from the above post's inline function.
public static DateTime FromExcelSerialDate(int excelDate)
{
  if (excelDate < 1)
    throw new ArgumentException("Excel dates cannot be smaller than 0.");

  var dateOfReference = new DateTime(1900, 1, 1);

  if (excelDate > 60d)
    excelDate = excelDate - 2;
  else
    excelDate = excelDate - 1;
  return dateOfReference.AddDays(excelDate);
}

Below code will read excel data into a datatable, which is converted to list of datarows.

if (FileUpload1.HasFile)
{
    if (Path.GetExtension(FileUpload1.FileName) == ".xlsx")
    {
        Stream fs = FileUpload1.FileContent;
        ExcelPackage package = new ExcelPackage(fs);
        DataTable dt = new DataTable();
        dt= package.ToDataTable();
        List<DataRow> listOfRows = new List<DataRow>();
        listOfRows = dt.AsEnumerable().ToList();

    }
}
using OfficeOpenXml;
using System.Data;
using System.Linq;

 public static class ExcelPackageExtensions
    {
        public static DataTable ToDataTable(this ExcelPackage package)
        {
            ExcelWorksheet workSheet = package.Workbook.Worksheets.First();
            DataTable table = new DataTable();
            foreach (var firstRowCell in workSheet.Cells[1, 1, 1, workSheet.Dimension.End.Column])
            {
                table.Columns.Add(firstRowCell.Text);
            }

            for (var rowNumber = 2; rowNumber <= workSheet.Dimension.End.Row; rowNumber++)
            {
                var row = workSheet.Cells[rowNumber, 1, rowNumber, workSheet.Dimension.End.Column];
                var newRow = table.NewRow();
                foreach (var cell in row)
                {
                    newRow[cell.Start.Column - 1] = cell.Text;
                }
                table.Rows.Add(newRow);
            }
            return table;
        }

    }

I have got an error on the first answer so I have changed some code line.

Please try my new code, it's working for me.

using OfficeOpenXml;
using OfficeOpenXml.Table;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;

public static class ImportExcelReader
{
    public static List<T> ImportExcelToList<T>(this ExcelWorksheet worksheet) where T : new()
    {
        //DateTime Conversion
        Func<double, DateTime> convertDateTime = new Func<double, DateTime>(excelDate =>
        {
            if (excelDate < 1)
            {
                throw new ArgumentException("Excel dates cannot be smaller than 0.");
            }

            DateTime dateOfReference = new DateTime(1900, 1, 1);

            if (excelDate > 60d)
            {
                excelDate = excelDate - 2;
            }
            else
            {
                excelDate = excelDate - 1;
            }

            return dateOfReference.AddDays(excelDate);
        });

        ExcelTable table = null;

        if (worksheet.Tables.Any())
        {
            table = worksheet.Tables.FirstOrDefault();
        }
        else
        {
            table = worksheet.Tables.Add(worksheet.Dimension, "tbl" + ShortGuid.NewGuid().ToString());

            ExcelAddressBase newaddy = new ExcelAddressBase(table.Address.Start.Row, table.Address.Start.Column, table.Address.End.Row + 1, table.Address.End.Column);

            //Edit the raw XML by searching for all references to the old address
            table.TableXml.InnerXml = table.TableXml.InnerXml.Replace(table.Address.ToString(), newaddy.ToString());
        }

        //Get the cells based on the table address
        List<IGrouping<int, ExcelRangeBase>> groups = table.WorkSheet.Cells[table.Address.Start.Row, table.Address.Start.Column, table.Address.End.Row, table.Address.End.Column]
            .GroupBy(cell => cell.Start.Row)
            .ToList();

        //Assume the second row represents column data types (big assumption!)
        List<Type> types = groups.Skip(1).FirstOrDefault().Select(rcell => rcell.Value.GetType()).ToList();

        //Get the properties of T
        List<PropertyInfo> modelProperties = new T().GetType().GetProperties().ToList();

        //Assume first row has the column names
        var colnames = groups.FirstOrDefault()
            .Select((hcell, idx) => new
            {
                Name = hcell.Value.ToString(),
                index = idx
            })
            .Where(o => modelProperties.Select(p => p.Name).Contains(o.Name))
            .ToList();

        //Everything after the header is data
        List<List<object>> rowvalues = groups
            .Skip(1) //Exclude header
            .Select(cg => cg.Select(c => c.Value).ToList()).ToList();

        //Create the collection container
        List<T> collection = new List<T>();
        foreach (List<object> row in rowvalues)
        {
            T tnew = new T();
            foreach (var colname in colnames)
            {
                //This is the real wrinkle to using reflection - Excel stores all numbers as double including int
                object val = row[colname.index];
                Type type = types[colname.index];
                PropertyInfo prop = modelProperties.FirstOrDefault(p => p.Name == colname.Name);

                //If it is numeric it is a double since that is how excel stores all numbers
                if (type == typeof(double))
                {
                    //Unbox it
                    double unboxedVal = (double)val;

                    //FAR FROM A COMPLETE LIST!!!
                    if (prop.PropertyType == typeof(int))
                    {
                        prop.SetValue(tnew, (int)unboxedVal);
                    }
                    else if (prop.PropertyType == typeof(double))
                    {
                        prop.SetValue(tnew, unboxedVal);
                    }
                    else if (prop.PropertyType == typeof(DateTime))
                    {
                        prop.SetValue(tnew, convertDateTime(unboxedVal));
                    }
                    else if (prop.PropertyType == typeof(string))
                    {
                        prop.SetValue(tnew, val.ToString());
                    }
                    else
                    {
                        throw new NotImplementedException(string.Format("Type '{0}' not implemented yet!", prop.PropertyType.Name));
                    }
                }
                else
                {
                    //Its a string
                    prop.SetValue(tnew, val);
                }
            }
            collection.Add(tnew);
        }

        return collection;
    }
}

How to call this function? please view below code;

private List<FundraiserStudentListModel> GetStudentsFromExcel(HttpPostedFileBase file)
    {
        List<FundraiserStudentListModel> list = new List<FundraiserStudentListModel>();
        if (file != null)
        {
            try
            {
                using (ExcelPackage package = new ExcelPackage(file.InputStream))
                {
                    ExcelWorkbook workbook = package.Workbook;
                    if (workbook != null)
                    {
                        ExcelWorksheet worksheet = workbook.Worksheets.FirstOrDefault();
                        if (worksheet != null)
                        {
                            list = worksheet.ImportExcelToList<FundraiserStudentListModel>();
                        }
                    }
                }
            }
            catch (Exception err)
            {
                //save error log
            }
        }
        return list;
    }

FundraiserStudentListModel here:

 public class FundraiserStudentListModel
{
    public string Name { get; set; }
    public string Email { get; set; }
    public string Phone { get; set; }
}

Working solution with validate email,mobile number

 public class ExcelProcessing
        {
            public List<ExcelUserData> ReadExcel()
            {
                string path = Config.folderPath + @"\MemberUploadFormat.xlsx";
    
                using (var excelPack = new ExcelPackage())
                {
                    //Load excel stream
                    using (var stream = File.OpenRead(path))
                    {
                        excelPack.Load(stream);
                    }
    
                    //Lets Deal with first worksheet.(You may iterate here if dealing with multiple sheets)
                    var ws = excelPack.Workbook.Worksheets[0];
    
                    List<ExcelUserData> userList = new List<ExcelUserData>();
    
                    int colCount = ws.Dimension.End.Column;  //get Column Count
                    int rowCount = ws.Dimension.End.Row;
                       
                    for (int row = 2; row <= rowCount; row++) // start from to 2 omit header
                    {
                       
                        bool IsValid = true;
                        ExcelUserData _user = new ExcelUserData();
    
                        for (int col = 1; col <= colCount; col++)
                        {
                            if (col == 1)
                            {
                                _user.FirstName = ws.Cells[row, col].Value?.ToString().Trim();
                                if (string.IsNullOrEmpty(_user.FirstName))
                                {
                                    _user.ErrorMessage += "Enter FirstName <br/>";
                                    IsValid = false;
                                }
                            }
                            else if (col == 2)
                            {
                                _user.Email = ws.Cells[row, col].Value?.ToString().Trim();
    
                                if (string.IsNullOrEmpty(_user.Email))
                                {
                                    _user.ErrorMessage += "Enter Email <br/>";
                                    IsValid = false;
                                }
                                else if (!IsValidEmail(_user.Email))
                                {
                                    _user.ErrorMessage += "Invalid Email Address <br/>";
                                    IsValid = false;
                                }
                            }
                            else if (col ==3)
                            {
                                _user.MobileNo = ws.Cells[row, col].Value?.ToString().Trim();
    
                                if (string.IsNullOrEmpty(_user.MobileNo))
                                {
                                    _user.ErrorMessage += "Enter Mobile No <br/>";
                                    IsValid = false;
                                }
                                else if (_user.MobileNo.Length != 10)
                                {
                                    _user.ErrorMessage += "Invalid Mobile No <br/>";
                                    IsValid = false;
                                }
    
                            }
                            else if (col == 4)
                            {
                                _user.IsAdmin = ws.Cells[row, col].Value?.ToString().Trim();
    
                                if (string.IsNullOrEmpty(_user.IsAdmin))
                                {
                                    _user.IsAdmin = "0";
                                }
                            }
    
                            _user.IsValid = IsValid;
                        }
                        userList.Add(_user);
                    }
                    return userList;
                }
            }
            public static bool IsValidEmail(string email)
            {
                Regex regex = new Regex(@"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$",
                 RegexOptions.CultureInvariant | RegexOptions.Singleline);
    
                return regex.IsMatch(email);
            }
        }

With this code you won't get an error because a cell is null. it will also cast the data type according to the properties in your class!

using System;
using System.Collections.Generic;
using System.Data;
using System.Data.OleDb;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using OfficeOpenXml;

public static class ReadExcel
    {
        public static List<T> ReadExcelToList<T>(this ExcelWorksheet worksheet) where T : new()
        {
            List<T> collection = new List<T>();
            try
            {
                DataTable dt = new DataTable();
                foreach (var firstRowCell in new T().GetType().GetProperties().ToList())
                {
                    //Add table colums with properties of T
                    dt.Columns.Add(firstRowCell.Name);
                }
                for (int rowNum = 2; rowNum <= worksheet.Dimension.End.Row; rowNum++)
                {
                    var wsRow = worksheet.Cells[rowNum, 1, rowNum, worksheet.Dimension.End.Column];
                    DataRow row = dt.Rows.Add();
                    foreach (var cell in wsRow)
                    {
                        row[cell.Start.Column - 1] = cell.Text;
                    }
                }
                
                //Get the colums of table
                var columnNames = dt.Columns.Cast<DataColumn>().Select(c => c.ColumnName).ToList();
                
                //Get the properties of T
                List<PropertyInfo> properties = new T().GetType().GetProperties().ToList();

                collection = dt.AsEnumerable().Select(row =>
                {
                    T item = Activator.CreateInstance<T>();
                    foreach (var pro in properties)
                    {
                        if (columnNames.Contains(pro.Name) || columnNames.Contains(pro.Name.ToUpper()))
                        {
                            PropertyInfo pI = item.GetType().GetProperty(pro.Name);
                            pro.SetValue(item, (row[pro.Name] == DBNull.Value) ? null : Convert.ChangeType(row[pro.Name], (Nullable.GetUnderlyingType(pI.PropertyType) == null) ? pI.PropertyType : Type.GetType(pI.PropertyType.GenericTypeArguments[0].FullName)));
                        }
                    }
                    return item;
                }).ToList();

            }
            catch (Exception ex)
            {
                //Save error log
            }

            return collection;
        }
    }

How to call this function? please view below code;

public List<Users> GetStudentsFromExcel(HttpPostedFileBase file)
{
    List<Users> list = new List<Users>();
    if (file != null)
    {
        try
        {
            using (ExcelPackage package = new ExcelPackage(file.InputStream))
            {
                ExcelWorkbook workbook = package.Workbook;
                if (workbook != null)
                {
                    ExcelWorksheet worksheet = workbook.Worksheets.FirstOrDefault();
                    if (worksheet != null)
                    {
                        list = worksheet.ReadExcelToList<Users>();
                        //Your code
                    }
                }
            }
        }
        catch (Exception ex)
        {
            //Save error log
        }
    }
    return list;
}

public class Users
{
    public string Code { get; set; }
    public string Name { get; set; }
    public string Email { get; set; }
    public string Phone { get; set; }
    public string Address { get; set; }
    public DateTime CreatedAt { get; set; }
}

Hope to help someone!

Yet another way to do it.

I used Ernie S solution but it didn't work if I had empty cells in the first data row (it wasn't able to guess the data type from it).
So instead of getting a data type from Excel table I get it from the T parameter class properties using reflection.

/// <summary>
///     Converts table to list of T objects
/// </summary>
/// <typeparam name="T">The type to return</typeparam>
/// <param name="table">Data source</param>
/// <returns>List of T objects</returns>
public static IEnumerable<T> ConvertToObjects<T>(this ExcelTable table) where T : new()
{
    ExcelCellAddress start = table.Address.Start;
    ExcelCellAddress end = table.Address.End;
    List<ExcelRange> cells = new();

    for (int r = start.Row; r <= end.Row; r++)
        for (int c = start.Column; c <= end.Column; c++)
            cells.Add(table.WorkSheet.Cells[r, c]);

    List<IGrouping<int, ExcelRange>> allRows = cells
        .GroupBy(cell => cell.Start.Row)
        .OrderBy(cell => cell.Key)
        .ToList();

    IEnumerable<PropertyInfo> typeProperties = typeof(T).GetProperties();

    IGrouping<int, ExcelRangeBase> header = allRows.First();

    Dictionary<PropertyInfo, int> columns = new();

    foreach (ExcelRangeBase col in header)
    {
        string propName = col.GetValue<string>();
        PropertyInfo propInfo = typeProperties.FirstOrDefault(x => x.Name.Equals(propName));
        if (propInfo != null)
        {
            columns.Add(propInfo, col.Start.Column);
        }                    
    }

    IEnumerable<IGrouping<int, ExcelRangeBase>> rows = allRows.Skip(1);

    List<T> objects = new();

    foreach (IGrouping<int, ExcelRangeBase> row in rows)
    {
        T obj = new();
        foreach (KeyValuePair<PropertyInfo, int> colInfo in columns)
        {
            ExcelRangeBase col = row.First(x => x.Start.Column == colInfo.Value);

            if (col.Value == null)
                continue;

            object value = Convert.ChangeType(col.Value, Nullable.GetUnderlyingType(colInfo.Key.PropertyType) ?? colInfo.Key.PropertyType);
            colInfo.Key.SetValue(obj, value);
        }
        objects.Add(obj);
    }

    return objects;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM