简体   繁体   中英

Normalize excel spreadsheet data (Columns to rows)

I am trying to normalize the data in an excel spreadsheet using Microsoft Interop Excel objects. Basically, I need to convert the columns into rows starting from a certain column offset.

Original Data:
ColumnA ColumnB ColumnC ColumnD ColumnE ColumnF
   X       Y      10      20      30      40

Normalized Data:
ColumnA ColumnB NewColumn Value
  X        Y     ColumnC   10
  X        Y     ColumnD   20
  X        Y     ColumnE   30
  X        Y     ColumnF   40

My function works as expected. However, the running time is very slow. So, I am wondering if I use any other framework like OpenXML, will I see any increase in efficiency

Here is my code using Interop objects:

public static void Normalize(string aFilePathName, string aSheetName, int aColOffSet, string aPivotColName, string aValueColName)
{
  LOG.DebugFormat("Normaling data in file: {0}", aFilePathName);
  LOG.DebugFormat("Sheet Name:{0} ColOffset:{1}", aSheetName, aColOffSet);

   Excel.Application vExcel = new Excel.Application();
   Excel.Workbook vWorkbook = null;
   Excel.Worksheet vWsOriginal = null;
   Excel.Worksheet vWsNormalized = null;
   try
     {
        vExcel.Visible = false;
        vWorkbook = vExcel.Workbooks.Open(aFilePathName, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
        vWsOriginal = vWorkbook.Worksheets[aSheetName];
        string vNormalizedSheetName = string.Format("Normalized {0}", aSheetName);
        bool vNormalizedSheetExists = (vWorkbook.Sheets.Cast<object>()
                                            .Select(sheetValue => sheetValue as Excel.Worksheet))
                                            .Any(wbSheet => wbSheet != null && wbSheet.Name == vNormalizedSheetName);
            if (!vNormalizedSheetExists)
            {
                vWsNormalized = vWorkbook.Worksheets.Add(vWsOriginal, Type.Missing, Type.Missing, Type.Missing);
                vWsNormalized.Name = vNormalizedSheetName;
            }
            else
            {
                vWsNormalized = vWorkbook.Worksheets[vNormalizedSheetName];
            }
            vWsNormalized.UsedRange.ClearContents();

            long vTotalColumns = 1;
            long vRowCounter = 1;
            Excel.Range vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];

            List<string> vHeaders = new List<string>();
            while (vWsRange.Value2 != null)
            {
                vHeaders.Add(vWsRange.Value2.ToString());
                vTotalColumns = vTotalColumns + 1;
                vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
            }

            // Insert the headers
            for (int vHeaderCol = 1; vHeaderCol < aColOffSet; vHeaderCol++)
            {
                vWsNormalized.Cells[1, vHeaderCol].Value = vHeaders[vHeaderCol - 1];
            }
            vWsNormalized.Cells[1, aColOffSet].Value = aPivotColName;
            vWsNormalized.Cells[1, aColOffSet + 1].Value = aValueColName;

            long vNewRow = 2;

            for (int vCol = aColOffSet; vCol < vTotalColumns; vCol++)
            {
                vRowCounter = 2;
                while (((Excel.Range)vWsOriginal.Cells[vRowCounter, 1]).Value2 != null)
                {
                    for (int j = 1; j < aColOffSet; j++)
                    {
                        vWsNormalized.Cells[vNewRow, j] = vWsOriginal.Cells[vRowCounter, j];
                    }

                    vWsNormalized.Cells[vNewRow, aColOffSet] = vWsOriginal.Cells[1, vCol];
                    vWsNormalized.Cells[vNewRow, aColOffSet + 1] = vWsOriginal.Cells[vRowCounter, vCol];

                    vRowCounter = vRowCounter + 1;

                    vNewRow = vNewRow + 1;
                }
            }
        }
        finally
        {
            vWorkbook.Close(Excel.XlSaveAction.xlSaveChanges, Type.Missing, Type.Missing);
            Marshal.FinalReleaseComObject(vWsNormalized);
            Marshal.FinalReleaseComObject(vWsOriginal);
            Marshal.FinalReleaseComObject(vWorkbook);
            vExcel.Quit();
            Marshal.FinalReleaseComObject(vExcel);
        }
    }

I am willing to try any other open source frameworks, if there is a possibility to improve performance.

Thanks

I was able to come up with a better implementation. Instead of looping through every single cell, leverage the Excel transpose function to do bulk copy.

public static void Normalize2(string aFilePathName, string aSheetName, int aColOffSet, string aPivotColName, string aValueColName)
    {
        LOG.DebugFormat("Normaling data in file: {0}", aFilePathName);
        LOG.DebugFormat("Sheet Name:{0} ColOffset:{1}", aSheetName, aColOffSet);

        Excel.Application vExcel = new Excel.Application();
        Excel.Workbook vWorkbook = null;
        Excel.Worksheet vWsOriginal = null;
        Excel.Worksheet vWsNormalized = null;
        try
        {
            vExcel.Visible = false;
            vWorkbook = vExcel.Workbooks.Open(aFilePathName, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
            vWsOriginal = vWorkbook.Worksheets[aSheetName];
            //vWsOriginal.Name = string.Format("Original_{0}", aSheetName);
            string vNormalizedSheetName = string.Format("Normalized {0}", aSheetName);
            bool vNormalizedSheetExists = (vWorkbook.Sheets.Cast<object>()
                                            .Select(sheetValue => sheetValue as Excel.Worksheet))
                                            .Any(wbSheet => wbSheet != null && wbSheet.Name == vNormalizedSheetName);
            if (!vNormalizedSheetExists)
            {
                vWsNormalized = vWorkbook.Worksheets.Add(vWsOriginal, Type.Missing, Type.Missing, Type.Missing);
                vWsNormalized.Name = vNormalizedSheetName;
            }
            else
            {
                vWsNormalized = vWorkbook.Worksheets[vNormalizedSheetName];
            }
            vWsNormalized.UsedRange.ClearContents();

            long vTotalColumns = 1;
            long vRowCounter = 1;
            Excel.Range vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];

            List<string> vHeaders = new List<string>();
            while (vWsRange.Value2 != null)
            {
                vHeaders.Add(vWsRange.Value2.ToString());
                vTotalColumns = vTotalColumns + 1;
                vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
            }

            // Insert the headers
            for (int vHeaderCol = 1; vHeaderCol < aColOffSet; vHeaderCol++)
            {
                vWsNormalized.Cells[1, vHeaderCol].Value = vHeaders[vHeaderCol - 1];
            }
            vWsNormalized.Cells[1, aColOffSet].Value = aPivotColName;
            vWsNormalized.Cells[1, aColOffSet + 1].Value = aValueColName;

            long vNewRow = 2;
            long vValueColumns = vTotalColumns - aColOffSet;
            vRowCounter = 2;

            Excel.Range vHeaderData = vWsOriginal.Range[vWsOriginal.Cells[1, aColOffSet],
                                                        vWsOriginal.Cells[1, vTotalColumns - 1]];
            string[] vPivotValueNames = new string[vTotalColumns - aColOffSet];
            vHeaders.CopyTo(aColOffSet - 1, vPivotValueNames, 0, (int) (vTotalColumns - aColOffSet));
            while (((Excel.Range)vWsOriginal.Cells[vNewRow, 1]).Value2 != null)
            {
                Excel.Range vStaticRowData = vWsOriginal.Range[vWsOriginal.Cells[vNewRow, 1],
                                                                   vWsOriginal.Cells[vNewRow, aColOffSet - 1]];

                Excel.Range vDynamicRowData = vWsOriginal.Range[vWsOriginal.Cells[vNewRow, aColOffSet],
                                                                   vWsOriginal.Cells[vNewRow, vTotalColumns - 1]];

                long vDestRowStart = vRowCounter;
                long vDestRowEnd = (vRowCounter + vValueColumns) - 1;
                Excel.Range vNormalizedStaticRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, 1],
                                                                    vWsNormalized.Cells[vDestRowEnd, aColOffSet - 1]];
                Excel.Range vNormalizedPivotValueRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, aColOffSet],
                                                                    vWsNormalized.Cells[vDestRowEnd, aColOffSet]];

                Excel.Range vNormalizedValueRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, aColOffSet + 1],
                                                                    vWsNormalized.Cells[vDestRowEnd, aColOffSet + 1]];
                vNormalizedStaticRowData.Value = vStaticRowData.Value;
                vNormalizedPivotValueRowData.Value = vExcel.WorksheetFunction.Transpose(vHeaderData.Value);
                vNormalizedValueRowData.Value = vExcel.WorksheetFunction.Transpose(vDynamicRowData.Value);

                vNewRow = vNewRow + 1;
                vRowCounter = vRowCounter + vValueColumns;
            }
        }
        finally
        {
            vWorkbook.Close(Excel.XlSaveAction.xlSaveChanges, Type.Missing, Type.Missing);
            Marshal.FinalReleaseComObject(vWsNormalized);
            Marshal.FinalReleaseComObject(vWsOriginal);
            Marshal.FinalReleaseComObject(vWorkbook);
            vExcel.Quit();
            Marshal.FinalReleaseComObject(vExcel);
        }
    }

我最近不得不做类似的事情,并使用数据透视表向导找到了这个技巧: http : //www.launchexcel.com/pivot-table-flatten-crosstab/

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM