C# OPEN XML: empty cells are getting skipped while getting data from EXCEL to DATATABLE

Had there been data in all the cells of a row then everything works good. The moment you have even a single empty cell in a row then things go haywire.

Why it is happening in first place?

This is because in the below code:

row.Descendants<Cell>().Count()

The Count() is the number of non-empty populated cells (not all columns). So, when you pass row.Descendants<Cell>().ElementAt(i) as an argument to GetCellValue method:

GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));

Then it will find the content of the next non-empty populated cell (not necessarily what is in that column index, i) e.g. if the first column is empty and we call ElementAt(1), it returns the value in the second column instead and the whole logic gets messed up.

Solution - We need to deal with the occurrence of empty cells: Essentially we need to figure out the original column index of the cell in case there were empty cells before it. So, you need to substitute your for loop code as below:

for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
      tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
}

with

for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
    Cell cell = row.Descendants<Cell>().ElementAt(i);
    int actualCellIndex = CellReferenceToIndex(cell);
    tempRow[actualCellIndex] = GetCellValue(spreadSheetDocument, cell);
}

and add below method in your code which is used in the above modified code snippet to obtain the original/correct column index of any cell:

private static int CellReferenceToIndex(Cell cell)
{
    int index = 0;
    string reference = cell.CellReference.ToString().ToUpper();
    foreach (char ch in reference)
    {
        if (Char.IsLetter(ch))
        {
            int value = (int)ch - (int)'A';
            index = (index == 0) ? value : ((index + 1) * 26) + value;
        }
        else
        {
            return index;
        }
    }
    return index;
}

public void Read2007Xlsx()
        {
            try
            {
                DataTable dt = new DataTable();
                using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"D:\File.xlsx", false))
                {
                    WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
                    IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
                    string relationshipId = sheets.First().Id.Value;
                    WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
                    Worksheet workSheet = worksheetPart.Worksheet;
                    SheetData sheetData = workSheet.GetFirstChild<SheetData>();
                    IEnumerable<Row> rows = sheetData.Descendants<Row>();
                    foreach (Cell cell in rows.ElementAt(0))
                    {
                        dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
                    }
                    foreach (Row row in rows) //this will also include your header row...
                    {
                        DataRow tempRow = dt.NewRow();
                        int columnIndex = 0;
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            // Gets the column index of the cell with data
                            int cellColumnIndex = (int)GetColumnIndexFromName(GetColumnName(cell.CellReference));
                            cellColumnIndex--; //zero based index
                            if (columnIndex < cellColumnIndex)
                            {
                                do
                                {
                                    tempRow[columnIndex] = ""; //Insert blank data here;
                                    columnIndex++;
                                }
                                while (columnIndex < cellColumnIndex);
                            }
                            tempRow[columnIndex] = GetCellValue(spreadSheetDocument, cell);

                            columnIndex++;
                        }
                        dt.Rows.Add(tempRow);
                    }
                }
                dt.Rows.RemoveAt(0); //...so i'm taking it out here.
            }
            catch (Exception ex)
            {
            }
        }
        /// <summary>
        /// Given a cell name, parses the specified cell to get the column name.
        /// </summary>
        /// <param name="cellReference">Address of the cell (ie. B2)</param>
        /// <returns>Column Name (ie. B)</returns>
        public static string GetColumnName(string cellReference)
        {
            // Create a regular expression to match the column name portion of the cell name.
            Regex regex = new Regex("[A-Za-z]+");
            Match match = regex.Match(cellReference);
            return match.Value;
        }
        /// <summary>
        /// Given just the column name (no row index), it will return the zero based column index.
        /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). 
        /// A length of three can be implemented when needed.
        /// </summary>
        /// <param name="columnName">Column Name (ie. A or AB)</param>
        /// <returns>Zero based index if the conversion was successful; otherwise null</returns>
        public static int? GetColumnIndexFromName(string columnName)
        {

            //return columnIndex;
            string name = columnName;
            int number = 0;
            int pow = 1;
            for (int i = name.Length - 1; i >= 0; i--)
            {
                number += (name[i] - 'A' + 1) * pow;
                pow *= 26;
            }
            return number;
        }
        public static string GetCellValue(SpreadsheetDocument document, Cell cell)
        {
            SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
            if (cell.CellValue ==null)
            {
            return "";
            }
            string value = cell.CellValue.InnerXml;
            if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
            {
                return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
            }
            else
            {
                return value;
            }
        }