Sheetjs: sheet_to_json skips empty rows

Created on 28 Oct 2014  ·  38Comments  ·  Source: SheetJS/sheetjs

When calling xlsx.utils.sheet_to_json(sheet, {header: 1}) I would expect to receive an array of arrays _including_ empty rows. However the function explicitly skips out empty rows which makes it impossible to select a particular row in the results. It would be good to make this behaviour configurable.

As a workaround, I have used sheet_to_csv, which doesn't skip rows, followed by csv-parse.

Most helpful comment

In my case I needed the header of a xlsx which could have empty rows above it.
I set the following values:
const jsonSheet = XLSX.utils.sheet_to_json(ws, { header: 1, range: 0, defval: ""});

The range: 0 is the essential attribute here.
As I said, this was my case.

All 38 comments

@tamlyn excellent catch! Fortunately this is an easy fix: in the loop, the empty check should also consider the header. https://github.com/SheetJS/js-xlsx/blob/master/bits/90_utils.js#L119 can be rewritten as:

if(isempty === false || header === 1) out[outi++] = row;

This is important enough to merit a new push later today with the fix.

sheet_to_json still not include empty cells

Hi, first of all thx for this awesome tool. Now then, in the version bump 0.7.2 only includes the fix for xls empty cells, but im using an xlsx and still have problems with the empty cells.

Also experiencing the same issue as Kevzz. Any updates?

Can also confirm I'm having this issue. Empty cells in xlsx files don't convert properly. Instead of leaving the value of that column as '', it just doesn't include that key at all.

Same issue here

eagerly awaiting for an update here :)

+1

require support for empty cell.

Hmmm, too bad, thought this would save my day :(
But the empty cells bug is still even in 0.8.0
The only way to convert also empty cells, is to save the xlsx table as xls.
Unfortunatley not an option for me since this destroys my table.

Yeah. For me, I go thru the each row and each header to check for undefined values and put '' for empty field.

 workbook.SheetNames.forEach(function (sheetName) {
                // Get headers.
                var headers = [];
                var sheet = workbook.Sheets[sheetName];
                var range = XLSX.utils.decode_range(sheet['!ref']);
                var C, R = range.s.r;
                /* start in the first row */
                /* walk every column in the range */
                for (C = range.s.c; C <= range.e.c; ++C) {
                    var cell = sheet[XLSX.utils.encode_cell({c: C, r: R})];
                    /* find the cell in the first row */

                    var hdr = "UNKNOWN " + C; // <-- replace with your desired default
                    if (cell && cell.t) {
                        hdr = XLSX.utils.format_cell(cell);
                    }
                    headers.push(hdr);
                }
                // For each sheets, convert to json.
                var roa = X.utils.sheet_to_json(workbook.Sheets[sheetName]);
                if (roa.length > 0) {
                    roa.forEach(function (row) {
                        // Set empty cell to ''.
                        headers.forEach(function (hd) {
                            if (row[hd] == undefined) {
                                row[hd] = '';
                            }
                        });
                    });
                }
            });

+1 the key should be included in the json objects with an empty string or null.

(Awesome tool nontheless)

@SheetJSDev Hi, any update when this will be fixed? I am still seeing this issue.

BTW great library!

@abartolo Go to xlsx.js file, find sheet_to_json function and change it like as this:

function sheet_to_json(sheet, opts){
    var val, row, range, header = 0, offset = 1, r, hdr = [], isempty, R, C, v;
    var o = opts != null ? opts : {};
    var raw = o.raw;
    if(sheet == null || sheet["!ref"] == null) return [];
    range = o.range !== undefined ? o.range : sheet["!ref"];
    if(o.header === 1) header = 1;
    else if(o.header === "A") header = 2;
    else if(Array.isArray(o.header)) header = 3;
    switch(typeof range) {
        case 'string': r = safe_decode_range(range); break;
        case 'number': r = safe_decode_range(sheet["!ref"]); r.s.r = range; break;
        default: r = range;
    }
    if(header > 0) offset = 0;
    var rr = encode_row(r.s.r);
    var cols = new Array(r.e.c-r.s.c+1);
    var out = new Array(r.e.r-r.s.r-offset+1);
    var outi = 0;
    for(C = r.s.c; C <= r.e.c; ++C) {
        cols[C] = encode_col(C);
        val = sheet[cols[C] + rr];
        switch(header) {
            case 1: hdr[C] = C; break;
            case 2: hdr[C] = cols[C]; break;
            case 3: hdr[C] = o.header[C - r.s.c]; break;
            default:
                if(val === undefined) continue;
                hdr[C] = format_cell(val);
        }
    }
    for (R = r.s.r + offset; R <= r.e.r; ++R) {
        rr = encode_row(R);
        isempty = true;
        if(header === 1) row = [];
        else {
            row = {};
            if(Object.defineProperty) Object.defineProperty(row, '__rowNum__', {value:R, enumerable:false});
            else row.__rowNum__ = R;
        }
        for (C = r.s.c; C <= r.e.c; ++C) {
            val = sheet[cols[C] + rr];
            if(val === undefined || val.t === undefined) {val = {t: "s", v: null, w: ""};}
            v = val.v;
            switch(val.t){
                case 'e': continue;
                case 's': break;
                case 'b': case 'n': break;
                default: throw 'unrecognized type ' + val.t;
            }
            if(v !== undefined) {
                row[hdr[C]] = raw ? v : format_cell(val,v);
                isempty = false;
            }
        }
        if(isempty === false || header === 1) out[outi++] = row;

    }
    out.length = outi;
    return out;
}

In summary this part has to be changed:

if(val === undefined || val.t === undefined) {val = {t: "s", v: null, w: ""};}

@mjza that has no change for me. Still the empty fields are inexistant and imposible to detect.

+1

@mjza Thanks , It is work

Any updates on this matter?

I think this was resolved a long time ago, in version 0.8.0 the fix from https://github.com/SheetJS/js-xlsx/issues/139#issuecomment-60823125 was applied. I just tested a file and see blank arrays generated when the option {header:1} is set.

As far as I can tell the crux of the matter (including empty rows) has been resolved. If anyone can share a sample file with expected output I'll reopen the issue.

sheet_to_json() with header option true yields empty column headers perfectly, But there is a bug and it won't echo null for last empty column data. In attached screenshot, there are 11 header items. 2nd column is empty and it's coming null as expected. But it's not echoing null for 11th column which is last one.

image

@vins13pattar do you have a file you can share? It should not be generating null in the first place -- those should be array holes (undefined)

I made a sample file to check what's going on: t139.xlsx

And this is what I see in node:

> XLSX.version
'0.9.5'
> XLSX.utils.sheet_to_json(XLSX.readFile('t139.xlsx').Sheets.Sheet1, {header:1})
[ [ 'id',
    'id-job',
    'type',
    'name',
    'address',
    'latitude',
    'longitude',
    'service-duration',
    'start-time',
    'end-time',
    'quantity',
    'required-skills' ],
  [ 'Stop1', , , , , , , , , , '206' ] ]

@SheetJSDev Thank you for the reply.
In the test file t139 'required-skills' is empty and it is generating data as

[ 'Stop1', , , , , , , , , , '206' ]

Array holes are ok. But how to represent array hole after the last column?
Like this?

[ 'Stop1', , , , , , , , , , '206', ]

I faced this issue because I was parsing content without any header columns.
Anyway, I made it working. Thank you for the awesome package!!

I had a similar issue but was able to solve it like this by removing empty object in from an array , hope this helps someone. Not an ideal solution but gets the job done

```
// code for the new excel reader
$scope.do_file = function(files)
{
$scope.fileContent = [];
var X = XLSX;
var global_wb;
var f = files[0];
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;console.log(data);
global_wb = X.read(data, {type: 'array'});
var output = "";
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});

        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
           // If ES6  code 
          //$scope.fileContent =  result["contacts"].filter(value => Object.keys(value).length !== 0);   

          // If  ES5  code 
          $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }


      };
      reader.readAsArrayBuffer(f);
    };

```

Are you by any chance roundtripping through JSON by stringifying in one place and parsing elsewhere? That would plug null into the array holes:

> var arr = [1,,,,,2]
undefined
> arr
[ 1, <4 empty items>, 2 ]
> JSON.stringify(arr)
'[1,null,null,null,null,2]'
> JSON.parse(JSON.stringify(arr))
[ 1, null, null, null, null, 2 ]

When I implement this code in my component.ts the worksheet contains null values ​​plus the sheet_to_json ignores the null values ​​and strips the entire column being able to set null and continue. Here's the snippet of my code, I'm using the most recent version of xlsx 0.12.13, it also follows a sample xlsx file I'm importing.

I'm using angular 5

var data = evt.target.files[0];
const target: DataTransfer = <DataTransfer>(evt.target);
if (target.files.length !== 1) throw new Error("Cannot use multiple files");
const reader: FileReader = new FileReader();

reader.onload = (e: any) => {

  /* read workbook */
  const bstr: string = e.target.result;
  const wb: XLSX.WorkBook = XLSX.read(bstr, {type: 'binary'});

  /* grab first sheet */
  const wsname: string = wb.SheetNames[0];
  const ws: XLSX.WorkSheet = wb.Sheets[wsname];

  /* data = meu array de objetos */
  data = <AOA>(XLSX.utils.sheet_to_json(ws, {header:1, raw: true})); // raw: true

}
reader.readAsBinaryString(target.files[0]);

file = https://drive.google.com/file/d/1b9ql8LCHiDwRYNLyt_BjkCh9w908TbUT/view?usp=sharing

@EDUARDOMBRAGA unable to download that file (you have to enable sharing). Can you test with the t139.xlsx file linked in a previous comment

To test in the web browser, go to https://oss.sheetjs.com/js-xlsx/ and drag-drop the Layout.xlsx file into the box. Once you see CSV content in the page, open up your browser console. The loaded workbook object is stored in the global_wb variable which you can access. Running XLSX.utils.sheet_to_json(global_wb.Sheets.Plan1, {header:1, raw:true})[2] in chrome console shows two array holes (not null):

The original worksheet had 13 columns, consistent with the actual data reported in chrome

Correct but I can not return these null values? they are stuck and I lose the data I need to return the column with the values ​​even though they are null? or is it not possible? because I need these null values ​​so I save in the database as nulls or in the future make check if the valroes are null I send as 0 or any other whatever this being done at the moment of reading and writing the object ...

@EDUARDOMBRAGA @SheetJSDev

I am using angular 6 and it will work on angular 5...
package.json "xlsx": "^0.12.13"
in File
import * as XLSX from 'xlsx';

This will work on xlsx, Only one I used it on.
XLSX.utils.sheet_to_json(workbook.Sheets[sheetName], {header: 1, defval: null});

defval does the magic here, 'null' is what I want in place
if I wanted empty string in place of empty cell
XLSX.utils.sheet_to_json(workbook.Sheets[sheetName], {header: 1, defval: ''});

defval can be any value you set.

@ryankall

Uall, worked as I expected. Thank you very much for the help. 👍

@ryankall Hello!

When I have null fields plus the same ones are of type string I can write as null and those that are of type binary send another value if it is null?

It was also not working for me until I added the 'defval' param:
var roa = XLS.utils.sheet_to_row_object_array(workbook.Sheets[sheetName], {defval: ''})

Hi there,
We can achieve empty rows by doing in this way.

try this once.

const xlsx = require('xlsx');
const csvjson = require('csvjson');

const workbook = xlsx.read(file.buffer, { type: 'buffer' });
const csvdata = xlsx.utils.sheet_to_csv(workbook.Sheets[workbook.SheetNames[0]]);
const data = csvjson.toObject(csvdata);

In my case I needed the header of a xlsx which could have empty rows above it.
I set the following values:
const jsonSheet = XLSX.utils.sheet_to_json(ws, { header: 1, range: 0, defval: ""});

The range: 0 is the essential attribute here.
As I said, this was my case.

thanks, Nahuelgrc :) 👍

Setting {sheetStubs: true} allowed me to access all the json objects including the ones with empty values, without adding it my array would just end at the first empty element but at least now it just skips the empty cells and continues with the rest of the array. I need to update an 'empty' element now so I need to be able to fetch the empty cell 'values'. Another important part so far was using the !null in my filter function as referenced here github.com/SheetJS/js-xlsx/issues/139#issuecomment-390754486. My array is still returning with no values for the elements after the empty value but I hope that helps someone.

Yeah. For me, I go thru the each row and each header to check for undefined values and put '' for empty field.

 workbook.SheetNames.forEach(function (sheetName) {
                // Get headers.
                var headers = [];
                var sheet = workbook.Sheets[sheetName];
                var range = XLSX.utils.decode_range(sheet['!ref']);
                var C, R = range.s.r;
                /* start in the first row */
                /* walk every column in the range */
                for (C = range.s.c; C <= range.e.c; ++C) {
                    var cell = sheet[XLSX.utils.encode_cell({c: C, r: R})];
                    /* find the cell in the first row */

                    var hdr = "UNKNOWN " + C; // <-- replace with your desired default
                    if (cell && cell.t) {
                        hdr = XLSX.utils.format_cell(cell);
                    }
                    headers.push(hdr);
                }
                // For each sheets, convert to json.
                var roa = X.utils.sheet_to_json(workbook.Sheets[sheetName]);
                if (roa.length > 0) {
                    roa.forEach(function (row) {
                        // Set empty cell to ''.
                        headers.forEach(function (hd) {
                            if (row[hd] == undefined) {
                                row[hd] = '';
                            }
                        });
                    });
                }
            });

Fantastic, it helped a lot.

Yeah. For me, I go thru the each row and each header to check for undefined values and put '' for empty field.

 workbook.SheetNames.forEach(function (sheetName) {
                // Get headers.
                var headers = [];
                var sheet = workbook.Sheets[sheetName];
                var range = XLSX.utils.decode_range(sheet['!ref']);
                var C, R = range.s.r;
                /* start in the first row */
                /* walk every column in the range */
                for (C = range.s.c; C <= range.e.c; ++C) {
                    var cell = sheet[XLSX.utils.encode_cell({c: C, r: R})];
                    /* find the cell in the first row */

                    var hdr = "UNKNOWN " + C; // <-- replace with your desired default
                    if (cell && cell.t) {
                        hdr = XLSX.utils.format_cell(cell);
                    }
                    headers.push(hdr);
                }
                // For each sheets, convert to json.
                var roa = X.utils.sheet_to_json(workbook.Sheets[sheetName]);
                if (roa.length > 0) {
                    roa.forEach(function (row) {
                        // Set empty cell to ''.
                        headers.forEach(function (hd) {
                            if (row[hd] == undefined) {
                                row[hd] = '';
                            }
                        });
                    });
                }
            });

Fantastic, it helped a lot.

its not work

Was this page helpful?
0 / 5 - 0 ratings

Related issues

thomasledoux1 picture thomasledoux1  ·  3Comments

m-ketan picture m-ketan  ·  3Comments

happy0088 picture happy0088  ·  3Comments

jamespan0 picture jamespan0  ·  3Comments

sangpuion picture sangpuion  ·  3Comments