Mongoose: .find() returns documents with empty array field

Created on 30 Oct 2014  路  7Comments  路  Source: Automattic/mongoose

mongoose 3.8.12
mongoDB version 2.4.9
node 0.10.34

the document schema is pretty simplistic with one array field

var MySchema = mongoose.Schema({
    externalReference : Number,
    sequence : [{
        sequenceNumber: Number,
        sequenceData: String
    }]
})

The issue that when the sequence field has large data (tested with 6MB, that has around 1,30,000 entries). The following returns the object with empty array for sequence field.

var mongoose = require('mongoose');
var MyModel = require('./myModel.js');
mongoose.connect('mongodb://localhost:27017/mydb', {}, function(){
    MyModel.find({_id:ObjectId("<object id>")}, function(err, objects) {
        //objects has one element with empty array in "sequence" field
    });
});

I'm not sure what is causing the array to be empty because running the following command on mongodb console dumps all the lines on the console(after a few seconds delay)

> db.MyModel.find({_id:ObjectId("<object id>" })
// all 1,30,000 lines dumped here...

If, on the other hand, the sequence has less data (say even 1,00,000) entries, mongoose works correctly.
Also, testing this with a different mongodb driver(pymongo) gave correct results, so I'm assuming there is something going wrong with mongoose.

Really appreciate any help,
thanks

Most helpful comment

The underlying error is a "call stack size exceeded" in NodeJS, and unfortunately this looks like mongoose is running into a really gnarly v8 problem. Lesson learned: use Function.apply() sparingly if you expect to pass it a lot of parameters. This fix will be in 3.8.22. In the meantime, you can use the .lean() query modifier:

db.MyModel.find({_id:ObjectId("<object id>" }).lean().exec(function() {})

to bypass the DocumentArray cast's offending .apply() call.

For posterity's sake, here's the code I used to repro. This commit fixes this particular issue.

var mongoose = require('mongoose');
var async = require('async');
var Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost:27017/gh-2423');

var subdoc = { s: 0 };

var MySchema = new Schema({
    externalReference : Number,
    sequence : [{
        s: Number,
    }]
})

var M = mongoose.model('Sequence', MySchema);

var x = new M();

var update = { $pushAll: { sequence: [] } };
for (var i = 0; i < 1024; ++i) {
  update.$pushAll.sequence.push(subdoc);
}


x.save(function(err, doc) {
  console.log('Error: ' + err);
  console.log(doc.sequence.length);

  var fns = [];
  var ctr = 0;
  for (var i = 0; i < 256; ++i) {
    fns.push(function(callback) {
      M.update({ _id: doc._id }, update, function(error) {
        console.log('Exec: ' + (++ctr));
        callback(error);
      });
    });
  }

  async.series(fns, function(err) {
    if (err) {
      throw err;
    }
    M.findOne({ _id: doc._id }, function(err, doc) {
      console.log(JSON.stringify(doc));
      console.log(doc.$__.saveError);
      console.log(doc.$__.saveError.stack);
      console.log('Error: ' + err);
      console.log('## ' + doc._id);
      console.log(doc.sequence.length);
    });
  });
});

All 7 comments

Hi,

I suspect it may be js-bson giving up because the subdoc is very large. Admittedly we don't do much testing with large documents because you typically don't want to do that with MongoDB, so you may very well have ran into such an issue. I'll try to repro this later, thanks for reporting.

+1

+1

The underlying error is a "call stack size exceeded" in NodeJS, and unfortunately this looks like mongoose is running into a really gnarly v8 problem. Lesson learned: use Function.apply() sparingly if you expect to pass it a lot of parameters. This fix will be in 3.8.22. In the meantime, you can use the .lean() query modifier:

db.MyModel.find({_id:ObjectId("<object id>" }).lean().exec(function() {})

to bypass the DocumentArray cast's offending .apply() call.

For posterity's sake, here's the code I used to repro. This commit fixes this particular issue.

var mongoose = require('mongoose');
var async = require('async');
var Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost:27017/gh-2423');

var subdoc = { s: 0 };

var MySchema = new Schema({
    externalReference : Number,
    sequence : [{
        s: Number,
    }]
})

var M = mongoose.model('Sequence', MySchema);

var x = new M();

var update = { $pushAll: { sequence: [] } };
for (var i = 0; i < 1024; ++i) {
  update.$pushAll.sequence.push(subdoc);
}


x.save(function(err, doc) {
  console.log('Error: ' + err);
  console.log(doc.sequence.length);

  var fns = [];
  var ctr = 0;
  for (var i = 0; i < 256; ++i) {
    fns.push(function(callback) {
      M.update({ _id: doc._id }, update, function(error) {
        console.log('Exec: ' + (++ctr));
        callback(error);
      });
    });
  }

  async.series(fns, function(err) {
    if (err) {
      throw err;
    }
    M.findOne({ _id: doc._id }, function(err, doc) {
      console.log(JSON.stringify(doc));
      console.log(doc.$__.saveError);
      console.log(doc.$__.saveError.stack);
      console.log('Error: ' + err);
      console.log('## ' + doc._id);
      console.log(doc.sequence.length);
    });
  });
});

thank you

Hi there,

We are running into same issue. In our case, we see the data already in MongoDB compass but when it comes to Mongoose it fails to return results but if we try again like say after 5 min then we can see the data on Mongoose (Sometimes this even not works).

I am thinking this has to do with the large amount of documents. We have two collections and each has around 10 million records. We query both of them at same time.

Here is the code that I am using to query just a one collection,

const mongoose = require('mongoose');

const Schema = mongoose.Schema;

mongoose.connect('mongodb://WHATEVERIP:27017/DBNAME?authSource=DBNAME', {

    user: "admin",

    pass: "passs",

    useNewUrlParser: true,

    useUnifiedTopology: true

});

var MarketingSchema = new Schema({}, { strict: false });

var MarketingModel = mongoose.model('Marketing', MarketingSchema, 'Marketing');

// r_time is 64 bit timestamp field

var DataRecord = await MarketingModel.find({ ip: "PUT IP OF CLIENT HERE" }).sort({ r_time: -1 }).lean().limit(1).exec();

if (!DataRecord.length) {

    console.log("NOT FOUND");

}

Sample data,

{
    "_id": {
        "$oid": "5f14cc1e283ea9705f1b31c2"
    },
    "userID": 1433571522,
    "ip": "135.114.236.220",
    "isNew": false,
    "r_time": {
        "$numberLong": "1595198494512"
    },
    "event": "Click",
    "x": 93.39,
    "y": -492.00
}


{
    "_id": {
        "$oid": "5f14cc22283ea9705f1b31c3"
    },
    "userID": 1433571522,
    "ip": "135.114.236.220",
    "isNew": false,
    "r_time": {
        "$numberLong": "1595198498608"
    },
    "event": "Press",
    "x": 91.39,
    "y": -20.00
}



{
    "_id": {
        "$oid": "5f14cc22283ea9705f1b31c4"
    },
    "userID": 1433571522,
    "ip": "135.114.236.220",
    "isNew": false,
    "r_time": {
        "$numberLong": "1595198498652"
    },
    "event": "Type",
    "x": 11.24,
    "y": -29.00
}

Just to tell you how hard we are affected with this, We've spent $100k on third party consulting fees and waited almost 1 year without solution until a friend called me up to check on mongoose github.

I'm happy to provide anything you may need even if you need money we can pay it

@gerald-dotcom what output do you get? Do you get the "NOT FOUND" message?

Also, are you absolutely sure that 'DBNAME' is the correct database and 'Marketing' is the correct collection?

Was this page helpful?
0 / 5 - 0 ratings