I am parsing a series of files, and the net data is 2600 objects
passed to a function which calls conn.query.
The console logs all 2600 items, but the mysql inserts stop at 150 records.
function insertData(data) {
const conn = mysql.createConnection(datasource)
const values = [data.one, data.two, `${data.three}`, data.four]
let sql = "INSERT INTO resource (one, two, three, four) VALUES (?, ?, ?, ?)";
console.log(`Processing #[${counter}] ${data['one']}`)
return new Promise(async () => {
await conn.query(sql, values, (error, results, fields) => {
if (error) return error
console.log(JSON.stringify(results))
})
conn.end()
})
}
Is there something in particular that I need to be doing here?
An error may be occurring, but your code will never display it. Can you add a if (error) console.log(error) to the top of your query callback to check if there is some error that is occurring?
Also, unless you are leaving something out, you cannot actually await the return value of conn.query with this module, so your conn.end() call may be occurring prior to the query completing, possibility.
Ok. I'll check these two things first.
You were right. I don't know how I forgot to console.log(error).
Any tips on throttling this a bit so that the connections aren't piling up?
Should I use connection pooling? I'm not sure how to incorporate it.
Error: ER_CON_COUNT_ERROR: Too many connections
at Handshake.Sequence._packetToError (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/sequences/Sequence.js:47:14)
at Handshake.ErrorPacket (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/sequences/Handshake.js:123:18)
at Protocol._parsePacket (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:291:23)
at Parser._parsePacket (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Parser.js:433:10)
at Parser.write (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Parser.js:43:10)
at Protocol.write (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:38:16)
at Socket.<anonymous> (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:88:28)
at Socket.<anonymous> (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:526:10)
at Socket.emit (events.js:210:5)
at addChunk (_stream_readable.js:308:12)
--------------------
at Protocol._enqueue (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:144:48)
at Protocol.handshake (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:51:23)
at Connection.connect (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:116:18)
at Connection._implyConnect (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:454:10)
at Connection.query (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:196:8)
at /home/kingram/PROJECTS/government/process.js:100:10
at new Promise (<anonymous>)
at insertData (/home/kingram/PROJECTS/government/process.js:99:10)
at proccessHTML (/home/kingram/PROJECTS/government/process.js:64:7)
at ReadStream.<anonymous> (/home/kingram/PROJECTS/government/process.js:81:5) {
code: 'ER_CON_COUNT_ERROR',
errno: 1040,
sqlMessage: 'Too many connections',
sqlState: undefined,
fatal: true
}
Ah, ok. So it seems like you are likely calling your insertData in a direct for or forEach loop perhaps, as it is likely trying to create all the connections up front as fast as it can instead of doing it one after the other. A simple "fix" would indeed be to just use a pool -- then you're just scheduling all the queries at once on the pool, which will constrain your parallelism to the max pool size. Another option is to pull in an async scheduling module such as https://www.npmjs.com/package/async
If you use a single insert for each datarow you will run into problems (memory / heap stack) when you run more than 2600 rows, let say 20k rows or more.
i would use bulk insert queries with 500 to 1000 rows like
INSERT INTO resource (one, two, three, four) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), [...];
This way you would need only one connection per 1000 rows and have much less overhead.
If you use a single insert for each datarow you will run into problems (memory / heap stack) when you run more than 2600 rows, let say 20k rows or more.
i would use bulk insert queries with 500 to 1000 rows likeINSERT INTO resource (one, two, three, four) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), [...];This way you would need only one connection per 1000 rows and have much less overhead.
Sounds like a good idea, though I don't see more than 3000 inserts at a time with this project, it may
be true for the next one.
Once I get a handle on connection pooling, since I'm there now, I'll try the batch inserts.
Currently I'm having this error using connection pooling:
Error: Handshake inactivity timeout
at Handshake.<anonymous> (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:160:17)
at Handshake.emit (events.js:210:5)
at Handshake._onTimeout (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/sequences/Sequence.js:124:8)
at Timer._onTimeout (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Timer.js:32:23)
at listOnTimeout (internal/timers.js:531:17)
at processTimers (internal/timers.js:475:7)
--------------------
at Protocol._enqueue (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:144:48)
at Protocol.handshake (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:51:23)
at PoolConnection.connect (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:116:18)
at Pool.getConnection (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Pool.js:48:16)
at insertData (/home/kingram/PROJECTS/government/process.js:89:8)
at proccessHTML (/home/kingram/PROJECTS/government/process.js:66:7)
at ReadStream.<anonymous> (/home/kingram/PROJECTS/government/process.js:83:5)
at ReadStream.emit (events.js:215:7)
at endReadableNT (_stream_readable.js:1183:12)
at processTicksAndRejections (internal/process/task_queues.js:80:21) {
code: 'PROTOCOL_SEQUENCE_TIMEOUT',
fatal: true,
timeout: 50000
}
/home/kingram/PROJECTS/government/process.js:97
connection.query(sql, values, (error, results, fields) => {
^
TypeError: Cannot read property 'query' of undefined
at /home/kingram/PROJECTS/government/process.js:97:16
at Handshake.onConnect (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Pool.js:58:9)
at Handshake.<anonymous> (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:526:10)
at Handshake._callback (/home/kingram/PROJECTS/government/node_modules/mysql/lib/Connection.js:488:16)
at Handshake.Sequence.end (/home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/sequences/Sequence.js:83:24)
at /home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:404:18
at Array.forEach (<anonymous>)
at /home/kingram/PROJECTS/government/node_modules/mysql/lib/protocol/Protocol.js:403:13
at processTicksAndRejections (internal/process/task_queues.js:75:11)
at runNextTicks (internal/process/task_queues.js:62:3)
(END)
My code looks like this:
const dataPool = {
// debug: ['ComQueryPacket', 'RowDataPacket'],
//debug: true,
acquireTimeout: 50000,
host: ***,
user: ***,
password: ***,
database: 'test',
port: '3306',
charset: 'utf8mb4'
}
const pool = mysql.createPool(dataPool)
function insertData(data) {
pool.getConnection((err, connection) => {
if (err) console.log(err)
const values = [one, two, three, four]
const sql = "INSERT INTO resource (one, two, three, four) VALUES (?, ?, ?, ?)";
console.log(`Processing #[${counter}] ${data['one']}`)
connection.query(sql, values, (error, results, fields) => {
connection.release()
if (error) console.log(error)
console.log(JSON.stringify(results))
})
})
}
function processData(input){
// stuff
insertData(input)
}
source.forEach((item) => {
processData(item)
})
I would definitely follow the advice of @kai-koch. This way you would need less connections and that means it's less error-prone.
For proper error handling processData and insertData need a callback parameter. At least you should add a return statement:
if (err) {
console.log(err);
return;
}
I would also look at the MySQL log to see what the server is doing when the script fails.
Most helpful comment
If you use a single insert for each datarow you will run into problems (memory / heap stack) when you run more than 2600 rows, let say 20k rows or more.
i would use bulk insert queries with 500 to 1000 rows like
This way you would need only one connection per 1000 rows and have much less overhead.