It'd be great if readline could emit 'data' events for each line, so that for await could be used:
const readline = require('readline');
const stream = require('stream');
const input = new stream.Readable();
input.push(`{"some": "json","another":"json"}\n`);
input.push(`{"some": "json2","another":"json2"}\n`);
input.push(null);
// What I wish I would do:
(async () => {
const rl = readline.createInterface({input});
const rows = [];
for await (const row of rl) rows.push(row);
console.log(rows)
})();
// workaround:
const betterReadLine = ({input}) => {
const output = new stream.PassThrough({objectMode: true});
const rl = readline.createInterface({input});
rl.on('line', line => {
output.write(JSON.parse(line));
});
rl.on('close', () => {
output.push(null);
});
return output;
};
(async () => {
const rl = betterReadLine({input});
const rows = [];
for await (const row of rl) rows.push(row);
console.log(rows)
})();
Duplicate of https://github.com/nodejs/node/issues/18603
@caub I was trying to make some workaround with an intermediate async generator that splits chunks and yields a Promise for each line, but such implementation was very slow, maybe due to many Promises involved. So I've made a shift with an intermediate async generator that splits chunks and yields a Promise for an Array with lines from the chunk, for now.
This implementation preserves line ending characters. If I need any granular line processing or transformation, I can use any Array functions (filter, map etc) on the consuming end.
read-lines-module.js:
'use strict';
const { createReadStream } = require('fs');
module.exports = async function* readLines(path, encoding = 'utf8') {
const readable = createReadStream(path, encoding);
let remainder = '';
for await (const chunk of readable) {
const lines = (remainder === '' ? chunk : `${remainder}${chunk}`)
.split(/(?<=\r?\n|\r(?!\n))/u);
remainder = lines[lines.length - 1].endsWith('\n') ? '' : lines.pop();
yield lines;
}
if (remainder !== '') yield [remainder];
};
test.js:
'use strict';
const { openSync, writeSync } = require('fs');
const readLines = require('./read-lines-module.js');
const output = openSync('big-file-copy.txt', 'w');
(async function main() {
try {
for await (const lines of readLines('big-file.txt')) {
writeSync(output, lines.join(''));
}
} catch (err) {
console.error(err);
}
})();
interesting, it makes sense so (keeping the line returns also, since 'data' event should just chunk the content)
edit: I thought your snippet was what nodejs core would adopt, sry got confused, and hoping for https://github.com/nodejs/node/pull/18904/files to get merged soon
@caub A new attempt to implement: https://github.com/nodejs/node/pull/23916
Most helpful comment
@caub I was trying to make some workaround with an intermediate async generator that splits chunks and yields a Promise for each line, but such implementation was very slow, maybe due to many Promises involved. So I've made a shift with an intermediate async generator that splits chunks and yields a Promise for an Array with lines from the chunk, for now.
This implementation preserves line ending characters. If I need any granular line processing or transformation, I can use any Array functions (filter, map etc) on the consuming end.
read-lines-module.js:test.js: