Node: Readline: emit data events along with current line events

Created on 23 Sep 2018  路  5Comments  路  Source: nodejs/node

It'd be great if readline could emit 'data' events for each line, so that for await could be used:

const readline = require('readline');
const stream = require('stream');

const input = new stream.Readable();
input.push(`{"some": "json","another":"json"}\n`);
input.push(`{"some": "json2","another":"json2"}\n`);
input.push(null);

// What I wish I would do:
(async () => {
  const rl = readline.createInterface({input});
  const rows = [];
  for await (const row of rl) rows.push(row);
  console.log(rows)
})();

// workaround:
const betterReadLine = ({input}) => {
  const output = new stream.PassThrough({objectMode: true});
  const rl = readline.createInterface({input});
  rl.on('line', line => {
    output.write(JSON.parse(line));
  });
  rl.on('close', () => {
    output.push(null);
  });
  return output;
};

(async () => {
  const rl = betterReadLine({input});
  const rows = [];
  for await (const row of rl) rows.push(row);
  console.log(rows)
})();
experimental feature request promises readline

Most helpful comment

@caub I was trying to make some workaround with an intermediate async generator that splits chunks and yields a Promise for each line, but such implementation was very slow, maybe due to many Promises involved. So I've made a shift with an intermediate async generator that splits chunks and yields a Promise for an Array with lines from the chunk, for now.

This implementation preserves line ending characters. If I need any granular line processing or transformation, I can use any Array functions (filter, map etc) on the consuming end.

read-lines-module.js:

'use strict';

const { createReadStream } = require('fs');

module.exports = async function* readLines(path, encoding = 'utf8') {
  const readable = createReadStream(path, encoding);
  let remainder = '';

  for await (const chunk of readable) {
    const lines = (remainder === '' ? chunk : `${remainder}${chunk}`)
                  .split(/(?<=\r?\n|\r(?!\n))/u);
    remainder = lines[lines.length - 1].endsWith('\n') ? '' : lines.pop();
    yield lines;
  }

  if (remainder !== '') yield [remainder];
};

test.js:

'use strict';

const { openSync, writeSync } = require('fs');
const readLines = require('./read-lines-module.js');

const output = openSync('big-file-copy.txt', 'w');

(async function main() {
  try {
    for await (const lines of readLines('big-file.txt')) {
      writeSync(output, lines.join(''));
    }
  } catch (err) {
    console.error(err);
  }
})();

All 5 comments

@caub I was trying to make some workaround with an intermediate async generator that splits chunks and yields a Promise for each line, but such implementation was very slow, maybe due to many Promises involved. So I've made a shift with an intermediate async generator that splits chunks and yields a Promise for an Array with lines from the chunk, for now.

This implementation preserves line ending characters. If I need any granular line processing or transformation, I can use any Array functions (filter, map etc) on the consuming end.

read-lines-module.js:

'use strict';

const { createReadStream } = require('fs');

module.exports = async function* readLines(path, encoding = 'utf8') {
  const readable = createReadStream(path, encoding);
  let remainder = '';

  for await (const chunk of readable) {
    const lines = (remainder === '' ? chunk : `${remainder}${chunk}`)
                  .split(/(?<=\r?\n|\r(?!\n))/u);
    remainder = lines[lines.length - 1].endsWith('\n') ? '' : lines.pop();
    yield lines;
  }

  if (remainder !== '') yield [remainder];
};

test.js:

'use strict';

const { openSync, writeSync } = require('fs');
const readLines = require('./read-lines-module.js');

const output = openSync('big-file-copy.txt', 'w');

(async function main() {
  try {
    for await (const lines of readLines('big-file.txt')) {
      writeSync(output, lines.join(''));
    }
  } catch (err) {
    console.error(err);
  }
})();

interesting, it makes sense so (keeping the line returns also, since 'data' event should just chunk the content)

edit: I thought your snippet was what nodejs core would adopt, sry got confused, and hoping for https://github.com/nodejs/node/pull/18904/files to get merged soon

@caub A new attempt to implement: https://github.com/nodejs/node/pull/23916

Was this page helpful?
0 / 5 - 0 ratings

Related issues

danielstaleiny picture danielstaleiny  路  3Comments

addaleax picture addaleax  路  3Comments

sandeepks1 picture sandeepks1  路  3Comments

filipesilvaa picture filipesilvaa  路  3Comments

dfahlander picture dfahlander  路  3Comments