Read a file one line at a time in node.js?
1 2 3 4 5 6 7 8 9 | var Lazy=require("lazy"); new Lazy(process.stdin) .lines .forEach( function(line) { console.log(line.toString()); } ); process.stdin.resume(); |
1 2 3 4 5 6 |'./VeryBigFile.csv', 'r', '0666', Process); function Process(err, fd) { if (err) throw err; // DO lazy read } |
因为node.js v0.12和node.js v4.0.0,所以有一个稳定的readline核心模块。以下是从文件中读取行的最简单方法,无需任何外部模块:
1 2 3 4 5 6 7 | var lineReader = require('readline').createInterface({ input: require('fs').createReadStream('') }); lineReader.on('line', function (line) { console.log('Line from file:', line); }); |
1 2 3 4 5 6 7 8 9 10 11 12 | var fs = require('fs'), readline = require('readline'); var rd = readline.createInterface({ input: fs.createReadStream('/path/to/file'), output: process.stdout, console: false }); rd.on('line', function(line) { console.log(line); }); |
1 2 3 4 5 6 7 8 9 | var lineReader = require('line-reader'); lineReader.eachLine('file.txt', function(line, last) { console.log(line); // do whatever you want with line... if(last){ // or check if it's the last one } }); |
1 2 3 4 5 6 7 |'file.txt', function(reader) { if (reader.hasNextLine()) { reader.nextLine(function(line) { console.log(line); }); } }); |
1 2 3 4 5 | require('fs').readFileSync('file.txt', 'utf-8').split(/ ? /).forEach(function(line){ console.log(line); }) |
1 2 3 4 5 6 7 8 | var rl = readline.createInterface({ input : fs.createReadStream('/path/file.txt'), output: process.stdout, terminal: false }) rl.on('line',function(line){ console.log(line) //or parse line }) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | var last =""; process.stdin.on('data', function(chunk) { var lines, i; lines = (last+chunk).split(" "); for(i = 0; i < lines.length - 1; i++) { console.log("line:" + lines[i]); } last = lines[i]; }); process.stdin.on('end', function() { console.log("line:" + last); }); process.stdin.resume(); |
1 2 3 4 5 6 | var carrier = require('carrier'); process.stdin.resume(); carrier.carry(process.stdin, function(line) { console.log('got one line: ' + line); }); |
1 2 3 4 5 6 7 8 9 10 11 | new BufferedReader ("lorem ipsum", { encoding:"utf8" }) .on ("error", function (error){ console.log ("error:" + error); }) .on ("line", function (line){ console.log ("line:" + line); }) .on ("end", function (){ console.log ("EOF"); }) .read (); |
- 交互式行处理(基于回调,不将整个文件加载到RAM中)
- 或者,返回数组中的所有行(详细或原始模式)
- 以交互方式中断流式处理,或执行类似映射/筛选器的处理
- 检测任何换行约定(PC/Mac/Linux)
- 正确的EOF/最后一行处理
- 多字节UTF-8字符的正确处理
- 按行检索字节偏移量和字节长度信息
- 随机访问,使用基于行或基于字节的偏移量
- 自动映射线偏移信息,加快随机访问速度
- 零依赖项
- 测验
1 2 3 4 5 6 | var split = require('split'); fs.createReadStream(file) .pipe(split()) .on('data', function (line) { //each chunk now is a seperate line! }); |
1 2 3 4 5 6 7 8 9 | const fs = require("fs") fs.readFile('./file', 'utf-8', (err, file) => { const lines = file.split(' ') for (let line of lines) console.log(line) }); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | function createLineReader(fileName){ var EM = require("events").EventEmitter var ev = new EM() var stream = require("fs").createReadStream(fileName) var remainder = null; stream.on("data",function(data){ if(remainder != null){//append newly received data chunk var tmp = new Buffer(remainder.length+data.length) remainder.copy(tmp) data.copy(tmp,remainder.length) data = tmp; } var start = 0; for(var i=0; i<data.length; i++){ if(data[i] == 10){ // new line var line = data.slice(start,i) ev.emit("line", line) start = i+1; } } if(start<data.length){ remainder = data.slice(start); }else{ remainder = null; } }) stream.on("end",function(){ if(null!=remainder) ev.emit("line",remainder) }) return ev } //---------main--------------- fileName = process.argv[2] lineReader = createLineReader(fileName) lineReader.on("line",function(line){ console.log(line.toString()) //console.log("++++++++++++++++++++") }) |
1 2 3 | while (<>) { process_line($_); } |
- 可以在许多项目中重用的最小同步代码。
- 文件大小或行数没有限制。
- 线条长度没有限制。
- 能够处理UTF-8格式的完整Unicode,包括BMP以外的字符。
- 能够处理*nix和windows行尾(我不需要旧式的mac)。
- 要包含在行中的行尾字符。
- 能够处理最后一行,有或没有行尾字符。
- 不要使用node.js发行版中未包含的任何外部库。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | var fs = require('fs'), StringDecoder = require('string_decoder').StringDecoder, util = require('util'); function lineByLine(fd) { var blob = ''; var blobStart = 0; var blobEnd = 0; var decoder = new StringDecoder('utf8'); var CHUNK_SIZE = 16384; var chunk = new Buffer(CHUNK_SIZE); var eolPos = -1; var lastChunk = false; var moreLines = true; var readMore = true; // each line while (moreLines) { readMore = true; // append more chunks from the file onto the end of our blob of text until we have an EOL or EOF while (readMore) { // do we have a whole line? (with LF) eolPos = blob.indexOf(' ', blobStart); if (eolPos !== -1) { blobEnd = eolPos; readMore = false; // do we have the last line? (no LF) } else if (lastChunk) { blobEnd = blob.length; readMore = false; // otherwise read more } else { var bytesRead = fs.readSync(fd, chunk, 0, CHUNK_SIZE, null); lastChunk = bytesRead !== CHUNK_SIZE; blob += decoder.write(chunk.slice(0, bytesRead)); } } if (blobStart < blob.length) { processLine(blob.substring(blobStart, blobEnd + 1)); blobStart = blobEnd + 1; if (blobStart >= CHUNK_SIZE) { // blobStart is in characters, CHUNK_SIZE is in octets var freeable = blobStart / CHUNK_SIZE; // keep blob from growing indefinitely, not as deterministic as I'd like blob = blob.substring(CHUNK_SIZE); blobStart -= CHUNK_SIZE; blobEnd -= CHUNK_SIZE; } } else { moreLines = false; } } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | var fs = require('fs'); var readlines = require('gen-readlines');'./file.txt', 'r', function(err, fd) { if (err) throw err; fs.fstat(fd, function(err, stats) { if (err) throw err; for (var line of readlines(fd, stats.size)) { console.log(line.toString()); } }); }); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | var fs = require('fs'); var readline = require('readline'); var Stream = require('stream'); function readFileLineByLine(inputFile, outputFile) { var instream = fs.createReadStream(inputFile); var outstream = new Stream(); outstream.readable = true; outstream.writable = true; var rl = readline.createInterface({ input: instream, output: outstream, terminal: false }); rl.on('line', function (line) { fs.appendFileSync(outputFile, line + ' '); }); }; |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | var fs = require('fs'); function readfile(name,online,onend,encoding) { var bufsize = 1024; var buffer = new Buffer(bufsize); var bufread = 0; var fd = fs.openSync(name,'r'); var position = 0; var eof = false; var data =""; var lines = 0; encoding = encoding ||"utf8"; function readbuf() { bufread = fs.readSync(fd,buffer,0,bufsize,position); position += bufread; eof = bufread ? false : true; data += buffer.toString(encoding,0,bufread); } function getLine() { var nl = data.indexOf(" "), hasnl = nl !== -1; if (!hasnl && eof) return fs.closeSync(fd), online(data,++lines), onend(lines); if (!hasnl && !eof) readbuf(), nl = data.indexOf(" "), hasnl = nl !== -1; if (!hasnl) return process.nextTick(getLine); var line = data.substr(0,nl); data = data.substr(nl+1); if (data[0] ===" ") data = data.substr(1); online(line,++lines); process.nextTick(getLine); } getLine(); } |
另一种解决方案是通过顺序执行器nsynjs运行逻辑。它使用node readline模块逐行读取文件,并且不使用承诺或递归,因此不会在大型文件上失败。以下是代码的外观:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | var nsynjs = require('nsynjs'); var textFile = require('./wrappers/nodeReadline').textFile; // this file is part of nsynjs function process(textFile) { var fh = new textFile();'path/to/file'); var s; while (typeof(s = fh.readLine(nsynjsCtx).data) != 'undefined') console.log(s); fh.close(); } var ctx =,{},textFile,function () { console.log('done'); }); |
我有一个小模块,它做得很好,并被许多其他项目使用NPM readline注意到,在节点v10中有一个本地readline模块,所以我将我的模块重新发布为linebyline
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | var fs = require('fs'), EventEmitter = require('events').EventEmitter, util = require('util'), newlines = [ 13, // 10 // ]; var readLine = module.exports = function(file, opts) { if (!(this instanceof readLine)) return new readLine(file);; opts = opts || {}; var self = this, line = [], lineCount = 0, emit = function(line, count) { self.emit('line', new Buffer(line).toString(), count); }; this.input = fs.createReadStream(file); this.input.on('open', function(fd) { self.emit('open', fd); }) .on('data', function(data) { for (var i = 0; i < data.length; i++) { if (0 <= newlines.indexOf(data[i])) { // Newline char was found. lineCount++; if (line.length) emit(line, lineCount); line = []; // Empty buffer. } else { line.push(data[i]); // Buffer new line data. } } }).on('error', function(err) { self.emit('error', err); }).on('end', function() { // Emit last line if anything left over since EOF won't trigger it. if (line.length){ lineCount++; emit(line, lineCount); } self.emit('end'); }).on('close', function() { self.emit('close'); }); }; util.inherits(readLine, EventEmitter); |
1 2 3 4 5 | // example var count = 0 require('line-kit')(require('fs').createReadStream('/etc/issue'), (line) => { count++; }, () => {console.log(`seen ${count} lines`)}) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | function emitLines(stream, re){ re = re && / /; var buffer = ''; stream.on('data', stream_data); stream.on('end', stream_end); function stream_data(data){ buffer += data; flush(); }//stream_data function stream_end(){ if(buffer) stream.emmit('line', buffer); }//stream_end function flush(){ var re = / /; var match; while(match = re.exec(buffer)){ var index = match.index + match[0].length; stream.emit('line', buffer.substring(0, index)); buffer = buffer.substring(index); re.lastIndex = 0; } }//flush }//emitLines |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | const EventEmitter = require('events').EventEmitter; class LineReader extends EventEmitter{ constructor(f, delim=' '){ super(); this.totalChars = 0; this.totalLines = 0; this.leftover = ''; f.on('data', (chunk)=>{ this.totalChars += chunk.length; let lines = chunk.split(delim); if (lines.length === 1){ this.leftover += chunk; return; } lines[0] = this.leftover + lines[0]; this.leftover = lines[lines.length-1]; if (this.leftover) lines.pop(); this.totalLines += lines.length; for (let l of lines) this.onLine(l); }); // f.on('error', ()=>{}); f.on('end', ()=>{console.log('chars', this.totalChars, 'lines', this.totalLines)}); } onLine(l){ this.emit('line', l); } } //Command line test const f = require('fs').createReadStream(process.argv[2], 'utf8'); const delim = process.argv[3]; const lineReader = new LineReader(f, delim); lineReader.on('line', (line)=> console.log(line)); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | class LineReader extends require('events').EventEmitter{ constructor(f, delim=' '){ super(); this.leftover = ''; f.on('data', (chunk)=>{ let lines = chunk.split(delim); if (lines.length === 1){ this.leftover += chunk; return; } lines[0] = this.leftover + lines[0]; this.leftover = lines[lines.length-1]; if (this.leftover) lines.pop(); for (let l of lines) this.emit('line', l); }); } } |
1 2 3 4 5 6 7 8 9 10 11 12 | const fs = require("fs") fs.readFile('./file', 'utf-8', (err, data) => { var innerContent; console.log("Asynchronous read:" + data.toString()); const lines = data.toString().split(' ') for (let line of lines) innerContent += line + ''; }); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | (function () { var fs = require('fs'); var glob = require('glob-fs')(); var path = require('path'); var result = 0; var exclude = ['LICENSE', path.join('e2e', 'util', 'db-ca', 'someother-file'), path.join('src', 'favicon.ico')]; var files = []; files = glob.readdirSync('**'); var allFiles = []; var patternString = [ 'trade', 'order', 'market', 'securities' ]; => { try { if (!fs.lstatSync(file).isDirectory() && exclude.indexOf(file) === -1) { fs.readFileSync(file).toString().split(/ ? /).forEach(function(line){ => { if (line.indexOf(pattern) !== -1) { console.log(file + ' contain `' + pattern + '` in in line"' + line +'";'); result = 1; } }); }); } } catch (e) { console.log('Error:', e.stack); } }); process.exit(result); })(); |
1 2 3 4 5 6 7 | const fs= require('fs') let stream = fs.createReadStream('<filename>', { autoClose: true }) stream.on('data', chunk => { let row = chunk.toString('ascii') })) |