node.js: read a text file into an array. (Each line an item in the array.)
我想将一个非常非常大的文件读入node.js中的JavaScript数组。
所以,如果文件是这样的:
1 2 3 4 5 | first line two three ... ... |
我会有阵列:
1 | ['first line','two','three', ... , ... ] |
该函数看起来像这样:
1 | var array = load(filename); |
因此,将它全部作为字符串加载然后拆分它的想法是不可接受的。
同步:
1 2 3 4 5 6 | var fs = require('fs'); var array = fs.readFileSync('file.txt').toString().split(" "); for(i in array) { console.log(array[i]); } |
异步:
1 2 3 4 5 6 7 8 9 | var fs = require('fs'); fs.readFile('file.txt', function(err, data) { if(err) throw err; var array = data.toString().split(" "); for(i in array) { console.log(array[i]); } }); |
如果您可以将最终数据放入数组中,那么您是否也可以将其放入字符串并将其拆分,如同建议的那样?
在任何情况下,如果您想一次处理一行文件,您也可以尝试这样的事情:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | var fs = require('fs'); function readLines(input, func) { var remaining = ''; input.on('data', function(data) { remaining += data; var index = remaining.indexOf(' '); while (index > -1) { var line = remaining.substring(0, index); remaining = remaining.substring(index + 1); func(line); index = remaining.indexOf(' '); } }); input.on('end', function() { if (remaining.length > 0) { func(remaining); } }); } function func(data) { console.log('Line: ' + data); } var input = fs.createReadStream('lines.txt'); readLines(input, func); |
编辑:(回应phopkins的评论)我认为(至少在较新版本中)substring不会复制数据,但会创建一个特殊的SlicedString对象(快速浏览一下v8源代码)。在任何情况下,这里都有一个修改,避免提到的子串(在一个文件上测试几兆字节的"所有工作,没有游戏使杰克成为一个沉闷的男孩"):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | function readLines(input, func) { var remaining = ''; input.on('data', function(data) { remaining += data; var index = remaining.indexOf(' '); var last = 0; while (index > -1) { var line = remaining.substring(last, index); last = index + 1; func(line); index = remaining.indexOf(' ', last); } remaining = remaining.substring(last); }); input.on('end', function() { if (remaining.length > 0) { func(remaining); } }); } |
使用Node.js readline模块。
1 2 3 4 5 6 7 8 9 10 | var fs = require('fs'); var readline = require('readline'); var filename = process.argv[2]; readline.createInterface({ input: fs.createReadStream(filename), terminal: false }).on('line', function(line) { console.log('Line: ' + line); }); |
使用readline(文档)。这是一个读取css文件,解析图标并将它们写入json的示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | var results = []; var rl = require('readline').createInterface({ input: require('fs').createReadStream('./assets/stylesheets/_icons.scss') }); // for every new line, if it matches the regex, add it to an array // this is ugly regex :) rl.on('line', function (line) { var re = /\.icon-icon.*:/; var match; if ((match = re.exec(line)) !== null) { results.push(match[0].replace(".",'').replace(":",'')); } }); // readline emits a close event when the file is read. rl.on('close', function(){ var outputFilename = './icons.json'; fs.writeFile(outputFilename, JSON.stringify(results, null, 2), function(err) { if(err) { console.log(err); } else { console.log("JSON saved to" + outputFilename); } }); }); |
伪
1 2 3 4 | var JFile=require('jfile'); var myF=new JFile("./data.txt"); myF.lines // ["first line","second line"] .... |
别忘了:
1 | npm install jfile --save |
使用BufferedReader,但该函数应该是异步的:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | var load = function (file, cb){ var lines = []; new BufferedReader (file, { encoding:"utf8" }) .on ("error", function (error){ cb (error, null); }) .on ("line", function (line){ lines.push (line); }) .on ("end", function (){ cb (null, lines); }) .read (); }; load ("file", function (error, lines){ if (error) return console.log (error); console.log (lines); }); |
我只是想添加@finbarr很棒的答案,在异步示例中有一点修复:
异步:
1 2 3 4 5 6 7 8 9 10 | var fs = require('fs'); fs.readFile('file.txt', function(err, data) { if(err) throw err; var array = data.toString().split(" "); for(i in array) { console.log(array[i]); } done(); }); |
@MadPhysicist,done()是发布异步的东西。呼叫。
这是@mtomis对上述答案的一种变体。
它创建了一个线流。它会发出"数据"和"结束"事件,允许您处理流的结尾。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | var events = require('events'); var LineStream = function (input) { var remaining = ''; input.on('data', function (data) { remaining += data; var index = remaining.indexOf(' '); var last = 0; while (index > -1) { var line = remaining.substring(last, index); last = index + 1; this.emit('data', line); index = remaining.indexOf(' ', last); } remaining = remaining.substring(last); }.bind(this)); input.on('end', function() { if (remaining.length > 0) { this.emit('data', remaining); } this.emit('end'); }.bind(this)); } LineStream.prototype = new events.EventEmitter; |
将它用作包装器:
1 2 3 4 5 6 7 8 9 | var lineInput = new LineStream(input); lineInput.on('data', function (line) { // handle line }); lineInput.on('end', function() { // wrap it up }); |
我遇到了同样的问题,我已经逐行解决了这个问题
https://www.npmjs.com/package/line-by-line
至少在我看来,它就像魅力一样,在同步和异步模式下。
此外,线路终止不终止 n的问题可以通过选项解决:
1 | { encoding: 'utf8', skipEmptyLines: false } |
线路同步处理:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | var LineByLineReader = require('line-by-line'), lr = new LineByLineReader('big_file.txt'); lr.on('error', function (err) { // 'err' contains error object }); lr.on('line', function (line) { // 'line' contains the current line without the trailing newline character. }); lr.on('end', function () { // All lines are read, file is closed now. }); |
使用Node.js v8或更高版本具有将正常功能转换为异步功能的新功能。
util.promisify
这是一个很棒的功能。下面是将txt文件中的10000个数字解析为数组的示例,使用数字上的合并排序计算反转。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | // read from txt file const util = require('util'); const fs = require('fs') fs.readFileAsync = util.promisify(fs.readFile); let result = [] const parseTxt = async (csvFile) => { let fields, obj const data = await fs.readFileAsync(csvFile) const str = data.toString() const lines = str.split(' ') // const lines = str console.log("lines", lines) // console.log("str", str) lines.map(line => { if(!line) {return null} result.push(Number(line)) }) console.log("result",result) return result } parseTxt('./count-inversion.txt').then(() => { console.log(mergeSort({arr: result, count: 0})) }) |
要将大文件读入数组,您可以逐行读取或按块读取块。
逐行参考我的答案
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | var fs = require('fs'), es = require('event-stream'), var lines = []; var s = fs.createReadStream('filepath') .pipe(es.split()) .pipe(es.mapSync(function(line) { //pause the readstream s.pause(); lines.push(line); s.resume(); }) .on('error', function(err) { console.log('Error:', err); }) .on('end', function() { console.log('Finish reading.'); console.log(lines); }) ); |
chunk by chunk参考这篇文章
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | var offset = 0; var chunkSize = 2048; var chunkBuffer = new Buffer(chunkSize); var fp = fs.openSync('filepath', 'r'); var bytesRead = 0; while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) { offset += bytesRead; var str = chunkBuffer.slice(0, bytesRead).toString(); var arr = str.split(' '); if(bytesRead = chunkSize) { // the last item of the arr may be not a full line, leave it to the next chunk offset -= arr.pop().length; } lines.push(arr); } console.log(lines); |