关于node.js:Library Recommendations:NodeJs读取csv文件

Library Recommendations: NodeJs reading csv file

使用nodejs,我想解析10000个记录的.csv文件,并对每一行进行一些操作。 我尝试使用http://www.adaltas.com/projects/node-csv。 我不能让它在每一行停下来。 这只是读取所有10000条记录。 我需要做以下事情

  • 逐行读取csv
  • 在每一行上执行耗时的操作
  • 转到下一行
  • 任何人都可以在这里建议任何其他想法?


    好像你需要使用一些基于流的解决方案,已经存在这样的库,所以在重新发明自己之前,尝试这个库,其中还包括验证支持。 https://www.npmjs.org/package/fast-csv


    我当前的解决方案使用异步模块来串行执行:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    var fs = require('fs');
    var parse = require('csv-parse');
    var async = require('async');

    var inputFile='myfile.csv';

    var parser = parse({delimiter: ','}, function (err, data) {
      async.eachSeries(data, function (line, callback) {
        // do something with the line
        doSomething(line).then(function() {
          // when processing finishes invoke the callback to move to the next one
          callback();
        });
      })
    });
    fs.createReadStream(inputFile).pipe(parser);


    我用这种方式: -

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    var fs = require('fs');
    var parse = require('csv-parse');

    var csvData=[];
    fs.createReadStream(req.file.path)
        .pipe(parse({delimiter: ':'}))
        .on('data', function(csvrow) {
            console.log(csvrow);
            //do something with csvrow
            csvData.push(csvrow);        
        })
        .on('end',function() {
          //do something wiht csvData
          console.log(csvData);
        });


    您正在引用的node-csv项目完全足以完成转换大部分CSV数据的每一行的任务,来自以下文档:http://csv.adaltas.com/transform/:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    csv()
      .from('82,Preisner,Zbigniew
    94,Gainsbourg,Serge')
      .to(console.log)
      .transform(function(row, index, callback){
        process.nextTick(function(){
          callback(null, row.reverse());
        });
    });

    根据我的经验,我可以说它也是一个相当快速的实现,我一直在处理具有接近10k记录的数据集,并且整个集合的处理时间在合理的几十毫秒级别。

    关于jurka的基于流的解决方案建议:node-csv是基于流的,并遵循Node.js的流API。


    要在fast-csv中暂停流式传输,您可以执行以下操作:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    let csvstream = csv.fromPath(filePath, { headers: true })
        .on("data", function (row) {
            csvstream.pause();
            // do some heavy work
            // when done resume the stream
            csvstream.resume();
        })
        .on("end", function () {
            console.log("We are done!")
        })
        .on("error", function (error) {
            console.log(error)
        });


    Fast-CSV npm模块可以从csv文件逐行读取数据。

    这是一个例子:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    let csv= require('fast-csv');

    var stream = fs.createReadStream("my.csv");

    csv
     .fromStream(stream, {headers : true})
     .on("data", function(data){
         console.log('I am one line of data', data);
     })
     .on("end", function(){
         console.log("done");
     });


    • 此解决方案使用csv-parser而不是某些中使用的csv-parse
      上面的答案。
    • csv-parser约在2年后出现
      csv-parse
    • 他们两个都解决了同样的目的,但我个人发现了
      csv-parser更好,因为它很容易通过它处理标题。

    首先安装csv-parser:

    1
    npm install csv-parser

    所以假设你有一个像这样的csv文件:

    1
    2
    3
    NAME, AGE
    Lionel Messi, 31
    Andres Iniesta, 34

    您可以执行以下所需的操作:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    const fs = require('fs');
    const csv = require('csv-parser');

    fs.createReadStream(inputFilePath)
    .pipe(csv())
    .on('data', function(data){
        try {
            console.log("Name is:"+data.NAME);
            console.log("Age is:"+data.AGE);

            //perform the operation
        }
        catch(err) {
            //error handler
        }
    })
    .on('end',function(){
        //some final operation
    });

    如需进一步阅读参考


    我需要一个异步csv阅读器,最初尝试@Pransh Tiwari的答案,但无法使用awaitutil.promisify()。最终我遇到了node-csvtojson,它几乎和csv-parser一样,但是有了promises。以下是csvtojson的示例用法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    const csvToJson = require('csvtojson');

    const processRecipients = async () => {
        const recipients = await csvToJson({
            trim:true
        }).fromFile('./recipients.csv');

        // Code executes after recipients are fully loaded.
        recipients.forEach((recipient) => {
            console.log(recipient.name, recipient.email);
        });
    };

    尝试逐行npm插件。

    1
    npm install line-by-line --save


    这是我从外部网址获取csv文件的解决方案

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    const parse = require( 'csv-parse/lib/sync' );
    const axios = require( 'axios' );
    const readCSV = ( module.exports.readCSV = async ( path ) => {
    try {
       const res = await axios( { url: path, method: 'GET', responseType: 'blob' } );
       let records = parse( res.data, {
          columns: true,
          skip_empty_lines: true
        } );

        return records;
     } catch ( e ) {
       console.log( 'err' );
     }

    } );
    readCSV('https://urltofilecsv');

    使用await / async执行此任务的解决方法:

    1
    2
    3
    const csv = require('csvtojson')
    const csvFilePath = 'data.csv'
    const array = await csv().fromFile(csvFilePath);


    您可以使用csv-to-json模块将csv转换为json格式,然后您可以在程序中轻松使用json文件


    1
    2
    3
    4
    5
    6
    7
    fs = require('fs');
    fs.readFile('FILENAME WITH PATH','utf8', function(err,content){
    if(err){
        console.log('error occured ' +JSON.stringify(err));
     }
     console.log('Fileconetent are ' + JSON.stringify(content));
    })

    npm安装csv

    示例CSV文件
    你需要一个CSV文件来解析,所以要么你已经有一个,或者你可以复制下面的文本并将其粘贴到一个新文件中并调用该文件"mycsv.csv"

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    ABC, 123, Fudge
    532, CWE, ICECREAM
    8023, POOP, DOGS
    441, CHEESE, CARMEL
    221, ABC, HOUSE
    1
    ABC, 123, Fudge
    2
    532, CWE, ICECREAM
    3
    8023, POOP, DOGS
    4
    441, CHEESE, CARMEL
    5
    221, ABC, HOUSE

    示例代码读取和解析CSV文件

    创建一个新文件,并将以下代码插入其中。务必仔细阅读幕后发生的事情。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
        var csv = require('csv');
        // loads the csv module referenced above.

        var obj = csv();
        // gets the csv module to access the required functionality

        function MyCSV(Fone, Ftwo, Fthree) {
            this.FieldOne = Fone;
            this.FieldTwo = Ftwo;
            this.FieldThree = Fthree;
        };
        // Define the MyCSV object with parameterized constructor, this will be used for storing the data read from the csv into an array of MyCSV. You will need to define each field as shown above.

        var MyData = [];
        // MyData array will contain the data from the CSV file and it will be sent to the clients request over HTTP.

        obj.from.path('../THEPATHINYOURPROJECT/TOTHE/csv_FILE_YOU_WANT_TO_LOAD.csv').to.array(function (data) {
            for (var index = 0; index < data.length; index++) {
                MyData.push(new MyCSV(data[index][0], data[index][1], data[index][2]));
            }
            console.log(MyData);
        });
        //Reads the CSV file from the path you specify, and the data is stored in the array we specified using callback function.  This function iterates through an array and each line from the CSV file will be pushed as a record to another array called MyData , and logs the data into the console to ensure it worked.

    var http = require('http');
    //Load the http module.

    var server = http.createServer(function (req, resp) {
        resp.writeHead(200, { 'content-type': 'application/json' });
        resp.end(JSON.stringify(MyData));
    });
    // Create a webserver with a request listener callback.  This will write the response header with the content type as json, and end the response by sending the MyData array in JSON format.

    server.listen(8080);
    // Tells the webserver to listen on port 8080(obviously this may be whatever port you want.)
    1
    var csv = require('csv');
    2
    // loads the csv module referenced above.
    3
    ?
    4
    var obj = csv();
    5
    // gets the csv module to access the required functionality
    6
    ?
    7
    function MyCSV(Fone, Ftwo, Fthree) {
    8
        this.FieldOne = Fone;
    9
        this.FieldTwo = Ftwo;
    10
        this.FieldThree = Fthree;
    11
    };
    12
    // Define the MyCSV object with parameterized constructor, this will be used for storing the data read from the csv into an array of MyCSV. You will need to define each field as shown above.
    13
    ?
    14
    var MyData = [];
    15
    // MyData array will contain the data from the CSV file and it will be sent to the clients request over HTTP.
    16
    ?
    17
    obj.from.path('../THEPATHINYOURPROJECT/TOTHE/csv_FILE_YOU_WANT_TO_LOAD.csv').to.array(function (data) {
    18
        for (var index = 0; index < data.length; index++) {
    19
            MyData.push(new MyCSV(data[index][0], data[index][1], data[index][2]));
    20
        }
    21
        console.log(MyData);
    22
    });
    23
    //Reads the CSV file from the path you specify, and the data is stored in the array we specified using callback function.  This function iterates through an array and each line from the CSV file will be pushed as a record to another array called MyData , and logs the data into the console to ensure it worked.
    24
    ?
    25
    var http = require('http');
    26
    //Load the http module.
    27
    ?
    28
    var server = http.createServer(function (req, resp) {
    29
        resp.writeHead(200, { 'content-type': 'application/json' });
    30
        resp.end(JSON.stringify(MyData));
    31
    });
    32
    // Create a webserver with a request listener callback.  This will write the response header with the content type as json, and end the response by sending the MyData array in JSON format.
    33
    ?
    34
    server.listen(8080);
    35
    // Tells the webserver to listen on port 8080(obviously this may be whatever port you want.)
    Things to be aware of in your app.js code
    In lines 7 through 11, we define the function called 'MyCSV' and the field names.

    If your CSV file has multiple columns make sure you define this correctly to match your file.

    On line 17 we define the location of the CSV file of which we are loading.  Make sure you use the correct path here.

    启动您的应用程序并验证功能
    打开控制台并键入以下命令:

    节点应用
    1
    节点应用
    您应该在控制台中看到以下输出:

    1
    2
    3
    4
    5
    [  MYCSV { Fieldone: 'ABC', Fieldtwo: '123', Fieldthree: 'Fudge' },
       MYCSV { Fieldone: '532', Fieldtwo: 'CWE', Fieldthree: 'ICECREAM' },
       MYCSV { Fieldone: '8023', Fieldtwo: 'POOP', Fieldthree: 'DOGS' },
       MYCSV { Fieldone: '441', Fieldtwo: 'CHEESE', Fieldthree: 'CARMEL' },
       MYCSV { Fieldone: '221', Fieldtwo: 'ABC', Fieldthree: 'HOUSE' }, ]

    1
    [MYCSV {Fieldone:'ABC',Fieldtwo:'123',Fieldthree:'Fudge'},
    2
    MYCSV {Fieldone:'532',Fieldtwo:'CWE',Fieldthree:'ICECREAM'},
    3
    MYCSV {Fieldone:'8023',Fieldtwo:'POOP',Fieldthree:'DOGS'},
    4
    MYCSV {Fieldone:'441',Fieldtwo:'CHEESE',Fieldthree:'CARMEL'},

    MYCSV {Fieldone:'221',Fieldtwo:'ABC',Fieldthree:'HOUSE'},]
    现在您应该打开Web浏览器并导航到您的服务器。您应该看到它以JSON格式输出数据。

    结论
    使用node.js及其CSV模块,我们可以快速,轻松地读取和使用存储在服务器上的数据,并根据请求将其提供给客户端