Reading CSV Files into Objects with Node.js

Full source code available here.

As I am learning Node.Js I am constantly surprised by how easy it is to do some things, and how difficult to do others, and how poor the examples out there are.

Case in point, I want to read some data from as CSV file and then send it off to ElasticSearch for indexing. The ElasticSearch part I have figured out already, and in Node.js that is easy.
I expected it to be trivial to find an example of reading a file, creating objects based on the rows, processing the objects, and doing all that in chunks of say 1000 rows. This kind of stuff is easy in C# and there are plenty of examples. Not the case is Node.js.

Here two ways of doing it.

You can download a CSV with all of Shakespeare’s plays, but the example you can download from above has just the first 650 lines from that file.

I want to read the file 100 rows at a time, put the rows into objects and then process the rows.

csv-parse
The first module I came across was csv-parse.

Run this from the console –

npm install csv-parse

I have a Entry class that represents a row from the CSV file. Its constructor takes six parameters that represent each of the columns in the CSV file.

In this example processing the data in chunks is not necessary, but when I process the full file with over 130,000 rows, chunking becomes important.

var fs = require('fs');
var parse = require('csv-parse');


function readCSV() {
    let entries = [];
    let count = 0;

    fs.createReadStream('shakespeare_plays_sample.csv')
        .pipe(parse({ delimiter: ';', from_line: 2 }))
        .on('data', function (row) {
            count++;
            entries.push(new Entry(row[0], row[1], row[2], row[3], row[4], row[5]))

            if (count % 100 == 0) {
                processEntries(entries);
                count = 0;
                entries = []; // clear the array
            }
        })
        .on('end', function () {
            processEntries(entries);
        });
}

function processEntries(entries) {
    console.log(entries[0].Id + "  to " + entries[entries.length - 1].Id);
}

class Entry {
    constructor(id, play, characterLineNumber, actSceneLine, character, line) {
        this.Id = id;
        this.Play = play;
        this.CharacterLineNumber = characterLineNumber;
        this.ActSceneLine = actSceneLine;
        this.Character = character;
        this.Line = line;
    }
}

readCSV();

Note how I have to make sure that the last chuck is also processed on line 22.

This approach seems fine, but then I found the csvtojson module.

csvtojson
This module makes what I’m trying to do a little easier by skipping over the need to explicitly construct an object with the data from the rows in the file.

First install the module –

npm install csvtojson

This is the code –

const csv=require('csvtojson');

function readCSV(){
    let entries = [];
    let count = 0;

    csv({delimiter:';'})
    .fromFile('./shakespeare_plays_sample.csv')
    .then((json)=>{
        json.forEach((row)=>
        {   
            count++;
            entries.push(row);
            if(count % 100 == 0){
                processEntries(entries);
                count = 0;
                entries = []; // clear the array
            }
        });
        processEntries(entries);
    })
    return entries;
}

function processEntries(entries){
    console.log(entries[0].Id + "  to " + entries[entries.length - 1].Id);
}

readCSV();

Again note how I process the chunks of 100, and then final chunk on line 20.

All of this is the aim of indexing all of Shakespeare’s works in ElasticSearch, and that I will show in another post.

Full source code available here.

Leave a Reply

Your email address will not be published. Required fields are marked *