-
Notifications
You must be signed in to change notification settings - Fork 0
/
load-csv.js
69 lines (58 loc) · 1.72 KB
/
load-csv.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const fs = require('fs');
const _ = require('lodash');
const shuffleSeed = require('shuffle-seed');
function extractColumns(data, columnNames) {
const headers = _.first(data);
const indexes = _.map(columnNames, column => headers.indexOf(column));
const extracted = _.map(data, row => _.pullAt(row, indexes));
return extracted;
}
module.exports = function loadCSV(
filename,
{
dataColumns = [],
labelColumns = [],
converters = {},
shuffle = false,
splitTest = false
}
) {
let data = fs.readFileSync(filename, { encoding: 'utf-8' });
data = _.map(data.split('\n'), d => d.split(','));
data = _.dropRightWhile(data, val => _.isEqual(val, ['']));
const headers = _.first(data);
data = _.map(data, (row, index) => {
if (index === 0) {
return row;
}
return _.map(row, (element, index) => {
if (converters[headers[index]]) {
const converted = converters[headers[index]](element);
return _.isNaN(converted) ? element : converted;
}
const result = parseFloat(element.replace('"', ''));
return _.isNaN(result) ? element : result;
});
});
let labels = extractColumns(data, labelColumns);
data = extractColumns(data, dataColumns);
data.shift();
labels.shift();
if (shuffle) {
data = shuffleSeed.shuffle(data, 'phrase');
labels = shuffleSeed.shuffle(labels, 'phrase');
}
if (splitTest) {
const trainSize = _.isNumber(splitTest)
? splitTest
: Math.floor(data.length / 2);
return {
features: data.slice(trainSize),
labels: labels.slice(trainSize),
testFeatures: data.slice(0, trainSize),
testLabels: labels.slice(0, trainSize)
};
} else {
return { features: data, labels };
}
};