-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
94 lines (83 loc) · 2.48 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
const csv = require('@fast-csv/parse');
const path = require('path');
const fs = require('fs');
const _ = require('lodash');
const rules = require('./rules');
//#############################
let tagCounter = {};
const after_fileName = `vi_blog_train-after.csv`;
// Output stream
fs.appendFileSync(after_fileName, `title,tags\n`); //add csv Header
const csvPath = path.resolve(__dirname, 'vi_blog_train.csv');
const pathOptions = {
headers: true,
delimiter: ',',
quote: '"',
ignoreEmpty: true,
discardUnmappedColumns: true,
strictColumnHandling: true,
trim: true
};
const counter = (setArray) => {
setArray.forEach(tag => {
if (tagCounter[tag] >= 1) {
tagCounter[tag]++;
} else tagCounter[tag] = 1;
});
}
const tagNames = Object.keys(rules);
const tagNamesLength = tagNames.length;
const convertTag = (tag) => {
let result;
tagNames.forEach(tagName => {
rules[tagName].forEach(key => {
if (tag.includes(key)) {
console.log({ tag, tagName });
result = tagName;
};
})
});
return result;
}
const groupTags = (tags) => {
let results = [];
tags.forEach(tag => {
const result = convertTag(tag);
if (result)
results.push(result);
});
console.log(results);
results = results.filter(e => e !== null);
return results;
}
const handleError = (error) => console.log(error);
const handleData = ({ title, tags }) => {
const rawArray = Array.from(JSON.parse(tags.replace(/["]/g, "=").replace(/[']/g, "\"").replace(/[=]/g, "'")));
const setArray = new Set(rawArray);
const temp = {
content: title,
tags: Array.from(setArray)
}
temp.tags = groupTags(temp.tags);
if (temp.tags.length === 0) return;
counter(setArray);
str = temp.tags.map(s => `'${s}'`).join(',');
str = `"[${str}]"`;
if (temp.content && temp.tags) {
fs.appendFile(after_fileName, `${temp.content},${str}\n`, err => console.error(err));
console.log(temp);
}
};
const handleEOF = (rowCount) => {
// console.info(`Total: ${rowCount} rows`);
// console.info(tagCounter);
fs.writeFile('vi_blog_train-report.json', JSON.stringify(tagCounter), function (err) {
if (err) throw err;
// console.info('Saved the report!');
});
};
fs.createReadStream(csvPath)
.pipe(csv.parse(pathOptions))
.on('error', handleError)
.on('data', handleData)
.on('end', handleEOF);