forked from aessam/ikhbr-SmartArabicNewsAggregator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
71 lines (61 loc) · 2.57 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
var fs = require("fs");
var async = require('async');
var Utility = require("./Utility");
var iKhbrParser = require("./ParseAndStructurizeFeed");
var db = require("./MongoDBHandler").db;
var crypto = require('crypto');
var fileDownloader = require("./FileDownloadQueue")
var logger = require("./logger");
var prepare = require("./PrepareArticles");
var arabicSourcesFilename = "ArabicSources.json";
var arabicSources = JSON.parse(fs.readFileSync(arabicSourcesFilename));
var sourceGroups = {};
var allSources = [];
arabicSources.forEach(function(item){
var requestInfo = Utility.prepareRequestOptionsForURL(item.feedUrl,"GET");
requestInfo["owner"] = item;
if(sourceGroups[requestInfo.host] == undefined){
sourceGroups[requestInfo.host] = [];
}
sourceGroups[requestInfo.host].push(requestInfo);
});
for(groupID in sourceGroups) {
allSources = allSources.concat(sourceGroups[groupID]);
}
async.forEachLimit(allSources,5,function(requestInfo, groupCallBack) {
Utility.downloadFeed(requestInfo, function (feedData, feedResponse, err) {
// in whatever situation you have to work on the next object and that's why we call the callback.
groupCallBack();
if(err){
logger.log("feedDownloadError",err);
}
if (feedData != undefined) {
// keep the returnObject and start working on it.
var articles = iKhbrParser.parseIncomingContent(feedResponse["headers"]["content-type"], requestInfo.owner, feedData);
if(articles && articles.length>0){
for(var item in articles){
var article = articles[item];
article = Utility.prepareArticleToDB(article,feedResponse["requestInfo"]["owner"]);
if(article.media && article.media.length>7 && Utility.prepMediaForDownload(article)) {
fileDownloader.addImageToDownloadQueue(article.media, "images/" + article.localMedia);
}
prepare.preProcessEntry(article);
db.insertArticlesArray(article);
}
}else{
logger.log( "FeedDownload", {
"HTTP Header" : feedResponse["headers"],
"DataHash": crypto.createHash('md5').update(feedData).digest('hex')
});
}
}else{
logger.log("FeedWithZeroLength","No comment");
}
},function(err){
if(!err)
db.closeDB();
});
});
process.on('uncaughtException', function(err) {
logger.log("uncaughtException",err);
});