forked from aessam/ikhbr-SmartArabicNewsAggregator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParseAndStructurizeFeed.js
74 lines (62 loc) · 2.17 KB
/
ParseAndStructurizeFeed.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
var Utility = require("./Utility");
var FeedParserCenter = require("./FeedParserCenter");
var DataUtility = require("./DataUtility");
var logger = require("./logger");
module.exports.parseIncomingContent = function (contentType, itemFeedInfo, data){
if (data.length<1024) return false;
i=0;
if(data[0]!=60){
while(data[i]!=60 && i<data.length){
i++;
};
data = data.slice(i);
}
if(contentType.toLowerCase().indexOf("windows-1256")>0){
data = Utility.convertWindows1256EncodingToUTF8(data);
}else{
data = data.toString("UTF8");
}
if(contentType.toLowerCase().indexOf("html")>=0 && data.indexOf('<?xml version=')!=0){
// Shit happens all teh time.
// console.log("A really fucked up dude, wrote a very bad code that mixes XML with HTML page.");
return;
}
parser = require("sax").createStream(false);
var parserForData;
var isFirst = false;
parser.onerror = function (e) {
logger.log("ParserError", e);
}
parser.ontext = function (text) {
if(parserForData)
parserForData.addTextToLastOpenedTag(text);
}
parser.onclosetag = function (node) {
parserForData.tagClosed(node);
}
parser.onopentag = function (node) {
if(!isFirst){
isFirst = true;
parserForData = FeedParserCenter.findMatchingParser(node);
// Post Processing for completely parsed feed item.
parserForData.setItemCompeletionCallBack(function(item){
// Adding the needed keys for teh feed item, it will be used for identification purposes
for(var key in itemFeedInfo){
item[key] = itemFeedInfo[key];
}
// Fix the date and images
DataUtility.FixPubDate(item);
DataUtility.setLargeImageForFeed(item);
});
}else{
parserForData.tagOpened(node);
}
}
parser.oncdata = function (text) {
parserForData.addTextToLastOpenedTag(text);
}
parser.write(data);
if(parserForData)
return parserForData.getParsedFeed();
return [];
}