forked from n1k0/readable-proxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.js
45 lines (43 loc) · 1.36 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
var childProcess = require("child_process");
var phantomjs = require("phantomjs");
var binPath = phantomjs.path;
var path = require("path");
var Promise = require("bluebird");
var objectAssign = require("object-assign");
var readabilityPath = process.env.READABILITY_LIB_PATH ||
path.normalize(path.join(__dirname, "vendor", "Readability.js"));
module.exports = function scrape(url, options) {
options = options || {};
if (!url) throw new Error("Missing url.");
return new Promise(function(fulfill, reject) {
var childArgs = [path.join(__dirname, "phantom-scrape.js"), url, readabilityPath];
if (options.userAgent) {
childArgs.push(options.userAgent);
}
childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) {
if (err) {
return reject(err);
}
var response, error;
try {
response = JSON.parse(stdout);
} catch (e) {
error = {
message: "Unable to parse JSON proxy response.",
line: e.line,
stack: e.stack
};
}
if (response && response.error) {
error = response.error;
}
if (error) {
reject(objectAssign(new Error(error.message), error));
} else if (!response) {
reject(new Error("Empty scraped response."));
} else {
fulfill(response);
}
});
});
};