Skip to content

Commit

Permalink
Merge branch 'master' of github.com:sylvinus/node-crawler
Browse files Browse the repository at this point in the history
paulvalla committed Nov 14, 2014

Verified

This commit was signed with the committer’s verified signature. The key has expired.
WoozyMasta Maxim Levchenko
2 parents 5b17aee + b46dd84 commit 6fa2fec
Showing 3 changed files with 35 additions and 20 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
[![Build Status](https://travis-ci.org/sylvinus/node-crawler.svg?branch=master)](https://travis-ci.org/sylvinus/node-crawler)

Current Goal
------
Refactoring the code to be more maintenable, it's spaghetti code in there !

node-crawler
------------

23 changes: 10 additions & 13 deletions lib/crawler.js
Original file line number Diff line number Diff line change
@@ -262,22 +262,19 @@ Crawler.prototype._makeCrawlerRequest = function _makeCrawlerRequest (options) {

Crawler.prototype._executeCrawlerRequest = function _executeCrawlerRequest (options) {
var self = this;
var cacheData = self.cache[options.uri];

if (useCache(options)) {
//If a query has already been made to self URL, don't callback again
if (useCache(options) && cacheData) {

var cacheData = self.cache[options.uri];

//If a query has already been made to self URL, don't callback again
if (cacheData) {

// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}
// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}

} else {
self._buildHttpRequest(options);
}
28 changes: 21 additions & 7 deletions tests/cacheOption.test.js
Original file line number Diff line number Diff line change
@@ -31,12 +31,26 @@ describe('Cache features tests', function() {
});
});

//describe('Skip Duplicate', function() {
// afterEach(function () {
// c = {};
// });
// it('should skip previous crawled urls', function (done) {});
// it('should not skip one single url', function (done) {});
//});
describe('Skip Duplicate active', function() {
afterEach(function () {
c = {};
});

it('should not skip one single url', function (done) {
c = new Crawler({
jquery: false,
skipDuplicates: true,
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
done();
},
});

c.queue('http://' + httpbinHost + '/status/200');
});

//it('should skip previous crawled urls', function (done) {});
});
});

0 comments on commit 6fa2fec

Please sign in to comment.