Skip to content

Commit

Permalink
real v1.0 now?
Browse files Browse the repository at this point in the history
- moved scrape limit from static number to config file
  • Loading branch information
aacid committed Jul 31, 2016
1 parent 08ddb6b commit fd1b6b8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
3 changes: 2 additions & 1 deletion Crawler4000/Crawler4000/Crawler4000.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def initScraper(self):
return

if self.scraper.login(login, password):
self.scraper.scrapeFriendsRecursively(10)
limit = self.config.getScrapeLimit()
self.scraper.scrapeFriendsRecursively(limit)
self.scraper.scrapeProfiles()

diplo = Crawler4000()
28 changes: 20 additions & 8 deletions Crawler4000/Crawler4000/Source/ConfigManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
class ConfigManager(object):
"""manages config file"""
MAIN_FILE = 'main.cfg'
FB_SECTION = 'FacebookLogin'
FB_CONFIG = 'FacebookLogin'
FB_USERNAME = 'username'
FB_PASSWORD = 'password'
MN_CONFIG = 'MainConfig'
MN_SCRAPE_LIMIT = 'scrape_limit'

config_read = False

Expand All @@ -28,17 +30,27 @@ def isLoaded(self):
return self.config_read

def createConfig(self):
self.config.add_section(self.FB_SECTION)
self.config.set(self.FB_SECTION, self.FB_USERNAME, '_DUMMY_USER_')
self.config.set(self.FB_SECTION, self.FB_PASSWORD, '_DUMMY_PASS_')
self.config.add_section(self.FB_CONFIG)
self.config.set(self.FB_CONFIG, self.FB_USERNAME, '_DUMMY_USER_')
self.config.set(self.FB_CONFIG, self.FB_PASSWORD, '_DUMMY_PASS_')
self.config.add_section(self.MN_CONFIG)
self.config.set(self.MN_CONFIG, self.MN_SCRAPE_LIMIT, '1000')

with open(self.MAIN_FILE, 'wb') as configfile:
self.config.write(configfile)

def getFBLogin(self):
if self.config.has_option(self.FB_SECTION, self.FB_USERNAME):
return self.config.get(self.FB_SECTION, self.FB_USERNAME)
if self.config.has_option(self.FB_CONFIG, self.FB_USERNAME):
return self.config.get(self.FB_CONFIG, self.FB_USERNAME)

def getFBPassword(self):
if self.config.has_option(self.FB_SECTION, self.FB_PASSWORD):
return self.config.get(self.FB_SECTION, self.FB_PASSWORD)
if self.config.has_option(self.FB_CONFIG, self.FB_PASSWORD):
return self.config.get(self.FB_CONFIG, self.FB_PASSWORD)

def getScrapeLimit(self):
if self.config.has_option(self.MN_CONFIG, self.MN_SCRAPE_LIMIT):
value = self.config.get(self.MN_CONFIG, self.MN_SCRAPE_LIMIT)
try:
return int(value)
except:
print 'Error reading scrape limit from config, not an number'

0 comments on commit fd1b6b8

Please sign in to comment.