forked from RyanOM/LinkedInViewBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
141 lines (115 loc) · 4.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os, time, re
import urlparse, random, getpass
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
def getEmail():
'''Simple function to get email input '''
email_verrified = False
while not email_verrified:
email = raw_input('Linkedin Email: ') #Asking for user input
#Checking email format
if re.match(r"[^@]+@[^@]+\.[^@]+", email):
email_verrified = True
return email
else:
print "Email not valid. Please try again"
def getPassword():
'''Simple function to get pasword input'''
pw_verrified = False
while not pw_verrified:
password = getpass.getpass('Password:') #Asking for user input
#Checking password format
if len(password) >= 6:
pw_verrified = True
return password
else:
print "Password must have 6 or more characters"
def getPeopleLinks(page):
'''Append a list with all user links in the page'''
links = []
for link in page.find_all('a'): #look for all <a ....> </a
#Getting the href attribute
url = link.get('href')
if url: #making sure there is an href with the boolean
if 'profile/view?id' in url: #locating the link to other profile
links.append(url)
return links
def getJobLinks(page):
links = []
for link in page.find_all('a'):#look for all <a ....> </a
#Getting the href attribute
url = link.get('href')
if url: #making sure there is an href with the boolean
if '/jobs' in url:
links.append(url)
print "jobs is in url:",url
return links
def getID(url):
pURL = urlparse.urlparse(url)
return urlparse.parse_qs(pURL.query)['id'][0]
def ViewBot(browser):
visited = {}
pList = []
counter = 0
while True:
time.sleep(random.uniform(3.4, 9.5))
page = BeautifulSoup(browser.page_source)
people = getPeopleLinks(page)
for person in people:
ID = getID(person)
if ID not in visited:
pList.append(person)
visited[ID] = 1
if pList:
person = pList.pop()
browser.get(person)
#Trying to access job
'''
jobi=[]
for job in page.find_all('a'):
url2=job.get('href')
if url2: #making sure there is an href with the boolean
if 'company/' in url2: #locating the link to other profile
jobi.append(url2)
'''
counter += 1
else:
jobs = getJobLinks(page)
if jobs:
job = random.choice(jobs)
root = "http://www.linkedin.com"
roots = "https://www.linkedin.com"
if root not in job or roots not in job:
job = "https://www.linkedin.com" + job
browser.get(job)
else:
print("I'm lost in this sea of jobs!!!")
print("I'm giving up....")
break
print("[+] " + browser.title + jobi[0] + " Visited!\n(" + str(counter)+"/" + str(len(pList)) + ") Visited/Queue")
print person
print ""
def Main():
email = getEmail()
password = getPassword()
print 'Logging in with %s' % email
browser = webdriver.Firefox()
browser.get("https://www.linkedin.com/uas/login")
emailElement = browser.find_element_by_id("session_key-login")
emailElement.send_keys(email)
passwordElement = browser.find_element_by_id("session_password-login")
passwordElement.send_keys(password)
passwordElement.submit()
os.system('clear')
print("[+] Success! You are now logged in with %s." % email)
print("[+] The bot is starting!")
'''
time.sleep(random.uniform(3.4, 9.5))
#Indicate the search you want to initiate crawling
browser.get("https://www.linkedin.com/vsearch/p?keywords=developeur")
'''
ViewBot(browser)
browser.close()
if __name__ == "__main__":
Main()