-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapping.py
145 lines (112 loc) · 4.39 KB
/
scrapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': "Mozilla/5.0 (Linux; Android 12; SM-S906N Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/80.0.3987.119 Mobile Safari/537.36"
}
def getGFGProfile(username, platform):
# Retrieve GFG profile
url = f'https://auth.geeksforgeeks.org/user/{username}/practice'
r = requests.get(url=url, headers=headers)
soup = BeautifulSoup(r.content, 'html5lib')
try:
# Try to look for userName span in html content. If not found, user doesnt exist
soup.find('div', attrs={'class':'profile_name'}).text
except:
# the tag wasnt found, so user doesnt exist
return {
'status': False,
'message': "user not found",
}
problemsSolved = int(soup.find_all('span', attrs={'class': 'score_card_value'})[1].text)
profile = {
"username": username,
"platform": platform,
"problems": problemsSolved,
"status": True,
}
return profile
def getLeetCodeProfile(username, platform):
# Retrieve LeetCode profile
url = r'https://leetcode.com/graphql?query={matchedUser(username:%20%22{username}%22)%20{username%20submitStats:submitStatsGlobal%20{acSubmissionNum%20{difficulty%20count%20submissions}}}}'
url = url.replace('{username}', username)
r = requests.get(url=url, headers=headers)
data = r.json()
# check if user has been found or not
if(type(data['data']['matchedUser'])==type(None)):
return {
'status': False,
'message': "user not found",
}
problemsSolved = int(data['data']['matchedUser']['submitStats']['acSubmissionNum'][0]['count'])
profile = {
"username": username,
"platform": platform,
"problems": problemsSolved,
"status": True,
}
return profile
def getCodeChefProfile(username, platform):
# Retrieve CodeChef profile
url = f'https://www.codechef.com/users/{username}'
r = requests.get(url=url, headers=headers)
soup = BeautifulSoup(r.content, 'html5lib')
try:
# Try to look for m-username--link span in html content. If not found, user doesnt exist
text = soup.find('span', attrs={'class':'m-username--link'}).text
if(not text==username):
# raise exception if not same as requested user
raise ValueError()
except:
# exception raised, so user doesnt exist
return {
'status': False,
'message': "user not found",
}
### After addition of Provisional rating on 02-08-2022, rating scrapping changed
# rating = int(soup.find('div', attrs={'class': 'rating-number'}).text) ## old method
rating = int(soup.find('div', attrs={'class': 'rating-number'}).text.split("?")[0])
stars = len(soup.find('div', attrs={'class': 'rating-star'}).find_all('span'))
icon = str(soup.find('div', attrs={'class': 'rating-star'}))
profile = {
"username": username,
"platform": platform,
"stars": stars,
"rating": rating,
"icon": icon,
"status": True,
}
return profile
def getHackerRankProfile(username, platform):
# Retrieve HackerRank profile
url = f'https://www.hackerrank.com/{username}'
r = requests.get(url=url, headers=headers)
soup = BeautifulSoup(r.content, 'html5lib')
try:
# Try to look for profile-username-heading p in html content. If not found, user doesnt exist
soup.find('p', attrs={'class':'profile-username-heading'}).text
except:
# the tag wasnt found, so user doesnt exist
return {
'status': False,
'message': "user not found",
}
badgesList = soup.find('div', attrs={'class': 'badges-list'}).find_all('div', attrs={'class': 'hacker-badge'})
badgeData = []
for badge in badgesList:
name = badge.find('text', attrs={'class': 'badge-title'}).text
stars = len(badge.find_all('svg', attrs={'class', 'badge-star'}))
icon = str(badge.find('div', attrs={'class': 'ui-badge-wrap'}))
data = {
name: {
'stars': stars,
'icon': icon,
}
}
badgeData.append(data)
profile = {
"username": username,
"platform": platform,
"badges": badgeData,
"status": True,
}
return profile