1
+ # @author:九世
2
+ # @time:2019/6/1
3
+
4
+ from gevent import monkey ;monkey .patch_all ()
5
+ from aiohttp import ClientSession
6
+ from bs4 import BeautifulSoup
7
+ from multiprocessing import Process
8
+ import asyncio
9
+ import gevent
10
+ import requests
11
+ import re
12
+ import json
13
+
14
+ aq = []
15
+ x = []
16
+ us = []
17
+
18
+ class Search :
19
+ def version (self ):
20
+ ver = {}
21
+ ver ['banner' ] = '''
22
+ __ __
23
+ __/\ \__ /\ \
24
+ __ /\_\ \ ,_\ ____ __ __ _ __ ___\ \ \___
25
+ /'_ `\/\ \ \ \/ /',__\ /'__`\ /'__`\ /\`'__\/'___\ \ _ `\
26
+ /\ \L\ \ \ \ \ \_ /\__, `\/\ __//\ \L\.\_\ \ \//\ \__/\ \ \ \ \
27
+ \ \____ \ \_\ \__\ \/\____/\ \____\ \__/.\_|| \_|| \____|| \_\ \_|
28
+ \/___L\ \/_/\/__/ _______\/___/ \/____/\/__/\/_/ \/_/ \/____/ \/_/\/_/
29
+ /\____/ /\______\
30
+ \_/__/ \/______/
31
+ '''
32
+ ver ['version' ]= '0.1'
33
+ ver ['author' ]= '九世'
34
+ ver ['github' ]= 'https://github.com/422926799'
35
+ ver ['waring' ]= '[!] 由于个别人不喜欢写README.md或者github上显示不出来导致README为空,抓取的时候抓不到。只能添加空白凑和着了,所以会有README混乱例子搜索:Metasploit即可体会到'
36
+ v_l = list (ver .keys ())
37
+ v_v = list (ver .values ())
38
+ for v in range (0 ,len (v_l )):
39
+ print ('{}:{}' .format (v_l [v ],v_v [v ]))
40
+
41
+ async def search (self ,id ):
42
+ print (' ' )
43
+ search_sessus = {}
44
+ if id == 1 :
45
+ print ('[+] 单个搜索' )
46
+ headers = {'user-agent' :'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' }
47
+ query = input ('搜索的内容 (输入set进入高级搜索模式)>' )
48
+ if query == 'set' :
49
+ setting = {'1' :'星星大于指定数量' ,'2' :'搜索某个时间段之后创建出来的代码' ,'3' :'获取星星数目介于某段区域' ,'4' :'输出高级搜索语法帮助,用户自定义混搭' }
50
+ hd = ['星星大于指定数量:stars>200:' ,'搜索某个时间段之后创建出来的代码:ios created:"2019-01-02 ..* "' ,'获取星星数目介于某段区域:stars:"10..100"' ,'按照文件搜索:android in:file' ,'按照路径检索:andrioid in:path' ,'按照语言检索:android language:java' ,'按照文件大小:android size:>100' ,'按照后缀名检索:android extention:css' ,'按照是否被fork过:android fork:true' ,'按照地域检索(这个猎头和hr应该用得着):android location:beijing' ]
51
+ s_v = {'1' :'stars:>{}' ,'2' :'{} created:"{} ..* "' ,'3' :'{} stars:"{}..{} "' ,'4' :hd }
52
+ f_key = list (setting .keys ())
53
+ f_value = list (setting .values ())
54
+ for z in range (0 ,len (f_key )):
55
+ print ('{}:{}' .format (f_key [z ],f_value [z ]))
56
+
57
+ xw = input ('>' )
58
+ if xw == '1' :
59
+ ts = input ('星星的数量>' )
60
+ query = s_v [xw ].format (ts )
61
+
62
+ elif xw == '2' :
63
+ ts = input ('什么时间创建的代码 (输入例如:2019-01-02)>' )
64
+ gjz = input ('关键字>' )
65
+ query = s_v [xw ].format (gjz ,ts )
66
+
67
+ elif xw == '3' :
68
+ gjz = input ('关键字>' )
69
+ ks = input ('星星最小数量>' )
70
+ kd = input ('星星最大数量>' )
71
+ query = s_v [xw ].format (gjz ,ks ,kd )
72
+
73
+ elif xw == '4' :
74
+ for h in hd :
75
+ print (h )
76
+
77
+ query = input ('自定义构建语法>' )
78
+
79
+ print (query )
80
+ pags = input ('页数>' )
81
+ url = 'https://github.com/search?q={}&type=Repositories' .format (query )
82
+ async with ClientSession () as rqt :
83
+ async with rqt .get (url = url ,headers = headers ) as respone :
84
+ text = await respone .text ()
85
+ sl = re .findall ('.*? repository results' ,text )
86
+ if len (sl )== 0 :
87
+ search_sessus ['搜索数量结果' ]= 0
88
+ print ('[-] 没有你要找的结果' )
89
+ exit ()
90
+ else :
91
+ search_sessus ['搜索结果数量' ]= str (sl [0 ]).lstrip ()
92
+ page = re .findall ('[/]search[?]p=[0-9]{1,}' ,text )
93
+ lt = []
94
+ for p in page :
95
+ px = re .findall ('[0-9]{1,}' ,str (p ))
96
+ lt .append (px [0 ])
97
+ lt .sort ()
98
+ if len (lt )== 0 :
99
+ bat = 0
100
+ else :
101
+ bat = lt [0 ]
102
+ search_sessus ['总页数' ]= bat
103
+ lt .clear ()
104
+
105
+ s_k = list (search_sessus .keys ())
106
+ s_v = list (search_sessus .values ())
107
+ for s in range (0 ,len (s_k )):
108
+ print ('[+] {}:{}' .format (s_k [s ],s_v [s ]))
109
+
110
+ if int (pags )> int (search_sessus ['总页数' ]):
111
+ pags = search_sessus ['总页数' ]
112
+ print ('[!] 要搜索的页数大于总页数,自动设置总页数为要搜索的页数' )
113
+
114
+ calc = 0
115
+ lt = []
116
+ if int (pags )== 0 : #如果只有一页的话,匹配页数的正则是匹配不到的只能改变了
117
+ url = 'https://github.com/search?p={}&q={}&type=Repositories' .format (pags , query )
118
+ lt .append (url )
119
+ p = Process (target = self .xc , args = (lt ,))
120
+ p .start ()
121
+ else :
122
+ for u in range (1 ,int (pags )+ 1 ):
123
+ if calc == '50' :
124
+ p = Process (target = self .xc ,args = (lt ,))
125
+ p .start ()
126
+ calc = 0
127
+ lt .clear ()
128
+ url = 'https://github.com/search?p={}&q={}&type=Repositories' .format (u ,query )
129
+ lt .append (url )
130
+ calc += 0
131
+
132
+ if len (lt )> 0 :
133
+ p = Process (target = self .xc , args = (lt ,))
134
+ p .start ()
135
+
136
+ def xc (self ,op ,):
137
+ headers = {'user-agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' }
138
+ reg = []
139
+ for o in op :
140
+ reg .append (gevent .spawn (self .searcs ,o ,headers ,))
141
+
142
+ gevent .joinall (reg )
143
+
144
+ def searcs (self ,url ,headers ,):
145
+ rqt = requests .get (url = url ,headers = headers )
146
+ red = BeautifulSoup (rqt .text ,'html.parser' )
147
+ for h in red .find_all ('h3' ):
148
+ jsons = re .findall ('{.*}' ,str (h ))
149
+ if len (jsons )> 0 :
150
+ for j in jsons :
151
+ jsons = json .loads (j )
152
+ git_url = jsons ['payload' ]['result' ]['url' ]
153
+ us .append (git_url )
154
+ author = re .findall ('github.com/.*[/]' ,git_url )
155
+ for a in author :
156
+ aq .append (str (a ).replace ('github.com/' ,'' ).replace ('/' ,'' ))
157
+
158
+ readme = re .findall (' <p class="col-.*">\s.*' ,rqt .text )
159
+ for r in readme :
160
+ rs = BeautifulSoup (str (r ),'html.parser' )
161
+ x .append (str (rs .get_text ()).strip ().lstrip ().rstrip ())
162
+
163
+ while len (x )!= len (us ):
164
+ x .append ('' )
165
+ for c in range (0 ,len (us )):
166
+ print ('作者:{} 仓库地址:{} 简介:{}' .format (aq [c ],us [c ],x [c ]))
167
+
168
+
169
+ async def main (self ):
170
+ suomin = ['1.单项搜索' ]
171
+ for s in suomin :
172
+ print (s )
173
+ xw = input ('git_search>' )
174
+ if xw == '1' :
175
+ rw = asyncio .ensure_future (self .search (int (xw )))
176
+ taks = [rw ]
177
+ await asyncio .wait (taks )
178
+ if __name__ == '__main__' :
179
+ obj = Search ()
180
+ obj .version ()
181
+ asyncio .run (obj .main ())
0 commit comments