Skip to content

Commit 2cf4ed2

Browse files
authored
Add files via upload
1 parent b050970 commit 2cf4ed2

File tree

2 files changed

+193
-0
lines changed

2 files changed

+193
-0
lines changed

github搜索工具/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
## gitsearch ##
2+
- [x] 基础搜索
3+
- [x] 语法搜索
4+
- [ ] ~~git clone下载~~
5+
- [ ] ~~批量搜索~~
6+
7+
## 测试结果 ##
8+
![](https://s2.ax1x.com/2019/06/02/V8ZOFf.md.png)
9+
10+
![](https://s2.ax1x.com/2019/06/02/V8ZzlQ.png)
11+
12+
![](https://s2.ax1x.com/2019/06/02/V8eCmn.png)

github搜索工具/git_search.py

+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# @author:九世
2+
# @time:2019/6/1
3+
4+
from gevent import monkey;monkey.patch_all()
5+
from aiohttp import ClientSession
6+
from bs4 import BeautifulSoup
7+
from multiprocessing import Process
8+
import asyncio
9+
import gevent
10+
import requests
11+
import re
12+
import json
13+
14+
aq=[]
15+
x=[]
16+
us=[]
17+
18+
class Search:
19+
def version(self):
20+
ver={}
21+
ver['banner'] ='''
22+
__ __
23+
__/\ \__ /\ \
24+
__ /\_\ \ ,_\ ____ __ __ _ __ ___\ \ \___
25+
/'_ `\/\ \ \ \/ /',__\ /'__`\ /'__`\ /\`'__\/'___\ \ _ `\
26+
/\ \L\ \ \ \ \ \_ /\__, `\/\ __//\ \L\.\_\ \ \//\ \__/\ \ \ \ \
27+
\ \____ \ \_\ \__\ \/\____/\ \____\ \__/.\_|| \_|| \____|| \_\ \_|
28+
\/___L\ \/_/\/__/ _______\/___/ \/____/\/__/\/_/ \/_/ \/____/ \/_/\/_/
29+
/\____/ /\______\
30+
\_/__/ \/______/
31+
'''
32+
ver['version']='0.1'
33+
ver['author']='九世'
34+
ver['github']='https://github.com/422926799'
35+
ver['waring']='[!] 由于个别人不喜欢写README.md或者github上显示不出来导致README为空,抓取的时候抓不到。只能添加空白凑和着了,所以会有README混乱例子搜索:Metasploit即可体会到'
36+
v_l=list(ver.keys())
37+
v_v=list(ver.values())
38+
for v in range(0,len(v_l)):
39+
print('{}:{}'.format(v_l[v],v_v[v]))
40+
41+
async def search(self,id):
42+
print(' ')
43+
search_sessus={}
44+
if id==1:
45+
print('[+] 单个搜索')
46+
headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}
47+
query=input('搜索的内容 (输入set进入高级搜索模式)>')
48+
if query=='set':
49+
setting={'1':'星星大于指定数量','2':'搜索某个时间段之后创建出来的代码','3':'获取星星数目介于某段区域','4':'输出高级搜索语法帮助,用户自定义混搭'}
50+
hd=['星星大于指定数量:stars>200:','搜索某个时间段之后创建出来的代码:ios created:"2019-01-02 ..* "','获取星星数目介于某段区域:stars:"10..100"','按照文件搜索:android in:file','按照路径检索:andrioid in:path','按照语言检索:android language:java','按照文件大小:android size:>100','按照后缀名检索:android extention:css','按照是否被fork过:android fork:true','按照地域检索(这个猎头和hr应该用得着):android location:beijing']
51+
s_v={'1':'stars:>{}','2':'{} created:"{} ..* "','3':'{} stars:"{}..{} "','4':hd}
52+
f_key=list(setting.keys())
53+
f_value=list(setting.values())
54+
for z in range(0,len(f_key)):
55+
print('{}:{}'.format(f_key[z],f_value[z]))
56+
57+
xw=input('>')
58+
if xw=='1':
59+
ts=input('星星的数量>')
60+
query=s_v[xw].format(ts)
61+
62+
elif xw=='2':
63+
ts=input('什么时间创建的代码 (输入例如:2019-01-02)>')
64+
gjz=input('关键字>')
65+
query=s_v[xw].format(gjz,ts)
66+
67+
elif xw=='3':
68+
gjz=input('关键字>')
69+
ks=input('星星最小数量>')
70+
kd=input('星星最大数量>')
71+
query = s_v[xw].format(gjz,ks,kd)
72+
73+
elif xw=='4':
74+
for h in hd:
75+
print(h)
76+
77+
query=input('自定义构建语法>')
78+
79+
print(query)
80+
pags=input('页数>')
81+
url='https://github.com/search?q={}&type=Repositories'.format(query)
82+
async with ClientSession() as rqt:
83+
async with rqt.get(url=url,headers=headers) as respone:
84+
text=await respone.text()
85+
sl=re.findall('.*? repository results',text)
86+
if len(sl)==0:
87+
search_sessus['搜索数量结果']=0
88+
print('[-] 没有你要找的结果')
89+
exit()
90+
else:
91+
search_sessus['搜索结果数量']=str(sl[0]).lstrip()
92+
page=re.findall('[/]search[?]p=[0-9]{1,}',text)
93+
lt=[]
94+
for p in page:
95+
px=re.findall('[0-9]{1,}',str(p))
96+
lt.append(px[0])
97+
lt.sort()
98+
if len(lt)==0:
99+
bat=0
100+
else:
101+
bat=lt[0]
102+
search_sessus['总页数']=bat
103+
lt.clear()
104+
105+
s_k=list(search_sessus.keys())
106+
s_v=list(search_sessus.values())
107+
for s in range(0,len(s_k)):
108+
print('[+] {}:{}'.format(s_k[s],s_v[s]))
109+
110+
if int(pags)>int(search_sessus['总页数']):
111+
pags=search_sessus['总页数']
112+
print('[!] 要搜索的页数大于总页数,自动设置总页数为要搜索的页数')
113+
114+
calc=0
115+
lt=[]
116+
if int(pags)==0: #如果只有一页的话,匹配页数的正则是匹配不到的只能改变了
117+
url = 'https://github.com/search?p={}&q={}&type=Repositories'.format(pags, query)
118+
lt.append(url)
119+
p = Process(target=self.xc, args=(lt,))
120+
p.start()
121+
else:
122+
for u in range(1,int(pags)+1):
123+
if calc=='50':
124+
p=Process(target=self.xc,args=(lt,))
125+
p.start()
126+
calc=0
127+
lt.clear()
128+
url='https://github.com/search?p={}&q={}&type=Repositories'.format(u,query)
129+
lt.append(url)
130+
calc+=0
131+
132+
if len(lt)>0:
133+
p = Process(target=self.xc, args=(lt,))
134+
p.start()
135+
136+
def xc(self,op,):
137+
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}
138+
reg=[]
139+
for o in op:
140+
reg.append(gevent.spawn(self.searcs,o,headers,))
141+
142+
gevent.joinall(reg)
143+
144+
def searcs(self,url,headers,):
145+
rqt=requests.get(url=url,headers=headers)
146+
red=BeautifulSoup(rqt.text,'html.parser')
147+
for h in red.find_all('h3'):
148+
jsons=re.findall('{.*}',str(h))
149+
if len(jsons)>0:
150+
for j in jsons:
151+
jsons=json.loads(j)
152+
git_url=jsons['payload']['result']['url']
153+
us.append(git_url)
154+
author=re.findall('github.com/.*[/]',git_url)
155+
for a in author:
156+
aq.append(str(a).replace('github.com/','').replace('/',''))
157+
158+
readme=re.findall(' <p class="col-.*">\s.*',rqt.text)
159+
for r in readme:
160+
rs=BeautifulSoup(str(r),'html.parser')
161+
x.append(str(rs.get_text()).strip().lstrip().rstrip())
162+
163+
while len(x)!=len(us):
164+
x.append('')
165+
for c in range(0,len(us)):
166+
print('作者:{} 仓库地址:{} 简介:{}'.format(aq[c],us[c],x[c]))
167+
168+
169+
async def main(self):
170+
suomin=['1.单项搜索']
171+
for s in suomin:
172+
print(s)
173+
xw=input('git_search>')
174+
if xw=='1':
175+
rw=asyncio.ensure_future(self.search(int(xw)))
176+
taks=[rw]
177+
await asyncio.wait(taks)
178+
if __name__ == '__main__':
179+
obj=Search()
180+
obj.version()
181+
asyncio.run(obj.main())

0 commit comments

Comments
 (0)