Skip to content

Commit fac4abf

Browse files
umangahuja1bhaveshAn
authored andcommitted
Addresses #320 Add video support for Google search (#426)
* video-search : google * solve build error
1 parent c3aa71d commit fac4abf

File tree

6 files changed

+93
-57
lines changed

6 files changed

+93
-57
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ app/static/bower_components/*
1010
*.swp
1111
Pipfile
1212
Pipfile.lock
13+
.vscode/*
14+

app/scrapers/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030

3131

3232
def small_test():
33-
assert isinstance(scrapers['google'].search('fossasia', 1), list)
33+
assert isinstance(scrapers['google'].search('fossasia', 1), list)
3434

3535

36-
def feed_gen(query, engine, count=10):
36+
def feed_gen(query, engine, count=10, qtype=''):
3737
engine = engine.lower()
3838
# provide temporary backwards compatibility for old names
3939
old_names = {'ubaidu': 'baidu',
@@ -43,5 +43,5 @@ def feed_gen(query, engine, count=10):
4343
if engine in ('quora', 'youtube'):
4444
urls = scrapers[engine].search_without_count(query)
4545
else:
46-
urls = scrapers[engine].search(query, count)
46+
urls = scrapers[engine].search(query, count, qtype)
4747
return urls

app/scrapers/generalized.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class Scraper:
99
startKey = ''
1010
queryKey = 'q'
1111
defaultStart = 0
12+
qtype = ''
1213
headers = {
1314
'User-Agent': (
1415
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
@@ -20,12 +21,14 @@ class Scraper:
2021
def __init__(self):
2122
pass
2223

23-
def get_page(self, query, startIndex=0):
24+
def get_page(self, query, startIndex=0, qtype=''):
2425
""" Fetch the google search results page
2526
Returns : Results Page
2627
"""
27-
payload = {self.queryKey: query, self.startKey: startIndex}
28+
payload = {self.queryKey: query, self.startKey: startIndex,
29+
self.qtype: qtype}
2830
response = requests.get(self.url, headers=self.headers, params=payload)
31+
print(response.url)
2932
return response
3033

3134
def parse_response(self, soup):
@@ -34,16 +37,16 @@ def parse_response(self, soup):
3437
def next_start(self, current_start, prev_results):
3538
return current_start + len(prev_results)
3639

37-
def search(self, query, num_results):
40+
def search(self, query, num_results, qtype=''):
3841
"""
3942
Search for the query and return set of urls
4043
Returns: list
4144
"""
4245
urls = []
4346
current_start = self.defaultStart
4447

45-
while(len(urls) < num_results):
46-
response = self.get_page(query, current_start)
48+
while (len(urls) < num_results):
49+
response = self.get_page(query, current_start, qtype)
4750
soup = BeautifulSoup(response.text, 'html.parser')
4851
new_results = self.parse_response(soup)
4952
if new_results is None:

app/scrapers/google.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def __init__(self):
1010
self.url = 'https://www.google.com/search'
1111
self.defaultStart = 0
1212
self.startKey = 'start'
13+
self.qtype = 'tbm'
1314

1415
def next_start(self, current_start, prev_results):
1516
return current_start + len(prev_results)

app/server.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def search(search_engine):
4949
try:
5050
count = int(request.args.get('num', 10))
5151
qformat = request.args.get('format', 'json').lower()
52+
qtype = request.args.get('type', '')
5253
if qformat not in ('json', 'xml', 'csv'):
5354
abort(400, 'Not Found - undefined format')
5455

@@ -68,7 +69,7 @@ def search(search_engine):
6869
if result:
6970
print("cache hit: {}".format(engine_and_query))
7071
else:
71-
result = feed_gen(query, engine, count)
72+
result = feed_gen(query, engine, count, qtype)
7273
if result:
7374
# store the result in the cache to speed up future searches
7475
store(engine_and_query, result)

app/templates/index.html

+77-48
Original file line numberDiff line numberDiff line change
@@ -64,52 +64,70 @@ <h1><code>query-server</code></h1>
6464
</div>
6565
</div>
6666
<br/>
67-
<div class="col-sm-4 col-xs-6" style="padding:0; margin: 0 auto;">
68-
<div class="dropdown">
69-
<label>Engine:</label><br/>
70-
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
71-
<span id="drop_down_text">
72-
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
73-
</span>
74-
&nbsp;
75-
<span class="caret"></span>
76-
</button>
77-
<input type="hidden" id="engine" value="google">
78-
<ul class="dropdown-menu">
79-
{% for engine in engines_list %}
80-
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
81-
<a style="cursor:pointer">
82-
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
83-
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
84-
{% else %}
85-
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
86-
{% endif %}
87-
{{ engine }}
88-
</a>
89-
</li>
90-
{% endfor %}
91-
</ul>
92-
</div>
93-
</div>
94-
<div class="col-sm-4 col-xs-4">
95-
<div class="form-group" style="display:inline-block; margin: 0 auto; min-width:50%;">
96-
<label for="resp">Max:</label>
97-
<select class="form-control" id="resp">
98-
<option>10</option>
99-
<option>20</option>
100-
<option>30</option>
101-
<option>40</option>
102-
<option>50</option>
103-
<option>60</option>
104-
<option>70</option>
105-
<option>80</option>
106-
<option>90</option>
107-
<option>100</option>
108-
</select>
67+
68+
<div class="row">
69+
<div class="col-sm-3 col-xs-6" style="padding:0; margin: 0 auto;">
70+
<div class="dropdown">
71+
<label>Engine:</label><br/>
72+
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
73+
<span id="drop_down_text">
74+
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
75+
</span>
76+
&nbsp;
77+
<span class="caret"></span>
78+
</button>
79+
<input type="hidden" id="engine" value="google">
80+
<ul class="dropdown-menu">
81+
{% for engine in engines_list %}
82+
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
83+
<a style="cursor:pointer">
84+
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
85+
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
86+
{% else %}
87+
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
88+
{% endif %}
89+
{{ engine }}
90+
</a>
91+
</li>
92+
{% endfor %}
93+
</ul>
94+
</div>
95+
</div>
96+
<div class="col-sm-5 col-xs-6">
97+
<label>Type:</label><br/>
98+
<div id="type" class="btn-group btn-group-vertical" style="display:inline-flex;padding:0; margin: 0 auto;" data-toggle="buttons">
99+
<label class=" active typeButton" style="padding:10px;">General<br/>
100+
<input type="radio" name = "stype" value="" autocomplete="off" checked>
101+
</label>
102+
<label class=" typeButton" style="padding:10px;">Images<br/>
103+
<input type="radio" name = "stype" value="isch" autocomplete="off">
104+
</label>
105+
<label class=" typeButton" style="padding:10px;">
106+
Video<br/>
107+
<input type="radio" name = "stype" value="vid" autocomplete="off">
108+
</label>
109+
</div>
110+
</div>
111+
<div class="col-sm-2 col-xs-6">
112+
<div class="form-group" style="display:inline-block; margin: 0 ; min-width:50%;">
113+
<label for="resp">Max:</label>
114+
<select class="form-control" id="resp">
115+
<option>10</option>
116+
<option>20</option>
117+
<option>30</option>
118+
<option>40</option>
119+
<option>50</option>
120+
<option>60</option>
121+
<option>70</option>
122+
<option>80</option>
123+
<option>90</option>
124+
<option>100</option>
125+
</select>
126+
</div>
127+
</div>
128+
<div class="col-sm-2 col-xs-6" style="margin-top:20px;">
129+
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
109130
</div>
110-
</div>
111-
<div class="col-sm-4 col-xs-s6" style="margin-top:20px;">
112-
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
113131
</div>
114132
</div>
115133
</form>
@@ -177,10 +195,10 @@ <h1><code>query-server</code></h1>
177195
$('#feed').hide();
178196
var sengine = $("#engine").val();
179197
var squery = $('#query').val();
198+
var stype = $("input[name=stype]:checked").val()
180199
var sformat = $(' #format label.active input').val();
181200
var count = $('#resp').val();
182-
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine +
183-
"?query=" + squery + "&format=" + sformat + "&num=" + count;
201+
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&type=" + stype + "&format=" + sformat + "&num=" + count;
184202
$.ajax({
185203
url: urlloc,
186204
type: 'GET',
@@ -209,10 +227,21 @@ <h1><code>query-server</code></h1>
209227
$('.formatButton').click(function (e) {
210228
e.preventDefault();
211229
if (!$(this).hasClass('active')) {
212-
$(".active").removeClass("active");
230+
$(".formatButton .active").removeClass("active");
231+
$(this).addClass('active')
232+
}
233+
});
234+
235+
/*
236+
$('.typeButton').click(function (e) {
237+
e.preventDefault();
238+
if (!$(this).hasClass('active')) {
239+
$(".typeButton .active").removeClass("active");
213240
$(this).addClass('active')
214241
}
215242
});
243+
*/
244+
216245

217246
$(window).keydown(function (event) {
218247
if (event.keyCode == 13) {

0 commit comments

Comments
 (0)