Skip to content

Video search #425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ app/static/bower_components/*
*.swp
Pipfile
Pipfile.lock
.vscode/*

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ The API(s) provided by query-server are as follows:

> *query* : query can be any string

> *format* : [`json`, `xml`]
> *format* : [`json`, `xml`, 'csv']

A sample query : `/api/v1/search/bing?query=fossasia&format=xml&num=10`

Expand Down
4 changes: 2 additions & 2 deletions app/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def small_test():
assert isinstance(scrapers['google'].search('fossasia', 1), list)


def feed_gen(query, engine, count=10):
def feed_gen(query, engine, count=10, qtype=''):
engine = engine.lower()
# provide temporary backwards compatibility for old names
old_names = {'ubaidu': 'baidu',
Expand All @@ -45,5 +45,5 @@ def feed_gen(query, engine, count=10):
if engine in ('quora', 'youtube'):
urls = scrapers[engine].search_without_count(query)
else:
urls = scrapers[engine].search(query, count)
urls = scrapers[engine].search(query, count,qtype)
return urls
10 changes: 6 additions & 4 deletions app/scrapers/generalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class Scraper:
def __init__(self):
pass

def get_page(self, query, startIndex=0):
def get_page(self, query, startIndex=0, qtype = ''):
""" Fetch the google search results page
Returns : Results Page
"""
payload = {self.queryKey: query, self.startKey: startIndex}
payload = {self.queryKey: query, self.startKey: startIndex,
self.qtype : qtype}
response = requests.get(self.url, headers=self.headers, params=payload)
print(response.url)
return response

def parse_response(self, soup):
Expand All @@ -34,7 +36,7 @@ def parse_response(self, soup):
def next_start(self, current_start, prev_results):
return current_start + len(prev_results)

def search(self, query, num_results):
def search(self, query, num_results, qtype=''):
"""
Search for the query and return set of urls
Returns: list
Expand All @@ -43,7 +45,7 @@ def search(self, query, num_results):
current_start = self.defaultStart

while(len(urls) < num_results):
response = self.get_page(query, current_start)
response = self.get_page(query, current_start, qtype)
soup = BeautifulSoup(response.text, 'html.parser')
new_results = self.parse_response(soup)
if new_results is None:
Expand Down
1 change: 1 addition & 0 deletions app/scrapers/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __init__(self):
self.url = 'https://www.google.com/search'
self.defaultStart = 0
self.startKey = 'start'
self.qtype = 'tbm'

def next_start(self, current_start, prev_results):
return current_start + len(prev_results)
Expand Down
3 changes: 2 additions & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def search(search_engine):
try:
count = int(request.args.get('num', 10))
qformat = request.args.get('format', 'json').lower()
qtype = request.args.get('type', '')
if qformat not in ('json', 'xml', 'csv'):
abort(400, 'Not Found - undefined format')

Expand All @@ -63,7 +64,7 @@ def search(search_engine):
if result:
print("cache hit: {}".format(engine_and_query))
else:
result = feed_gen(query, engine, count)
result = feed_gen(query, engine, count, qtype)
if result:
# store the result in the cache to speed up future searches
store(engine_and_query, result)
Expand Down
125 changes: 77 additions & 48 deletions app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,52 +64,70 @@ <h1><code>query-server</code></h1>
</div>
</div>
<br/>
<div class="col-sm-4 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-4 col-xs-4">
<div class="form-group" style="display:inline-block; margin: 0 auto; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>

<div class="row"></div>
<div class="col-sm-3 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-5 col-xs-6">
<label>Type:</label><br/>
<div id="type" class="btn-group btn-group-vertical" style="display:inline-flex;padding:0; margin: 0 auto;" data-toggle="buttons">
<label class=" active typeButton" style="padding:10px;">General<br/>
<input type="radio" name = "stype" value="" autocomplete="off" checked>
</label>
<label class=" typeButton" style="padding:10px;">Images<br/>
<input type="radio" name = "stype" value="isch" autocomplete="off">
</label>
<label class=" typeButton" style="padding:10px;">
Video<br/>
<input type="radio" name = "stype" value="vid" autocomplete="off">
</label>
</div>
</div>
<div class="col-sm-2 col-xs-6">
<div class="form-group" style="display:inline-block; margin: 0 ; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>
</div>
</div>
<div class="col-sm-2 col-xs-6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
<div class="col-sm-4 col-xs-s6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
</form>
Expand Down Expand Up @@ -179,10 +197,10 @@ <h1><code>query-server</code></h1>
$('#feed').hide();
var sengine = $("#engine").val();
var squery = $('#query').val();
var stype = $("input[name=stype]:checked").val()
var sformat = $(' #format label.active input').val();
var count = $('#resp').val();
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine +
"?query=" + squery + "&format=" + sformat + "&num=" + count;
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&type=" + stype + "&format=" + sformat + "&num=" + count;
$.ajax({
url: urlloc,
type: 'GET',
Expand Down Expand Up @@ -211,10 +229,21 @@ <h1><code>query-server</code></h1>
$('.formatButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".active").removeClass("active");
$(".formatButton .active").removeClass("active");
$(this).addClass('active')
}
});

/*
$('.typeButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".typeButton .active").removeClass("active");
$(this).addClass('active')
}
});
*/


$(window).keydown(function (event) {
if (event.keyCode == 13) {
Expand Down