Skip to content

video-search : google #426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ app/static/bower_components/*
*.swp
Pipfile
Pipfile.lock
.vscode/*

6 changes: 3 additions & 3 deletions app/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@


def small_test():
assert isinstance(scrapers['google'].search('fossasia', 1), list)
assert isinstance(scrapers['google'].search('fossasia', 1), list)


def feed_gen(query, engine, count=10):
def feed_gen(query, engine, count=10, qtype=''):
engine = engine.lower()
# provide temporary backwards compatibility for old names
old_names = {'ubaidu': 'baidu',
Expand All @@ -43,5 +43,5 @@ def feed_gen(query, engine, count=10):
if engine in ('quora', 'youtube'):
urls = scrapers[engine].search_without_count(query)
else:
urls = scrapers[engine].search(query, count)
urls = scrapers[engine].search(query, count, qtype)
return urls
13 changes: 8 additions & 5 deletions app/scrapers/generalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Scraper:
startKey = ''
queryKey = 'q'
defaultStart = 0
qtype = ''
headers = {
'User-Agent': (
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
Expand All @@ -20,12 +21,14 @@ class Scraper:
def __init__(self):
pass

def get_page(self, query, startIndex=0):
def get_page(self, query, startIndex=0, qtype=''):
""" Fetch the google search results page
Returns : Results Page
"""
payload = {self.queryKey: query, self.startKey: startIndex}
payload = {self.queryKey: query, self.startKey: startIndex,
self.qtype: qtype}
response = requests.get(self.url, headers=self.headers, params=payload)
print(response.url)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we do this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well that was during testing. I forgot to remove it 😅
I don't know about live link. Can you tell me more?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@umangahuja1 Deploy it on heroku and share the link here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not able to deploy. It is asking for card details. Though I have deployed my apps for free earlier.

return response

def parse_response(self, soup):
Expand All @@ -34,16 +37,16 @@ def parse_response(self, soup):
def next_start(self, current_start, prev_results):
return current_start + len(prev_results)

def search(self, query, num_results):
def search(self, query, num_results, qtype=''):
"""
Search for the query and return set of urls
Returns: list
"""
urls = []
current_start = self.defaultStart

while(len(urls) < num_results):
response = self.get_page(query, current_start)
while (len(urls) < num_results):
response = self.get_page(query, current_start, qtype)
soup = BeautifulSoup(response.text, 'html.parser')
new_results = self.parse_response(soup)
if new_results is None:
Expand Down
1 change: 1 addition & 0 deletions app/scrapers/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __init__(self):
self.url = 'https://www.google.com/search'
self.defaultStart = 0
self.startKey = 'start'
self.qtype = 'tbm'

def next_start(self, current_start, prev_results):
return current_start + len(prev_results)
Expand Down
3 changes: 2 additions & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def search(search_engine):
try:
count = int(request.args.get('num', 10))
qformat = request.args.get('format', 'json').lower()
qtype = request.args.get('type', '')
if qformat not in ('json', 'xml', 'csv'):
abort(400, 'Not Found - undefined format')

Expand All @@ -68,7 +69,7 @@ def search(search_engine):
if result:
print("cache hit: {}".format(engine_and_query))
else:
result = feed_gen(query, engine, count)
result = feed_gen(query, engine, count, qtype)
if result:
# store the result in the cache to speed up future searches
store(engine_and_query, result)
Expand Down
125 changes: 77 additions & 48 deletions app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,52 +64,70 @@ <h1><code>query-server</code></h1>
</div>
</div>
<br/>
<div class="col-sm-4 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-4 col-xs-4">
<div class="form-group" style="display:inline-block; margin: 0 auto; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>

<div class="row">
<div class="col-sm-3 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-5 col-xs-6">
<label>Type:</label><br/>
<div id="type" class="btn-group btn-group-vertical" style="display:inline-flex;padding:0; margin: 0 auto;" data-toggle="buttons">
<label class=" active typeButton" style="padding:10px;">General<br/>
<input type="radio" name = "stype" value="" autocomplete="off" checked>
</label>
<label class=" typeButton" style="padding:10px;">Images<br/>
<input type="radio" name = "stype" value="isch" autocomplete="off">
</label>
<label class=" typeButton" style="padding:10px;">
Video<br/>
<input type="radio" name = "stype" value="vid" autocomplete="off">
</label>
</div>
</div>
<div class="col-sm-2 col-xs-6">
<div class="form-group" style="display:inline-block; margin: 0 ; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>
</div>
</div>
<div class="col-sm-2 col-xs-6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
<div class="col-sm-4 col-xs-s6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
</form>
Expand Down Expand Up @@ -177,10 +195,10 @@ <h1><code>query-server</code></h1>
$('#feed').hide();
var sengine = $("#engine").val();
var squery = $('#query').val();
var stype = $("input[name=stype]:checked").val()
var sformat = $(' #format label.active input').val();
var count = $('#resp').val();
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine +
"?query=" + squery + "&format=" + sformat + "&num=" + count;
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&type=" + stype + "&format=" + sformat + "&num=" + count;
$.ajax({
url: urlloc,
type: 'GET',
Expand Down Expand Up @@ -209,10 +227,21 @@ <h1><code>query-server</code></h1>
$('.formatButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".active").removeClass("active");
$(".formatButton .active").removeClass("active");
$(this).addClass('active')
}
});

/*
$('.typeButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".typeButton .active").removeClass("active");
$(this).addClass('active')
}
});
*/


$(window).keydown(function (event) {
if (event.keyCode == 13) {
Expand Down