Skip to content

Commit

Permalink
update logging to std out
Browse files Browse the repository at this point in the history
  • Loading branch information
gaspa93 committed Nov 28, 2021
1 parent 1d4c189 commit 73438f6
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
20 changes: 17 additions & 3 deletions googlemaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def sort_by(self, url, ind):
time.sleep(3)
except Exception as e:
tries += 1
self.logger.warn('Failed to click recent button')
self.logger.warn('Failed to click sorting button')

# failed to open the dropdown
if tries == MAX_RETRY:
Expand Down Expand Up @@ -92,6 +92,8 @@ def get_reviews(self, offset):
for index, review in enumerate(rblock):
if index >= offset:
parsed_reviews.append(self.__parse(review))

# logging to std out
print(self.__parse(review))

return parsed_reviews
Expand Down Expand Up @@ -192,14 +194,14 @@ def more_reviews(self):
#<button ved="1i:1,t:18519,e:0,p:kPkcYIz-Dtql-QaL1YawDw:1969" jstcache="1202" jsaction="pane.reviewChart.moreReviews" class="gm2-button-alt jqnFjrOWMVU__button-blue" jsan="7.gm2-button-alt,7.jqnFjrOWMVU__button-blue,0.ved,22.jsaction">14 reviews</button>
#<button aria-label="14 reviews" vet="3648" jsaction="pane.rating.moreReviews" jstcache="1010" class="widget-pane-link" jsan="7.widget-pane-link,0.aria-label,0.vet,0.jsaction">14 reviews</button>
links = self.driver.find_elements_by_xpath('//button[@jsaction=\'pane.reviewChart.moreReviews\']')
print('LINKS HERE', links)

for l in links:
l.click()
time.sleep(2)


def __scroll(self):
scrollable_div = self.driver.find_element_by_css_selector('div.section-layout.section-scrollbox.cYB2Ge-oHo7ed.cYB2Ge-ti6hGc')
scrollable_div = self.driver.find_element_by_css_selector('div.siAUzd-neVct.section-scrollbox.cYB2Ge-oHo7ed.cYB2Ge-ti6hGc')
self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
#self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

Expand Down Expand Up @@ -237,6 +239,18 @@ def __get_driver(self, debug=False):
options.add_argument("--lang=en-GB")
input_driver = webdriver.Chrome(chrome_options=options)

# first lets click on google agree button so we can continue
try:
input_driver.get(GM_WEBPAGE)
agree = WebDriverWait(input_driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//span[contains(text(), "I agree")]')))
agree.click()

# back to the main page
input_driver.switch_to_default_content()
except:
pass

return input_driver


Expand Down
9 changes: 5 additions & 4 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time


ind = {'most_relevant' : 1 , 'newest' : 2, 'highest_rating' : 3, 'lowest_rating' : 4 }
ind = {'most_relevant' : 0 , 'newest' : 1, 'highest_rating' : 2, 'lowest_rating' : 3 }
HEADER = ['id_review', 'caption', 'relative_date', 'retrieval_date', 'rating', 'username', 'n_review_user', 'n_photo_user', 'url_user']
HEADER_W_SOURCE = ['id_review', 'caption', 'relative_date','retrieval_date', 'rating', 'username', 'n_review_user', 'n_photo_user', 'url_user', 'url_source']

Expand Down Expand Up @@ -47,7 +47,6 @@ def csv_writer(source_field, ind_sort_by, path='data/'):
print(scraper.get_account(url))
else:
error = scraper.sort_by(url, ind[args.sort_by])
print(error)

if error == 0:

Expand All @@ -57,9 +56,11 @@ def csv_writer(source_field, ind_sort_by, path='data/'):
scraper.more_reviews()

while n < args.N:

# logging to std out
print(colored('[Review ' + str(n) + ']', 'cyan'))
reviews = scraper.get_reviews(n)

reviews = scraper.get_reviews(n)
if len(reviews) == 0:
break

Expand All @@ -70,4 +71,4 @@ def csv_writer(source_field, ind_sort_by, path='data/'):

writer.writerow(row_data)

n += len(reviews)
n += len(reviews)

0 comments on commit 73438f6

Please sign in to comment.