Skip to content

Commit

Permalink
Release (#9)
Browse files Browse the repository at this point in the history
* Added Badges (#5)

* Update Doc

* Add Codacy badge (#6)

* Add Support for gh-actions (#8)
  • Loading branch information
ParthS007 authored Dec 29, 2020
1 parent 1668c01 commit 8b3bf7a
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 108 deletions.
3 changes: 1 addition & 2 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[flake8]
max-line-length = 130
ignore = W191,
F841
ignore = W191, F841, W503
exclude =
.git,
__pycache__
28 changes: 28 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: CI

on:
push:
branches: [master, development]
pull_request:
branches: [master, development]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9

- name: Check python code formatting
run: |
pip install black
black --check .
- name: Check compliance with pep8, pyflakes and circular complexity
run: |
pip install flake8
flake8 .
19 changes: 0 additions & 19 deletions .travis.yml

This file was deleted.

6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Ali Scraper

![Ali Scraper](/other/Scraper-artwork.png)

A scraper which scraps Ali Express and get the product details in a Google spreadsheet.

[![Build Status](https://github.com/ParthS007/Ali-Scraper/workflows/CI/badge.svg)](https://github.com/ParthS007/Ali-Scraper/actions)
![Lines of code](https://tokei.rs/b1/github/ParthS007/Ali-Scraper)
[![HitCount](http://hits.dwyl.io/ParthS007/Ali-Scraper.svg)](http://hits.dwyl.io/ParthS007/Ali-Scraper)

## Technology

- Python 3
Expand Down
12 changes: 8 additions & 4 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
max_orders = 100
threshold = 5

scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']

credentials = ServiceAccountCredentials.from_json_keyfile_name('ALI_Scraper-3.json', scope)
scope = [
"https://spreadsheets.google.com/feeds",
"https://www.googleapis.com/auth/drive",
]

credentials = ServiceAccountCredentials.from_json_keyfile_name(
"ALI_Scraper-3.json", scope
)

base_url = "https://www.aliexpress.com/wholesale?SortType=total_tranpro_desc"
91 changes: 47 additions & 44 deletions function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS

price_patt = re.compile(r'.*\$(.*)')
orders_patt = re.compile(r'.*\((.*)\)')
price_patt = re.compile(r".*\$(.*)")
orders_patt = re.compile(r".*\((.*)\)")


def get_end_page(url):
Expand All @@ -17,50 +17,45 @@ def get_end_page(url):
with contextlib.closing(urlopen(url)) as page:
data = page.read()
Soup = BS(data, "lxml")
totalResult = Soup.find('strong', {'class': 'search-count'})
totalResult = Soup.find("strong", {"class": "search-count"})
print("Sleeping for 25 seconds for end page")
time.sleep(25)

results = int(totalResult.text.replace(',', ''))
if (results >= 4800):
results = int(totalResult.text.replace(",", ""))
if results >= 4800:
endPage = 100
elif (results > 0 and results < 4800):
elif results > 0 and results < 4800:
endPage = math.ceil((results / 48))
return endPage


def get_items_on_page(url, page_no):
print("Page: " + str(page_no))
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)\
AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)\
AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36"
}
with contextlib.closing(urlopen(url + "&page=" + str(page_no))) as page:
data = page.read()
soup = BS(data, "lxml")
list_ele = soup.find('ul', {'id': 'hs-below-list-items'})
items_ele = list_ele.find_all('div', {'class': 'item'})
list_ele = soup.find("ul", {"id": "hs-below-list-items"})
items_ele = list_ele.find_all("div", {"class": "item"})
items = {}

for i, ele in enumerate(items_ele):
info = ele.find('div', {'class': 'info'})
link_ele = info.find('a', {'class': 'history-item'})
link = link_ele['href']
info = ele.find("div", {"class": "info"})
link_ele = info.find("a", {"class": "history-item"})
link = link_ele["href"]
name = link_ele.text.strip()
price_text = info.find('span', {'class': 'value'}).text
price_text = info.find("span", {"class": "value"}).text
price = price_patt.search(price_text).groups()[0]
orders_ele = info.find('span', {'class': 'order-num'})
orders_string = orders_ele.find('em').text
orders_raw = orders_patt.search(
orders_string
).groups()[0].replace(',', '')
orders_ele = info.find("span", {"class": "order-num"})
orders_string = orders_ele.find("em").text
orders_raw = orders_patt.search(orders_string).groups()[0].replace(",", "")
orders = int(orders_raw)
tokens = link.split("?")[0].split("/")
id = int(tokens[-1].split(".")[0])
items[id] = {
"name": name,
"price": price,
"link": link,
"orders": orders
}
items[id] = {"name": name, "price": price, "link": link, "orders": orders}

return items

Expand Down Expand Up @@ -99,13 +94,13 @@ def put_items(sheet, items, diff):
for i, id in enumerate(items.keys()):
item = items[id]
cell_range[j].value = id
cell_range[j + 1].value = item['name']
cell_range[j + 2].value = item['price']
cell_range[j + 3].value = item['link']
cell_range[j + 4].value = item['orders']
cell_range[j + 5].value = item['prev_orders']
cell_range[j + 6].value = item['delta']
cell_range[j + 7].value = item['interesting']
cell_range[j + 1].value = item["name"]
cell_range[j + 2].value = item["price"]
cell_range[j + 3].value = item["link"]
cell_range[j + 4].value = item["orders"]
cell_range[j + 5].value = item["prev_orders"]
cell_range[j + 6].value = item["delta"]
cell_range[j + 7].value = item["interesting"]
j += 8
if diff > 0:
last_row = len(items) + 1
Expand All @@ -119,29 +114,35 @@ def put_items(sheet, items, diff):

def send_msg(items, item_name):
# reply to thread or post an article in the newsgroup
SMTPSVR = 'smtp.gmail.com'
who = '[email protected]'
SMTPSVR = "smtp.gmail.com"
who = "[email protected]"
msg = """Subject: Hot items: {item_name}
Hello Nat,
Here are some interesting items:
""".format(item_name=item_name)
""".format(
item_name=item_name
)
"""
with open('message', 'w') as msg:
msg.write('From: YOUR_NAME_HERE <[email protected]>\n')
msg.write('Newsgroups: %s\n' % group_name)
msg.write('Subject: %s\n' % subject)
subprocess.call(['nano', 'message'])
"""
recipients = ['[email protected]'] # Add Reciepent Mail
recipients = ["[email protected]"] # Add Reciepent Mail
item_list = []
for id in items:
item_list.append("{name} - {link} - increased by {delta}(From {prev_orders} to {orders})".format(name=items[id]['name'],
link=items[id]['link'],
delta=items[id]['delta'],
prev_orders=items[id]['prev_orders'],
orders=items[id]['orders']))
msg += '\n\n'.join(item_list)
item_list.append(
"{name} - {link} - increased by {delta}(From {prev_orders} to {orders})".format(
name=items[id]["name"],
link=items[id]["link"],
delta=items[id]["delta"],
prev_orders=items[id]["prev_orders"],
orders=items[id]["orders"],
)
)
msg += "\n\n".join(item_list)
msg += """
Regards,
Expand All @@ -154,7 +155,7 @@ def send_msg(items, item_name):
exit()
sendSvr.ehlo()
try:
sendSvr.login('[email protected]', 'xxxxxx') # Add Your Email ID and Password
sendSvr.login("[email protected]", "xxxxxx") # Add Your Email ID and Password
except SMTPAuthenticationError:
print("Invalid SMTP credentials.")
exit()
Expand All @@ -165,7 +166,9 @@ def send_msg(items, item_name):


def next_available_row(worksheet):
str_list = list(filter(None, worksheet.col_values(1))) # fastest but perhaps stupid :)
str_list = list(
filter(None, worksheet.col_values(1))
) # fastest but perhaps stupid :)
return len(str_list)


Expand Down
Binary file added other/Scraper-artwork.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
62 changes: 31 additions & 31 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
asn1crypto==0.24.0
astroid==1.6.5
beautifulsoup4==4.6.0
asn1crypto==1.4.0
astroid==2.4.2
beautifulsoup4==4.9.3
bs4==0.0.1
cachetools==2.1.0
certifi==2018.4.16
cffi==1.11.5
chardet==3.0.4
cryptography>=2.3
google-api-python-client==1.7.3
google-auth==1.5.0
google-auth-httplib2==0.0.3
gspread==3.0.0
html5lib==1.0.1
httplib2==0.11.3
idna==2.6
isort==4.3.4
lazy-object-proxy==1.3.1
lxml==4.2.1
cachetools==4.2.0
certifi==2020.12.5
cffi==1.14.4
chardet==4.0.0
cryptography>=3.3.1
google-api-python-client==1.12.8
google-auth==1.24.0
google-auth-httplib2==0.0.4
gspread==3.6.0
html5lib==1.1
httplib2==0.18.1
idna==2.10
isort==5.6.4
lazy-object-proxy==1.5.2
lxml==4.6.2
mccabe==0.6.1
oauth2client==4.1.2
pyasn1==0.4.3
pyasn1-modules==0.2.1
pycparser==2.18
pylint==1.9.2
pyOpenSSL==18.0.0
requests>=2.20.0
rsa==3.4.2
selenium==3.12.0
six==1.11.0
uritemplate==3.0.0
urllib3>=1.23
oauth2client==4.1.3
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.20
pylint==2.6.0
pyOpenSSL==20.0.1
requests>=2.25.1
rsa==4.6
selenium==3.141.0
six==1.15.0
uritemplate==3.0.1
urllib3>=1.26.2
webencodings==0.5.1
wrapt==1.10.11
wrapt==1.12.1
19 changes: 11 additions & 8 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,21 @@ def main_search(sheet, query):
for id in items:
prev_orders = prev_items_orders.get(id)
if prev_orders is not None:
items[id]['prev_orders'] = prev_orders
items[id]['delta'] = items[id]['orders'] - items[id]['prev_orders']
items[id]["prev_orders"] = prev_orders
items[id]["delta"] = items[id]["orders"] - items[id]["prev_orders"]

if items[id]['delta'] >= threshold and items[id]['prev_orders'] <= max_orders:
if (
items[id]["delta"] >= threshold
and items[id]["prev_orders"] <= max_orders
):
interesting[id] = items[id]
items[id]['interesting'] = True
items[id]["interesting"] = True
else:
items[id]['interesting'] = False
items[id]["interesting"] = False
else:
items[id]['prev_orders'] = None
items[id]['delta'] = None
items[id]['interesting'] = None # new item!
items[id]["prev_orders"] = None
items[id]["delta"] = None
items[id]["interesting"] = None # new item!

if interesting:
function.send_msg(interesting, item_name=query)
Expand Down

0 comments on commit 8b3bf7a

Please sign in to comment.