Release (#9)

* Added Badges (#5) * Update Doc * Add Codacy badge (#6) * Add Support for gh-actions (#8)
ParthS007 · Dec 29, 2020 · 8b3bf7a · 8b3bf7a
1 parent 1668c01
commit 8b3bf7a
Show file tree

Hide file tree

Showing 9 changed files with 132 additions and 108 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,7 +1,6 @@
 [flake8]
 max-line-length = 130
-ignore = W191,
-         F841
+ignore = W191, F841, W503
 exclude =
     .git,
     __pycache__
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,28 @@
+name: CI
+
+on:
+    push:
+        branches: [master, development]
+    pull_request:
+        branches: [master, development]
+
+jobs:
+    lint:
+      runs-on: ubuntu-latest
+      steps:
+        - uses: actions/checkout@v2
+
+        - name: Setup Python
+          uses: actions/setup-python@v2
+          with:
+            python-version: 3.9
+
+        - name: Check python code formatting
+          run: |
+            pip install black
+            black --check .
+
+        - name: Check compliance with pep8, pyflakes and circular complexity
+          run: |
+            pip install flake8
+            flake8 .
diff --git a/.travis.yml b/.travis.yml
diff --git a/README.md b/README.md
@@ -1,7 +1,13 @@
 # Ali Scraper
 
+![Ali Scraper](/other/Scraper-artwork.png)
+
 A scraper which scraps Ali Express and get the product details in a Google spreadsheet.
 
+[![Build Status](https://github.com/ParthS007/Ali-Scraper/workflows/CI/badge.svg)](https://github.com/ParthS007/Ali-Scraper/actions)
+![Lines of code](https://tokei.rs/b1/github/ParthS007/Ali-Scraper)
+[![HitCount](http://hits.dwyl.io/ParthS007/Ali-Scraper.svg)](http://hits.dwyl.io/ParthS007/Ali-Scraper)
+
 ## Technology
 
 - Python 3

diff --git a/__init__.py b/__init__.py
@@ -3,9 +3,13 @@
 max_orders = 100
 threshold = 5
 
-scope = ['https://spreadsheets.google.com/feeds',
-         'https://www.googleapis.com/auth/drive']
-
-credentials = ServiceAccountCredentials.from_json_keyfile_name('ALI_Scraper-3.json', scope)
+scope = [
+    "https://spreadsheets.google.com/feeds",
+    "https://www.googleapis.com/auth/drive",
+]
+
+credentials = ServiceAccountCredentials.from_json_keyfile_name(
+    "ALI_Scraper-3.json", scope
+)
 
 base_url = "https://www.aliexpress.com/wholesale?SortType=total_tranpro_desc"
diff --git a/function.py b/function.py
@@ -7,8 +7,8 @@
 from urllib.request import urlopen
 from bs4 import BeautifulSoup as BS
 
-price_patt = re.compile(r'.*\$(.*)')
-orders_patt = re.compile(r'.*\((.*)\)')
+price_patt = re.compile(r".*\$(.*)")
+orders_patt = re.compile(r".*\((.*)\)")
 
 
 def get_end_page(url):
@@ -17,50 +17,45 @@ def get_end_page(url):
         with contextlib.closing(urlopen(url)) as page:
             data = page.read()
         Soup = BS(data, "lxml")
-        totalResult = Soup.find('strong', {'class': 'search-count'})
+        totalResult = Soup.find("strong", {"class": "search-count"})
         print("Sleeping for 25 seconds for end page")
         time.sleep(25)
 
-    results = int(totalResult.text.replace(',', ''))
-    if (results >= 4800):
+    results = int(totalResult.text.replace(",", ""))
+    if results >= 4800:
         endPage = 100
-    elif (results > 0 and results < 4800):
+    elif results > 0 and results < 4800:
         endPage = math.ceil((results / 48))
     return endPage
 
 
 def get_items_on_page(url, page_no):
     print("Page: " + str(page_no))
-    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)\
-                AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'}
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)\
+                AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36"
+    }
     with contextlib.closing(urlopen(url + "&page=" + str(page_no))) as page:
         data = page.read()
     soup = BS(data, "lxml")
-    list_ele = soup.find('ul', {'id': 'hs-below-list-items'})
-    items_ele = list_ele.find_all('div', {'class': 'item'})
+    list_ele = soup.find("ul", {"id": "hs-below-list-items"})
+    items_ele = list_ele.find_all("div", {"class": "item"})
     items = {}
 
     for i, ele in enumerate(items_ele):
-        info = ele.find('div', {'class': 'info'})
-        link_ele = info.find('a', {'class': 'history-item'})
-        link = link_ele['href']
+        info = ele.find("div", {"class": "info"})
+        link_ele = info.find("a", {"class": "history-item"})
+        link = link_ele["href"]
         name = link_ele.text.strip()
-        price_text = info.find('span', {'class': 'value'}).text
+        price_text = info.find("span", {"class": "value"}).text
         price = price_patt.search(price_text).groups()[0]
-        orders_ele = info.find('span', {'class': 'order-num'})
-        orders_string = orders_ele.find('em').text
-        orders_raw = orders_patt.search(
-            orders_string
-        ).groups()[0].replace(',', '')
+        orders_ele = info.find("span", {"class": "order-num"})
+        orders_string = orders_ele.find("em").text
+        orders_raw = orders_patt.search(orders_string).groups()[0].replace(",", "")
         orders = int(orders_raw)
         tokens = link.split("?")[0].split("/")
         id = int(tokens[-1].split(".")[0])
-        items[id] = {
-            "name": name,
-            "price": price,
-            "link": link,
-            "orders": orders
-        }
+        items[id] = {"name": name, "price": price, "link": link, "orders": orders}
 
     return items
 
@@ -99,13 +94,13 @@ def put_items(sheet, items, diff):
     for i, id in enumerate(items.keys()):
         item = items[id]
         cell_range[j].value = id
-        cell_range[j + 1].value = item['name']
-        cell_range[j + 2].value = item['price']
-        cell_range[j + 3].value = item['link']
-        cell_range[j + 4].value = item['orders']
-        cell_range[j + 5].value = item['prev_orders']
-        cell_range[j + 6].value = item['delta']
-        cell_range[j + 7].value = item['interesting']
+        cell_range[j + 1].value = item["name"]
+        cell_range[j + 2].value = item["price"]
+        cell_range[j + 3].value = item["link"]
+        cell_range[j + 4].value = item["orders"]
+        cell_range[j + 5].value = item["prev_orders"]
+        cell_range[j + 6].value = item["delta"]
+        cell_range[j + 7].value = item["interesting"]
         j += 8
     if diff > 0:
         last_row = len(items) + 1
@@ -119,29 +114,35 @@ def put_items(sheet, items, diff):
 
 def send_msg(items, item_name):
     # reply to thread or post an article in the newsgroup
-    SMTPSVR = 'smtp.gmail.com'
-    who = '[email protected]'
+    SMTPSVR = "smtp.gmail.com"
+    who = "[email protected]"
     msg = """Subject: Hot items: {item_name}
             Hello Nat,
             Here are some interesting items:
 
-            """.format(item_name=item_name)
+            """.format(
+        item_name=item_name
+    )
     """
     with open('message', 'w') as msg:
         msg.write('From: YOUR_NAME_HERE <[email protected]>\n')
         msg.write('Newsgroups: %s\n' % group_name)
         msg.write('Subject: %s\n' % subject)
     subprocess.call(['nano', 'message'])
     """
-    recipients = ['[email protected]']  # Add Reciepent Mail
+    recipients = ["[email protected]"]  # Add Reciepent Mail
     item_list = []
     for id in items:
-        item_list.append("{name} - {link} - increased by {delta}(From {prev_orders} to {orders})".format(name=items[id]['name'],
-                         link=items[id]['link'],
-                         delta=items[id]['delta'],
-                         prev_orders=items[id]['prev_orders'],
-                         orders=items[id]['orders']))
-    msg += '\n\n'.join(item_list)
+        item_list.append(
+            "{name} - {link} - increased by {delta}(From {prev_orders} to {orders})".format(
+                name=items[id]["name"],
+                link=items[id]["link"],
+                delta=items[id]["delta"],
+                prev_orders=items[id]["prev_orders"],
+                orders=items[id]["orders"],
+            )
+        )
+    msg += "\n\n".join(item_list)
     msg += """
 
     Regards,
@@ -154,7 +155,7 @@ def send_msg(items, item_name):
         exit()
     sendSvr.ehlo()
     try:
-        sendSvr.login('[email protected]', 'xxxxxx')  # Add Your Email ID and Password
+        sendSvr.login("[email protected]", "xxxxxx")  # Add Your Email ID and Password
     except SMTPAuthenticationError:
         print("Invalid SMTP credentials.")
         exit()
@@ -165,7 +166,9 @@ def send_msg(items, item_name):
 
 
 def next_available_row(worksheet):
-    str_list = list(filter(None, worksheet.col_values(1)))  # fastest but perhaps stupid :)
+    str_list = list(
+        filter(None, worksheet.col_values(1))
+    )  # fastest but perhaps stupid :)
     return len(str_list)
 
 

diff --git a/other/Scraper-artwork.png b/other/Scraper-artwork.png
diff --git a/requirements.txt b/requirements.txt
@@ -1,34 +1,34 @@
-asn1crypto==0.24.0
-astroid==1.6.5
-beautifulsoup4==4.6.0
+asn1crypto==1.4.0
+astroid==2.4.2
+beautifulsoup4==4.9.3
 bs4==0.0.1
-cachetools==2.1.0
-certifi==2018.4.16
-cffi==1.11.5
-chardet==3.0.4
-cryptography>=2.3
-google-api-python-client==1.7.3
-google-auth==1.5.0
-google-auth-httplib2==0.0.3
-gspread==3.0.0
-html5lib==1.0.1
-httplib2==0.11.3
-idna==2.6
-isort==4.3.4
-lazy-object-proxy==1.3.1
-lxml==4.2.1
+cachetools==4.2.0
+certifi==2020.12.5
+cffi==1.14.4
+chardet==4.0.0
+cryptography>=3.3.1
+google-api-python-client==1.12.8
+google-auth==1.24.0
+google-auth-httplib2==0.0.4
+gspread==3.6.0
+html5lib==1.1
+httplib2==0.18.1
+idna==2.10
+isort==5.6.4
+lazy-object-proxy==1.5.2
+lxml==4.6.2
 mccabe==0.6.1
-oauth2client==4.1.2
-pyasn1==0.4.3
-pyasn1-modules==0.2.1
-pycparser==2.18
-pylint==1.9.2
-pyOpenSSL==18.0.0
-requests>=2.20.0
-rsa==3.4.2
-selenium==3.12.0
-six==1.11.0
-uritemplate==3.0.0
-urllib3>=1.23
+oauth2client==4.1.3
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+pylint==2.6.0
+pyOpenSSL==20.0.1
+requests>=2.25.1
+rsa==4.6
+selenium==3.141.0
+six==1.15.0
+uritemplate==3.0.1
+urllib3>=1.26.2
 webencodings==0.5.1
-wrapt==1.10.11
+wrapt==1.12.1
diff --git a/run.py b/run.py
@@ -30,18 +30,21 @@ def main_search(sheet, query):
     for id in items:
         prev_orders = prev_items_orders.get(id)
         if prev_orders is not None:
-            items[id]['prev_orders'] = prev_orders
-            items[id]['delta'] = items[id]['orders'] - items[id]['prev_orders']
+            items[id]["prev_orders"] = prev_orders
+            items[id]["delta"] = items[id]["orders"] - items[id]["prev_orders"]
 
-            if items[id]['delta'] >= threshold and items[id]['prev_orders'] <= max_orders:
+            if (
+                items[id]["delta"] >= threshold
+                and items[id]["prev_orders"] <= max_orders
+            ):
                 interesting[id] = items[id]
-                items[id]['interesting'] = True
+                items[id]["interesting"] = True
             else:
-                items[id]['interesting'] = False
+                items[id]["interesting"] = False
         else:
-            items[id]['prev_orders'] = None
-            items[id]['delta'] = None
-            items[id]['interesting'] = None  # new item!
+            items[id]["prev_orders"] = None
+            items[id]["delta"] = None
+            items[id]["interesting"] = None  # new item!
 
     if interesting:
         function.send_msg(interesting, item_name=query)