-
Notifications
You must be signed in to change notification settings - Fork 0
/
TianMaoReader.py
105 lines (99 loc) · 4.36 KB
/
TianMaoReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from Store import Store
from Food import Food
from pyquery import PyQuery as pq
from selenium import webdriver as wd
import xml.dom.minidom as dom
import re
import threading
#
# This module is used for search stores with a concrete fruit and retrive a list of store WITH OR WITHOUT ASSERT.
# You are supposed to check the method "Calculate()" is used before you sort the list.
# Of course I will make it automatization soon.
class TianMaoReader:
@classmethod
def Reading(cls,fruit,con, warp, storage):
# Commander of the browser(Chrome)
try:
chrome_options = wd.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
browser = wd.Chrome(chrome_options=chrome_options)
# Open the web page
browser.get(
'https://list.tmall.com/search_product.htm?q=' + fruit.attr[
'name'] + '&type=p&spm=a220m.1000858.a2227oh.d100&xl=%C6%BB%B9%FB_2&from=.list.pc_1_suggest')
# Write the context of the page which we searched to html file
html = browser.page_source
root = pq(html).__str__()
browser.close()
except BaseException as e:
print("Please check your network,and please don't bother the browser!")
return None
file = open('TianMao.html', 'w', encoding='utf-8')
file.write(root)
# Open and decode the file we just stored
root = dom.parse("TianMao.html")
# Get root of the html
sstmp2 = root.documentElement
stmp2 = sstmp2.getElementsByTagName('div')
i = 0
# Prepare for initializing a store,the variable 'i' is not necessary.
price = 0
name = ''
amount = 0.0
sale = 0
stores = []
for item in stmp2:
# This label contains the information about store.
if item.getAttribute('class') != 'product-iWrap':
continue
children = item.getElementsByTagName('p') # item.childNodes
for t in children:
# Price for fruit
if t.getAttribute('class') == 'productPrice':
em = t.getElementsByTagName('em')
for e in em:
price = e.getAttribute('title')
elif t.getAttribute('class') == 'productTitle':
# Name of shops ,the amount of fruit is figured out here.
em = t.getElementsByTagName('a')
name = em[0].getAttribute('title')
stramount = re.findall('\d+斤', name)
if stramount.__len__() == 0:
# Maybe the weight of good is written by Chinese or not been written,
# I appoint it 500 gram in these scenarios
amount = 1
else:
# Fortunately we find the weight in title, and we figure out how much it is.
stramount = re.findall('\d+', stramount[0])
amount = float(stramount[0])
price = float(price) / amount
elif t.getAttribute('class') == 'productStatus':
# How much this good is been saled per month?
em = t.getElementsByTagName('span')[0].getElementsByTagName('em')
str = em[0].childNodes[0].nodeValue
num = re.findall('\d+\.\d+', str)
if num.__len__() == 0:
num = re.findall("\d+", str)
# print(num)
if -1 != str.find('万'):
# Maybe they were written like "1.2万"?
sale = int(float(num[0]) * 10000)
else:
sale = int(num[0])
# print(sale)
children = item.getElementsByTagName('div')
a = children[0].getElementsByTagName('a')
url = 'http:' + a[0].getAttribute('href')
stores.append(Store(name, price, 0, sale, fruit, url))
for shop in stores:
shop.Calculate()
# shop.Print()
#stores.sort(key=lambda Store: Store.attr['score'], reverse=True)
con.acquire()
warp.append(1)
con.notify_all()
con.release()
for store in stores:
storage.append(store)
return stores