-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinvesting.py
110 lines (97 loc) · 5.34 KB
/
investing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import threading
from lxml import html
import requests
import csv
from datetime import date, timedelta
import logging
import time
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,
datefmt="%H:%M:%S")
MainURL = 'https://www.investing.com'
def thread_function(market):
today = date.today()
filename = market + "-" + str(today - timedelta(days = 1))
# file oluşturma
f = open('output/'+filename+'.csv', 'w', newline='\n')
with f:
fnames = ['PairID', 'Name', 'Symbol', 'Last', 'Chg.', 'Chg.%', 'Vol', 'Mcap']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writeheader()
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36',
'Host':'www.investing.com',
'Referer':'https://www.investing.com'
}
fheaders = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36',
'Host':'www.investing.com',
'Referer':'https://www.investing.com',
'X-Requested-With':'XMLHttpRequest'
}
pagecounURL = 'https://www.investing.com/indices/'+market+'-composite-components'
pagecountreq = requests.get(pagecounURL, headers=headers)
pagecounttree = html.fromstring(pagecountreq.content)
pagecount = len(pagecounttree.xpath('//*[@id="paginationWrap"]/div[2]/a')) + 1
logging.info("%s pages found in %s." % ((pagecount - 1), market.title()))
for i in range(1, pagecount):
URL = 'https://www.investing.com/indices/'+market+'-composite-components'
logging.info("%s page %s scraping started." % (market.title(), str(i)))
if i == 1:
page = requests.get(URL, headers=headers)
tree = html.fromstring(page.content)
else:
URL = URL + '/' + str(i)
page = requests.get(URL, headers=headers)
tree = html.fromstring(page.content)
mcap = tree.xpath('//*[@id="filter_fundamental"]/@onclick')[0]
getsmlID = tree.xpath('//*[@id="leftColumn"]/script[1]/text()')[0]
cookieID = mcap.split(",")[5].split("\'")[1]
pairID = mcap.split(",")[4].split("\'")[0]
for line in getsmlID.split("\n"):
if "window.siteData.smlID" in line:
line = line.replace(';', '').strip().split( )
smlID = line[2]
fURL = 'https://www.investing.com/indices/Service/FundamentalInstrument?pairid='+pairID+'&sid='+cookieID+'&filterParams=&smlID='+smlID+'&page=%s' % i
fpage = requests.get(fURL, headers=fheaders)
ftree = html.fromstring(fpage.content)
rowcount = len(tree.xpath('//*[@id="cr1"]/tbody/tr')) + 1
for order in range(1, rowcount):
pairid = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[2]/span/@data-id' % order)[0]
name = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[2]' % order)[0].text_content()
last = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[3]' % order)[0].text_content()
chg = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[6]' % order)[0].text_content()
chgpercent = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[7]' % order)[0].text_content()
vol = tree.xpath('//*[@id="cr1"]/tbody/tr[%s]/td[8]' % order)[0].text_content()
mcap = ftree.xpath('//*[@id="fundamental"]/tbody/tr[%s]/td[4]' % order)[0].text_content()
symbol = None
with open(''+market+'-symbols.csv', encoding='utf-8') as csvDataFile:
csvReader = csv.reader(csvDataFile)
for row in csvReader:
if row[0] == name:
symbol = row[1]
if symbol is None:
logging.error("\"%s\" Symbol Not Found in %s! Researching started." % (name, market.title()))
href = tree.xpath('//a[text()="%s"]/@href' % name)[0]
time.sleep(3)
detail = requests.get(MainURL + href, headers=headers)
detailtree = html.fromstring(detail.content)
symbol = detailtree.xpath('//meta[@itemprop="tickerSymbol"]/@content')[0]
f = open(''+market+'-symbols.csv', 'a', newline='\n')
with f:
fnames = ['Name', 'Symbol']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writerow({'Name' : name, 'Symbol': symbol})
logging.info("\"%s\" Symbol found in %s : %s" % (name, market.title(), symbol))
f = open('output/'+filename+'.csv', 'a', newline='\n')
with f:
fnames = ['PairID', 'Name', 'Symbol', 'Last', 'Chg.', 'Chg.%', 'Vol', 'Mcap']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writerow({'PairID' : pairid, 'Name' : name, 'Symbol': symbol, 'Last': last, 'Chg.': chg, 'Chg.%': chgpercent, 'Vol': vol, 'Mcap': mcap})
logging.info("%s page %s scraping Finished." % (market.title(), str(i)))
logging.info("Finish the Investing '%s' Job! :)" % market.title())
if __name__ == "__main__":
nysethread = threading.Thread(target=thread_function, args=("nyse",))
nasdaqthread = threading.Thread(target=thread_function, args=("nasdaq",))
nysethread.start()
nasdaqthread.start()