-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathtokens.py
32 lines (26 loc) · 1.11 KB
/
tokens.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# -*- coding: utf-8 -*-
import scrapy
class TokensSpider(scrapy.Spider):
name = "tokens"
allowed_domains = ["etherscan.io"]
start_urls = ["https://etherscan.io/tokens/"]
addressUrl = "https://etherscan.io/address/{}"
nextPageUrl = "https://etherscan.io/{}"
custom_settings = {"AUTOTHROTTLE_ENABLED": True}
def parse(self, response):
tokens = response.css("a").re("/token/(0x\w+)")
tokens = list(set(tokens))
for token in tokens:
yield scrapy.Request(
self.addressUrl.format(token),
callback=self.getCode,
meta={"address": token},
)
nextpage = response.xpath('//a[text()="Next"]/@href').extract_first()
if nextpage:
yield scrapy.Request(self.nextPageUrl.format(nextpage, callback=self.parse))
def getCode(self, response):
name = response.xpath('//a[@data-placement="bottom"]/text()').extract_first()
address = response.meta["address"]
code = response.css("#editor::text").extract_first()
yield {"name": name, "address": address, "code": code}