-
Notifications
You must be signed in to change notification settings - Fork 1
/
Crawler.fs
131 lines (117 loc) · 5.12 KB
/
Crawler.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
module MinhaCarteira.Crawler
open System
open System.Threading.Tasks
open PuppeteerSharp
open Models
let private getBrowser() = task {
use fetcher = new BrowserFetcher()
let! _ = fetcher.DownloadAsync(BrowserFetcher.DefaultChromiumRevision)
let options = LaunchOptions(Headless=false)
return! Puppeteer.LaunchAsync(options)
}
let private find url waitUntil script = task {
use! browser = getBrowser()
use! page = browser.NewPageAsync()
let! _ = page.GoToAsync(url)
let! _ = page.WaitForFunctionAsync(waitUntil)
let! tickers = page
.EvaluateFunctionAsync<string[]>(script)
return tickers |> Array.map (fun x -> x.Trim()) |> Seq.ofArray
}
let private getFIIs1() =
let url = "https://fiis.com.br/lista-de-fundos-imobiliarios"
let waitUntil = "() => $('span.ticker').length > 290"
let script = "() => [...document.querySelectorAll('span.ticker')].map(x => x.innerHTML)"
find url waitUntil script
let private getFIIs2() =
let url = "https://www.clubefii.com.br/fundo_imobiliario_lista"
let waitUntil = "() => $('tr.tabela_principal td:first-child a').length > 300"
let script = "() => [...document.querySelectorAll('tr.tabela_principal td:first-child a')].map(x => x.innerHTML)"
find url waitUntil script
let private getFIIs() = task {
let! tickers = [ getFIIs1; getFIIs2; ] |> Task.Sequential
return tickers |> Seq.collect id |> Set.ofSeq |> Set.toSeq
}
let private getFiagro() =
let url = "https://www.clubefii.com.br/fundos-imobiliarios/51639/Agronegocio"
let waitUntil = "() => $('tr.tabela_principal td:first-child a').length > 10"
let script = "() => [...document.querySelectorAll('tr.tabela_principal td:first-child a')].map(x => x.innerHTML)"
find url waitUntil script
let private getUnits() =
let url = "https://www.b3.com.br/pt_br/market-data-e-indices/servicos-de-dados/market-data/consultas/mercado-a-vista/units/"
let waitUntil = "() => true"
let script = "() => [...document.querySelectorAll('#conteudo-principal table tbody td:nth-child(2)')].map(x => x.innerHTML)"
find url waitUntil script
let private getETFs() =
let url = "https://br.investing.com/etfs/brazil-etfs"
let waitUntil = "() => $('#etfs td[title]').length > 20"
let script = "() => [...document.querySelectorAll('#etfs td[title]')].map(x => x.title)"
find url waitUntil script
let private getFiInfra() =
let url = "https://dividendosfiis.com.br/firf"
let waitUntil = "() => [...document.querySelectorAll('#g-mainbar > div:nth-child(3) > div > div > div.blog-header > table:nth-child(9) > tbody > tr > td:nth-child(1)')].length > 8"
let script = "() => [...document.querySelectorAll('#g-mainbar > div:nth-child(3) > div > div > div.blog-header > table:nth-child(9) > tbody > tr > td:nth-child(1)')].map(x => x.innerText)"
find url waitUntil script
let tickerFactories =
[
FiInfra, getFiInfra
Fiagro, getFiagro
FII, getFIIs
ETF, getETFs
Acao, getUnits
]
let getCotacao ativos =
let getFromGoogle (page: IPage) ativo = task {
let! _ = page.GoToAsync($"http://www.google.com/search?q=%s{ativo}")
let cellSelector = "div[eid] div[data-ved] span[jscontroller] span[jsname]"
return! page.QuerySelectorAsync(cellSelector).EvaluateFunctionAsync<string>("_ => _.innerText")
}
let getFromBing (page: IPage) ativo = task {
let! _ = page.GoToAsync($"https://www.bing.com/search?q=%s{ativo}")
let cellSelector = "#Finance_Quote"
return! page.QuerySelectorAsync(cellSelector).EvaluateFunctionAsync<string>("_ => _.innerText")
}
let getFromInvestidor category (page: IPage) ativo = task {
let! _ = page.GoToAsync($"https://investidor10.com.br/%s{category}/%s{ativo}/")
let cellSelector = "#cards-ticker > div._card.cotacao > div._card-body > div > span"
return! page.QuerySelectorAsync(cellSelector).EvaluateFunctionAsync<string>("_ => _.innerText")
}
let searchIn = [
getFromGoogle;
getFromInvestidor "etfs";
getFromInvestidor "fiis";
getFromBing;
]
task {
use! browser = getBrowser()
let! moneys =
ativos
|> Seq.map(fun ativo -> task {
use! page = browser.NewPageAsync()
let mutable succeed = false
let mutable i = 0
let mutable quote = "not found"
while not succeed && i < searchIn.Length do
try
let f = searchIn[i]
let! quote2 = f page ativo
quote <- quote2
succeed <- true
with
| _ -> ()
i <- i + 1
return quote
})
|> Task.WhenAll
return moneys
|> Seq.map ((fun s ->
s
.Replace("R$", String.Empty)
.Replace(",", ".")
.Trim()) >> Decimal.TryParse >>
function
| true, d -> Some d
| _ -> None)
|> Seq.zip ativos
|> Map.ofSeq
}