forked from get-set-fetch/scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
infinite-scrolling.ts
66 lines (60 loc) · 1.75 KB
/
infinite-scrolling.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/* eslint-disable object-curly-newline */
import { KnexStorage, PuppeteerClient, Scraper, setLogger, ScrapeEvent, encode } from '../src/index';
setLogger({ level: 'info' });
const knexConfig = {
client: 'sqlite3',
useNullAsDefault: true,
connection: {
filename: './examples/data/infinite-scrolling.sqlite',
},
};
const storage = new KnexStorage(knexConfig);
const client = new PuppeteerClient({ args: [
'--disable-gpu',
'--disable-dev-shm-usage',
'--disable-setuid-sandbox',
'--no-first-run',
'--no-sandbox',
'--no-zygote',
'--single-process',
] });
const scraper = new Scraper(storage, client);
const scrapeHash = 'ePnXQdMJjZJRDoIwDIbv4rOjHsV4ATJhDiLbSOtcvL3tJBAyCbz1pe3fft88LLKnV95q8Kb9s/eWjni6J+XKhwXrpTTnmL6ls23/rnIm4lik+IlOqTz6PMs+5rtOsvT/gI67A36Uj+4ufJdWG/RA8qlNdvrxMrhFo7ROmO4xSilVwiOzkqLptBvFgMFoGw1MeQEnVPC7kCDHrakJaFqQ2F/CU61j';
const scrapeConfig = {
name: 'uefaPlayerRankings',
pipeline: 'browser-static-content',
pluginOpts: [
{
name: 'ExtractUrlsPlugin',
maxDepth: 0,
},
{
name: 'ExtractHtmlContentPlugin',
selectorPairs: [
{
contentSelector: 'div.statistics-item--name',
label: 'player',
},
{
contentSelector: 'div.history-numbers',
label: 'goals',
},
],
},
{
name: 'ScrollPlugin',
after: 'UpsertResourcePlugin',
stabilityCheck: 1000,
},
],
resources: [
{
url: 'https://www.uefa.com/uefachampionsleague/history/rankings/players/goals_scored/',
},
],
};
scraper.on(ScrapeEvent.ProjectScraped, async () => {
await scraper.export('./examples/data/infinite-scrolling.csv', { type: 'csv' });
await storage.close();
});
scraper.scrape(scrapeConfig, { domain: { delay: 1000 } });