forked from NikolaiT/scrapeulous
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamazon.js
58 lines (56 loc) · 1.91 KB
/
amazon.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/**
* @author Nikolai Tschacher
* @version 1.0
* @last_modified March 2020
* @website: scrapeulous.com
*
* Searches a product on amazon and obtains product meta data
* such as price and asin.
*
* Supported options:
*
* @param options.amazon_domain: string, the domain of Amazon
* @param keyword: The keyword that is searched on Amazon
*/
class Amazon extends BrowserWorker {
async crawl(keyword) {
let amazon_domain = this.options.amazon_domain || 'www.amazon.com';
await this.page.goto(`https://${amazon_domain}/`);
await this.page.waitForSelector('#nav-search');
await this.page.waitFor(200);
const input = await this.page.$('input[name="field-keywords"]');
await this.page.evaluate((value) => {
document.querySelector('input[name="field-keywords"]').value = value;
}, keyword);
await input.focus();
await this.page.keyboard.press("Enter");
await this.page.waitForSelector('.s-result-list');
await this.page.waitFor(500);
// parse product information
return await this.page.evaluate(() => {
let products = document.querySelectorAll('.s-result-list .s-result-item');
const data = [];
products.forEach((el) => {
let obj = {
asin: el.getAttribute('data-asin'),
};
let linkElement = el.querySelector('div > h2 > a');
try {
if (linkElement) {
obj.url = linkElement.getAttribute('href');
obj.title = linkElement.querySelector('span').innerText;
}
obj.price = el.querySelector('.a-price span').innerText;
try {
obj.stars = el.querySelector('.a-size-small [aria-label]:nth-child(1)').innerText.trim();
obj.num_ratings = el.querySelector('.a-size-small [aria-label]:nth-child(2)').innerText.trim();
} catch (err) {
}
data.push(obj);
} catch (e) {
}
});
return data;
});
}
}