-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathholex.js
376 lines (319 loc) · 15.8 KB
/
holex.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
const puppeteer = require('puppeteer');
// parse command line arguments
const args = process.argv.slice(2);
let deliveryDate = '';
let flowerNames = [];
// get argument by flag
function getArgValue(flag) {
const index = args.indexOf(flag);
return index > -1 ? args[index + 1] : null;
}
// extract values
// deliveryDate = "2025-01-22"
// flowerNames = ["STOCK", "SNAPDRAGON", "SALAL", "DELPHINIUM", "ROSE", "CARNATION", "LISIANTHUS", "SCABIOSA", "MUMS", "RANUNCULUS", "ANEMONE", "EUCALYPTUS", "RUSCUS"];
deliveryDate = getArgValue('--deliveryDate') || '';
flowerNames = getArgValue('--flowerNames') ? getArgValue('--flowerNames').split(',') : [
"STOCK", "SNAPDRAGON", "SALAL", "DELPHINIUM", "ROSE", "CARNATION", "LISIANTHUS", "SCABIOSA", "MUMS", "RANUNCULUS", "ANEMONE", "EUCALYPTUS", "RUSCUS"
];
let numPages = 0;
(async () => {
let flowers = [];
let browser = null;
try {
//browser = await puppeteer.launch(); // launches puppeteer browser instance
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage(); // opens new browser tab
//console.log("loaded browser")
// print browser console messages
// page.on('console', async msg => {
// const args = await Promise.all(msg.args().map(arg => arg.jsonValue()));
// if (args.length > 0 && args[0].includes("console:")) {
// console.error(`${args}`);
// }
// });
// login to holex
const loginUrl = "https://holex.com/en_US/login";
const username = "[email protected]";
const password = "HEXhappydayz5!";
await page.goto(loginUrl);
await page.waitForSelector('#j_username'); // wait for load
await page.type('#j_username', username);
await page.waitForSelector('#j_password'); // wait for load
await page.type('#j_password', password);
// submit form directly
await page.evaluate(() => {
document.querySelector('#loginForm').submit();
});
await page.waitForNavigation(); // wait for login
// console.log("login success")
// navigate to product page
const productPageUrl = "https://holex.com/en_US/All-products/Flowers/c/Flowers";
await page.goto(productPageUrl);
//console.log("navigated to product page")
// handle delivery date popup (if it appears)
const popupSelector = '#cboxContent';
const popupHandle = await page.$(popupSelector);
if (popupHandle) {
// popup found, close it
await page.click('#cboxClose');
//console.log("closed delivery date popup");
} else {
//console.log("no delivery date popup found");
}
// convert delivery date from frontend into delivery date wanted
// input - YYYY-MM-DD
// output - MM/DD/YYYY
const [inputYear, inputMonth, inputDay] = deliveryDate.split('-').map(Number);
let formattedInputDate = new Date(inputYear, inputMonth - 1, inputDay);
const inputMonthString = (formattedInputDate.getMonth() + 1).toString().padStart(2, '0');
const inputDayString = formattedInputDate.getDate().toString().padStart(2, '0');
const inputYearString = formattedInputDate.getFullYear();
deliveryDate = `${inputMonthString}/${inputDayString}/${inputYearString}`;
// check if original delivery date is found and available
const { inputDateFound, inputDateAvail } = await findInputDate(page, deliveryDate);
// console.log("input date status:", inputDateFound, inputDateAvail);
// ensure input date is found
if (inputDateFound) {
// input date is disabled, move to first open date
if (inputDateAvail) {
await selectDeliveryDate(page, deliveryDate);
//console.log("selected delivery date:", deliveryDate);
// loop through each page
let hasNextPage = true;
while (hasNextPage) {
try {
//console.log("entered page loop")
await page.waitForSelector('section.version_two.product_grid_page.plus_font[page-name="productGridPage"]'); // wait for the product list to load
//console.log("page loaded")
const newFlowers = await extractFlowerData(page, flowerNames, deliveryDate);
flowers = flowers.concat(newFlowers);
// check if there is a next
const nextPageLink = await page.$('li.pagination-next.hidden-xs a');
//console.log("nextPageLink = ", nextPageLink);
if (nextPageLink) {
numPages += 1;
await page.waitForSelector('li.pagination-next.hidden-xs a');
//prev error await nextPageLink.click();
await page.$eval('li.pagination-next.hidden-xs a', el => el.click());
await page.waitForNavigation();
//console.log("next page", numPages)
} else {
//console.log("last page")
hasNextPage = false;
}
} catch (err) {
console.error("error during pagination or scraping:", err);
hasNextPage = false;
}
//console.log("hasNextPage = ", hasNextPage);
}
}
}
else {
//console.log("input date was not found")
}
} catch (err) {
console.error("error during login or page load:", err);
} finally {
if (browser) {
await browser.close();
//console.log("closed browser");
}
//console.log("scraped all data")
console.log(JSON.stringify(flowers));
}
})();
async function findInputDate(page, deliveryDate) {
//console.log("entered findInputDate");
// click on calendar
await page.waitForSelector('.js-custom_datepicker');
await page.click('.js-custom_datepicker');
// wait for calendar to appear
await page.waitForSelector('.bootstrap-datetimepicker-widget');
// get date table
const days = await page.$$('.bootstrap-datetimepicker-widget .datepicker-days td[data-action="selectDay"]');
let inputDateFound = false;
let inputDateAvail = false;
for (const day of days) {
const { dayDate, isDisabled } = await page.evaluate(el => {
return {
dayDate: el.getAttribute('data-day'),
isDisabled: el.classList.contains('disabled')
};
}, day);
if (dayDate === deliveryDate) {
inputDateFound = true;
inputDateAvail = !isDisabled;
//console.log(`date ${deliveryDate} is ${inputDateAvail ? 'available' : 'disabled'}`);
break; // exit loop once the date is found
}
}
if (!inputDateFound) {
//console.log(`date ${deliveryDate} not found in the calendar`);
}
return { inputDateFound, inputDateAvail };
}
async function selectDeliveryDate(page, deliveryDate) {
//console.log("entered selectDeliveryDate");
const deliveryDateSelector = '.js-custom_datepicker';
const popupSelector = '#cboxLoadedContent';
const confirmButtonSelector = '.confirm_select_date';
// click on calendar
await page.waitForSelector(deliveryDateSelector);
await page.click(deliveryDateSelector);
// wait for calendar to appear
await page.waitForSelector('.bootstrap-datetimepicker-widget');
// navigate to correct month and year in calendar
await navigateCalendar(page, deliveryDate);
// find all day elements in the calendar
const days = await page.$$('.bootstrap-datetimepicker-widget .datepicker-days td[data-action="selectDay"]');
// iterate through days to find the correct date
for (const day of days) {
const dayValue = await page.evaluate(el => el.getAttribute('data-day'), day);
if (dayValue === deliveryDate) {
//console.log(`clicking date: ${deliveryDate}`);
await day.click();
// handle confirmation popup
await page.waitForSelector(popupSelector, { timeout: 5000 }).catch(() => null);
//console.log("popup appeared");
const popupHandle = await page.$(popupSelector);
if (popupHandle) {
// click confirm button
const confirmButton = await page.$(confirmButtonSelector);
if (confirmButton) {
await confirmButton.click();
await page.waitForNavigation();
//console.log("clicked confirm button");
} else {
//console.log("continue button not found in popup");
}
} else {
//console.log("popup not found");
}
return; // exit the function after selecting the date and handling the popup
}
}
//console.log(`date ${deliveryDate} not found or is disabled`);
}
async function navigateCalendar(page, deliveryDate) {
// click on calendar
await page.waitForSelector('.js-custom_datepicker');
await page.click('.js-custom_datepicker');
// wait for calendar to appear
await page.waitForSelector('.bootstrap-datetimepicker-widget');
// select title from calendar
const calendarTitleSelector = '.bootstrap-datetimepicker-widget .picker-switch';
// parse month and year from current deliveryDate
const [month, day, year] = deliveryDate.split('/');
const deliveryMonth = parseInt(month, 10); // convert into M
const deliveryYear = parseInt(year, 10); // convert into YYYY
//console.log("delivery date month, year", deliveryMonth, deliveryYear);
// extract month and year from calendar title
const getCalendarTitle = async () => {
return await page.$eval(calendarTitleSelector, el => el.textContent.trim());
};
let calendarTitle = await getCalendarTitle();
let [calendarMonthName, calendarYear] = calendarTitle.split(' ');
let calendarMonth = new Date(Date.parse(calendarMonthName + " 1, 2024")).getMonth() + 1; // converts month name to num (1-12)
let calendarYearNumber = parseInt(calendarYear, 10);
//console.log("cal month, year", calendarMonth, calendarYearNumber);
// navigate to correct month year in calendar
while (deliveryMonth !== calendarMonth || deliveryYear !== calendarYearNumber) {
await page.click('.bootstrap-datetimepicker-widget .next');
//console.log("navigated to next month");
calendarTitle = await getCalendarTitle();
[calendarMonthName, calendarYear] = calendarTitle.split(' ');
calendarMonth = new Date(Date.parse(calendarMonthName + " 1, 2024")).getMonth() + 1;
calendarYearNumber = parseInt(calendarYear, 10);
//console.log("cal month, year", calendarMonth, calendarYearNumber);
}
//console.log("cal month, year", calendarMonth, calendarYearNumber);
}
async function extractFlowerData(page, flowerNames, currDeliveryDate) {
//console.log("entered extractFlowerData for date:", currDeliveryDate);
try {
await page.waitForSelector('.product_list_item');
//console.log("products loaded")
return await page.evaluate((flowerNames, currDeliveryDate) => {
let items = document.querySelectorAll('.product_list_item');
//console.log("console: items selected", items)
let flowersData = [];
items.forEach(item => {
// extracts flower name in all caps
let flowerNameElement = item.querySelector('.name_fav a');
let flowerName = flowerNameElement ? flowerNameElement.textContent.trim().toUpperCase() : '';
//console.log("console: flower name:", flowerName);
// check if current name matches name from flowerNames list
let containsFlowerName = flowerNames.some(name => flowerName.includes(name));
// scrapes matching flowers
if (containsFlowerName) {
//console.log("console: name ", flowerName)
// scrape flower image
let flowerImageElement = item.querySelector('img');
let flowerImage = flowerImageElement ? flowerImageElement.getAttribute('src') : '';
//console.log("console: image ", flowerImage)
// scrape prices and corresponding quantities
let priceElements = item.querySelectorAll('.price_text');
let quantityElements = item.querySelectorAll('.stock_unit');
let allPrices = [];
let stemPrice = '0';
// ensure prices and quantities stored together
priceElements.forEach((priceElement, index) => {
let price = priceElement ? priceElement.textContent.trim() : '';
let quantity = quantityElements[index] ? quantityElements[index].textContent.trim().replace('x', '').trim() : '';
if (price && quantity) {
//const formattedPrice = `${price}/${quantity} stems`;
let formattedPrice = `${price.replace('$ ', '$').trim()}/${quantity} ST`;;
allPrices.push(formattedPrice);
}
});
let prices = allPrices.join(', '); // convert to string
// set stemPrice
if (allPrices.length > 0) {
let priceMatch = allPrices[0].match(/\$([\d.]+)/);
stemPrice = priceMatch ? priceMatch[1] : '0';
}
//console.log("console: stemPrice, prices", stemPrice, prices)
// scrape flower color
let colorElement = item.querySelector('.hlx_plp_color');
let color = colorElement ? colorElement.style.background : '';
// format color correctly
if (color.includes('conic-gradient')) {
color = 'assorted';
}
//console.log("console: color ", color)
// scrape height
let heightElement = item.querySelector('.classification_attributes_block_details p');
let height = heightElement ? heightElement.textContent.trim() : '';
// console.log("console: height ", height)
let farmElement = item.querySelector('.country_icon_outer .text');
let farm = farmElement ? farmElement.innerText.trim() : '';
// console.log("console: farm ", farm)
// delivery is date passed in
let delivery = currDeliveryDate;
//console.log("console: delivery date", delivery)
flowersData.push({
flowerName,
flowerImage,
prices,
stemPrice,
color,
height,
stemsPer: ' ',
seller: "Holex",
farm,
available: ' ',
delivery
});
}
});
return flowersData;
}, flowerNames, currDeliveryDate);
} catch (err) {
console.error("error during data extraction:", err);
return [];
}
}