From 4ab826d1227688db9c3ea4ff6db1e35fbc8d0a8e Mon Sep 17 00:00:00 2001 From: centerfield77 Date: Thu, 15 Feb 2024 23:23:50 +0900 Subject: [PATCH] fix: parser processing --- src/scraper/scrapeBookMetadata.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/scraper/scrapeBookMetadata.ts b/src/scraper/scrapeBookMetadata.ts index d0da261..bb13c0d 100644 --- a/src/scraper/scrapeBookMetadata.ts +++ b/src/scraper/scrapeBookMetadata.ts @@ -1,5 +1,6 @@ import type { Root } from 'cheerio'; +import { currentAmazonRegion } from '~/amazonRegion'; import type { Book, BookMetadata } from '~/models'; import { loadRemoteDom } from './loadRemoteDom'; @@ -58,8 +59,10 @@ const parseIsbn = ($: Root): string | null => { }; const parseAuthorUrl = ($: Root): string | null => { - const href = $('.contributorNameID').attr('href'); - return `https://www.amazon.com${href}`; + const region = currentAmazonRegion(); + const domainURL = `https://${region.hostname}`; + const href = $('a.a-size-base.a-link-normal.a-text-normal').attr('href'); + return href ? `${domainURL}/${href}` : domainURL; }; export const parseBookMetadata = ($: Root): BookMetadata => { @@ -73,7 +76,9 @@ export const parseBookMetadata = ($: Root): BookMetadata => { }; const scrapeBookMetadata = async (book: Book): Promise => { - const { dom } = await loadRemoteDom(`https://www.amazon.com/dp/${book.asin}`, 1000); + const region = currentAmazonRegion(); + const domainURL = `https://${region.hostname}`; + const { dom } = await loadRemoteDom(`${domainURL}/dp/${book.asin}`, 1000); return parseBookMetadata(dom); };