diff --git a/src/main/java/com/jisungin/infra/crawler/CrawlingBook.java b/src/main/java/com/jisungin/infra/crawler/CrawledBook.java similarity index 72% rename from src/main/java/com/jisungin/infra/crawler/CrawlingBook.java rename to src/main/java/com/jisungin/infra/crawler/CrawledBook.java index 123ddd0..49fef35 100644 --- a/src/main/java/com/jisungin/infra/crawler/CrawlingBook.java +++ b/src/main/java/com/jisungin/infra/crawler/CrawledBook.java @@ -4,9 +4,12 @@ import java.time.LocalDateTime; import lombok.Builder; import lombok.Getter; +import lombok.NoArgsConstructor; +import net.minidev.json.annotate.JsonIgnore; @Getter -public class CrawlingBook { +@NoArgsConstructor +public class CrawledBook { private String title; private String content; @@ -18,8 +21,8 @@ public class CrawlingBook { private LocalDateTime dateTime; @Builder - private CrawlingBook(String title, String content, String isbn, String publisher, String imageUrl, String thumbnail, - String authors, LocalDateTime dateTime) { + private CrawledBook(String title, String content, String isbn, String publisher, String imageUrl, String thumbnail, + String authors, LocalDateTime dateTime) { this.title = title; this.content = content; this.isbn = isbn; @@ -30,9 +33,9 @@ private CrawlingBook(String title, String content, String isbn, String publisher this.dateTime = dateTime; } - public static CrawlingBook of(String title, String content, String isbn, String publisher, String imageUrl, - String thumbnail, String authors, LocalDateTime dateTime) { - return CrawlingBook.builder() + public static CrawledBook of(String title, String content, String isbn, String publisher, String imageUrl, + String thumbnail, String authors, LocalDateTime dateTime) { + return CrawledBook.builder() .title(title) .content(content) .isbn(isbn) @@ -57,6 +60,10 @@ public BookCreateServiceRequest toServiceRequest() { .build(); } + public boolean isBlankIsbn() { + return isbn == null || isbn.isBlank(); + } + private String[] convertAuthorsToArr(String authors) { return authors.split(" 저| 공저| 글| 편저| 원저| 기획|&")[0].split(","); } diff --git a/src/main/java/com/jisungin/infra/crawler/Crawler.java b/src/main/java/com/jisungin/infra/crawler/Crawler.java index 5ffc118..0a2fb9b 100644 --- a/src/main/java/com/jisungin/infra/crawler/Crawler.java +++ b/src/main/java/com/jisungin/infra/crawler/Crawler.java @@ -4,7 +4,7 @@ public interface Crawler { - CrawlingBook crawlBook(String isbn); - Map crawlBestSellerBook(); + CrawledBook crawlBook(String isbn); + Map crawlBestSellerBook(); } diff --git a/src/main/java/com/jisungin/infra/crawler/Parser.java b/src/main/java/com/jisungin/infra/crawler/Parser.java index 5d61182..0a5f502 100644 --- a/src/main/java/com/jisungin/infra/crawler/Parser.java +++ b/src/main/java/com/jisungin/infra/crawler/Parser.java @@ -6,7 +6,7 @@ public interface Parser { String parseIsbn(Document doc); - CrawlingBook parseBook(Document doc); + CrawledBook parseBook(Document doc); Map parseBestSellerBookId(Document doc); } diff --git a/src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java b/src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java index 15e1225..55d52f9 100644 --- a/src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java +++ b/src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java @@ -5,8 +5,10 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +@Slf4j @Component @RequiredArgsConstructor public class Yes24Crawler implements Crawler { @@ -15,26 +17,34 @@ public class Yes24Crawler implements Crawler { private final Parser parser; @Override - public CrawlingBook crawlBook(String isbn) { + public CrawledBook crawlBook(String isbn) { String bookId = parser.parseIsbn(fetcher.fetchIsbn(isbn)); return parser.parseBook(fetcher.fetchBook(bookId)); } @Override - public Map crawlBestSellerBook() { - Map bestSellerBookIds = parser.parseBestSellerBookId(fetcher.fetchBestSellerBookId()); - Map bestSellerBooks = new ConcurrentHashMap<>(); + public Map crawlBestSellerBook() { + Map crawledBookIds = parser.parseBestSellerBookId(fetcher.fetchBestSellerBookId()); + Map crawledBookMap = new ConcurrentHashMap<>(); - List> futures = bestSellerBookIds.entrySet().stream() + List> futures = crawledBookIds.entrySet().stream() .map(entry -> CompletableFuture.supplyAsync(() -> parser.parseBook(fetcher.fetchBook(entry.getValue()))) - .thenAccept(crawlingBook -> bestSellerBooks.put(entry.getKey(), crawlingBook)) - .exceptionally(throwable -> null)) + .thenAccept(crawledBook -> { + if (!crawledBook.isBlankIsbn()) { + crawledBookMap.put(entry.getKey(), crawledBook); + } + }) + .exceptionally(throwable -> { + log.warn("[WARN] 19세 이상 도서는 조회할 수 없습니다."); + + return null; + })) .toList(); CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); - return bestSellerBooks; + return crawledBookMap; } } diff --git a/src/main/java/com/jisungin/infra/crawler/Yes24Parser.java b/src/main/java/com/jisungin/infra/crawler/Yes24Parser.java index 86b0619..04eb805 100644 --- a/src/main/java/com/jisungin/infra/crawler/Yes24Parser.java +++ b/src/main/java/com/jisungin/infra/crawler/Yes24Parser.java @@ -35,7 +35,7 @@ public String parseIsbn(Document doc) { } @Override - public CrawlingBook parseBook(Document doc) { + public CrawledBook parseBook(Document doc) { String json = doc.select(bookJsonCss).html(); String title = parseJsonToString(json, "$.name"); @@ -47,7 +47,7 @@ public CrawlingBook parseBook(Document doc) { String content = Jsoup.clean(doc.select(bookContentCss).text(), Safelist.none()); LocalDateTime dateTime = parseDate(parseJsonToString(json, "$.workExample[0].datePublished")); - return CrawlingBook.of(title, content, isbn, publisher, imageUrl, thumbnail, authors, dateTime); + return CrawledBook.of(title, content, isbn, publisher, imageUrl, thumbnail, authors, dateTime); } @Override