Skip to content

Commit

Permalink
refactor: Crawler 성인 도서 오류 처리 (#115)
Browse files Browse the repository at this point in the history
  • Loading branch information
jwooo committed Jun 7, 2024
1 parent b3bc7ff commit b93856a
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import java.time.LocalDateTime;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import net.minidev.json.annotate.JsonIgnore;

@Getter
public class CrawlingBook {
@NoArgsConstructor
public class CrawledBook {

private String title;
private String content;
Expand All @@ -18,8 +21,8 @@ public class CrawlingBook {
private LocalDateTime dateTime;

@Builder
private CrawlingBook(String title, String content, String isbn, String publisher, String imageUrl, String thumbnail,
String authors, LocalDateTime dateTime) {
private CrawledBook(String title, String content, String isbn, String publisher, String imageUrl, String thumbnail,
String authors, LocalDateTime dateTime) {
this.title = title;
this.content = content;
this.isbn = isbn;
Expand All @@ -30,9 +33,9 @@ private CrawlingBook(String title, String content, String isbn, String publisher
this.dateTime = dateTime;
}

public static CrawlingBook of(String title, String content, String isbn, String publisher, String imageUrl,
String thumbnail, String authors, LocalDateTime dateTime) {
return CrawlingBook.builder()
public static CrawledBook of(String title, String content, String isbn, String publisher, String imageUrl,
String thumbnail, String authors, LocalDateTime dateTime) {
return CrawledBook.builder()
.title(title)
.content(content)
.isbn(isbn)
Expand All @@ -57,6 +60,10 @@ public BookCreateServiceRequest toServiceRequest() {
.build();
}

public boolean isBlankIsbn() {
return isbn == null || isbn.isBlank();
}

private String[] convertAuthorsToArr(String authors) {
return authors.split(" 저| 공저| 글| 편저| 원저| 기획|&")[0].split(",");
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/jisungin/infra/crawler/Crawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

public interface Crawler {

CrawlingBook crawlBook(String isbn);
Map<Long, CrawlingBook> crawlBestSellerBook();
CrawledBook crawlBook(String isbn);
Map<Long, CrawledBook> crawlBestSellerBook();

}
2 changes: 1 addition & 1 deletion src/main/java/com/jisungin/infra/crawler/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
public interface Parser {

String parseIsbn(Document doc);
CrawlingBook parseBook(Document doc);
CrawledBook parseBook(Document doc);
Map<Long, String> parseBestSellerBookId(Document doc);

}
26 changes: 18 additions & 8 deletions src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;

@Slf4j
@Component
@RequiredArgsConstructor
public class Yes24Crawler implements Crawler {
Expand All @@ -15,26 +17,34 @@ public class Yes24Crawler implements Crawler {
private final Parser parser;

@Override
public CrawlingBook crawlBook(String isbn) {
public CrawledBook crawlBook(String isbn) {
String bookId = parser.parseIsbn(fetcher.fetchIsbn(isbn));

return parser.parseBook(fetcher.fetchBook(bookId));
}

@Override
public Map<Long, CrawlingBook> crawlBestSellerBook() {
Map<Long, String> bestSellerBookIds = parser.parseBestSellerBookId(fetcher.fetchBestSellerBookId());
Map<Long, CrawlingBook> bestSellerBooks = new ConcurrentHashMap<>();
public Map<Long, CrawledBook> crawlBestSellerBook() {
Map<Long, String> crawledBookIds = parser.parseBestSellerBookId(fetcher.fetchBestSellerBookId());
Map<Long, CrawledBook> crawledBookMap = new ConcurrentHashMap<>();

List<CompletableFuture<Void>> futures = bestSellerBookIds.entrySet().stream()
List<CompletableFuture<Void>> futures = crawledBookIds.entrySet().stream()
.map(entry -> CompletableFuture.supplyAsync(() -> parser.parseBook(fetcher.fetchBook(entry.getValue())))
.thenAccept(crawlingBook -> bestSellerBooks.put(entry.getKey(), crawlingBook))
.exceptionally(throwable -> null))
.thenAccept(crawledBook -> {
if (!crawledBook.isBlankIsbn()) {
crawledBookMap.put(entry.getKey(), crawledBook);
}
})
.exceptionally(throwable -> {
log.warn("[WARN] 19세 이상 도서는 조회할 수 없습니다.");

return null;
}))
.toList();

CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join();

return bestSellerBooks;
return crawledBookMap;
}

}
4 changes: 2 additions & 2 deletions src/main/java/com/jisungin/infra/crawler/Yes24Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public String parseIsbn(Document doc) {
}

@Override
public CrawlingBook parseBook(Document doc) {
public CrawledBook parseBook(Document doc) {
String json = doc.select(bookJsonCss).html();

String title = parseJsonToString(json, "$.name");
Expand All @@ -47,7 +47,7 @@ public CrawlingBook parseBook(Document doc) {
String content = Jsoup.clean(doc.select(bookContentCss).text(), Safelist.none());
LocalDateTime dateTime = parseDate(parseJsonToString(json, "$.workExample[0].datePublished"));

return CrawlingBook.of(title, content, isbn, publisher, imageUrl, thumbnail, authors, dateTime);
return CrawledBook.of(title, content, isbn, publisher, imageUrl, thumbnail, authors, dateTime);
}

@Override
Expand Down

0 comments on commit b93856a

Please sign in to comment.