diff --git a/back/build.gradle b/back/build.gradle index aede553..c7d84e2 100644 --- a/back/build.gradle +++ b/back/build.gradle @@ -28,6 +28,9 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-validation' implementation 'org.seleniumhq.selenium:selenium-java:4.1.0' implementation 'io.github.bonigarcia:webdrivermanager:5.0.3' + implementation group: 'org.apache.poi', name: 'poi', version: '5.0.0' + implementation group: 'org.apache.poi', name: 'poi-ooxml', version: '5.0.0' + implementation 'commons-codec:commons-codec:1.15' runtimeOnly 'com.mysql:mysql-connector-j:8.1.0' compileOnly 'org.projectlombok:lombok' annotationProcessor 'org.projectlombok:lombok' diff --git a/back/src/main/java/CPR/NLP/service/CourseService.java b/back/src/main/java/CPR/NLP/service/CourseService.java index 6bc891e..2667bba 100644 --- a/back/src/main/java/CPR/NLP/service/CourseService.java +++ b/back/src/main/java/CPR/NLP/service/CourseService.java @@ -5,10 +5,26 @@ import CPR.NLP.dto.CourseResponseDTO; import CPR.NLP.repository.CourseRepository; import lombok.RequiredArgsConstructor; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.poi.xssf.usermodel.XSSFCell; +import org.apache.poi.xssf.usermodel.XSSFRow; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.DefaultResourceLoader; +import org.springframework.core.io.Resource; +import org.springframework.core.io.support.ResourcePatternUtils; +import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -20,25 +36,101 @@ public class CourseService { private final CourseRepository courseRepository; - public void saveOrUpdateCourse(CourseRequestDTO courseDTO) { - Optional existingCourse = courseRepository.findByNameAndProfessor(courseDTO.getName(), courseDTO.getProfessor()); - - if (existingCourse.isEmpty()) { - courseRepository.save(courseDTO.toEntity()); - } else { //어차피 과목정보에 올라오는 과목들은 거의 다 변경된 과목들이므로 따로 수정됐는지 여부는 확인하지 않아도 될 듯: 그런데 같은 교수님이 같은 과목을 두 반 이상 강의하는 경우에 location과 time 어떻게 저장할지 고려해야 할듯 - Course existing = existingCourse.get(); - Course updatedCourse = Course.builder() - .courseId(existing.getCourseId()) - .code(courseDTO.getCode()) - .name(courseDTO.getName()) - .professor(courseDTO.getProfessor()) - .location(courseDTO.getLocation()) - .time(courseDTO.getTime()) - .createdAt(existing.getCreatedAt()) - .updatedAt(LocalDateTime.now()) - .build(); - - courseRepository.save(updatedCourse); + /* + ClassPathResource resource = new ClassPathResource("개설교과목정보.xlsx"); + String pythonScriptPath = resource.getFile().getAbsolutePath(); + */ + + //private String excelFilePath = "C:\\Users\\dlthd\\Desktop\\웹_프로젝트\\NLP-3\\moduzzi\\back\\src\\main\\resources\\개설교과목정보.xlsx"; + private Resource resource = ResourcePatternUtils.getResourcePatternResolver(new DefaultResourceLoader()) + .getResource("classpath*:개설교과목정보.xlsx"); + private String lastHash = null; + + @Scheduled(cron = "0 12 0 * * MON") //매주 월요일 오후 12시에 실행 //0 12 0 * * MON + public void checkAndUpdateExcel() { + try { + String currentHash = calculateMD5(resource.getInputStream()); + + if (!currentHash.equals(lastHash)) { + // 파일이 업데이트되었음 -> 크롤링 + crawlExcelFile(resource); + lastHash = currentHash; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + private String calculateMD5(InputStream inputStream) throws IOException { + return DigestUtils.md5Hex(inputStream); + } + + private void crawlExcelFile(Resource resource) { + try { + //FileInputStream inputStream = new FileInputStream(new File(filePath)); + InputStream inputStream = resource.getInputStream(); + XSSFWorkbook workbook = new XSSFWorkbook(inputStream); + XSSFSheet sheet = workbook.getSheetAt(0); // 첫 번째 시트를 가져옴 + + // 각 행을 순회하면서 열 값을 가져옴 + for (int row = 1; row < sheet.getPhysicalNumberOfRows(); row++) { + XSSFRow currentRow = sheet.getRow(row); + String cell5 = currentRow.getCell(4).getStringCellValue(); // 5번째 열 (5-1=4) + String cell7 = currentRow.getCell(6).getStringCellValue(); + String cell9 = currentRow.getCell(8).getStringCellValue(); + String cell11 = currentRow.getCell(10).getStringCellValue(); + + Optional course = courseRepository.findByNameAndProfessor(cell5, cell11); + if (course.isPresent()) { + Course existing = course.get(); + long daysDifference = ChronoUnit.DAYS.between(existing.getUpdatedAt(), LocalDateTime.now()); + long daysDifference2 = ChronoUnit.DAYS.between(existing.getCreatedAt(), LocalDateTime.now()); + + // 업데이트 조건: updatedAt와 현재 시간의 차이, 혹은 createdAt과 현재 시간의 차이가 하루 이내인 경우: 즉 같은 과목과 교수님이 여러 분반을 가르칠 경우 + if (daysDifference <= 1 || daysDifference2 <= 1) { + Course updatedCourse = Course.builder() + .courseId(existing.getCourseId()) + .code(cell5) + .name(cell7) + .professor(cell11) + .location(existing.getLocation()) + .time(existing.getTime() + " / " + cell9) + .createdAt(existing.getCreatedAt()) + //.updatedAt(LocalDateTime.now()) + .build(); + + courseRepository.save(updatedCourse); + } else { + Course updatedCourse = Course.builder() + .courseId(existing.getCourseId()) + .code(cell5) + .name(cell7) + .professor(cell11) + .location(existing.getLocation()) + .time(cell9) + .createdAt(existing.getCreatedAt()) + //.updatedAt(LocalDateTime.now()) + .build(); + + courseRepository.save(updatedCourse); + } + } else { + Course newCourse = Course.builder() + .code(cell5) + .name(cell7) + .professor(cell11) + .time(cell9) + //.createdAt(LocalDateTime.now()) + .build(); + + courseRepository.save(newCourse); + } + } + + workbook.close(); + inputStream.close(); + } catch (IOException e) { + e.printStackTrace(); } } diff --git a/back/src/main/java/CPR/NLP/service/CrawlingService.java b/back/src/main/java/CPR/NLP/service/CrawlingService.java index f480e1b..00433bf 100644 --- a/back/src/main/java/CPR/NLP/service/CrawlingService.java +++ b/back/src/main/java/CPR/NLP/service/CrawlingService.java @@ -6,16 +6,19 @@ import CPR.NLP.repository.CourseRepository; import CPR.NLP.repository.ResultRepository; import CPR.NLP.repository.ReviewRepository; -import com.google.gson.Gson; -import com.google.gson.JsonObject; +import com.google.gson.*; import lombok.RequiredArgsConstructor; import org.openqa.selenium.*; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; import org.springframework.beans.factory.annotation.Value; import org.openqa.selenium.chrome.ChromeDriver; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.time.Duration; +import java.time.LocalDateTime; import java.util.*; import java.util.concurrent.TimeUnit; @@ -51,7 +54,7 @@ public boolean isEnoughWords(String text) { return words.length >= 5; } - @Scheduled(cron = "0 0 0 * * *") //반환타입이 void고, 매개변수가 없는 메소드여야 함 + @Scheduled(cron = "0 7 16 * * *") //반환타입이 void고, 매개변수가 없는 메소드여야 함 public void saveReviews() { List courses = courseRepository.findAll(); WebDriver driver = new ChromeDriver(); @@ -63,12 +66,16 @@ public void saveReviews() { List> reviews = executeCrawlingScript(driver, name, professor); //crawling 함수 호출 -> rating과 content가 담긴 reviews list 받아옴, 차례로 course_id와 함께 save float size = reviews.size(); + if (size == 0) + size = 1; reviewRepository.deleteByCourseCourseId(courseId); //기존 해당 course의 review들 삭제 String text = ""; String data = ""; String feeling = ""; String allReviews = ""; + String sentiment = ""; + String confidence = ""; float averageRating = 0; for (Map review: reviews) { @@ -102,12 +109,17 @@ public void saveReviews() { if (isEnoughWords(text)) //남은 text 처리 data += pythonServiceCaller.callSummarizeFunction(text, clientId, clientSecret); - feeling = pythonServiceCaller.callSentimentFunction(allReviews, clientId, clientSecret); + /*if (allReviews.trim() != ""){ + feeling = pythonServiceCaller.callSentimentFunction(allReviews, clientId, clientSecret); - Gson gson = new Gson(); - JsonObject documentObject = gson.fromJson(feeling, JsonObject.class).get("document").getAsJsonObject(); - String sentiment = documentObject.get("sentiment").getAsString(); - String confidence = documentObject.get("confidence").toString(); + //Gson gson = new Gson(); + Gson gson = new GsonBuilder().setLenient().create(); + JsonElement feelingElement = gson.fromJson(feeling, JsonElement.class); + + JsonObject documentObject = gson.fromJson(feeling, JsonObject.class).get("document").getAsJsonObject(); + sentiment = documentObject.get("sentiment").getAsString(); + confidence = documentObject.get("confidence").toString(); + }*/ int resultId = -1; Optional result = resultRepository.findByCourse(course); @@ -131,6 +143,7 @@ public void saveReviews() { .confidence(confidence) .sentiment(sentiment) .averageRating(averageRating/size) + //.createdAt(LocalDateTime.now()) .build(); resultRepository.save(newResult); @@ -185,7 +198,7 @@ public List> executeCrawlingScript(WebDriver driver, String System.out.println("No reviews found for the professor's lecture."); return reviews; } - moreElement.click(); //더보기 메뉴 + moreElement.click(); //더보기 메뉴*/ // Retrieve and print the reviews List starElements = driver.findElements(By.cssSelector("body > div > div > div.pane > div > div.articles > div.article > div.article_header > div.title > div.rate > span.star > span.on")); diff --git "a/back/src/main/resources/\352\260\234\354\204\244\352\265\220\352\263\274\353\252\251\354\240\225\353\263\264.xlsx" "b/back/src/main/resources/\352\260\234\354\204\244\352\265\220\352\263\274\353\252\251\354\240\225\353\263\264.xlsx" new file mode 100644 index 0000000..ad1e4fc Binary files /dev/null and "b/back/src/main/resources/\352\260\234\354\204\244\352\265\220\352\263\274\353\252\251\354\240\225\353\263\264.xlsx" differ