Skip to content

Commit

Permalink
[BE] Course 데이터 저장 및 업데이트 (#71)
Browse files Browse the repository at this point in the history
Refactor: courseService에서 local 파일 경로 jar에서도 돌아가도록 수정
  • Loading branch information
lee0594 authored Mar 10, 2024
1 parent 7f2fb1b commit 0df11a8
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 28 deletions.
3 changes: 3 additions & 0 deletions back/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-starter-validation'
implementation 'org.seleniumhq.selenium:selenium-java:4.1.0'
implementation 'io.github.bonigarcia:webdrivermanager:5.0.3'
implementation group: 'org.apache.poi', name: 'poi', version: '5.0.0'
implementation group: 'org.apache.poi', name: 'poi-ooxml', version: '5.0.0'
implementation 'commons-codec:commons-codec:1.15'
runtimeOnly 'com.mysql:mysql-connector-j:8.1.0'
compileOnly 'org.projectlombok:lombok'
annotationProcessor 'org.projectlombok:lombok'
Expand Down
130 changes: 111 additions & 19 deletions back/src/main/java/CPR/NLP/service/CourseService.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,26 @@
import CPR.NLP.dto.CourseResponseDTO;
import CPR.NLP.repository.CourseRepository;
import lombok.RequiredArgsConstructor;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.ResourcePatternUtils;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
Expand All @@ -20,25 +36,101 @@ public class CourseService {

private final CourseRepository courseRepository;

public void saveOrUpdateCourse(CourseRequestDTO courseDTO) {
Optional<Course> existingCourse = courseRepository.findByNameAndProfessor(courseDTO.getName(), courseDTO.getProfessor());

if (existingCourse.isEmpty()) {
courseRepository.save(courseDTO.toEntity());
} else { //어차피 과목정보에 올라오는 과목들은 거의 다 변경된 과목들이므로 따로 수정됐는지 여부는 확인하지 않아도 될 듯: 그런데 같은 교수님이 같은 과목을 두 반 이상 강의하는 경우에 location과 time 어떻게 저장할지 고려해야 할듯
Course existing = existingCourse.get();
Course updatedCourse = Course.builder()
.courseId(existing.getCourseId())
.code(courseDTO.getCode())
.name(courseDTO.getName())
.professor(courseDTO.getProfessor())
.location(courseDTO.getLocation())
.time(courseDTO.getTime())
.createdAt(existing.getCreatedAt())
.updatedAt(LocalDateTime.now())
.build();

courseRepository.save(updatedCourse);
/*
ClassPathResource resource = new ClassPathResource("개설교과목정보.xlsx");
String pythonScriptPath = resource.getFile().getAbsolutePath();
*/

//private String excelFilePath = "C:\\Users\\dlthd\\Desktop\\웹_프로젝트\\NLP-3\\moduzzi\\back\\src\\main\\resources\\개설교과목정보.xlsx";
private Resource resource = ResourcePatternUtils.getResourcePatternResolver(new DefaultResourceLoader())
.getResource("classpath*:개설교과목정보.xlsx");
private String lastHash = null;

@Scheduled(cron = "0 12 0 * * MON") //매주 월요일 오후 12시에 실행 //0 12 0 * * MON
public void checkAndUpdateExcel() {
try {
String currentHash = calculateMD5(resource.getInputStream());

if (!currentHash.equals(lastHash)) {
// 파일이 업데이트되었음 -> 크롤링
crawlExcelFile(resource);
lastHash = currentHash;
}
} catch (IOException e) {
e.printStackTrace();
}
}

private String calculateMD5(InputStream inputStream) throws IOException {
return DigestUtils.md5Hex(inputStream);
}

private void crawlExcelFile(Resource resource) {
try {
//FileInputStream inputStream = new FileInputStream(new File(filePath));
InputStream inputStream = resource.getInputStream();
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
XSSFSheet sheet = workbook.getSheetAt(0); // 첫 번째 시트를 가져옴

// 각 행을 순회하면서 열 값을 가져옴
for (int row = 1; row < sheet.getPhysicalNumberOfRows(); row++) {
XSSFRow currentRow = sheet.getRow(row);
String cell5 = currentRow.getCell(4).getStringCellValue(); // 5번째 열 (5-1=4)
String cell7 = currentRow.getCell(6).getStringCellValue();
String cell9 = currentRow.getCell(8).getStringCellValue();
String cell11 = currentRow.getCell(10).getStringCellValue();

Optional<Course> course = courseRepository.findByNameAndProfessor(cell5, cell11);
if (course.isPresent()) {
Course existing = course.get();
long daysDifference = ChronoUnit.DAYS.between(existing.getUpdatedAt(), LocalDateTime.now());
long daysDifference2 = ChronoUnit.DAYS.between(existing.getCreatedAt(), LocalDateTime.now());

// 업데이트 조건: updatedAt와 현재 시간의 차이, 혹은 createdAt과 현재 시간의 차이가 하루 이내인 경우: 즉 같은 과목과 교수님이 여러 분반을 가르칠 경우
if (daysDifference <= 1 || daysDifference2 <= 1) {
Course updatedCourse = Course.builder()
.courseId(existing.getCourseId())
.code(cell5)
.name(cell7)
.professor(cell11)
.location(existing.getLocation())
.time(existing.getTime() + " / " + cell9)
.createdAt(existing.getCreatedAt())
//.updatedAt(LocalDateTime.now())
.build();

courseRepository.save(updatedCourse);
} else {
Course updatedCourse = Course.builder()
.courseId(existing.getCourseId())
.code(cell5)
.name(cell7)
.professor(cell11)
.location(existing.getLocation())
.time(cell9)
.createdAt(existing.getCreatedAt())
//.updatedAt(LocalDateTime.now())
.build();

courseRepository.save(updatedCourse);
}
} else {
Course newCourse = Course.builder()
.code(cell5)
.name(cell7)
.professor(cell11)
.time(cell9)
//.createdAt(LocalDateTime.now())
.build();

courseRepository.save(newCourse);
}
}

workbook.close();
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}

Expand Down
31 changes: 22 additions & 9 deletions back/src/main/java/CPR/NLP/service/CrawlingService.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
import CPR.NLP.repository.CourseRepository;
import CPR.NLP.repository.ResultRepository;
import CPR.NLP.repository.ReviewRepository;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.*;
import lombok.RequiredArgsConstructor;
import org.openqa.selenium.*;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.beans.factory.annotation.Value;
import org.openqa.selenium.chrome.ChromeDriver;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.time.Duration;
import java.time.LocalDateTime;
import java.util.*;
import java.util.concurrent.TimeUnit;

Expand Down Expand Up @@ -51,7 +54,7 @@ public boolean isEnoughWords(String text) {
return words.length >= 5;
}

@Scheduled(cron = "0 0 0 * * *") //반환타입이 void고, 매개변수가 없는 메소드여야 함
@Scheduled(cron = "0 7 16 * * *") //반환타입이 void고, 매개변수가 없는 메소드여야 함
public void saveReviews() {
List<Course> courses = courseRepository.findAll();
WebDriver driver = new ChromeDriver();
Expand All @@ -63,12 +66,16 @@ public void saveReviews() {

List<Map<String, Object>> reviews = executeCrawlingScript(driver, name, professor); //crawling 함수 호출 -> rating과 content가 담긴 reviews list 받아옴, 차례로 course_id와 함께 save
float size = reviews.size();
if (size == 0)
size = 1;
reviewRepository.deleteByCourseCourseId(courseId); //기존 해당 course의 review들 삭제

String text = "";
String data = "";
String feeling = "";
String allReviews = "";
String sentiment = "";
String confidence = "";
float averageRating = 0;

for (Map<String, Object> review: reviews) {
Expand Down Expand Up @@ -102,12 +109,17 @@ public void saveReviews() {
if (isEnoughWords(text)) //남은 text 처리
data += pythonServiceCaller.callSummarizeFunction(text, clientId, clientSecret);

feeling = pythonServiceCaller.callSentimentFunction(allReviews, clientId, clientSecret);
/*if (allReviews.trim() != ""){
feeling = pythonServiceCaller.callSentimentFunction(allReviews, clientId, clientSecret);
Gson gson = new Gson();
JsonObject documentObject = gson.fromJson(feeling, JsonObject.class).get("document").getAsJsonObject();
String sentiment = documentObject.get("sentiment").getAsString();
String confidence = documentObject.get("confidence").toString();
//Gson gson = new Gson();
Gson gson = new GsonBuilder().setLenient().create();
JsonElement feelingElement = gson.fromJson(feeling, JsonElement.class);
JsonObject documentObject = gson.fromJson(feeling, JsonObject.class).get("document").getAsJsonObject();
sentiment = documentObject.get("sentiment").getAsString();
confidence = documentObject.get("confidence").toString();
}*/

int resultId = -1;
Optional<Result> result = resultRepository.findByCourse(course);
Expand All @@ -131,6 +143,7 @@ public void saveReviews() {
.confidence(confidence)
.sentiment(sentiment)
.averageRating(averageRating/size)
//.createdAt(LocalDateTime.now())
.build();

resultRepository.save(newResult);
Expand Down Expand Up @@ -185,7 +198,7 @@ public List<Map<String, Object>> executeCrawlingScript(WebDriver driver, String
System.out.println("No reviews found for the professor's lecture.");
return reviews;
}
moreElement.click(); //더보기 메뉴
moreElement.click(); //더보기 메뉴*/

// Retrieve and print the reviews
List<WebElement> starElements = driver.findElements(By.cssSelector("body > div > div > div.pane > div > div.articles > div.article > div.article_header > div.title > div.rate > span.star > span.on"));
Expand Down
Binary file not shown.

0 comments on commit 0df11a8

Please sign in to comment.