-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
129 changed files
with
2,683 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
from flask import Flask, request, redirect, url_for | ||
|
||
import os | ||
import time | ||
import json | ||
import shutil | ||
import datetime | ||
import pandas as pd | ||
|
||
from pathlib import Path | ||
from natsort import os_sorted | ||
|
||
from main import pointchecker | ||
from qna import categorize_qna | ||
from mul import detect_multiple | ||
from sub import detect_subjective | ||
from path import * | ||
from utils import * | ||
|
||
app = Flask(__name__) | ||
|
||
ALLOWED_FILE_EXTENSIONS = set(['pdf', 'png', 'jpg', 'jpeg']) | ||
|
||
def allowed_file(filename): | ||
return '.' in filename and \ | ||
filename.rsplit('.', 1)[1] in ALLOWED_FILE_EXTENSIONS | ||
|
||
|
||
@app.route("/") | ||
def hello(): | ||
# id 생성 규칙 - 클라이언트 ip + 접속시간 | ||
id = getId() | ||
return f'Hello World! <br><br> id : {id}' | ||
|
||
@app.route("/id/<id>", methods=["GET"]) | ||
def get_json(client_id): | ||
id_path = UPLOAD_FOLDER + "/" + client_id | ||
json_path = id_path + "/" + "data.json" | ||
|
||
with open(json_path) as f: | ||
json_data = load(f) | ||
|
||
return json_data, 200 | ||
|
||
|
||
@app.route("/upload", methods=["POST"]) | ||
def upload_files(): | ||
## upload 폴더 생성 ## | ||
try: | ||
if not os.path.exists(UPLOAD_FOLDER): | ||
os.mkdir(UPLOAD_FOLDER) | ||
except: | ||
pass | ||
## upload 폴더 생성 ## | ||
|
||
if request.method == "POST": | ||
files = request.files | ||
datas = request.form | ||
data = datas.get("data") | ||
data = json.loads(data) | ||
|
||
client_id = data["client_id"] | ||
id_path = UPLOAD_FOLDER + "/" + client_id | ||
|
||
print(client_id) | ||
print(id_path) | ||
|
||
## id 폴더 생성 ## | ||
try: | ||
if not os.path.exists(id_path): | ||
os.mkdir(id_path) | ||
except: | ||
pass | ||
## id 폴더 생성 ## | ||
|
||
pdf = files["pdf"] | ||
pdf_name = pdf.filename | ||
pdf_path = os.path.join(id_path, pdf_name) | ||
if pdf and allowed_file(pdf_name): | ||
pdf.save(pdf_path) | ||
|
||
test_name = data["test_name"] | ||
copy_num = data["copy_num"] | ||
total_qna_num = data["total_qna_num"] | ||
testee_num = data["testee_num"] | ||
test_category = data["test_category"] | ||
|
||
print("파일 업로드 성공") | ||
|
||
return redirect(url_for("plural_check", | ||
client_id=client_id, | ||
test_name=test_name, | ||
copy_num=copy_num, | ||
total_qna_num=total_qna_num, | ||
testee_num=testee_num, | ||
test_category=test_category)) | ||
|
||
|
||
# 다인용 | ||
@app.route("/plural", methods=["GET"]) | ||
def plural_check(): | ||
client_id = request.args.get("client_id", type=str) | ||
test_name = request.args.get("test_name", type=int) | ||
copy_num = request.args.get("copy_num", type=int) | ||
total_qna_num = request.args.get("total_qna_num", type=int) | ||
testee_num = request.args.get("testee_num", type=int) | ||
test_category = request.args.getlist("test_category") | ||
|
||
id_path = UPLOAD_FOLDER + "/" + client_id | ||
|
||
start = time.time() | ||
df = pd.DataFrame() | ||
df = pointchecker(id_path, test_name, copy_num, total_qna_num, testee_num, test_category) | ||
end = time.time() | ||
point_eta = end - start | ||
|
||
print() | ||
print(df.set_index(keys=["testee_id", "file"], drop=True)) | ||
print() | ||
print("point_eta: " + f"{point_eta:.2f} sec") | ||
|
||
if len(df) == 0: | ||
return "Error Occured", 200 | ||
|
||
json_data = df.to_json(orient="records") | ||
json_path = id_path + "/" + "data.json" | ||
|
||
with open(json_path, 'w') as f: | ||
json.dump(json_data, f) | ||
|
||
return json_data, 200 | ||
|
||
|
||
# 1인용 | ||
@app.route("/single") | ||
def single_check(): | ||
id = request.args.get("id", type=str) | ||
id_path = UPLOAD_FOLDER + "/" + id | ||
mul_path = id_path + "/mul" | ||
sub_path = id_path + "/sub" | ||
|
||
## 결과 저장 폴더 생성 ## | ||
makeFolder(id_path) | ||
makeFolder(mul_path) | ||
makeFolder(sub_path) | ||
## 결과 저장 폴더 생성 ## | ||
|
||
print_intro() | ||
|
||
original_pdf_file_path_list = [] | ||
original_pdf_file_path_list = os_sorted(Path(id_path).glob('*.pdf')) | ||
convertPdfToJpg(original_pdf_file_path_list, id_path) | ||
|
||
df = pd.DataFrame() | ||
mul_df = pd.DataFrame() | ||
sub_df = pd.DataFrame() | ||
|
||
final_df = pd.DataFrame(columns=["testee_id", "file", "num", "testee_answer", "correct_answer"]) | ||
|
||
start = time.time() | ||
categorize_qna(id_path) | ||
end = time.time() | ||
qna_eta = end - start | ||
|
||
start = time.time() | ||
mul_df = detect_multiple(id_path) | ||
mul_df.sort_values(by=["num"], inplace=True) | ||
end = time.time() | ||
mul_eta = end - start | ||
print() | ||
print_full(mul_df) | ||
|
||
start = time.time() | ||
sub_df = detect_subjective(id_path) | ||
sub_df.sort_values(by=["num"], inplace=True) | ||
end = time.time() | ||
sub_eta = end - start | ||
print() | ||
print_full(sub_df) | ||
|
||
df = pd.concat([mul_df, sub_df], axis=0, ignore_index=True) | ||
df.sort_values(by=["num"], inplace=True) | ||
final_df = dfToFinalDf(df) | ||
print() | ||
print_full(final_df) | ||
|
||
final_df.to_excel(excel_writer=id_path+"/df.xlsx") | ||
|
||
print("qna_eta: " + f"{qna_eta:.2f} sec") | ||
print("mul_eta: " + f"{mul_eta:.2f} sec") | ||
print("sub_eta: " + f"{sub_eta:.2f} sec") | ||
|
||
print_outro() | ||
|
||
json_data = final_df.to_json(orient="records") | ||
return json_data, 200 | ||
|
||
|
||
# mul_test | ||
@app.route("/mul_test") | ||
def mul_check(): | ||
id = request.args.get("id", type=str) | ||
id_path = UPLOAD_FOLDER + "/" + id | ||
mul_path = id_path + "/mul" | ||
|
||
## 결과 저장 폴더 생성 ## | ||
makeFolder(id_path) | ||
makeFolder(mul_path) | ||
## 결과 저장 폴더 생성 ## | ||
|
||
print_intro() | ||
|
||
original_pdf_file_path_list = [] | ||
original_pdf_file_path_list = os_sorted(Path(id_path).glob('*.pdf')) | ||
convertPdfToJpg(original_pdf_file_path_list, id_path) | ||
|
||
df = pd.DataFrame() | ||
mul_df = pd.DataFrame() | ||
|
||
final_df = pd.DataFrame(columns=["testee_id", "file", "num", "testee_answer", "correct_answer"]) | ||
|
||
start = time.time() | ||
categorize_qna(id_path) | ||
end = time.time() | ||
|
||
print("\ncategorize_qna eta: " + f"{end - start:.2f} sec") | ||
|
||
mul_df = detect_multiple(id_path) | ||
mul_df.sort_values(by=["num"], inplace=True) | ||
print() | ||
print_full(mul_df) | ||
|
||
df = mul_df | ||
df.sort_values(by=["num"], inplace=True) | ||
print() | ||
print_full(df) | ||
|
||
final_df = dfToFinalDf(df) | ||
print() | ||
print_full(df) | ||
|
||
final_df.to_excel(excel_writer=id_path+"/df.xlsx") | ||
|
||
print_outro() | ||
|
||
json_data = final_df.to_json(orient="records") | ||
return json_data, 200 | ||
|
||
|
||
if __name__ == "__main__": | ||
app.run(host="0.0.0.0", port=8080) |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import os | ||
import sys | ||
import easyocr | ||
|
||
import numpy as np | ||
|
||
from PIL import Image | ||
|
||
from utils import getNumText | ||
from utils import * | ||
|
||
sys.path.append(os.path.dirname(os.getcwd() + "/models/tamil_ocr/ocr_tamil")) | ||
from ocr_tamil.ocr import OCR | ||
|
||
|
||
### 텍스트 부분 잘라내기 함수 ### | ||
|
||
# 각 jpg에 적힌 코드 인식해서 이름 매칭 | ||
# testee jpg df 생성 | ||
# - testee_jpg_df = pd.DataFrame(columns=["file", "testee_id", "page"]) | ||
# - name은 page가 1일 때만 인식 (학생 이름은 각 시험지 첫 장에만 적혀 있기 때문임) | ||
# 식별코드: testee_id - page (ex. 3-2라면, testee_id=3, page=2) | ||
|
||
### 오른쪽 상단 testee_name 인식 함수 ### | ||
def readTesteeName(img, reader): | ||
x1, y1, x2, y2 = (610, 30, 750, 90) | ||
cropped_img = img.crop((x1, y1, x2, y2)) | ||
image_np = np.array(cropped_img) | ||
|
||
# tamilocr 사용 | ||
text = "" | ||
ocr_text = OCR().predict(image_np) | ||
text = getNumText(ocr_text) | ||
|
||
return text | ||
|
||
|
||
### 텍스트에서 testee_id와 page를 추출하는 함수 ### | ||
def extractTesteeId(text): | ||
if text: | ||
text_split = text[0].split('-') | ||
testee_id = text_split[0].strip() | ||
page = text_split[1].strip() if len(text_split) > 1 else None | ||
return testee_id, page | ||
else: | ||
return "", "" | ||
|
||
|
||
### testee_jpg_df에 id_match 추가 | ||
def testeeIdJpgDf(df, testee_jpg_df, id_match): | ||
# df = pd.DataFrame(columns=["index_id", "testee_id", "testee_name", "file", "page"]) | ||
for testee_jpg_df_idx, testee_jpg_df_row in testee_jpg_df.iterrows(): | ||
index_id = testee_jpg_df_row["index_id"] | ||
testee_id = testee_jpg_df_row["testee_id"] | ||
testee_name = id_match.loc[index_id, "testee_name"] | ||
file = testee_jpg_df_row["file"] | ||
page = testee_jpg_df_row["page"] | ||
df.loc[len(df)] = [index_id, testee_id, testee_name, file, page] | ||
|
||
return df | ||
|
||
|
||
### 텍스트 부분 잘라내기 함수 (메인) ### | ||
def testeeCodeRecognition(jpg_file_path_list, testee_jpg_df): | ||
# easyOCR 사용 | ||
reader = easyocr.Reader(['ko', 'en']) | ||
|
||
# id_match 딕셔너리 초기화 | ||
id_match = pd.DataFrame(columns=["testee_id", "testee_name"]) | ||
|
||
# index_id | ||
index_id = 0 | ||
|
||
# 폴더 내의 모든 파일에 대해 반복 | ||
for file in jpg_file_path_list: | ||
# 이미지 파일 열기 | ||
img = Image.open(file) | ||
img = img.resize((794,1123), Image.LANCZOS) # 인식 위치를 같게 만들기 위한 이미지 규격화. | ||
|
||
# 왼쪽 상단 num_id와 page 인식 | ||
x1, y1, x2, y2 = (35, 35, 160, 90) | ||
cropped_img = img.crop((x1, y1, x2, y2)) | ||
image_np = np.array(cropped_img) | ||
|
||
# easyOCR 사용 | ||
text = reader.readtext(image_np, detail=0) | ||
testee_id, page = extractTesteeId(text) | ||
|
||
#오른쪽 상단 testee_name 인식 | ||
# page가 1인 경우 testee_id와 testee_name를 id_match에 딕셔너리로 추가 | ||
if page == "1": | ||
testee_name = readTesteeName(img, reader) | ||
id_match.loc[len(id_match)] = [testee_id, testee_name] | ||
index_id += 1 | ||
|
||
testee_jpg_df.loc[len(testee_jpg_df)] = [index_id, file, testee_id, page] | ||
|
||
id_match.index += 1 | ||
|
||
return testee_jpg_df, id_match |
Oops, something went wrong.