Skip to content

Commit

Permalink
GPU 프로젝트 업로드
Browse files Browse the repository at this point in the history
  • Loading branch information
gongkeo committed May 24, 2024
1 parent 1b00ede commit 2f44511
Show file tree
Hide file tree
Showing 129 changed files with 2,683 additions and 0 deletions.
251 changes: 251 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
from flask import Flask, request, redirect, url_for

import os
import time
import json
import shutil
import datetime
import pandas as pd

from pathlib import Path
from natsort import os_sorted

from main import pointchecker
from qna import categorize_qna
from mul import detect_multiple
from sub import detect_subjective
from path import *
from utils import *

app = Flask(__name__)

ALLOWED_FILE_EXTENSIONS = set(['pdf', 'png', 'jpg', 'jpeg'])

def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_FILE_EXTENSIONS


@app.route("/")
def hello():
# id 생성 규칙 - 클라이언트 ip + 접속시간
id = getId()
return f'Hello World! <br><br> id : {id}'

@app.route("/id/<id>", methods=["GET"])
def get_json(client_id):
id_path = UPLOAD_FOLDER + "/" + client_id
json_path = id_path + "/" + "data.json"

with open(json_path) as f:
json_data = load(f)

return json_data, 200


@app.route("/upload", methods=["POST"])
def upload_files():
## upload 폴더 생성 ##
try:
if not os.path.exists(UPLOAD_FOLDER):
os.mkdir(UPLOAD_FOLDER)
except:
pass
## upload 폴더 생성 ##

if request.method == "POST":
files = request.files
datas = request.form
data = datas.get("data")
data = json.loads(data)

client_id = data["client_id"]
id_path = UPLOAD_FOLDER + "/" + client_id

print(client_id)
print(id_path)

## id 폴더 생성 ##
try:
if not os.path.exists(id_path):
os.mkdir(id_path)
except:
pass
## id 폴더 생성 ##

pdf = files["pdf"]
pdf_name = pdf.filename
pdf_path = os.path.join(id_path, pdf_name)
if pdf and allowed_file(pdf_name):
pdf.save(pdf_path)

test_name = data["test_name"]
copy_num = data["copy_num"]
total_qna_num = data["total_qna_num"]
testee_num = data["testee_num"]
test_category = data["test_category"]

print("파일 업로드 성공")

return redirect(url_for("plural_check",
client_id=client_id,
test_name=test_name,
copy_num=copy_num,
total_qna_num=total_qna_num,
testee_num=testee_num,
test_category=test_category))


# 다인용
@app.route("/plural", methods=["GET"])
def plural_check():
client_id = request.args.get("client_id", type=str)
test_name = request.args.get("test_name", type=int)
copy_num = request.args.get("copy_num", type=int)
total_qna_num = request.args.get("total_qna_num", type=int)
testee_num = request.args.get("testee_num", type=int)
test_category = request.args.getlist("test_category")

id_path = UPLOAD_FOLDER + "/" + client_id

start = time.time()
df = pd.DataFrame()
df = pointchecker(id_path, test_name, copy_num, total_qna_num, testee_num, test_category)
end = time.time()
point_eta = end - start

print()
print(df.set_index(keys=["testee_id", "file"], drop=True))
print()
print("point_eta: " + f"{point_eta:.2f} sec")

if len(df) == 0:
return "Error Occured", 200

json_data = df.to_json(orient="records")
json_path = id_path + "/" + "data.json"

with open(json_path, 'w') as f:
json.dump(json_data, f)

return json_data, 200


# 1인용
@app.route("/single")
def single_check():
id = request.args.get("id", type=str)
id_path = UPLOAD_FOLDER + "/" + id
mul_path = id_path + "/mul"
sub_path = id_path + "/sub"

## 결과 저장 폴더 생성 ##
makeFolder(id_path)
makeFolder(mul_path)
makeFolder(sub_path)
## 결과 저장 폴더 생성 ##

print_intro()

original_pdf_file_path_list = []
original_pdf_file_path_list = os_sorted(Path(id_path).glob('*.pdf'))
convertPdfToJpg(original_pdf_file_path_list, id_path)

df = pd.DataFrame()
mul_df = pd.DataFrame()
sub_df = pd.DataFrame()

final_df = pd.DataFrame(columns=["testee_id", "file", "num", "testee_answer", "correct_answer"])

start = time.time()
categorize_qna(id_path)
end = time.time()
qna_eta = end - start

start = time.time()
mul_df = detect_multiple(id_path)
mul_df.sort_values(by=["num"], inplace=True)
end = time.time()
mul_eta = end - start
print()
print_full(mul_df)

start = time.time()
sub_df = detect_subjective(id_path)
sub_df.sort_values(by=["num"], inplace=True)
end = time.time()
sub_eta = end - start
print()
print_full(sub_df)

df = pd.concat([mul_df, sub_df], axis=0, ignore_index=True)
df.sort_values(by=["num"], inplace=True)
final_df = dfToFinalDf(df)
print()
print_full(final_df)

final_df.to_excel(excel_writer=id_path+"/df.xlsx")

print("qna_eta: " + f"{qna_eta:.2f} sec")
print("mul_eta: " + f"{mul_eta:.2f} sec")
print("sub_eta: " + f"{sub_eta:.2f} sec")

print_outro()

json_data = final_df.to_json(orient="records")
return json_data, 200


# mul_test
@app.route("/mul_test")
def mul_check():
id = request.args.get("id", type=str)
id_path = UPLOAD_FOLDER + "/" + id
mul_path = id_path + "/mul"

## 결과 저장 폴더 생성 ##
makeFolder(id_path)
makeFolder(mul_path)
## 결과 저장 폴더 생성 ##

print_intro()

original_pdf_file_path_list = []
original_pdf_file_path_list = os_sorted(Path(id_path).glob('*.pdf'))
convertPdfToJpg(original_pdf_file_path_list, id_path)

df = pd.DataFrame()
mul_df = pd.DataFrame()

final_df = pd.DataFrame(columns=["testee_id", "file", "num", "testee_answer", "correct_answer"])

start = time.time()
categorize_qna(id_path)
end = time.time()

print("\ncategorize_qna eta: " + f"{end - start:.2f} sec")

mul_df = detect_multiple(id_path)
mul_df.sort_values(by=["num"], inplace=True)
print()
print_full(mul_df)

df = mul_df
df.sort_values(by=["num"], inplace=True)
print()
print_full(df)

final_df = dfToFinalDf(df)
print()
print_full(df)

final_df.to_excel(excel_writer=id_path+"/df.xlsx")

print_outro()

json_data = final_df.to_json(orient="records")
return json_data, 200


if __name__ == "__main__":
app.run(host="0.0.0.0", port=8080)
Binary file added assets/answersheet_test.xlsx
Binary file not shown.
Binary file added assets/btn_img/button_1_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_1_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_2_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_2_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_2_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_2_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/button_3_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/tool1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/btn_img/tool2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/malgun.ttf
Binary file not shown.
Binary file added assets/pointchecker.ico
Binary file not shown.
100 changes: 100 additions & 0 deletions id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import sys
import easyocr

import numpy as np

from PIL import Image

from utils import getNumText
from utils import *

sys.path.append(os.path.dirname(os.getcwd() + "/models/tamil_ocr/ocr_tamil"))
from ocr_tamil.ocr import OCR


### 텍스트 부분 잘라내기 함수 ###

# 각 jpg에 적힌 코드 인식해서 이름 매칭
# testee jpg df 생성
# - testee_jpg_df = pd.DataFrame(columns=["file", "testee_id", "page"])
# - name은 page가 1일 때만 인식 (학생 이름은 각 시험지 첫 장에만 적혀 있기 때문임)
# 식별코드: testee_id - page (ex. 3-2라면, testee_id=3, page=2)

### 오른쪽 상단 testee_name 인식 함수 ###
def readTesteeName(img, reader):
x1, y1, x2, y2 = (610, 30, 750, 90)
cropped_img = img.crop((x1, y1, x2, y2))
image_np = np.array(cropped_img)

# tamilocr 사용
text = ""
ocr_text = OCR().predict(image_np)
text = getNumText(ocr_text)

return text


### 텍스트에서 testee_id와 page를 추출하는 함수 ###
def extractTesteeId(text):
if text:
text_split = text[0].split('-')
testee_id = text_split[0].strip()
page = text_split[1].strip() if len(text_split) > 1 else None
return testee_id, page
else:
return "", ""


### testee_jpg_df에 id_match 추가
def testeeIdJpgDf(df, testee_jpg_df, id_match):
# df = pd.DataFrame(columns=["index_id", "testee_id", "testee_name", "file", "page"])
for testee_jpg_df_idx, testee_jpg_df_row in testee_jpg_df.iterrows():
index_id = testee_jpg_df_row["index_id"]
testee_id = testee_jpg_df_row["testee_id"]
testee_name = id_match.loc[index_id, "testee_name"]
file = testee_jpg_df_row["file"]
page = testee_jpg_df_row["page"]
df.loc[len(df)] = [index_id, testee_id, testee_name, file, page]

return df


### 텍스트 부분 잘라내기 함수 (메인) ###
def testeeCodeRecognition(jpg_file_path_list, testee_jpg_df):
# easyOCR 사용
reader = easyocr.Reader(['ko', 'en'])

# id_match 딕셔너리 초기화
id_match = pd.DataFrame(columns=["testee_id", "testee_name"])

# index_id
index_id = 0

# 폴더 내의 모든 파일에 대해 반복
for file in jpg_file_path_list:
# 이미지 파일 열기
img = Image.open(file)
img = img.resize((794,1123), Image.LANCZOS) # 인식 위치를 같게 만들기 위한 이미지 규격화.

# 왼쪽 상단 num_id와 page 인식
x1, y1, x2, y2 = (35, 35, 160, 90)
cropped_img = img.crop((x1, y1, x2, y2))
image_np = np.array(cropped_img)

# easyOCR 사용
text = reader.readtext(image_np, detail=0)
testee_id, page = extractTesteeId(text)

#오른쪽 상단 testee_name 인식
# page가 1인 경우 testee_id와 testee_name를 id_match에 딕셔너리로 추가
if page == "1":
testee_name = readTesteeName(img, reader)
id_match.loc[len(id_match)] = [testee_id, testee_name]
index_id += 1

testee_jpg_df.loc[len(testee_jpg_df)] = [index_id, file, testee_id, page]

id_match.index += 1

return testee_jpg_df, id_match
Loading

0 comments on commit 2f44511

Please sign in to comment.