-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
118 lines (96 loc) · 3.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse, FileResponse
from vision_parse import VisionParser
import os
from tempfile import NamedTemporaryFile
import img2pdf
from PIL import Image
import tempfile
app = FastAPI(title="PDF Parser API")
def format_markdown_content(markdown_pages: list[str]) -> str:
"""Format markdown pages into a single string with page separators."""
content = ""
for i, page_content in enumerate(markdown_pages):
content += f"\n## Page {i+1}\n\n{page_content}\n\n---\n"
return content
def convert_image_to_pdf(image_path: str) -> str:
"""Convert image to PDF and return the path to the PDF file."""
# Open image to check format and convert if necessary
with Image.open(image_path) as img:
# If image is not in RGB mode, convert it
if img.mode != "RGB":
img = img.convert("RGB")
# Save as temporary RGB image
rgb_image_path = image_path + "_rgb.jpg"
img.save(rgb_image_path, "JPEG")
image_path = rgb_image_path
# Convert to PDF
pdf_path = image_path + ".pdf"
with open(pdf_path, "wb") as f:
f.write(img2pdf.convert(image_path, rotation=img2pdf.Rotation.ifvalid))
# Clean up RGB temporary file if it was created
if image_path.endswith("_rgb.jpg"):
os.unlink(image_path)
return pdf_path
@app.post("/parse")
async def parse_pdf(file: UploadFile = File(...)):
# Validate file type
if not file.filename.lower().endswith((".pdf", ".jpg", ".jpeg", ".png")):
return JSONResponse(
status_code=400,
content={"error": "File must be a PDF or image (jpg, jpeg, png)"},
)
# Create a temporary file to store the uploaded content
with NamedTemporaryFile(
delete=False, suffix=os.path.splitext(file.filename)[1]
) as temp_file:
content = await file.read()
temp_file.write(content)
temp_file_path = temp_file.name
try:
# Convert image to PDF if necessary
file_to_parse = temp_file_path
is_image = file.filename.lower().endswith((".jpg", ".jpeg", ".png"))
if is_image:
file_to_parse = convert_image_to_pdf(temp_file_path)
# Initialize parser
parser = VisionParser(
model_name="gemini-1.5-flash",
api_key=os.getenv("GEMINI_API_KEY"),
temperature=0.9,
top_p=0.4,
max_output_tokens=8192,
image_mode="url",
detailed_extraction=True,
)
# Convert the file
markdown_pages = parser.convert_pdf(file_to_parse)
# Format markdown content
markdown_content = format_markdown_content(markdown_pages)
# Create a temporary markdown file
output_filename = os.path.splitext(file.filename)[0] + ".md"
with tempfile.NamedTemporaryFile(
delete=False, mode="w", suffix=".md"
) as md_file:
md_file.write(markdown_content)
md_file_path = md_file.name
# Return the file as a downloadable response
return FileResponse(
md_file_path,
media_type="text/markdown",
filename=output_filename,
background=None, # This ensures the file is deleted after sending
)
finally:
# Clean up temporary files
os.unlink(temp_file_path)
if is_image and file_to_parse != temp_file_path:
os.unlink(file_to_parse)
@app.get("/")
async def root():
return {
"message": "Welcome to PDF Parser API",
"endpoints": {
"POST /parse": "Upload a PDF or image file to convert it to markdown"
},
}