-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_bookmarks_to_pdf.py
138 lines (117 loc) · 5.4 KB
/
add_bookmarks_to_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import tkinter as tk
from tkinter import filedialog
import shutil
from pypdf import PdfWriter
import pymupdf
from PIL import Image, ImageTk
def get_file_path(prompt):
print(prompt)
root = tk.Tk()
root.withdraw() # Hide the root window
file_path = filedialog.askopenfilename(title=prompt)
root.destroy()
if file_path:
print(f"Selected file: {file_path}")
else:
print("No file selected.")
return file_path
def read_bookmarks(file_path):
print(f"Reading bookmarks from {file_path}")
bookmarks = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
parts = line.strip().split(',')
hierarchy = parts[0].strip()
name = parts[1].strip()
page_number = int(parts[2].strip()) - 1 # Convert to zero-based index
bookmarks.append((hierarchy, name, page_number))
print(f"Read bookmark: hierarchy: {hierarchy}, name: {name}, page: {page_number}")
print(f"Total bookmarks read: {len(bookmarks)}")
return bookmarks
def update_pdf_page_display(pdf_page, root, label):
# Render the page to an image
pix = pdf_page.get_pixmap()
# Convert the image to a PIL Image object
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Convert PIL Image to ImageTk
img_tk = ImageTk.PhotoImage(img)
label.config(image=img_tk)
label.image = img_tk
root.update()
def add_bookmarks_to_pdf(pdf_path, bookmarks):
print(f"Adding bookmarks to PDF: {pdf_path}")
# Copy the input pdf to a backup file
destination_path = pdf_path.replace('.pdf', '_ORIGINAL.pdf')
shutil.copyfile(pdf_path, destination_path)
print(f"Backup of original PDF created at {destination_path}")
print("Opening PDF (This may take some time for large PDFs)...")
# Open the PDF file to display the pages
pdf_document = pymupdf.open(pdf_path)
# Create a PdfWriter object to write the bookmarks
pdf_writer = PdfWriter(fileobj=pdf_path, full=True)
print(f"PDF opened successfully. Total pages: {pdf_document.page_count}")
# Create a tkinter window to display the pages
root = tk.Tk()
root.title("PDF Page Display")
# Create a label to display the image
label = tk.Label(root)
label.pack()
# Loop through the bookmarks and add them to the PDF
print("Adding bookmarks...")
bookmark_dicts = []
current_offset = 0
for hierarchy, name, original_page_number in bookmarks:
new_page_number = original_page_number + current_offset
# Ensure the page number is within the valid range
if new_page_number < 0 or new_page_number > pdf_document.page_count - 1:
raise ValueError(f"Page number {new_page_number + 1} for bookmark '{hierarchy} {name}' is out of range.")
# Let user confirm the page number
confirmed = False
while not confirmed:
# Display the page
update_pdf_page_display(pdf_document.load_page(new_page_number), root, label)
# Ask the user to confirm the page number or provide an offset
user_input = input(f"Is this page ({new_page_number + 1}) correct for bookmark '{hierarchy} {name}'?\nIf so, enter 'yes'/'y'. Otherwise enter a page offset to find the actual page: ").strip().lower()
if user_input in ['yes', 'y']:
confirmed = True
else:
try:
additional_offset = int(user_input)
current_offset += additional_offset
new_page_number += additional_offset
except ValueError:
print("Invalid input. Please enter 'yes', 'y', or a valid (signed) integer offset.")
print(f"Adding bookmark: hierarchy: {hierarchy}, name: {name}, page: {new_page_number + 1}...")
levels = hierarchy.split('.')
if len(levels) == 1:
# Add bookmark to root
print("Adding bookmark to root")
current_reference = pdf_writer.add_outline_item(name, new_page_number)
else:
# Find Parent
parent = None
for bookmark_dict in bookmark_dicts:
if bookmark_dict["hierarchy"] == '.'.join(levels[:-1]):
parent = bookmark_dict["reference"]
break
if parent is None:
# Parent not found
raise ValueError(f"Parent not found for bookmark: {name}")
# Add bookmark with reference to parent
print(f"Adding bookmark under parent with hierarchy '{bookmark_dict['hierarchy']}'")
current_reference = pdf_writer.add_outline_item(name, new_page_number, parent)
bookmark_dicts.append({"hierarchy": hierarchy, "reference": current_reference})
# Close the window showing the PDF page
root.destroy()
# Save the PDF with bookmarks
print("Saving PDF with bookmarks...")
with open(pdf_path, 'wb') as output_pdf:
pdf_writer.write(output_pdf)
print(f"PDF with bookmarks written to {pdf_path}")
if __name__ == "__main__":
print("Starting the bookmark adding process...")
bookmarks_file = get_file_path("Select the bookmarks file")
input_pdf = get_file_path("Select the input PDF file")
bookmarks = read_bookmarks(bookmarks_file)
add_bookmarks_to_pdf(input_pdf, bookmarks)
print("Bookmark adding process completed.")