-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf_bookmarking.py
218 lines (178 loc) · 5.9 KB
/
pdf_bookmarking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#! python3
# -*- coding: utf-8 -*-
"""
Created on Tue May 26 15:35:56 2020
@author: Lala Samprit Ray
"""
import os
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger
class MyDictionary(dict):
"""
Inheritance
-----------
dict : inherits from class dict
Methods
-----------
add : adds an add method to My_dictionary class so that dictionaries can be added by just saying My_dictionary.add(key,value)
"""
def add(self, key, value):
"""
Parameters
----------
key : string preferred
accepts a string as key for dictionary
value : int
pagenum is stored here.
Returns
-------
None.
"""
self[key] = value
def title_page(string):
"""
Takes in path of the .txt file and outputs a dict.
Parameters
----------
string : string
Takes the path of the .txt file as input string.
Returns
-------
res_dct : dictionary
Outputs dictionary in the format {title:pagenum}
The title is a string and the pagenum is integer.
"""
file1 = open(string, "r")
lst = file1.readlines()
file1.close()
lst1 = []
lst2 = []
res_dct = MyDictionary()
for i, j in enumerate(lst):
if "@" in lst[i].strip():
line = j.strip("\n")
title, page = line.split("@")
lst1.append(title.strip())
try:
lst2.append(int(page.strip()) - 1)
except TypeError:
raise Exception(
"Check all values after @ perhaps there are non integer values, that is, alphabetical characters present. Remove those characters to proceed."
)
res_dct.add(lst1[i], lst2[i])
else:
raise Exception(
"There are empty lines and/or lines without '@' character in the supplied text file. Please remove the empty lines and correct the lines by using @ to separate bookmark title and page num"
)
return res_dct
def filesplitter(src, dft):
"""
splits files into individual pages
Parameters
----------
src : string
include input file with its location like
"c://tepelasticity.pdf"
This is the file which has to be split/bookmarked
dft : string
the folder where all files are to be placed after splitting for merging.
Returns
-------
None.
"""
sourcefilepath = src
destinationfolderpath = dft
inputpdf = PdfFileReader(open(sourcefilepath, "rb"))
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open(
destinationfolderpath + "\\" + "page%s.pdf" % i, "wb"
) as output_stream:
output.write(output_stream)
def bookmark(srcff, book):
"""
merges all the files in the directory while adding bookmarks from the supplied book dictionary.
Parameters
----------
srcff : string
include input file with its location like
"c://tepelasticity.pdf"
This is the file which has to be split/bookmarked
book : dict
keys store the titles in string format.
values store the page number in integer format.
{key:value}
Returns
-------
None.
"""
os.chdir(srcff)
merger = PdfFileMerger()
visual_feedback = "creating bookmark..."
num = 0
key_list = list(book.keys())
val_list = list(book.values())
while num != len(os.listdir(".")):
pdf = "page%s.pdf" % num
check = (
num in book.values()
)
if num and check:
title = key_list[val_list.index(num)]
merger.append(pdf, title)
num += 1
try:
print(
visual_feedback,
key_list[num] + "[email protected] number....." + str(num),
)
except IndexError:
print("[email protected] number...." + str(num))
else:
merger.append(pdf)
num += 1
merger.setPageMode("/UseOutlines")
print("your bookmarked pdf will be stored at...", srcff)
print("what do you want to call this pdf file?(do not use .pdf at end)\n")
result = input()
merger.write(result + ".pdf")
merger.close()
return result + ".pdf"
def main():
"""
This is where the main operation occurs
Change the title_page argument to your current .txt file
The format of the text file should be as shown in the example
Raises
------
Exception
If there is any file in the destination folder this program will not work hence it stops and gives a chance to the user to clear the space before running the program again.
Returns
-------
None.
Example line in the text file
-----------------------------
Differential Equation of Equilibrium @ 12
The part before @ is the title and the part after @ is the page number. You cannot use characters after the @ symbol.
"""
source = input("Paste the source pdf file location here(include.pdf)-->")
text_file=input("Paste the text file location here(include.txt)------>")
book=title_page(text_file)
destination = "C:\\Users\\User_name\\pdfJatra" # Paste path of where you want to create the final pdf file
if len(os.listdir(destination)) > 0:
raise Exception(
"There are files in destination folder please move or delete files in >>"
+ destination
+ "<< this location and run the program again"
)
print(book)
print("Hoping that you used only positive integers to denote page number.")
filesplitter(source, destination)
final_file = bookmark(destination, book)
os.chdir(destination)
for file in os.listdir("."):
if file != final_file:
os.remove(file)
print("bookmarked pdf at" + " ---> " + destination)
if __name__ == "__main__":
main()