-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmulti.py
309 lines (258 loc) · 12.2 KB
/
multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import argparse
import itertools
from pathlib import Path
from typing import Literal, Dict, List, Iterable, Sequence
import PyPDF2
TARGET_FOLDER = "../single"
NO_PAGE: int = -1
def chunker(seq: Sequence, size: int) -> Iterable:
"""
Returns an iterator that goes over a list in chunks of size <size>.
From: https://stackoverflow.com/a/434328
:param seq: A Sequence (e.g. list, tuple) that will be split into chunks
:param size: Number of elements per chunk
:return: An Iterable that contains the chunks
"""
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
def invert_list_dict(input_dict: Dict[int, List[Path]]) -> Dict[Path, int]:
"""
Inverts a dictionary of the Type:
{
Hashable: [
Hashable,
Hashable,
...
Hashable
],
}
All Items in the List will have the key as their value.
:param input_dict:
:return:
"""
inv_dict = {}
for key, values in input_dict.items():
for value in values:
inv_dict.update({value: key})
# sort the dictionary
inv_dict = dict(sorted(inv_dict.items()))
return inv_dict
def convert_page_numbers_to_indices(page_no_dict: Dict[int, List[Path]]) -> Dict[Path, List[int]]:
inv_dict = invert_list_dict(page_no_dict)
counting_index = 1
for key, value in inv_dict.items():
# FIXME: check if it is fine that we change the type here from int to List[int]:
inv_dict[key] = list(range(counting_index, counting_index + value))
counting_index += value
return inv_dict
def get_page_indices(single_pdf_path: Path, full_pdf_path: Path):
# get the lengths for all single_files:
single_list = [file for file in single_pdf_path.iterdir() if file.suffix == ".pdf"]
single_list.sort()
length_dict = {}
total_page_count = 0
for file in single_list:
reader = PyPDF2.PdfReader(str(file))
length = len(reader.pages)
total_page_count += length
if length in length_dict.keys():
length_dict[length].append(file)
else:
length_dict.update({length: [file]})
full_pdf_length = len(PyPDF2.PdfReader(str(full_pdf_path)).pages)
if not full_pdf_length == total_page_count + 1:
raise ValueError(f"Expected exactly one more page in the full pdf, but got {full_pdf_length - total_page_count}\
more pages.\n\tFull pdf: {full_pdf_length},\n\tCumulative Single Page Count: {total_page_count}.")
return length_dict
class NupTexDocument:
NUP_HEAD_CODE = r"""\documentclass[12pt,a4paper,landscape]{scrartcl}
\usepackage[a4paper]{geometry}
\usepackage{pdfpages}
\usepackage{xcolor}
%\geometry{showframe}
%\geometry{left=1.2cm,right=1.2cm,top=1.5cm,bottom=1.5cm}
\pagestyle{empty}
% Define very light gray color so as to be nearly invisible
\definecolor{ultra-light-gray}{gray}{0.9}
% since pdfpages uses fbox to draw the frames lets change the stroke color
% to the desired value
\renewcommand\fbox{\fcolorbox{ultra-light-gray}{white}}
\begin{document}"""
NUP_FOOT_CODE = r"""\end{document}
"""
def __init__(self, dance_dict: Dict[int, List[Path]], nup_pdf_source: Path):
self.dance_dict = dance_dict
self.source_pdf_path = nup_pdf_source
self.empty_page_idx = self.get_last_page_idx(nup_pdf_source)
@classmethod
def get_last_page_idx(cls, pdf_path: Path) -> int:
reader = PyPDF2.PdfReader(str(pdf_path))
return len(reader.pages)
@classmethod
def split_into_front_back_pairs(cls, dance_idxs: List[int]) -> List[List[int]]:
num_pages = len(dance_idxs)
# make page count even
if num_pages % 2 == 1:
dance_idxs.append(NO_PAGE)
num_pages += 1
pages = []
for idx in range(int(num_pages / 2)):
pages.append([dance_idxs[idx], dance_idxs[idx+int(num_pages / 2)]])
return pages
def layout_dances(self, output_file: Path, dance_list: List[Path], fold_edge, nup_factor: int):
# TODO: proper type for fold_edge, pull out to
# separate type def since it's used twice.
page_list = []
# create working copy of dance_dict with only the selected dances
sel_dance_dict = self.copy_dict_and_drop_unlisted(self.dance_dict, dance_list)
# we want to process dances from large to small
# TODO: refactoring and cleanup
sel_dance_dict = dict(reversed(sorted(sel_dance_dict.items())))
all_dance_dict = dict(reversed(sorted(self.dance_dict.items())))
# now let's iterate over the dances
dance_idx_dict = convert_page_numbers_to_indices(all_dance_dict)
for page_count, dances in sel_dance_dict.items():
for dance in dances:
split_pages = self.split_into_front_back_pairs(dance_idx_dict[dance])
page_list.extend(split_pages)
with open(output_file, "w") as out_file:
out_file.write(self.NUP_HEAD_CODE)
for chunk in chunker(page_list, int(nup_factor ** 2)):
page = NupPage(fold_edge=fold_edge, nup_factor=nup_factor)
for pair in chunk:
page.add_single_front_back(pair)
page.replace_none_pages(self.empty_page_idx)
out_file.write(page.get_tex_code(self.source_pdf_path))
out_file.write(self.NUP_FOOT_CODE)
@classmethod
def copy_dict_and_drop_unlisted(cls, input_dict: Dict[int, List[Path]], target_list: List[Path]) -> Dict[int, List[Path]]:
# TODO: create a class for dance_dict
# it is such an unwieldy data structure that it probably should be encapsulated in a class
working_dict = input_dict.copy()
for key, values in working_dict.items():
# keep only the dances that also occur in the target_list
# FIXME: the use of sets currently doesn't allow for the layouting
# of the same dance twice! fix this if necessary.
new_values = list(set(values) & (set(target_list)))
working_dict[key] = new_values
return working_dict
class NupPage:
NUP_TEX_START = r"\includepdf[pages={"
# double braces (<{{> and <}}>) here for when it is actually tex code,
# to make it work with python string formatting (the second brace is the escape sequence)
NUP_TEX_END = "}}, frame=true, nup={nup_factor}x{nup_factor}, angle={angle}]{{{pdfpath}}}\n"
def __init__(self, fold_edge: Literal['long', 'short'], nup_factor: int = 2):
self.nup_factor = nup_factor
self.fold_edge = fold_edge
self.slots_front: List[int] = [NO_PAGE] * int(nup_factor ** 2)
self.slots_back: List[int] = [NO_PAGE] * int(nup_factor ** 2)
self.next_free_slot: int = 0
def __len__(self) -> int:
return int(self.nup_factor ** 2)
def add_single_front_back(self, fb_pair: List[int]):
self.slots_front[self.next_free_slot] = fb_pair[0]
self.slots_back[self.next_free_slot] = fb_pair[1]
self.next_free_slot += 1
def replace_none_pages(self, empty_page_idx: int) -> None:
"""
Replaces all nones in slots with the empty page index given.
:param empty_page_idx:
:return:
"""
for idx in range(len(self.slots_front)):
if self.slots_front[idx] == NO_PAGE:
self.slots_front[idx] = empty_page_idx
for idx in range(len(self.slots_back)):
if self.slots_back[idx] == NO_PAGE:
self.slots_back[idx] = empty_page_idx
def get_arranged_back_numbers(self):
arranged_numbers = []
if self.fold_edge == "short":
# Goal: Flip the indices horizontally:
# 1 2 3 -> 3 2 1
# 4 5 6 -> 6 5 4
# 7 8 9 -> 9 8 7
#
# 1 2 3 4 -> 4 3 2 1
# 5 6 7 8 -> 8 7 6 5
# 9 10 11 12 -> 12 11 10 9
# 13 14 15 16 -> 16 15 14 13
# iterate over lines:
for chunk in chunker(self.slots_back, self.nup_factor):
# flip each line
arranged_numbers.extend(reversed(chunk))
else:
# Goal: Flip the indices vertically:
# Also: the pages need to be flipped by 180° degrees
# 1 2 3 -> 7 8 9
# 4 5 6 -> 4 5 6
# 7 8 9 -> 1 2 3
#
# 1 2 3 4 -> 13 14 15 16
# 5 6 7 8 -> 9 10 11 12
# 9 10 11 12 -> 5 6 7 8
# 13 14 15 16 -> 1 2 3 4
for chunk in chunker(self.slots_back, self.nup_factor):
arranged_numbers = chunk + arranged_numbers
return arranged_numbers
def get_tex_code(self, pdf_path: Path) -> str:
"""
This method outputs pdfpages code to plot on front and one back of a page.
:param pdf_path: path to the pdf file from which to take pages
:return:
"""
angle = 0 if self.fold_edge == 'short' else 180
tex_str = ""
tex_str += self.NUP_TEX_START
tex_str += ", ".join([str(x) for x in self.slots_front])
# We have an angle of 0 here (normal orientation):
tex_str += self.NUP_TEX_END.format(nup_factor=self.nup_factor, angle=0, pdfpath=str(pdf_path.as_posix()))
tex_str += self.NUP_TEX_START
tex_str += ", ".join([str(x) for x in self.get_arranged_back_numbers()])
# apply angle here because the back side needs to be in the same format as the front
tex_str += self.NUP_TEX_END.format(nup_factor=self.nup_factor, angle=angle, pdfpath=str(pdf_path.as_posix()))
return tex_str
def create_nup_tex_from_pdfs(
single_pdf_path: Path,
full_pdf_path: Path,
nup_factor: int,
output_path: Path,
fold_edge: str) -> None:
dance_dict = get_page_indices(single_pdf_path=single_pdf_path, full_pdf_path=full_pdf_path)
ntc = NupTexDocument(dance_dict=dance_dict, nup_pdf_source=full_pdf_path)
flat_dance_list = list(dance_dict.values())
# flatten code from: https://stackoverflow.com/a/953097
flat_dance_list = list(itertools.chain.from_iterable(flat_dance_list))
flat_dance_list.sort()
#p = Path("multipage-tool/")
#p.mkdir(exist_ok=True, parents=False)
Path(output_path.parent).mkdir(exist_ok=True, parents=False)
ntc.layout_dances(output_file=output_path, dance_list=flat_dance_list, fold_edge=fold_edge,
nup_factor=nup_factor)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--single_pdfs", type=Path, help="the path to the directory of all split-pdfs", required=True)
parser.add_argument("--full_pdf", type=Path, help="the path to the full pdf with all dances", required=True)
parser.add_argument("--nup_factor", type=int, help="How many rows and columns should there be on one a4 page.", required=True)
parser.add_argument("--fold_edge", type=str, help="Either 'short' or 'long' depending on which edge you want to fold the pages for double sided printing.")
parser.add_argument("--output_tex", type=Path, help="path to where the output tex file should be created.", required=True)
args = parser.parse_args()
# dance_dict = get_page_indices(single_pdf_path=SINGLE_PATH, full_pdf_path=FULL_PATH)
# ntc = NupTexDocument(dance_dict=dance_dict, nup_pdf_source=FULL_PATH)
# flat_dance_list = list(dance_dict.values())
# # flatten code from: https://stackoverflow.com/a/953097
# flat_dance_list = list(itertools.chain.from_iterable(flat_dance_list))
# flat_dance_list.sort()
#
# p = Path("./")
# p.mkdir(exist_ok=True, parents=False)
out_path = Path(args.output_tex)#.resolve()
single_path = Path(args.single_pdfs)#.resolve()
full_path = Path(args.full_pdf)#.resolve()
# TODO: make a check that ensures that the full file is correctly placed relative to the output file!
create_nup_tex_from_pdfs(
single_pdf_path=single_path,
full_pdf_path=full_path,
nup_factor=args.nup_factor,
output_path=out_path,
fold_edge=args.fold_edge
)