-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
51 lines (30 loc) · 853 Bytes
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pymupdf
import pprint
print("Get an overview of a PDF file")
filename = input("Filename: ")
doc = pymupdf.open(filename)
print("\n","Metadata: ")
pprint.pp(doc.metadata)
print("\n")
print("Pages: ", doc.page_count,"\n")
print("Analyzing pages...","\n")
imgCount = 0
for index,page in enumerate(doc):
tbls = page.find_tables()
imgs = page.get_images()
lnks = page.get_links()
print("pg#",index,": ")
print("Rect x1,y1:",page.rect.x1,",",page.rect.y1)
if(len(tbls.tables)>0):
print("This page contains tables")
for t in tbls.tables:
print(t.extract())
if(len(imgs)>0):
print("This page contains images")
imgCount += len(imgs)
if(len(lnks)>0):
print("This page contains links")
print(lnks)
print("\n")
doc.close()
print("Done")