From 8d7bdd44cc3f41b1b69f6b3c5f548c9cadb96457 Mon Sep 17 00:00:00 2001 From: yh202109 Date: Mon, 8 Jul 2024 16:11:18 -0400 Subject: [PATCH] v0.2.15 --- README.md | 2 +- docs/index.md | 2 +- docs/{st_cdisc.ipynb => stcdisc.ipynb} | 0 mtbp3/{stdcdisc => stcdisc}/__init__.py | 0 mtbp3/stdiso/pdfsummary.py | 46 ----------------- mtbp3/{stdiso => stiso}/__init__.py | 0 mtbp3/stiso/pdfsummary.py | 69 +++++++++++++++++++++++++ 7 files changed, 71 insertions(+), 48 deletions(-) rename docs/{st_cdisc.ipynb => stcdisc.ipynb} (100%) rename mtbp3/{stdcdisc => stcdisc}/__init__.py (100%) delete mode 100644 mtbp3/stdiso/pdfsummary.py rename mtbp3/{stdiso => stiso}/__init__.py (100%) create mode 100644 mtbp3/stiso/pdfsummary.py diff --git a/README.md b/README.md index f7f2c554..b29c314d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Functions are grouped into: - Graphs: `util.cdtg` - System: `util.lsr` - Standard: - - ISO: `stdiso.pdfsummary` + - ISO: `stiso.pdfsummary` - CDISC: - StatLab: - Correlation: `statlab.corr` diff --git a/docs/index.md b/docs/index.md index 73244ce3..529c4e19 100755 --- a/docs/index.md +++ b/docs/index.md @@ -16,7 +16,7 @@ statlab_kappa.rst statlab_kappa2.rst statlab_corr_tau.rst std_iso_pdf.ipynb -st_cdisc.ipynb +stcdisc.ipynb example_cdtg.ipynb example_files.ipynb diff --git a/docs/st_cdisc.ipynb b/docs/stcdisc.ipynb similarity index 100% rename from docs/st_cdisc.ipynb rename to docs/stcdisc.ipynb diff --git a/mtbp3/stdcdisc/__init__.py b/mtbp3/stcdisc/__init__.py similarity index 100% rename from mtbp3/stdcdisc/__init__.py rename to mtbp3/stcdisc/__init__.py diff --git a/mtbp3/stdiso/pdfsummary.py b/mtbp3/stdiso/pdfsummary.py deleted file mode 100644 index d5a0b5e9..00000000 --- a/mtbp3/stdiso/pdfsummary.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2023-2024 Y Hsu -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public license as published by -# the Free software Foundation, either version 3 of the License, or -# any later version. -#j -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details -# -# You should have received a copy of the GNU General Public license -# along with this program. If not, see - -import pypdf -import mtbp3 -import os - -class pdfSummary: - def __init__(self, path = None): - - if not isinstance(path, str) or len(path) == 0: - self.pdf_path = mtbp3.get_data('attention.pdf') - self.demo = True - else: - self.pdf_path = path - self.demo = False - - try: - file = open(self.pdf_path, 'rb') - self.pp = pypdf.PdfReader(file) - except FileNotFoundError: - raise ValueError("File not found") - - self.n_page = self.pp.get_num_pages() - self.file_size = os.path.getsize(self.pdf_path) - self.n_image_in_page = [len(self.pp.pages[i].images) for i in range(self.n_page)] - self.n_image = sum(self.n_image_in_page) - self.meta = self.pp.metadata - -if __name__ == "__main__": - - pdf_obj = pdfSummary("/Users/yh2020/dt2/proj/mtbp3/mtbp3/data/attention.pdf") - print(pdf_obj.meta.creation_date) - diff --git a/mtbp3/stdiso/__init__.py b/mtbp3/stiso/__init__.py similarity index 100% rename from mtbp3/stdiso/__init__.py rename to mtbp3/stiso/__init__.py diff --git a/mtbp3/stiso/pdfsummary.py b/mtbp3/stiso/pdfsummary.py new file mode 100644 index 00000000..8b0e239b --- /dev/null +++ b/mtbp3/stiso/pdfsummary.py @@ -0,0 +1,69 @@ +# Copyright (C) 2023-2024 Y Hsu +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public license as published by +# the Free software Foundation, either version 3 of the License, or +# any later version. +#j +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details +# +# You should have received a copy of the GNU General Public license +# along with this program. If not, see + +from pypdf import PdfReader +import mtbp3 +import os +import pandas as pd + +class pdfSummary: + def __init__(self, path = None): + + if not isinstance(path, str) or len(path) == 0: + self.pdf_path = mtbp3.get_data('attention.pdf') + self.demo = True + else: + self.pdf_path = path + self.demo = False + + try: + file = open(self.pdf_path, 'rb') + self.pp = PdfReader(file) + except FileNotFoundError: + raise ValueError("File not found") + + tmp0 = self.pp.get_num_pages() + tmp1 = [len(self.pp.pages[i].images) for i in range(tmp0)] + self.summary = { + 'pdf_version': self.pp.pdf_header.split('-')[1], + 'n_page': tmp0, + 'file_size': os.path.getsize(self.pdf_path), + 'n_image_in_page': tmp1, + 'n_image_in_file': sum(tmp1) + } + self.summary_label = { + 'pdf_version': "PDF version", + 'n_page': "Number of pages: ", + 'file_size': "File size (byte): ", + 'n_image_in_page': "Number of images in individual pages: ", + 'n_image_in_file': "Number of images total: ", + } + + def get_summary_string(self): + return "\n".join([self.summary_label[key] + ": " + str(self.summary[key]) for key in self.summary.keys()]) + + def get_summary_df(self): + data = { + 'Summary Label': list(self.summary_label.values()), + 'Summary Value': list(self.summary.values()) + } + df = pd.DataFrame(data) + return df + +if __name__ == "__main__": + + pfr = pdfSummary("/Users/yh2020/dt2/proj/mtbp3/mtbp3/data/attention.pdf") + print('outline',pfr.pp.outline) +