v0.2.21

yh202109 · Jul 26, 2024 · ac358ea · ac358ea
1 parent 1612fa9
commit ac358ea
Show file tree

Hide file tree

Showing 10 changed files with 71 additions and 46 deletions.
diff --git a/docs/statlab_kappa.rst b/docs/statlab_kappa.rst
@@ -149,6 +149,11 @@ The SE of :math:`\kappa` is calculated as
 Interpretation of Cohen's Kappa Suggested in Literature
 *************
 
+There are several groups of interpretation. Some roughly (not-strictly) defined types are listed below:
+
+1. Table based interpretation: a shared interpretation simplifies application process and provides a easy to compare values.
+2. Interpretation based on Approximated model based confidence interval or Bootstrap confidence intervals with a preselected criterion
+3. Bayesian inference based interpretation [8]_ 
 
 Cohen (1960) [4]_ suggested the Kappa result be interpreted as follows: 
 
@@ -486,6 +491,10 @@ The weighted :math:`\kappa` is calculated as
 .. math::
   \kappa = 1- \frac{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}N_{j_1,j_2}}{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}\hat{E}_{j_1, j_2}}.
 
+(There shall be another page discussing weighted methods and variations)
+
+
+
 *************
 Reference
 *************

diff --git a/docs/std_iso_idmp.ipynb b/docs/std_iso_idmp.ipynb
@@ -42,7 +42,10 @@
                 "- ISO 11239 and ISO/TS 20440 / **Dosage Form and Route** of Administration\n",
                 "- ISO 11240 / **Units** of Measurement (UoM)\n",
                 "\n",
-                "\n"
+                "Related ISO standards:\n",
+                "\n",
+                "- ISO Technical Specifications (TS) 16791:2020 (which complements ISO 11615) assists Automatic Identification and Data Capture (AIDC) related to health informatics.\n",
+                "- ISO Technical Requirements (TR) 14872:2019 includes core principles for maintenance of identifiers and terms supporting IDMP\n"
             ]
         },
         {

diff --git a/mtbp3/__init__.py b/mtbp3/__init__.py
@@ -9,9 +9,3 @@
 from importlib.metadata import version
 __version__ = version(__package__)
 
-import os
-
-_ROOT = os.path.abspath(os.path.dirname(__file__))
-def get_data(path):
-    return os.path.join(_ROOT, 'data', path)
-
diff --git a/mtbp3/health/ectd.py b/mtbp3/health/ectd.py
@@ -15,16 +15,15 @@
 
 import pandas as pd
 import os
-from mtbp3.util.cdt import ListTree
-import mtbp3
+from mtbp3.util import cdt, util
 
 class ctoc_by_fda:
     def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"):
         assert isinstance(ectd_version, str) and all(char.isdigit() or char == '.' for char in ectd_version), "Version must be a string with integers and dots"
         assert isinstance(ctoc_version, str) and all(char.isdigit() or char == '.' for char in ctoc_version), "Version must be a string with integers and dots"
         self.ectd_version = ectd_version
         self.ctoc_version = ctoc_version
-        self.folder_name = mtbp3.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
+        self.folder_name = util.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
         self.ctoc = self.__load_list()
 
     def __load_list(self):
@@ -44,7 +43,7 @@ def show_ctoc_tree(self, module=None, to_right=False):
             module = 1
 
         filtered_ctoc = [item for item in self.ctoc if item.startswith(str(module))]
-        tree = ListTree(lst=filtered_ctoc, infmt='dotspace')
+        tree = cdt.ListTree(lst=filtered_ctoc, infmt='dotspace')
         return tree.list_tree(to_right=to_right)
 
     @staticmethod
@@ -110,9 +109,9 @@ def find_section_given_words(self, words, outfmt='simple', include='up', to_righ
                         out_colored.append(f"{first_part} {colored_second_part}")
                     else:
                         out_colored.append(row)
-                out_tree = ListTree(lst=out_colored, infmt='dotspace')
+                out_tree = cdt.ListTree(lst=out_colored, infmt='dotspace')
             else:
-                out_tree = ListTree(lst=out, infmt='dotspace')
+                out_tree = cdt.ListTree(lst=out, infmt='dotspace')
             return out_tree.list_tree(to_right=to_right)
         else:
             raise ValueError("Invalid value for outfmt. Supported values are 'simple' and 'tree'.")

diff --git a/mtbp3/health/emt.py b/mtbp3/health/emt.py
@@ -1,4 +1,4 @@
-#  Copyright (C) 2023 Y Hsu <[email protected]>
+#  Copyright (C) 2023-2024 Y Hsu <[email protected]>
 #
 #  This program is free software: you can redistribute it and/or modify
 #  it under the terms of the GNU General Public license as published by
@@ -19,8 +19,7 @@
 import re
 import numpy as np
 import pandas as pd
-from mtbp3.util.lsr import LsrTree
-import mtbp3
+from mtbp3.util import util, lsr, cdt 
 
 class Emt:
     """A class representing MedDRA terms.
@@ -30,7 +29,7 @@ class Emt:
 
     Attributes:
         folder_name (str): The folder name associated with the Emt.
-        lsr (LsrTree): An instance of the LsrTree class for listing files.
+        lsrt (LsrTree): An instance of the LsrTree class for listing files.
         month (str): The month of the version published.
         year (str): The year of the version published.
     """
@@ -47,7 +46,7 @@ def __init__(self, folder_name=''):
             self.folder_name = folder_name
             self.demo = False
         else:
-            self.folder_name = mtbp3.get_data('test_emt/MedDRA')
+            self.folder_name = util.get_data('test_emt/MedDRA')
             self.demo = True
 
         self.version_number = "00.0"
@@ -127,8 +126,8 @@ def find_files(self):
         Returns:
             list: A list of missing file names.
         """
-        lsr = LsrTree(self.folder_name, outfmt="list")
-        lsr_files = lsr.list_files()
+        lsrt = lsr.LsrTree(self.folder_name, outfmt="list")
+        lsr_files = lsrt.list_files()
         support_doc_files, med_ascii_files, seq_ascii_files = self.expected_file_lists()
 
         missing_files = []
@@ -161,8 +160,8 @@ def list_files(self):
         Returns:
             list: A list of file names.
         """
-        lsr = LsrTree(self.folder_name, outfmt="tree", with_counts=True)
-        lsr_files = lsr.list_files()
+        lsrt = lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True)
+        lsr_files = lsrt.list_files()
         return lsr_files
 
     def find_soc(self, terms=[], ignore_case=False):
@@ -625,7 +624,7 @@ def find_terms_given_smq_sub(self, subset_df, keep_columns, llt_only, llt_curren
     def load_fmq_default(self):
         if self.fmq_list_default is None:
             try:
-                tmp = pd.read_csv(os.path.join(mtbp3.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
+                tmp = pd.read_csv(os.path.join(util.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
                 tmp = tmp.iloc[:, :-1]
                 tmp.columns = ['fmq', 'pt', 'fmq_pt', 'classification']
                 self.fmq_list_default = tmp
@@ -759,7 +758,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals
             list1 = pt_df['fmq_class'].unique().tolist()
             lists = pt_df['fmq_class_soc'].unique().tolist()
             list2 = pt_df['fmq_class_soc_pt'].unique().tolist()
-            tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
+            tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
             return tree.list_tree(to_right=to_right)
         else:
             pt_df = pt_df.sort_values(by=['fmq', 'classification'])
@@ -776,7 +775,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals
 
             list1 = pt_df['fmq_class'].unique().tolist()
             list2 = pt_df['fmq_class_pt'].unique().tolist()
-            tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+list2)
+            tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+list2)
             return tree.list_tree(to_right=to_right)
 
 if __name__ == "__main__":

diff --git a/mtbp3/stdiso/pdfsummary.py b/mtbp3/stdiso/pdfsummary.py
@@ -14,10 +14,9 @@
 #  along with this program. If not, see <https://www.gnu.org/license/>
 
 from pypdf import PdfReader
-import mtbp3
 import os
 import pandas as pd
-from mtbp3.util.cdt import ListTree
+from mtbp3 import cdt, util
 
 class pdfSummary:
     """
@@ -46,7 +45,7 @@ def __init__(self, path=None):
             path (str, optional): The path to the PDF file. If not provided, a demo file will be used.
         """
         if not isinstance(path, str) or len(path) == 0:
-            self.pdf_path = mtbp3.get_data('attention.pdf')
+            self.pdf_path = util.get_data('attention.pdf')
             self.demo = True
         else:
             self.pdf_path = path
@@ -187,7 +186,7 @@ def show_outline_tree(self, max_itr=5, to_right=False):
         if len(self.outline_list) != 2:
             raise ValueError("self.outline_list should be a length 2 list")
 
-        tree = ListTree(lst=self.outline_list[0], infmt='dotspace')
+        tree = cdt.ListTree(lst=self.outline_list[0], infmt='dotspace')
         return '\n'.join(tree.list_tree(to_right=to_right))
 
 if __name__ == "__main__":

diff --git a/mtbp3/util/__init__.py b/mtbp3/util/__init__.py
@@ -1,3 +1,4 @@
 from .lsr import *
 from .cdt import *
 from .cdtg import *
+from .util import *
diff --git a/mtbp3/util/util.py b/mtbp3/util/util.py
@@ -0,0 +1,23 @@
+#  Copyright (C) 2023-2024 Y Hsu <[email protected]>
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public license as published by
+#  the Free software Foundation, either version 3 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#  GNU General Public License for more details
+#
+#  You should have received a copy of the GNU General Public license
+#  along with this program. If not, see <https://www.gnu.org/license/>
+
+import os
+
+_ROOT = os.path.abspath(os.path.dirname(__file__))
+def get_data(path):
+    return os.path.join(_ROOT, 'data', path)
+
+if __name__ == "__main__":
+    pass
diff --git a/tests/test_lsr.py b/tests/test_lsr.py
@@ -1,50 +1,49 @@
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 import unittest
-from mtbp3.util.lsr import LsrTree
-import mtbp3
+from mtbp3.util import lsr, util
 import os
 
 
 class TestLsrTree(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(TestLsrTree, self).__init__(*args, **kwargs)
-        self.test_folder = mtbp3.get_data('test_lsr')
+        self.test_folder = util.get_data('test_lsr')
 
     def test_list_files_list(self):
-        lsr = LsrTree(self.test_folder, outfmt="list")
-        files = lsr.list_files()
+        lsrt = lsr.LsrTree(self.test_folder, outfmt="list")
+        files = lsrt.list_files()
         expected_files = ['/testfolder1/testfile10', '/testfolder1/testfile11', '/testfolder1/testfile12', '/testfolder2/testfile20', '/testfolder2/testfile3']
         self.assertCountEqual(files, expected_files)
 
     def test_list_files_json(self):
-        lsr = LsrTree(self.test_folder, outfmt="json")
-        files = lsr.list_files()
+        lsrt = lsr.LsrTree(self.test_folder, outfmt="json")
+        files = lsrt.list_files()
         expected_files = '{"0": {"path": "", "level": 0, "folders": ["testfolder1", "testfolder2"], "files": []}, "1": {"path": "/testfolder1", "level": 1, "folders": [], "files": ["testfile10", "testfile11", "testfile12"]}, "2": {"path": "/testfolder2", "level": 1, "folders": [], "files": ["testfile20", "testfile3"]}}'
         self.assertCountEqual(files, expected_files)
 
     def test_list_files_dataframe(self):
-        lsr = LsrTree(self.test_folder, outfmt="dataframe")
-        files = lsr.list_files()['file'].tolist()
+        lsrt = lsr.LsrTree(self.test_folder, outfmt="dataframe")
+        files = lsrt.list_files()['file'].tolist()
         expected_files = ['testfile10', 'testfile11', 'testfile12', 'testfile20', 'testfile3']
         self.assertCountEqual(files, expected_files)
 
     def test_list_files_string(self):
-        lsr = LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string")
-        files = lsr.list_files()
+        lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string")
+        files = lsrt.list_files()
         expected_files = 'testfolder1/\n... testfile10\n... testfile11\n... testfile12'
         self.assertEqual(files, expected_files)
 
     def test_list_files_tree(self):
-        #lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree")
-        lsr = LsrTree(self.test_folder, outfmt="tree")
-        files = lsr.list_files()
+        #lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree")
+        lsrt = lsr.LsrTree(self.test_folder, outfmt="tree")
+        files = lsrt.list_files()
         expected_files = 'test_lsr/\n├── testfolder1/\n│   ├── testfile10\n│   ├── testfile11\n│   └── testfile12\n└── testfolder2/\n    ├── testfile20\n    └── testfile3'
         self.assertEqual(files, expected_files)
 
     def test_list_files_tree2(self):
-        lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True)
-        files = lsr.list_files()
+        lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True)
+        files = lsrt.list_files()
         expected_files = 'testfolder2/  <<<((( F=2; D=0 )))>>>\n├── testfile20\n└── testfile3'
         self.assertEqual(files, expected_files)
 

diff --git a/tests/test_stdiso_pdfsummary.py b/tests/test_stdiso_pdfsummary.py
@@ -1,6 +1,5 @@
 import unittest
 import pandas as pd
-import mtbp3
 from mtbp3.stdiso.pdfsummary import pdfSummary
 
 class TestPdfSummary(unittest.TestCase):