diff --git a/docs/statlab_kappa.rst b/docs/statlab_kappa.rst index 8da5acc1..a2823b92 100644 --- a/docs/statlab_kappa.rst +++ b/docs/statlab_kappa.rst @@ -149,6 +149,11 @@ The SE of :math:`\kappa` is calculated as Interpretation of Cohen's Kappa Suggested in Literature ************* +There are several groups of interpretation. Some roughly (not-strictly) defined types are listed below: + +1. Table based interpretation: a shared interpretation simplifies application process and provides a easy to compare values. +2. Interpretation based on Approximated model based confidence interval or Bootstrap confidence intervals with a preselected criterion +3. Bayesian inference based interpretation [8]_ Cohen (1960) [4]_ suggested the Kappa result be interpreted as follows: @@ -486,6 +491,10 @@ The weighted :math:`\kappa` is calculated as .. math:: \kappa = 1- \frac{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}N_{j_1,j_2}}{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}\hat{E}_{j_1, j_2}}. +(There shall be another page discussing weighted methods and variations) + + + ************* Reference ************* diff --git a/docs/std_iso_idmp.ipynb b/docs/std_iso_idmp.ipynb index cc5b99e7..6a2c3334 100644 --- a/docs/std_iso_idmp.ipynb +++ b/docs/std_iso_idmp.ipynb @@ -42,7 +42,10 @@ "- ISO 11239 and ISO/TS 20440 / **Dosage Form and Route** of Administration\n", "- ISO 11240 / **Units** of Measurement (UoM)\n", "\n", - "\n" + "Related ISO standards:\n", + "\n", + "- ISO Technical Specifications (TS) 16791:2020 (which complements ISO 11615) assists Automatic Identification and Data Capture (AIDC) related to health informatics.\n", + "- ISO Technical Requirements (TR) 14872:2019 includes core principles for maintenance of identifiers and terms supporting IDMP\n" ] }, { diff --git a/mtbp3/__init__.py b/mtbp3/__init__.py index 7a656427..1220f781 100644 --- a/mtbp3/__init__.py +++ b/mtbp3/__init__.py @@ -9,9 +9,3 @@ from importlib.metadata import version __version__ = version(__package__) -import os - -_ROOT = os.path.abspath(os.path.dirname(__file__)) -def get_data(path): - return os.path.join(_ROOT, 'data', path) - diff --git a/mtbp3/health/ectd.py b/mtbp3/health/ectd.py index d76bb98d..0fee2a73 100644 --- a/mtbp3/health/ectd.py +++ b/mtbp3/health/ectd.py @@ -15,8 +15,7 @@ import pandas as pd import os -from mtbp3.util.cdt import ListTree -import mtbp3 +from mtbp3.util import cdt, util class ctoc_by_fda: def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"): @@ -24,7 +23,7 @@ def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"): assert isinstance(ctoc_version, str) and all(char.isdigit() or char == '.' for char in ctoc_version), "Version must be a string with integers and dots" self.ectd_version = ectd_version self.ctoc_version = ctoc_version - self.folder_name = mtbp3.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt') + self.folder_name = util.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt') self.ctoc = self.__load_list() def __load_list(self): @@ -44,7 +43,7 @@ def show_ctoc_tree(self, module=None, to_right=False): module = 1 filtered_ctoc = [item for item in self.ctoc if item.startswith(str(module))] - tree = ListTree(lst=filtered_ctoc, infmt='dotspace') + tree = cdt.ListTree(lst=filtered_ctoc, infmt='dotspace') return tree.list_tree(to_right=to_right) @staticmethod @@ -110,9 +109,9 @@ def find_section_given_words(self, words, outfmt='simple', include='up', to_righ out_colored.append(f"{first_part} {colored_second_part}") else: out_colored.append(row) - out_tree = ListTree(lst=out_colored, infmt='dotspace') + out_tree = cdt.ListTree(lst=out_colored, infmt='dotspace') else: - out_tree = ListTree(lst=out, infmt='dotspace') + out_tree = cdt.ListTree(lst=out, infmt='dotspace') return out_tree.list_tree(to_right=to_right) else: raise ValueError("Invalid value for outfmt. Supported values are 'simple' and 'tree'.") diff --git a/mtbp3/health/emt.py b/mtbp3/health/emt.py index 2254d7c6..b66f4c3f 100644 --- a/mtbp3/health/emt.py +++ b/mtbp3/health/emt.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Y Hsu +# Copyright (C) 2023-2024 Y Hsu # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public license as published by @@ -19,8 +19,7 @@ import re import numpy as np import pandas as pd -from mtbp3.util.lsr import LsrTree -import mtbp3 +from mtbp3.util import util, lsr, cdt class Emt: """A class representing MedDRA terms. @@ -30,7 +29,7 @@ class Emt: Attributes: folder_name (str): The folder name associated with the Emt. - lsr (LsrTree): An instance of the LsrTree class for listing files. + lsrt (LsrTree): An instance of the LsrTree class for listing files. month (str): The month of the version published. year (str): The year of the version published. """ @@ -47,7 +46,7 @@ def __init__(self, folder_name=''): self.folder_name = folder_name self.demo = False else: - self.folder_name = mtbp3.get_data('test_emt/MedDRA') + self.folder_name = util.get_data('test_emt/MedDRA') self.demo = True self.version_number = "00.0" @@ -127,8 +126,8 @@ def find_files(self): Returns: list: A list of missing file names. """ - lsr = LsrTree(self.folder_name, outfmt="list") - lsr_files = lsr.list_files() + lsrt = lsr.LsrTree(self.folder_name, outfmt="list") + lsr_files = lsrt.list_files() support_doc_files, med_ascii_files, seq_ascii_files = self.expected_file_lists() missing_files = [] @@ -161,8 +160,8 @@ def list_files(self): Returns: list: A list of file names. """ - lsr = LsrTree(self.folder_name, outfmt="tree", with_counts=True) - lsr_files = lsr.list_files() + lsrt = lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True) + lsr_files = lsrt.list_files() return lsr_files def find_soc(self, terms=[], ignore_case=False): @@ -625,7 +624,7 @@ def find_terms_given_smq_sub(self, subset_df, keep_columns, llt_only, llt_curren def load_fmq_default(self): if self.fmq_list_default is None: try: - tmp = pd.read_csv(os.path.join(mtbp3.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0) + tmp = pd.read_csv(os.path.join(util.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0) tmp = tmp.iloc[:, :-1] tmp.columns = ['fmq', 'pt', 'fmq_pt', 'classification'] self.fmq_list_default = tmp @@ -759,7 +758,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals list1 = pt_df['fmq_class'].unique().tolist() lists = pt_df['fmq_class_soc'].unique().tolist() list2 = pt_df['fmq_class_soc_pt'].unique().tolist() - tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+lists+list2) + tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2) return tree.list_tree(to_right=to_right) else: pt_df = pt_df.sort_values(by=['fmq', 'classification']) @@ -776,7 +775,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals list1 = pt_df['fmq_class'].unique().tolist() list2 = pt_df['fmq_class_pt'].unique().tolist() - tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+list2) + tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+list2) return tree.list_tree(to_right=to_right) if __name__ == "__main__": diff --git a/mtbp3/stdiso/pdfsummary.py b/mtbp3/stdiso/pdfsummary.py index 67b6207b..2dd2f388 100644 --- a/mtbp3/stdiso/pdfsummary.py +++ b/mtbp3/stdiso/pdfsummary.py @@ -14,10 +14,9 @@ # along with this program. If not, see from pypdf import PdfReader -import mtbp3 import os import pandas as pd -from mtbp3.util.cdt import ListTree +from mtbp3 import cdt, util class pdfSummary: """ @@ -46,7 +45,7 @@ def __init__(self, path=None): path (str, optional): The path to the PDF file. If not provided, a demo file will be used. """ if not isinstance(path, str) or len(path) == 0: - self.pdf_path = mtbp3.get_data('attention.pdf') + self.pdf_path = util.get_data('attention.pdf') self.demo = True else: self.pdf_path = path @@ -187,7 +186,7 @@ def show_outline_tree(self, max_itr=5, to_right=False): if len(self.outline_list) != 2: raise ValueError("self.outline_list should be a length 2 list") - tree = ListTree(lst=self.outline_list[0], infmt='dotspace') + tree = cdt.ListTree(lst=self.outline_list[0], infmt='dotspace') return '\n'.join(tree.list_tree(to_right=to_right)) if __name__ == "__main__": diff --git a/mtbp3/util/__init__.py b/mtbp3/util/__init__.py index ae827e5a..f7ba4474 100644 --- a/mtbp3/util/__init__.py +++ b/mtbp3/util/__init__.py @@ -1,3 +1,4 @@ from .lsr import * from .cdt import * from .cdtg import * +from .util import * \ No newline at end of file diff --git a/mtbp3/util/util.py b/mtbp3/util/util.py new file mode 100644 index 00000000..fdded919 --- /dev/null +++ b/mtbp3/util/util.py @@ -0,0 +1,23 @@ +# Copyright (C) 2023-2024 Y Hsu +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public license as published by +# the Free software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details +# +# You should have received a copy of the GNU General Public license +# along with this program. If not, see + +import os + +_ROOT = os.path.abspath(os.path.dirname(__file__)) +def get_data(path): + return os.path.join(_ROOT, 'data', path) + +if __name__ == "__main__": + pass \ No newline at end of file diff --git a/tests/test_lsr.py b/tests/test_lsr.py index fd5ffa02..4cfc1ff6 100644 --- a/tests/test_lsr.py +++ b/tests/test_lsr.py @@ -1,50 +1,49 @@ import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) import unittest -from mtbp3.util.lsr import LsrTree -import mtbp3 +from mtbp3.util import lsr, util import os class TestLsrTree(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestLsrTree, self).__init__(*args, **kwargs) - self.test_folder = mtbp3.get_data('test_lsr') + self.test_folder = util.get_data('test_lsr') def test_list_files_list(self): - lsr = LsrTree(self.test_folder, outfmt="list") - files = lsr.list_files() + lsrt = lsr.LsrTree(self.test_folder, outfmt="list") + files = lsrt.list_files() expected_files = ['/testfolder1/testfile10', '/testfolder1/testfile11', '/testfolder1/testfile12', '/testfolder2/testfile20', '/testfolder2/testfile3'] self.assertCountEqual(files, expected_files) def test_list_files_json(self): - lsr = LsrTree(self.test_folder, outfmt="json") - files = lsr.list_files() + lsrt = lsr.LsrTree(self.test_folder, outfmt="json") + files = lsrt.list_files() expected_files = '{"0": {"path": "", "level": 0, "folders": ["testfolder1", "testfolder2"], "files": []}, "1": {"path": "/testfolder1", "level": 1, "folders": [], "files": ["testfile10", "testfile11", "testfile12"]}, "2": {"path": "/testfolder2", "level": 1, "folders": [], "files": ["testfile20", "testfile3"]}}' self.assertCountEqual(files, expected_files) def test_list_files_dataframe(self): - lsr = LsrTree(self.test_folder, outfmt="dataframe") - files = lsr.list_files()['file'].tolist() + lsrt = lsr.LsrTree(self.test_folder, outfmt="dataframe") + files = lsrt.list_files()['file'].tolist() expected_files = ['testfile10', 'testfile11', 'testfile12', 'testfile20', 'testfile3'] self.assertCountEqual(files, expected_files) def test_list_files_string(self): - lsr = LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string") - files = lsr.list_files() + lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string") + files = lsrt.list_files() expected_files = 'testfolder1/\n... testfile10\n... testfile11\n... testfile12' self.assertEqual(files, expected_files) def test_list_files_tree(self): - #lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree") - lsr = LsrTree(self.test_folder, outfmt="tree") - files = lsr.list_files() + #lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree") + lsrt = lsr.LsrTree(self.test_folder, outfmt="tree") + files = lsrt.list_files() expected_files = 'test_lsr/\n├── testfolder1/\n│ ├── testfile10\n│ ├── testfile11\n│ └── testfile12\n└── testfolder2/\n ├── testfile20\n └── testfile3' self.assertEqual(files, expected_files) def test_list_files_tree2(self): - lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True) - files = lsr.list_files() + lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True) + files = lsrt.list_files() expected_files = 'testfolder2/ <<<((( F=2; D=0 )))>>>\n├── testfile20\n└── testfile3' self.assertEqual(files, expected_files) diff --git a/tests/test_stdiso_pdfsummary.py b/tests/test_stdiso_pdfsummary.py index 75dee431..4d3eccb5 100644 --- a/tests/test_stdiso_pdfsummary.py +++ b/tests/test_stdiso_pdfsummary.py @@ -1,6 +1,5 @@ import unittest import pandas as pd -import mtbp3 from mtbp3.stdiso.pdfsummary import pdfSummary class TestPdfSummary(unittest.TestCase):