Skip to content

Commit

Permalink
v0.2.21
Browse files Browse the repository at this point in the history
  • Loading branch information
yh202109 committed Jul 26, 2024
1 parent 1612fa9 commit ac358ea
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 46 deletions.
9 changes: 9 additions & 0 deletions docs/statlab_kappa.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ The SE of :math:`\kappa` is calculated as
Interpretation of Cohen's Kappa Suggested in Literature
*************

There are several groups of interpretation. Some roughly (not-strictly) defined types are listed below:

1. Table based interpretation: a shared interpretation simplifies application process and provides a easy to compare values.
2. Interpretation based on Approximated model based confidence interval or Bootstrap confidence intervals with a preselected criterion
3. Bayesian inference based interpretation [8]_

Cohen (1960) [4]_ suggested the Kappa result be interpreted as follows:

Expand Down Expand Up @@ -486,6 +491,10 @@ The weighted :math:`\kappa` is calculated as
.. math::
\kappa = 1- \frac{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}N_{j_1,j_2}}{\sum_{j_1=1}^J\sum_{j_2=1}^J w_{j_1,j_2}\hat{E}_{j_1, j_2}}.
(There shall be another page discussing weighted methods and variations)



*************
Reference
*************
Expand Down
5 changes: 4 additions & 1 deletion docs/std_iso_idmp.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
"- ISO 11239 and ISO/TS 20440 / **Dosage Form and Route** of Administration\n",
"- ISO 11240 / **Units** of Measurement (UoM)\n",
"\n",
"\n"
"Related ISO standards:\n",
"\n",
"- ISO Technical Specifications (TS) 16791:2020 (which complements ISO 11615) assists Automatic Identification and Data Capture (AIDC) related to health informatics.\n",
"- ISO Technical Requirements (TR) 14872:2019 includes core principles for maintenance of identifiers and terms supporting IDMP\n"
]
},
{
Expand Down
6 changes: 0 additions & 6 deletions mtbp3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,3 @@
from importlib.metadata import version
__version__ = version(__package__)

import os

_ROOT = os.path.abspath(os.path.dirname(__file__))
def get_data(path):
return os.path.join(_ROOT, 'data', path)

11 changes: 5 additions & 6 deletions mtbp3/health/ectd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@

import pandas as pd
import os
from mtbp3.util.cdt import ListTree
import mtbp3
from mtbp3.util import cdt, util

class ctoc_by_fda:
def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"):
assert isinstance(ectd_version, str) and all(char.isdigit() or char == '.' for char in ectd_version), "Version must be a string with integers and dots"
assert isinstance(ctoc_version, str) and all(char.isdigit() or char == '.' for char in ctoc_version), "Version must be a string with integers and dots"
self.ectd_version = ectd_version
self.ctoc_version = ctoc_version
self.folder_name = mtbp3.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
self.folder_name = util.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
self.ctoc = self.__load_list()

def __load_list(self):
Expand All @@ -44,7 +43,7 @@ def show_ctoc_tree(self, module=None, to_right=False):
module = 1

filtered_ctoc = [item for item in self.ctoc if item.startswith(str(module))]
tree = ListTree(lst=filtered_ctoc, infmt='dotspace')
tree = cdt.ListTree(lst=filtered_ctoc, infmt='dotspace')
return tree.list_tree(to_right=to_right)

@staticmethod
Expand Down Expand Up @@ -110,9 +109,9 @@ def find_section_given_words(self, words, outfmt='simple', include='up', to_righ
out_colored.append(f"{first_part} {colored_second_part}")
else:
out_colored.append(row)
out_tree = ListTree(lst=out_colored, infmt='dotspace')
out_tree = cdt.ListTree(lst=out_colored, infmt='dotspace')
else:
out_tree = ListTree(lst=out, infmt='dotspace')
out_tree = cdt.ListTree(lst=out, infmt='dotspace')
return out_tree.list_tree(to_right=to_right)
else:
raise ValueError("Invalid value for outfmt. Supported values are 'simple' and 'tree'.")
Expand Down
23 changes: 11 additions & 12 deletions mtbp3/health/emt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2023 Y Hsu <[email protected]>
# Copyright (C) 2023-2024 Y Hsu <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public license as published by
Expand All @@ -19,8 +19,7 @@
import re
import numpy as np
import pandas as pd
from mtbp3.util.lsr import LsrTree
import mtbp3
from mtbp3.util import util, lsr, cdt

class Emt:
"""A class representing MedDRA terms.
Expand All @@ -30,7 +29,7 @@ class Emt:
Attributes:
folder_name (str): The folder name associated with the Emt.
lsr (LsrTree): An instance of the LsrTree class for listing files.
lsrt (LsrTree): An instance of the LsrTree class for listing files.
month (str): The month of the version published.
year (str): The year of the version published.
"""
Expand All @@ -47,7 +46,7 @@ def __init__(self, folder_name=''):
self.folder_name = folder_name
self.demo = False
else:
self.folder_name = mtbp3.get_data('test_emt/MedDRA')
self.folder_name = util.get_data('test_emt/MedDRA')
self.demo = True

self.version_number = "00.0"
Expand Down Expand Up @@ -127,8 +126,8 @@ def find_files(self):
Returns:
list: A list of missing file names.
"""
lsr = LsrTree(self.folder_name, outfmt="list")
lsr_files = lsr.list_files()
lsrt = lsr.LsrTree(self.folder_name, outfmt="list")
lsr_files = lsrt.list_files()
support_doc_files, med_ascii_files, seq_ascii_files = self.expected_file_lists()

missing_files = []
Expand Down Expand Up @@ -161,8 +160,8 @@ def list_files(self):
Returns:
list: A list of file names.
"""
lsr = LsrTree(self.folder_name, outfmt="tree", with_counts=True)
lsr_files = lsr.list_files()
lsrt = lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True)
lsr_files = lsrt.list_files()
return lsr_files

def find_soc(self, terms=[], ignore_case=False):
Expand Down Expand Up @@ -625,7 +624,7 @@ def find_terms_given_smq_sub(self, subset_df, keep_columns, llt_only, llt_curren
def load_fmq_default(self):
if self.fmq_list_default is None:
try:
tmp = pd.read_csv(os.path.join(mtbp3.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
tmp = pd.read_csv(os.path.join(util.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
tmp = tmp.iloc[:, :-1]
tmp.columns = ['fmq', 'pt', 'fmq_pt', 'classification']
self.fmq_list_default = tmp
Expand Down Expand Up @@ -759,7 +758,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals
list1 = pt_df['fmq_class'].unique().tolist()
lists = pt_df['fmq_class_soc'].unique().tolist()
list2 = pt_df['fmq_class_soc_pt'].unique().tolist()
tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
return tree.list_tree(to_right=to_right)
else:
pt_df = pt_df.sort_values(by=['fmq', 'classification'])
Expand All @@ -776,7 +775,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals

list1 = pt_df['fmq_class'].unique().tolist()
list2 = pt_df['fmq_class_pt'].unique().tolist()
tree = mtbp3.util.cdt.ListTree(lst = ['FMQ/']+list0+list1+list2)
tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+list2)
return tree.list_tree(to_right=to_right)

if __name__ == "__main__":
Expand Down
7 changes: 3 additions & 4 deletions mtbp3/stdiso/pdfsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
# along with this program. If not, see <https://www.gnu.org/license/>

from pypdf import PdfReader
import mtbp3
import os
import pandas as pd
from mtbp3.util.cdt import ListTree
from mtbp3 import cdt, util

class pdfSummary:
"""
Expand Down Expand Up @@ -46,7 +45,7 @@ def __init__(self, path=None):
path (str, optional): The path to the PDF file. If not provided, a demo file will be used.
"""
if not isinstance(path, str) or len(path) == 0:
self.pdf_path = mtbp3.get_data('attention.pdf')
self.pdf_path = util.get_data('attention.pdf')
self.demo = True
else:
self.pdf_path = path
Expand Down Expand Up @@ -187,7 +186,7 @@ def show_outline_tree(self, max_itr=5, to_right=False):
if len(self.outline_list) != 2:
raise ValueError("self.outline_list should be a length 2 list")

tree = ListTree(lst=self.outline_list[0], infmt='dotspace')
tree = cdt.ListTree(lst=self.outline_list[0], infmt='dotspace')
return '\n'.join(tree.list_tree(to_right=to_right))

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions mtbp3/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .lsr import *
from .cdt import *
from .cdtg import *
from .util import *
23 changes: 23 additions & 0 deletions mtbp3/util/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (C) 2023-2024 Y Hsu <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public license as published by
# the Free software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details
#
# You should have received a copy of the GNU General Public license
# along with this program. If not, see <https://www.gnu.org/license/>

import os

_ROOT = os.path.abspath(os.path.dirname(__file__))
def get_data(path):
return os.path.join(_ROOT, 'data', path)

if __name__ == "__main__":
pass
31 changes: 15 additions & 16 deletions tests/test_lsr.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,49 @@
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import unittest
from mtbp3.util.lsr import LsrTree
import mtbp3
from mtbp3.util import lsr, util
import os


class TestLsrTree(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(TestLsrTree, self).__init__(*args, **kwargs)
self.test_folder = mtbp3.get_data('test_lsr')
self.test_folder = util.get_data('test_lsr')

def test_list_files_list(self):
lsr = LsrTree(self.test_folder, outfmt="list")
files = lsr.list_files()
lsrt = lsr.LsrTree(self.test_folder, outfmt="list")
files = lsrt.list_files()
expected_files = ['/testfolder1/testfile10', '/testfolder1/testfile11', '/testfolder1/testfile12', '/testfolder2/testfile20', '/testfolder2/testfile3']
self.assertCountEqual(files, expected_files)

def test_list_files_json(self):
lsr = LsrTree(self.test_folder, outfmt="json")
files = lsr.list_files()
lsrt = lsr.LsrTree(self.test_folder, outfmt="json")
files = lsrt.list_files()
expected_files = '{"0": {"path": "", "level": 0, "folders": ["testfolder1", "testfolder2"], "files": []}, "1": {"path": "/testfolder1", "level": 1, "folders": [], "files": ["testfile10", "testfile11", "testfile12"]}, "2": {"path": "/testfolder2", "level": 1, "folders": [], "files": ["testfile20", "testfile3"]}}'
self.assertCountEqual(files, expected_files)

def test_list_files_dataframe(self):
lsr = LsrTree(self.test_folder, outfmt="dataframe")
files = lsr.list_files()['file'].tolist()
lsrt = lsr.LsrTree(self.test_folder, outfmt="dataframe")
files = lsrt.list_files()['file'].tolist()
expected_files = ['testfile10', 'testfile11', 'testfile12', 'testfile20', 'testfile3']
self.assertCountEqual(files, expected_files)

def test_list_files_string(self):
lsr = LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string")
files = lsr.list_files()
lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder1'), outfmt="string")
files = lsrt.list_files()
expected_files = 'testfolder1/\n... testfile10\n... testfile11\n... testfile12'
self.assertEqual(files, expected_files)

def test_list_files_tree(self):
#lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree")
lsr = LsrTree(self.test_folder, outfmt="tree")
files = lsr.list_files()
#lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree")
lsrt = lsr.LsrTree(self.test_folder, outfmt="tree")
files = lsrt.list_files()
expected_files = 'test_lsr/\n├── testfolder1/\n│ ├── testfile10\n│ ├── testfile11\n│ └── testfile12\n└── testfolder2/\n ├── testfile20\n └── testfile3'
self.assertEqual(files, expected_files)

def test_list_files_tree2(self):
lsr = LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True)
files = lsr.list_files()
lsrt = lsr.LsrTree(os.path.join(self.test_folder, 'testfolder2'), outfmt="tree", with_counts=True)
files = lsrt.list_files()
expected_files = 'testfolder2/ <<<((( F=2; D=0 )))>>>\n├── testfile20\n└── testfile3'
self.assertEqual(files, expected_files)

Expand Down
1 change: 0 additions & 1 deletion tests/test_stdiso_pdfsummary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import unittest
import pandas as pd
import mtbp3
from mtbp3.stdiso.pdfsummary import pdfSummary

class TestPdfSummary(unittest.TestCase):
Expand Down

0 comments on commit ac358ea

Please sign in to comment.