diff --git a/mtbp3/__init__.py b/mtbp3/__init__.py index 1220f78..655a01e 100644 --- a/mtbp3/__init__.py +++ b/mtbp3/__init__.py @@ -9,3 +9,9 @@ from importlib.metadata import version __version__ = version(__package__) + +import os + +_ROOT = os.path.abspath(os.path.dirname(__file__)) +def get_data(path): + return os.path.join(_ROOT, 'data', path) \ No newline at end of file diff --git a/mtbp3/health/ectd.py b/mtbp3/health/ectd.py index 0fee2a7..afd2bbc 100644 --- a/mtbp3/health/ectd.py +++ b/mtbp3/health/ectd.py @@ -15,7 +15,8 @@ import pandas as pd import os -from mtbp3.util import cdt, util +import mtbp3 +from mtbp3 import util class ctoc_by_fda: def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"): @@ -23,8 +24,9 @@ def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"): assert isinstance(ctoc_version, str) and all(char.isdigit() or char == '.' for char in ctoc_version), "Version must be a string with integers and dots" self.ectd_version = ectd_version self.ctoc_version = ctoc_version - self.folder_name = util.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt') + self.folder_name = mtbp3.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt') self.ctoc = self.__load_list() + print(self.folder_name) def __load_list(self): #file_path = f'./mtbp3/data/supp_ectd/fda_ectd{self.ectd_version}_ctocv{ctoc_version}.txt' @@ -43,7 +45,7 @@ def show_ctoc_tree(self, module=None, to_right=False): module = 1 filtered_ctoc = [item for item in self.ctoc if item.startswith(str(module))] - tree = cdt.ListTree(lst=filtered_ctoc, infmt='dotspace') + tree = mtbp3.util.cdt.ListTree(lst=filtered_ctoc, infmt='dotspace') return tree.list_tree(to_right=to_right) @staticmethod @@ -109,9 +111,9 @@ def find_section_given_words(self, words, outfmt='simple', include='up', to_righ out_colored.append(f"{first_part} {colored_second_part}") else: out_colored.append(row) - out_tree = cdt.ListTree(lst=out_colored, infmt='dotspace') + out_tree = mtbp3.util.cdt.ListTree(lst=out_colored, infmt='dotspace') else: - out_tree = cdt.ListTree(lst=out, infmt='dotspace') + out_tree = mtbp3.util.cdt.ListTree(lst=out, infmt='dotspace') return out_tree.list_tree(to_right=to_right) else: raise ValueError("Invalid value for outfmt. Supported values are 'simple' and 'tree'.") diff --git a/mtbp3/health/emt.py b/mtbp3/health/emt.py index b66f4c3..dc8a76d 100644 --- a/mtbp3/health/emt.py +++ b/mtbp3/health/emt.py @@ -19,7 +19,8 @@ import re import numpy as np import pandas as pd -from mtbp3.util import util, lsr, cdt +import mtbp3 +from mtbp3 import util class Emt: """A class representing MedDRA terms. @@ -46,7 +47,7 @@ def __init__(self, folder_name=''): self.folder_name = folder_name self.demo = False else: - self.folder_name = util.get_data('test_emt/MedDRA') + self.folder_name = mtbp3.get_data('test_emt/MedDRA') self.demo = True self.version_number = "00.0" @@ -126,7 +127,7 @@ def find_files(self): Returns: list: A list of missing file names. """ - lsrt = lsr.LsrTree(self.folder_name, outfmt="list") + lsrt = util.lsr.LsrTree(self.folder_name, outfmt="list") lsr_files = lsrt.list_files() support_doc_files, med_ascii_files, seq_ascii_files = self.expected_file_lists() @@ -160,7 +161,7 @@ def list_files(self): Returns: list: A list of file names. """ - lsrt = lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True) + lsrt = util.lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True) lsr_files = lsrt.list_files() return lsr_files @@ -624,7 +625,7 @@ def find_terms_given_smq_sub(self, subset_df, keep_columns, llt_only, llt_curren def load_fmq_default(self): if self.fmq_list_default is None: try: - tmp = pd.read_csv(os.path.join(util.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0) + tmp = pd.read_csv(os.path.join(mtbp3.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0) tmp = tmp.iloc[:, :-1] tmp.columns = ['fmq', 'pt', 'fmq_pt', 'classification'] self.fmq_list_default = tmp @@ -758,7 +759,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals list1 = pt_df['fmq_class'].unique().tolist() lists = pt_df['fmq_class_soc'].unique().tolist() list2 = pt_df['fmq_class_soc_pt'].unique().tolist() - tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2) + tree = util.lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2) return tree.list_tree(to_right=to_right) else: pt_df = pt_df.sort_values(by=['fmq', 'classification']) @@ -775,7 +776,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals list1 = pt_df['fmq_class'].unique().tolist() list2 = pt_df['fmq_class_pt'].unique().tolist() - tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+list2) + tree = util.lsr.ListTree(lst = ['FMQ/']+list0+list1+list2) return tree.list_tree(to_right=to_right) if __name__ == "__main__": diff --git a/mtbp3/util/__init__.py b/mtbp3/util/__init__.py index f7ba447..a9fd9f1 100644 --- a/mtbp3/util/__init__.py +++ b/mtbp3/util/__init__.py @@ -1,4 +1,9 @@ from .lsr import * from .cdt import * from .cdtg import * -from .util import * \ No newline at end of file + +import os + +_ROOT = os.path.abspath(os.path.dirname(__file__)) +def get_data(path): + return os.path.join(_ROOT, '../data', path) diff --git a/mtbp3/util/cdt.py b/mtbp3/util/cdt.py index 6a88554..b60d338 100644 --- a/mtbp3/util/cdt.py +++ b/mtbp3/util/cdt.py @@ -172,7 +172,7 @@ def __init__(self, lst=[], label=[], infmt='path'): self.infmt = infmt self.df = pd.DataFrame() self.prelst = pd.DataFrame() - self.out = pd.DataFrame() + self.tree = pd.DataFrame() def __list_tree_df(self): if not isinstance(self.lst, list): @@ -290,11 +290,20 @@ def __list_tree_pre(self, to_right=False): self.prelst = prelst def list_tree(self, to_right=False): + """ + Returns a DataFrame representing the tree structure of the object. + + Parameters: + - to_right (bool): If True, aligns the tree structure to the right by padding with spaces. + + Returns: + - tree (DataFrame): DataFrame representing the tree structure. + """ self.__list_tree_pre(to_right=to_right) if self.prelst.empty: - self.out = pd.DataFrame() - return self.out + self.tree = pd.DataFrame() + return self.tree out_joined = self.prelst.apply(lambda row: ''.join(row), axis=1) @@ -302,8 +311,34 @@ def list_tree(self, to_right=False): max_length = out_joined.str.len().max() out_joined = out_joined.apply(lambda x: x.rjust(max_length)) - self.out = out_joined - return self.out + self.tree = out_joined + return self.tree + + def list_tree_with_keyword(self, keywords, neighbor = 0, outfmt='simple'): + """ + Search for keywords in the list of items. + + Args: + keywords (list): List of keywords to search for. + + Returns: + list: List of items that contain the keywords. + + """ + if not isinstance(keywords, list): + raise ValueError("keywords should be a list.") + + if not self.lst: + return [] + + if outfmt == 'simple': + result = [item for item in self.lst if any(keyword in item for keyword in keywords)] + elif outfmt == 'subtree': + for item in result: + if any(keyword in item for keyword in keywords): + result.append(item) + else: + return [] if __name__ == "__main__": pass diff --git a/mtbp3/util/cdtg.py b/mtbp3/util/cdtg.py index 03620ad..23bb1d1 100644 --- a/mtbp3/util/cdtg.py +++ b/mtbp3/util/cdtg.py @@ -77,6 +77,7 @@ def __init__(self, df, y_col, group_col=None, grid_col=None, grid_wrap=None, x_c self.x_order = [] if grid_kws is None: grid_kws = {} + self.grid_kws = grid_kws sns.set_style("ticks", {'axes.grid': True}) diff --git a/mtbp3/util/util.py b/mtbp3/util/util.py index fdded91..5f2645a 100644 --- a/mtbp3/util/util.py +++ b/mtbp3/util/util.py @@ -17,7 +17,7 @@ _ROOT = os.path.abspath(os.path.dirname(__file__)) def get_data(path): - return os.path.join(_ROOT, 'data', path) + return os.path.join(_ROOT, '../data', path) if __name__ == "__main__": pass \ No newline at end of file diff --git a/tests/test_health_emt.py b/tests/test_health_emt.py index dd5ea55..4d639fa 100644 --- a/tests/test_health_emt.py +++ b/tests/test_health_emt.py @@ -3,12 +3,12 @@ import unittest import pandas as pd -from mtbp3.health.emt import Emt +from mtbp3.health import emt class TestEmt(unittest.TestCase): def setUp(self): - self.emt = Emt() + self.emt = emt.Emt() self.emt.find_files() def test_expected_file_lists(self):