Skip to content

Commit

Permalink
v0.2.21
Browse files Browse the repository at this point in the history
  • Loading branch information
yh202109 committed Jul 28, 2024
1 parent 01bc109 commit 7f0e60f
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 21 deletions.
6 changes: 6 additions & 0 deletions mtbp3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@
from importlib.metadata import version
__version__ = version(__package__)


import os

_ROOT = os.path.abspath(os.path.dirname(__file__))
def get_data(path):
return os.path.join(_ROOT, 'data', path)
12 changes: 7 additions & 5 deletions mtbp3/health/ectd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@

import pandas as pd
import os
from mtbp3.util import cdt, util
import mtbp3
from mtbp3 import util

class ctoc_by_fda:
def __init__(self, ectd_version="3.2.2", ctoc_version="2.3.3"):
assert isinstance(ectd_version, str) and all(char.isdigit() or char == '.' for char in ectd_version), "Version must be a string with integers and dots"
assert isinstance(ctoc_version, str) and all(char.isdigit() or char == '.' for char in ctoc_version), "Version must be a string with integers and dots"
self.ectd_version = ectd_version
self.ctoc_version = ctoc_version
self.folder_name = util.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
self.folder_name = mtbp3.get_data(f'supp_ectd/fda_ectd{ectd_version}_ctocv{ctoc_version}.txt')
self.ctoc = self.__load_list()
print(self.folder_name)

def __load_list(self):
#file_path = f'./mtbp3/data/supp_ectd/fda_ectd{self.ectd_version}_ctocv{ctoc_version}.txt'
Expand All @@ -43,7 +45,7 @@ def show_ctoc_tree(self, module=None, to_right=False):
module = 1

filtered_ctoc = [item for item in self.ctoc if item.startswith(str(module))]
tree = cdt.ListTree(lst=filtered_ctoc, infmt='dotspace')
tree = mtbp3.util.cdt.ListTree(lst=filtered_ctoc, infmt='dotspace')
return tree.list_tree(to_right=to_right)

@staticmethod
Expand Down Expand Up @@ -109,9 +111,9 @@ def find_section_given_words(self, words, outfmt='simple', include='up', to_righ
out_colored.append(f"{first_part} {colored_second_part}")
else:
out_colored.append(row)
out_tree = cdt.ListTree(lst=out_colored, infmt='dotspace')
out_tree = mtbp3.util.cdt.ListTree(lst=out_colored, infmt='dotspace')
else:
out_tree = cdt.ListTree(lst=out, infmt='dotspace')
out_tree = mtbp3.util.cdt.ListTree(lst=out, infmt='dotspace')
return out_tree.list_tree(to_right=to_right)
else:
raise ValueError("Invalid value for outfmt. Supported values are 'simple' and 'tree'.")
Expand Down
15 changes: 8 additions & 7 deletions mtbp3/health/emt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import re
import numpy as np
import pandas as pd
from mtbp3.util import util, lsr, cdt
import mtbp3
from mtbp3 import util

class Emt:
"""A class representing MedDRA terms.
Expand All @@ -46,7 +47,7 @@ def __init__(self, folder_name=''):
self.folder_name = folder_name
self.demo = False
else:
self.folder_name = util.get_data('test_emt/MedDRA')
self.folder_name = mtbp3.get_data('test_emt/MedDRA')
self.demo = True

self.version_number = "00.0"
Expand Down Expand Up @@ -126,7 +127,7 @@ def find_files(self):
Returns:
list: A list of missing file names.
"""
lsrt = lsr.LsrTree(self.folder_name, outfmt="list")
lsrt = util.lsr.LsrTree(self.folder_name, outfmt="list")
lsr_files = lsrt.list_files()
support_doc_files, med_ascii_files, seq_ascii_files = self.expected_file_lists()

Expand Down Expand Up @@ -160,7 +161,7 @@ def list_files(self):
Returns:
list: A list of file names.
"""
lsrt = lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True)
lsrt = util.lsr.LsrTree(self.folder_name, outfmt="tree", with_counts=True)
lsr_files = lsrt.list_files()
return lsr_files

Expand Down Expand Up @@ -624,7 +625,7 @@ def find_terms_given_smq_sub(self, subset_df, keep_columns, llt_only, llt_curren
def load_fmq_default(self):
if self.fmq_list_default is None:
try:
tmp = pd.read_csv(os.path.join(util.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
tmp = pd.read_csv(os.path.join(mtbp3.get_data('test_emt/FMQ'), "FMQ_Consolidated_List.csv"), delimiter=',', header=0)
tmp = tmp.iloc[:, :-1]
tmp.columns = ['fmq', 'pt', 'fmq_pt', 'classification']
self.fmq_list_default = tmp
Expand Down Expand Up @@ -758,7 +759,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals
list1 = pt_df['fmq_class'].unique().tolist()
lists = pt_df['fmq_class_soc'].unique().tolist()
list2 = pt_df['fmq_class_soc_pt'].unique().tolist()
tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
tree = util.lsr.ListTree(lst = ['FMQ/']+list0+list1+lists+list2)
return tree.list_tree(to_right=to_right)
else:
pt_df = pt_df.sort_values(by=['fmq', 'classification'])
Expand All @@ -775,7 +776,7 @@ def show_fmq_tree(self, fmq=[], with_soc=False, ignore_case=False, to_right=Fals

list1 = pt_df['fmq_class'].unique().tolist()
list2 = pt_df['fmq_class_pt'].unique().tolist()
tree = lsr.ListTree(lst = ['FMQ/']+list0+list1+list2)
tree = util.lsr.ListTree(lst = ['FMQ/']+list0+list1+list2)
return tree.list_tree(to_right=to_right)

if __name__ == "__main__":
Expand Down
7 changes: 6 additions & 1 deletion mtbp3/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from .lsr import *
from .cdt import *
from .cdtg import *
from .util import *

import os

_ROOT = os.path.abspath(os.path.dirname(__file__))
def get_data(path):
return os.path.join(_ROOT, '../data', path)
45 changes: 40 additions & 5 deletions mtbp3/util/cdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def __init__(self, lst=[], label=[], infmt='path'):
self.infmt = infmt
self.df = pd.DataFrame()
self.prelst = pd.DataFrame()
self.out = pd.DataFrame()
self.tree = pd.DataFrame()

def __list_tree_df(self):
if not isinstance(self.lst, list):
Expand Down Expand Up @@ -290,20 +290,55 @@ def __list_tree_pre(self, to_right=False):
self.prelst = prelst

def list_tree(self, to_right=False):
"""
Returns a DataFrame representing the tree structure of the object.
Parameters:
- to_right (bool): If True, aligns the tree structure to the right by padding with spaces.
Returns:
- tree (DataFrame): DataFrame representing the tree structure.
"""
self.__list_tree_pre(to_right=to_right)

if self.prelst.empty:
self.out = pd.DataFrame()
return self.out
self.tree = pd.DataFrame()
return self.tree

out_joined = self.prelst.apply(lambda row: ''.join(row), axis=1)

if to_right:
max_length = out_joined.str.len().max()
out_joined = out_joined.apply(lambda x: x.rjust(max_length))

self.out = out_joined
return self.out
self.tree = out_joined
return self.tree

def list_tree_with_keyword(self, keywords, neighbor = 0, outfmt='simple'):
"""
Search for keywords in the list of items.
Args:
keywords (list): List of keywords to search for.
Returns:
list: List of items that contain the keywords.
"""
if not isinstance(keywords, list):
raise ValueError("keywords should be a list.")

if not self.lst:
return []

if outfmt == 'simple':
result = [item for item in self.lst if any(keyword in item for keyword in keywords)]
elif outfmt == 'subtree':
for item in result:
if any(keyword in item for keyword in keywords):
result.append(item)
else:
return []

if __name__ == "__main__":
pass
Expand Down
1 change: 1 addition & 0 deletions mtbp3/util/cdtg.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(self, df, y_col, group_col=None, grid_col=None, grid_wrap=None, x_c
self.x_order = []
if grid_kws is None:
grid_kws = {}

self.grid_kws = grid_kws
sns.set_style("ticks", {'axes.grid': True})

Expand Down
2 changes: 1 addition & 1 deletion mtbp3/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

_ROOT = os.path.abspath(os.path.dirname(__file__))
def get_data(path):
return os.path.join(_ROOT, 'data', path)
return os.path.join(_ROOT, '../data', path)

if __name__ == "__main__":
pass
4 changes: 2 additions & 2 deletions tests/test_health_emt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import unittest
import pandas as pd
from mtbp3.health.emt import Emt
from mtbp3.health import emt

class TestEmt(unittest.TestCase):

def setUp(self):
self.emt = Emt()
self.emt = emt.Emt()
self.emt.find_files()

def test_expected_file_lists(self):
Expand Down

0 comments on commit 7f0e60f

Please sign in to comment.