diff --git a/docs/std_cdisc.ipynb b/docs/std_cdisc.ipynb index f39c871f..baac1a9f 100644 --- a/docs/std_cdisc.ipynb +++ b/docs/std_cdisc.ipynb @@ -226,12 +226,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To get the newest protocol ct package information:\n", + "To get the newest Protocol CT package in DataFrame format:\n", "\n", "```python\n", - "title = 'Protocol'\n", - "cl = accessLib(\"/Users/yh2020/cdisc.txt\")\n", + "from mtbp3.stdcdisc.lib import accessLib\n", + "cl = accessLib(\"/user/home/name/cdisc.txt\")\n", "cl.get_ct_list()\n", + "title = 'Protocol'\n", "cl.get_ct_package(title)\n", "cl.get_ct_codelists_df(title)\n", "print(cl.ct_package[title]['ct_df'][['conceptId', 'name', 'preferredTerm']].head())\n", @@ -249,6 +250,31 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get the newest Glossary CT package in DataFrame format:\n", + "\n", + "```python\n", + "title = 'Glossary'\n", + "cl.get_ct_package(title)\n", + "cl.get_ct_codelists_df(title)\n", + "print(cl.ct_package[title]['ct_df'][['conceptId', 'name', 'group', 'preferredTerm', 'definition']].head())\n", + "```\n", + "\n", + "Output:\n", + "\n", + "```\n", + " conceptId name group preferredTerm definition\n", + "0 C67497 CDISC Glossary CDISC Glossary Terminology The terminology of the Clinical Data Interchan...\n", + "1 C80442 CDISC Glossary Premarket Device Notification 510(k). Premarket Notification (PMN) required ...\n", + "2 C42610 CDISC Glossary Abbreviation A set of letters that are drawn from a word or...\n", + "3 C71733 CDISC Glossary Biological Absorption The process by which medications reach the blo...\n", + "4 C156638 CDISC Glossary Accelerated Approval Regulatory mechanism by which new drugs meant ...\n", + "```" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/mtbp3/stdcdisc/lib.py b/mtbp3/stdcdisc/lib.py index 5e9a158b..48840c1e 100644 --- a/mtbp3/stdcdisc/lib.py +++ b/mtbp3/stdcdisc/lib.py @@ -178,21 +178,25 @@ def get_ct_codelists_df(self, title="", max_level = 3): remaining_list_label = [] for index, item in enumerate(codelists): if isinstance(item, dict): - if 'conceptId' in item.keys() and 'name' not in item.keys(): - item['name']="" + if 'group' not in item.keys(): + item['group']="" if 'synonyms' not in item.keys(): item['synonyms']=[] if 'terms' in item.keys(): + for i in range(len(item['terms'])): + if 'name' not in item['terms'][i].keys(): + item['terms'][i]['name'] = "" + item['terms'][i]['group'] = item['name'] nterms = len(item['terms']) remaining_list.extend(item['terms']) remaining_list_label.extend([codelists_label[index]+'.pseudo'+item['conceptId']]*nterms) else: nterms = 0 - data.append([codelists_label[index], level, item['conceptId'], item['name'], item['preferredTerm'], item['submissionValue'], '; '.join(item['synonyms']), item['definition'], nterms]) + data.append([codelists_label[index], level, item['conceptId'], item['name'], item['group'], item['preferredTerm'], item['submissionValue'], '; '.join(item['synonyms']), item['definition'], nterms]) else: not_processed.append(item) - df = pd.DataFrame(data, columns=['label', 'level', 'conceptId', 'name', 'preferredTerm', 'submissionValue', 'synonyms', 'definition', 'terms']) + df = pd.DataFrame(data, columns=['label', 'level', 'conceptId', 'name', 'group', 'preferredTerm', 'submissionValue', 'synonyms', 'definition', 'terms']) label = df[['label', 'preferredTerm']] df = df.drop('label', axis=1) @@ -202,4 +206,4 @@ def get_ct_codelists_df(self, title="", max_level = 3): return if __name__ == "__main__": - pass + pass \ No newline at end of file