-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path211225_EDA02_yn.py
39 lines (32 loc) · 1.56 KB
/
211225_EDA02_yn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
# coding: utf-8
# This EDA to explore patient count
import pandas as pd
import numpy as np
import math
import statistics
import glob
import tqdm
listOfYearlyImportFiles = ['Data/lab_data/req1512_lab_2009.csv',
'Data/lab_data/req1512_lab_2010.csv',
'Data/lab_data/req1512_lab_2011.csv',
'Data/lab_data/req1512_lab_2012.csv',
'Data/lab_data/req1512_lab_2013.csv',
'Data/lab_data/req1512_lab_2014.csv',
'Data/lab_data/req1512_lab_2015.csv',
'Data/lab_data/req1512_lab_2016.csv',
'Data/lab_data/req1512_lab_2017.csv',
'Data/lab_data/req1512_lab_2018.csv',
'Data/lab_data/req1512_lab_2019.csv',
'Data/lab_data/req1512_lab_2020.csv']
listOfMRNs = []
for eachYearlyImportFile in listOfYearlyImportFiles:
workingDataFrame = pd.read_csv(eachYearlyImportFile, usecols = ['MRN', 'PAT_ENC_CSN_ID', 'ORDER_PROC_ID','ORDERING_DATE','COMMON_NAME','COMPONENT_NAME','BASE_NAME','ORD_VALUE','REFERENCE_UNIT'])
workingDataFrame['MRN'] = workingDataFrame['MRN'].astype('str')
thisListOfMRNs = workingDataFrame['MRN'].unique().tolist()
print(len(thisListOfMRNs))
listOfMRNs += thisListOfMRNs
print('total before removal of duplicates:',len(listOfMRNs))
# remove duplicates
listOfMRNs = list(set(listOfMRNs))
print('total after removal of duplicates:', len(listOfMRNs))