-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path210123_EDA04.py
55 lines (45 loc) · 2.34 KB
/
210123_EDA04.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# coding: utf-8
# This EDA to explore patient count
import pandas as pd
import numpy as np
import math
import statistics
import glob
import tqdm
listOfIncludedYears = [2009,2016,2017,2018,2019,2020]
dataFilePath = 'Data/lab_data/req1512_lab_year.csv'
for eachYear in listOfIncludedYears[:1]:
filePath = dataFilePath.replace('year',str(eachYear))
workingDataFrame = pd.read_csv(filePath,
usecols=['MRN', 'PAT_ENC_CSN_ID', 'ORDER_PROC_ID', 'ORDERING_DATE', 'COMMON_NAME',
'COMPONENT_NAME', 'BASE_NAME', 'ORD_VALUE', 'REFERENCE_LOW', 'REFERENCE_HIGH', 'REFERENCE_UNIT'])
workingDataFrame['MRN'] = workingDataFrame['MRN'].astype('str')
thisListOfMRNs = workingDataFrame['MRN'].unique().tolist()
filteredMrnDataFrame = workingDataFrame[workingDataFrame['COMMON_NAME']=='ALT']
listOfReference = filteredMrnDataFrame['REFERENCE_LOW'].toList()
'''
listOfYearlyImportFiles = ['Data/lab_data/req1512_lab_2009.csv',
'Data/lab_data/req1512_lab_2010.csv',
'Data/lab_data/req1512_lab_2011.csv',
'Data/lab_data/req1512_lab_2012.csv',
'Data/lab_data/req1512_lab_2013.csv',
'Data/lab_data/req1512_lab_2014.csv',
'Data/lab_data/req1512_lab_2015.csv',
'Data/lab_data/req1512_lab_2016.csv',
'Data/lab_data/req1512_lab_2017.csv',
'Data/lab_data/req1512_lab_2018.csv',
'Data/lab_data/req1512_lab_2019.csv',
'Data/lab_data/req1512_lab_2020.csv']
listOfMRNs = []
for eachYearlyImportFile in listOfYearlyImportFiles:
workingDataFrame = pd.read_csv(eachYearlyImportFile, usecols = ['MRN', 'PAT_ENC_CSN_ID', 'ORDER_PROC_ID','ORDERING_DATE','COMMON_NAME','COMPONENT_NAME','BASE_NAME','ORD_VALUE','REFERENCE_UNIT'])
workingDataFrame['MRN'] = workingDataFrame['MRN'].astype('str')
thisListOfMRNs = workingDataFrame['MRN'].unique().tolist()
print(len(thisListOfMRNs))
listOfMRNs += thisListOfMRNs
print('total before removal of duplicates:',len(listOfMRNs))
# remove duplicates
listOfMRNs = list(set(listOfMRNs))
print('total after removal of duplicates:', len(listOfMRNs))
'''