-
Notifications
You must be signed in to change notification settings - Fork 0
/
211225_EDA03_yn.py
58 lines (49 loc) · 2.59 KB
/
211225_EDA03_yn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# coding: utf-8
# This EDA to convert the original data to patient level data so data is easier to pull.
import pandas as pd
import numpy as np
import statistics
import glob
import tqdm
#listOfIncludedYears = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020]
listOfIncludedYears = [2019,2020]
dataFilePath = 'Data/lab_data/req1512_lab_year.csv'
for eachYear in listOfIncludedYears:
print('working on:',eachYear)
filePath = dataFilePath.replace('year',str(eachYear))
workingDataFrame = pd.read_csv(filePath)
#['MRN', 'PAT_ENC_CSN_ID', 'ORDER_PROC_ID', 'ORDERING_DATE', 'COMMON_NAME',
# 'COMPONENT_NAME', 'BASE_NAME', 'ORD_VALUE', 'REFERENCE_UNIT']
workingDataFrame['MRN'] = workingDataFrame['MRN'].astype('str')
thisListOfMRNs = workingDataFrame['MRN'].unique().tolist()
saveFilePath = 'Data/lab_data_patientLvl/mrn_year.csv'
for eachMRN in tqdm.tqdm(thisListOfMRNs):
mrnDataFrame = workingDataFrame[workingDataFrame['MRN']==eachMRN]
thisSaveFilePath = saveFilePath.replace('mrn',eachMRN).replace('year', str(eachYear))
mrnDataFrame.to_csv(thisSaveFilePath)
'''
listOfYearlyImportFiles = ['Data/lab_data/req1512_lab_2009.csv',
'Data/lab_data/req1512_lab_2010.csv',
'Data/lab_data/req1512_lab_2011.csv',
'Data/lab_data/req1512_lab_2012.csv',
'Data/lab_data/req1512_lab_2013.csv',
'Data/lab_data/req1512_lab_2014.csv',
'Data/lab_data/req1512_lab_2015.csv',
'Data/lab_data/req1512_lab_2016.csv',
'Data/lab_data/req1512_lab_2017.csv',
'Data/lab_data/req1512_lab_2018.csv',
'Data/lab_data/req1512_lab_2019.csv',
'Data/lab_data/req1512_lab_2020.csv']
listOfMRNs = []
for eachYearlyImportFile in listOfYearlyImportFiles:
workingDataFrame = pd.read_csv(eachYearlyImportFile, usecols = ['MRN', 'PAT_ENC_CSN_ID', 'ORDER_PROC_ID','ORDERING_DATE','COMMON_NAME','COMPONENT_NAME','BASE_NAME','ORD_VALUE','REFERENCE_UNIT'])
workingDataFrame['MRN'] = workingDataFrame['MRN'].astype('str')
thisListOfMRNs = workingDataFrame['MRN'].unique().tolist()
print(len(thisListOfMRNs))
listOfMRNs += thisListOfMRNs
print('total before removal of duplicates:',len(listOfMRNs))
# remove duplicates
listOfMRNs = list(set(listOfMRNs))
print('total after removal of duplicates:', len(listOfMRNs))
'''