-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform_functions.py
276 lines (241 loc) · 15.2 KB
/
transform_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 12 16:08:14 2020
@author: rushn
# Query to get BARCODE from human name : https://jacksonlabstest.platformforscience.com/TEST/odata/EMPLOYEE?$filter=Name eq 'Daniel Pfalzer'
from requests.auth import HTTPBasicAuth
import requests
my_auth = HTTPBasicAuth(email, password)
query = queryString
result = requests.get(query, auth=my_auth)
"""
import os
from statistics import mean
import numpy as np
import pandas as pd
from datetime import datetime
def listFiles(importPath):
fileList = [f for f in os.listdir(importPath) if os.path.isfile(os.path.join(importPath, f))]
return fileList
def get_field_sub(x):
return x.split('_')[0]
def best_fit_slope(ys):
xs=np.array([300,600,900,1200])
m = (((mean(xs)*mean(ys)) - mean(xs*ys)) /
((mean(xs)**2) - mean(xs**2)))
return m * 100
def readcsv(importPath, skiprows, nrow=None):
data = pd.read_csv(str(importPath), skiprows=skiprows, nrows=nrow)
return data
def readxlsx(importPath, skiprows, nrow=None):
data = pd.read_excel(str(importPath), skiprows=skiprows, nrows=nrow)
return data
# Accepts date time in the format m/d/yyyy
def corePfsDateTimeKluge(timestr):
dt = datetime.strptime(timestr, '%m/%d/%Y')
corrected = datetime.strftime(dt, '%m/%d/%Y')
return corrected
def calculations_LD(data):
data['Latency to first transition into dark (sec)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['LATENCY (CENTROID)'].transform('nth', 0)
data['Side changes'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['ENTRY COUNT (CENTROID)'].transform('nth', 0)
data['Light side time spent (sec)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION (CENTROID)'].transform('nth', 1)
data['Time mobile light side (sec)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['AMBULATORY TIME (CENTROID)'].transform('nth', 1)
data['Dark side time spent (sec)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION (CENTROID)'].transform('nth', 0)
data['Time mobile dark side (sec)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['AMBULATORY TIME (CENTROID)'].transform('nth', 0)
data['Percent time in dark (%)'] = data['Dark side time spent (sec)']/(1200 - data['Latency to first transition into dark (sec)'])*100
data['Percent time in light (%)'] = (data['Light side time spent (sec)'] - data['Latency to first transition into dark (sec)'])/(1200 - data['Latency to first transition into dark (sec)'])*100
data.loc[(data['DURATION (CENTROID)'] == 0) & (data['ZONE'] == 'Dark'), 'Transition'] = 'No'
data.loc[(data['DURATION (CENTROID)'] == 1200) & (data['ZONE'] == 'Light'), 'Transition'] = 'No'
data.loc[(data['Transition'].isnull()), 'Transition'] = 'Yes'
data.loc[(data['Light side time spent (sec)'] == 1200), 'Latency to first transition into dark (sec)']=1200
data.loc[(data['Light side time spent (sec)'] == 1200), 'Percent time in dark (%)']=0
data.loc[(data['Light side time spent (sec)'] == 1200), 'Percent time in light (%)']=100
return data
def final_data_LD(data):
colNameNew = ['EXPT_SAMPLE_BARCODE',
'Test Date',
'Tester Name',
'Arena ID',
'Start time',
'Sample Duration',
'Latency to first transition into dark (sec)',
'Side changes',
'Light side time spent (sec)',
'Time mobile light side (sec)',
'Dark side time spent (sec)',
'Time mobile dark side (sec)',
'Percent time in dark (%)',
'Percent time in light (%)',
'Transition',
'Comment']
#Selecting final table of columns for LIMS
data_final = data[colNameNew].reset_index(drop=True)
data_final = data_final.drop_duplicates()
return data_final
def calculations_CDO_OF(data):
data['Start Date and Time'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['START TIME'].transform('nth', 0)
data['Whole Arena Permanence Time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['Sample Duration'].transform('sum')
data['Distance Traveled Total (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('sum')
data['Distance Traveled First Five Minutes (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 0)
data['Distance Traveled Second Five Minutes (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 1)
data['Distance Traveled Third Five Minutes (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 2)
data['Distance Traveled Fourth Five Minutes (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 3)
data['Whole arena average speed (cm/s)'] = data['Distance Traveled Total (cm)']/1200
data['Whole arena resting time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['REST TIME (s)'].transform('sum')
data['Time Spent Mobile (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['AMBULATORY TIME (s)'].transform('sum')
data['Number of Rears Total'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL EPISODE COUNT'].transform('sum')
data['Number of Rears First Five Minutes'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL EPISODE COUNT'].transform('nth', 0)
data['Number of Rears Second Five Minutes'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL EPISODE COUNT'].transform('nth', 1)
data['Number of Rears Third Five Minutes'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL EPISODE COUNT'].transform('nth', 2)
data['Number of Rears Fourth Five Minutes'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL EPISODE COUNT'].transform('nth', 3)
data['Vertical time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['VERTICAL TIME (s)'].transform('sum')
data['Clockwise'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['CLOCKWISE REVOLUTIONS'].transform('sum')
data['Counter Clockwise'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['COUNTER-CLOCKWISE REVOLUTIONS'].transform('sum')
data['Distance Traveled Habituation Ratio (cm)'] = ((data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 3)/300*100)-(data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].transform('nth', 0)/300*100))/3
distance_avg = data.groupby(['EXPT_SAMPLE_BARCODE'])['TOTAL DISTANCE (cm)'].apply(best_fit_slope)
distance_avg = distance_avg.rename('Distance Traveled Slope (%)')
data = pd.merge(data, distance_avg, left_on='EXPT_SAMPLE_BARCODE', right_index=True)
return data
def calculations_ZDS_OF(data):
data[list(data)] = data[list(data)].astype(str)
data = data.groupby(['EXPT_SAMPLE_BARCODE', 'SAMPLE', 'Test Date', 'Tester Name', 'Comments'], \
as_index=False, sort=False).agg(','.join)
data[['DURATION_P','DURATION_CORNER','DURATION_CENTER']] = data['DURATION (CENTROID)'].str.split(',',expand=True).apply(pd.to_numeric)
data[['ENTRY_P','ENTRY_CORNER','ENTRY_CENTER']] = data['ENTRY COUNT (CENTROID)'].str.split(',',expand=True).apply(pd.to_numeric)
data[['DISTANCE_P','DISTANCE_CORNER','DISTANCE_CENTER']] = data['TOTAL DISTANCE (CENTROID)'].str.split(',',expand=True).apply(pd.to_numeric)
data[['REST_P','REST_CORNER','REST_CENTER']] = data['REST TIME (CENTROID)'].str.split(',',expand=True).apply(pd.to_numeric)
data[['AMBULATORY_P','AMBULATORY_CORNER','AMBULATORY_CENTER']] = data['AMBULATORY TIME (CENTROID)'].str.split(',',expand=True).apply(pd.to_numeric)
data[['EXPT_SAMPLE_BARCODE', 'SAMPLE']] = data[['EXPT_SAMPLE_BARCODE', 'SAMPLE']].apply(pd.to_numeric)
data['Periphery distance traveled (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DISTANCE_P'].transform('sum')+data.groupby(['EXPT_SAMPLE_BARCODE'])['DISTANCE_CORNER'].transform('sum')
data['Periphery resting time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['REST_P'].transform('sum')+data.groupby(['EXPT_SAMPLE_BARCODE'])['REST_CORNER'].transform('sum')
data['Periphery permanence time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_P'].transform('sum')+data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CORNER'].transform('sum')
data['Periphery average speed (cm/s)'] = data['Periphery distance traveled (cm)']/data['Periphery permanence time (s)']
data['Center distance travelled (cm)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DISTANCE_CENTER'].transform('sum')
data['Center permanence time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CENTER'].transform('sum')
data['Center resting time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['REST_CENTER'].transform('sum')
data['Center average speed (cm/s)'] = data['Center distance travelled (cm)']/data['Center permanence time (s)']
data['Number of center entries'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['ENTRY_CENTER'].transform('sum')
data['PctTime Center (%)'] = data['Center permanence time (s)']/1200*100
data['Corners Permanence Time (s)'] = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CORNER'].transform('sum')
data['PctTime Corners (%)'] = data['Corners Permanence Time (s)']/1200*100
data['PctTime Corners Habituation Ratio (cm)'] = ((data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CORNER'].transform('nth', 3)/300*100)-(data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CORNER'].transform('nth', 0)/300*100))/3
data['PctTime Center Habituation Ratio (cm)'] = ((data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CENTER'].transform('nth', 3)/300*100)-(data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CENTER'].transform('nth', 0)/300*100))/3
corner_avg = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CORNER'].apply(best_fit_slope)
corner_avg = corner_avg.rename('PctTime Corner Slope (%)')
center_avg = data.groupby(['EXPT_SAMPLE_BARCODE'])['DURATION_CENTER'].apply(best_fit_slope)
center_avg = center_avg.rename('PctTime Center Slope (%)')
avgs = pd.merge(corner_avg, center_avg, left_index=True, right_index=True)
data = pd.merge(data, avgs, left_on='EXPT_SAMPLE_BARCODE', right_index=True)
return data
def data_CDO_OF(data):
colNameNew = ['EXPT_SAMPLE_BARCODE',
'Test Date',
'Tester Name',
'Arena ID',
'Start Date and Time',
'Sample Duration',
'Whole Arena Permanence Time (s)',
'Distance Traveled Total (cm)',
'Distance Traveled First Five Minutes (cm)',
'Distance Traveled Second Five Minutes (cm)',
'Distance Traveled Third Five Minutes (cm)',
'Distance Traveled Fourth Five Minutes (cm)',
'Whole arena average speed (cm/s)',
'Whole arena resting time (s)',
'Time Spent Mobile (s)',
'Number of Rears Total',
'Number of Rears First Five Minutes',
'Number of Rears Second Five Minutes',
'Number of Rears Third Five Minutes',
'Number of Rears Fourth Five Minutes',
'Vertical time (s)',
'Clockwise',
'Counter Clockwise',
'Distance Traveled Habituation Ratio (cm)',
'Distance Traveled Slope (%)',
'Comments']
#Selecting final table of columns for LIMS
data_CDO = data[colNameNew].reset_index(drop=True)
return data_CDO
def data_ZDS_OF(data):
colNameNew = ['EXPT_SAMPLE_BARCODE',
'Periphery distance traveled (cm)',
'Periphery resting time (s)',
'Periphery permanence time (s)',
'Periphery average speed (cm/s)',
'Center distance travelled (cm)',
'Center resting time (s)',
'Center permanence time (s)',
'Center average speed (cm/s)',
'Number of center entries',
'PctTime Center (%)',
'PctTime Center Habituation Ratio (cm)',
'PctTime Corners (%)',
'PctTime Corners Habituation Ratio (cm)',
'Corners Permanence Time (s)',
'PctTime Center Slope (%)',
'PctTime Corner Slope (%)']
data_ZDS = data[colNameNew].reset_index(drop=True)
return data_ZDS
def final_data_Hem(data):
colNameNew = ['EXPT_SAMPLE_BARCODE',
'White Blood Cells (WBC)',
'Red Blood Cells (RBC)',
'Measured Hemoglobin (mHGB)',
'Hematocrit (HCT)',
'Mean Cell Volume (MCV)',
'Mean Corpuscular hemoglobin (CHg)',
'Mean Cell Hemoglobin Concentration (MCHC)',
'Platelet Count (PLT) (Optical)',
'Red Cell Distr. Width (RDW)',
'Mean Platelet Volume (MPV)',
'Nucleated Red Blood Cell Count',
'Nucleated Red Blood Cell Percentage',
'Neurophil Cell Count',
'Lymphocyte Cell Count',
'Monocyte Cell Count',
'Eosinophil Cell Count',
'Basophil Cell Count',
'Neurophils (NEUT)',
'Lymphocytes (LYM)',
'Monocytes (MONO)',
'Eosinophils (EOS)',
'Basophils (BASO)',
'Percent Retic',
'Reticulocytes (Retic)',
'Percent Immature Retic',
'Percent Low Fluorescing Retic',
'Percent Middle Fluorescing Retic',
'Percent High Fluorescing Retic',
'Retic Hemoglobin (CHr)',
'Platelet Count (Impedance)',
'Platelet Count (Optical)',
'Experimenter ID',
'Date and Time of Blood Collection',
'Date of Measurement',
'Analyst ID',
'Comments',
'Date and time of sacrifice']
#Selecting final table of columns for LIMS
data['Date and time of sacrifice'] = data['Date-Time Sacrifice/Collection']
data.columns = colNameNew
data_final = data[colNameNew].reset_index(drop=True)
data_final = data_final.drop_duplicates()
return data_final
def calculations_HB(data):
values = dict(data.groupby('EXPT_SAMPLE_BARCODE')['HOLE NAME'].apply(list))
poke_counts = {}
for key in values:
test = {x:values[key].count(x) for x in values[key]}
poke_counts.update({key:test})
counts = pd.DataFrame.from_dict(poke_counts, orient='index')
counts = counts.reindex(sorted(counts.columns), axis=1)
counts.columns = ['Total Hole Pokes Hole ' + str(col) for col in counts.columns]
data = pd.merge(data, counts, left_on = ['EXPT_SAMPLE_BARCODE'], right_index=True)
data['HOLE NAME'] = data['HOLE NAME'].astype(str)
sequence = data.groupby(['EXPT_SAMPLE_BARCODE'])['HOLE NAME'].apply('-'.join).reset_index()
sequence = sequence.rename(columns={'HOLE NAME':'Holepoke sequence'})
data = pd.merge(data, sequence, on=['EXPT_SAMPLE_BARCODE'])
data = data.drop(columns = ['HOLE NAME']).drop_duplicates().reset_index(drop=True)
data.fillna(0, inplace=True)
return data