Skip to content

Commit

Permalink
added demographics.py for open house to put code to load demographics…
Browse files Browse the repository at this point in the history
… into a background script'
  • Loading branch information
Rahul-Brito committed Apr 17, 2024
1 parent 33bb6c6 commit a0cc2d9
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions src/b2aiprep/demographics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import pandas as pd
from pandas import DataFrame
from enum import Enum
import json
import pickle


class RepeatInstrument(Enum):
SESSION = 'Session'
ACOUSTIC_TASK = 'Acoustic Task'
RECORDING = 'Recording'
GENERIC_DEMOGRAPHICS = 'Q Generic Demographics'
GENERIC_CONFOUNDERS = 'Q Generic Confounders'
GENERIC_VOICE_PERCEPTION = 'Q Generic Voice Perception'
GENERIC_VOICE_HANDICAP = 'Q Generic Voice Handicap Index Vhi10'
GENERIC_PHQ9_DEPRESSION = 'Q Generic Patient Health Questionnaire9'
GENERIC_GAD7_ANXIETY = 'Q - Generic - Gad7 Anxiety'
RESP_DYSPNEA_INDEX = 'Q - Resp - Dyspnea Index Di'
RESP_LEICESTER_COUGH = 'Q - Resp - Leicester Cough Questionnaire Lcq'
VOICE_VOICE_PROBLEM_SEVERITY = 'Q - Voice - Voice Problem Severity'
MOOD_PANAS = 'Q - Mood - Panas'
MOOD_CUSTOM_AFFECT = 'Q - Mood - Custom Affect Scale'
MOOD_DSM5_ADULT = 'Q - Mood - Dsm5 Adult'
MOOD_PTSD_ADULT = 'Q - Mood - Ptsd Adult'
MOOD_ADHD_ADULT = 'Q - Mood - Adhd Adult'
NEURO_WINOGRAD_SCHEMAS = 'Q - Neuro Winograd Schemas'
NEURO_WORDCOLOR_STROOP = 'Q - Neuro - Wordcolor Stroop'
NEURO_PRODUCTIVE_VOCABULARY = 'Q - Neuro - Productive Vocabulary'
NEURO_RANDOM_ITEM_GENERATION = 'Q - Neuro - Random Item Generation'

def load_csv_file(file_path):
try:
data = pd.read_csv(file_path, low_memory=False, na_values='')
return data
except FileNotFoundError:
print('File not found.')
return None
except pd.errors.ParserError:
print('Error parsing CSV file.')
return None

def pickle_new_data_columns(category: str, column_names: list):

out_file = f'{category}.pkl'
with open(out_file, "wb") as f:
pickle.dump(column_names, f)
print(f'New column names saved to: {out_file}')
return

def load_data_columns():
data_columns = {}
with open('participant_columns.pkl', "rb") as f:
data_columns['participant_columns'] = pickle.load(f)

with open('demographics_columns.pkl', "rb") as f:
data_columns['demographics_columns'] = pickle.load(f)

with open('confounders_columns.pkl', "rb") as f:
data_columns['confounders_columns'] = pickle.load(f)

with open('phq9_columns.pkl', "rb") as f:
data_columns['phq9_columns'] = pickle.load(f)

with open('gad7_columns.pkl', "rb") as f:
data_columns['gad7_columns'] = pickle.load(f)

return data_columns

def get_columns_of_repeat_instrument(df: DataFrame, repeat_instrument: RepeatInstrument):
columns = pd.Index(['record_id'])

data_columns = load_data_columns()
participant_columns=data_columns['participant_columns']
demographics_columns=data_columns['demographics_columns']
confounders_columns=data_columns['confounders_columns']
phq9_columns=data_columns['phq9_columns']
gad7_columns=data_columns['gad7_columns']

if repeat_instrument == RepeatInstrument.SESSION:
columns = columns.append(df.columns[pd.Series(df.columns).str.startswith('session_')])
elif repeat_instrument == RepeatInstrument.ACOUSTIC_TASK:
columns = columns.append(df.columns[pd.Series(df.columns).str.startswith('acoustic_task_')])
elif repeat_instrument == RepeatInstrument.RECORDING:
columns = columns.append(df.columns[pd.Series(df.columns).str.startswith('recording_')])
elif repeat_instrument == RepeatInstrument.GENERIC_DEMOGRAPHICS:
columns = columns.append(pd.Index(demographics_columns))
elif repeat_instrument == RepeatInstrument.GENERIC_CONFOUNDERS:
columns = columns.append(pd.Index(confounders_columns))
elif repeat_instrument == RepeatInstrument.GENERIC_PHQ9_DEPRESSION:
columns = columns.append(pd.Index(phq9_columns))
elif repeat_instrument == RepeatInstrument.GENERIC_GAD7_ANXIETY:
columns = columns.append(pd.Index(gad7_columns))
else:
# add rest of columns for other repeat instruments
raise NotImplementedError('Repeat Instrument not implemented.')
return columns

def get_df_of_repeat_instrument(df: DataFrame, repeat_instrument: RepeatInstrument):
return df[df['redcap_repeat_instrument'] == repeat_instrument.value][get_columns_of_repeat_instrument(df, repeat_instrument)]

0 comments on commit a0cc2d9

Please sign in to comment.