-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.py
86 lines (63 loc) · 2.79 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from __future__ import annotations
import logging
import sys
from typing import TypedDict
import yaml
class Config(TypedDict):
"""defines the structure of the config dict to be passed to the function extract_UKBB_tabular_data"""
## Filtering section
# List of FieldIDs to extract, null to extract all
# See Data_Dictionary_Showcase.tsv for a concise list
# Ex: Sex = 31
FieldIDs: list[int] | list[None]
# Instance aka timepoint, 1-4, none for all
InstanceIDs: list[int] | list[None]
# Specific subjects to extract, null for all
SubjectIDs: list[int] | list[None]
SubjectIDFiles: list[str] | list[None]
# For fields with array components, null for all
ArrayIDs: list[int] | list[None]
# Use pre-defined Categories of FieldIDs, added to list above, null for none
Categories: list[int] | list[None]
## Output control section
# Replicate non-instanced data (aka Sex, other single-point measurements)
# across all instances
replicate_non_instanced: bool
# Use data dictionary to recode FieldIDs as <Name>_<FieldID>
recode_field_names: bool
# Use data dictionary and coding file to replace FieldValues with decoded entries
recode_data_values: bool
# Some FieldValues were saved as empty strings instead of NA, drop these
drop_empty_strings: bool
# List of string responses to map to Null
drop_null_strings: list[str] | list[None]
drop_null_numerics: list[float] | list[None]
## Wide output control
# Produce a wide aka pivoted DataFrame in addition to the filtered narrow frame
wide: bool
# Use data dictionary to assign proper datatypes to columns in wide output
# Only applies to binary arrow format
recode_wide_column_valuetypes: bool
# Attempt to split compound type FieldValues into a list in wide output
convert_compound_to_list: bool
# When recode_wide_column_valuetype=true some values from recode_data_values=true
# # some values will break setting column datatypes
# # Substitute strings to values set below
convert_less_than_value_integer: int
convert_less_than_value_continuous: int | float
def load_config(config_file: str) -> Config:
"""Returns a config dict loaded from a YAML file. It is assumed the format is correct"""
try:
with open(config_file, "r") as stream:
try:
tempdict = yaml.safe_load(stream)
for key in tempdict.keys():
if isinstance(tempdict[key], list):
tempdict[key] = [i for i in tempdict[key] if i is not None]
return tempdict
except yaml.YAMLError as exc:
logging.exception(exc)
sys.exit(1)
except FileNotFoundError as exc:
logging.exception(exc)
sys.exit(1)