-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsynthea_data.py
52 lines (48 loc) · 2.06 KB
/
synthea_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
import sqlite3
from datetime import datetime
from dateutil.relativedelta import relativedelta
from typing import Dict, Any, Tuple
import random
class SyntheaData:
"""
Handles operations related to processing and retrieving patient data from a consolidated dataset.
"""
def __init__(self, config: Dict[str, Any]) -> None:
"""
Initializes with configuration settings.
"""
self.config = config
def import_cleaned_data_to_sqlite(self) -> None:
"""
Imports cleaned medical data from a CSV file into an SQLite database.
"""
conn = sqlite3.connect(self.config['database_path'])
df = pd.read_csv(self.config['cleaned_data_csv_path'])
df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
conn.close()
def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
"""
Retrieves a random patient's data who has been diagnosed with a specified condition.
"""
conn = sqlite3.connect(self.config['database_path'])
query = """
SELECT * FROM cleaned_medical_data
WHERE description_cond LIKE ? OR reasondescription LIKE ?
"""
df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
conn.close()
if not df.empty:
selected_row = df.sample(n=1).iloc[0]
patient_data = {
'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
'gender': selected_row['GENDER'],
'conditions': [selected_row['description_cond']],
'observations': [selected_row.get('observation', '')],
'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
}
modality = selected_row.get('modality', None)
body_area = selected_row.get('body_area', None)
return patient_data, modality, body_area
else:
return {}, None, None