From 8440ae44b6c77c393c4db0577780348618bf372e Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Sun, 7 Apr 2024 23:10:43 +0100 Subject: [PATCH 1/9] see changes on config.yaml file and also main.py --- .idea/vcs.xml | 6 ++ Prompt_engineer/config.yaml | 2 + Prompt_engineer/main.py | 77 +++++++++++++++++++ .../synthea_data.py | 28 +++---- config.yaml | 2 - main.py | 65 ---------------- 6 files changed, 100 insertions(+), 80 deletions(-) create mode 100644 .idea/vcs.xml create mode 100644 Prompt_engineer/config.yaml create mode 100644 Prompt_engineer/main.py rename synthea_data.py => Prompt_engineer/synthea_data.py (67%) delete mode 100644 config.yaml delete mode 100644 main.py diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml new file mode 100644 index 0000000..4f320e9 --- /dev/null +++ b/Prompt_engineer/config.yaml @@ -0,0 +1,2 @@ +database_path: '/Users/ayodejioyesanya/Desktop/sqlite-tools-osx-x86-3420000/health_data.db' +cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py new file mode 100644 index 0000000..e863073 --- /dev/null +++ b/Prompt_engineer/main.py @@ -0,0 +1,77 @@ +import pandas as pd +import sqlite3 +from datetime import datetime +from dateutil.relativedelta import relativedelta +from typing import Dict, Any, Tuple, Optional +import yaml +import argparse + + +class SyntheaData: + """ + Handles operations related to processing and retrieving patient data from a consolidated dataset. + """ + + def __init__(self, config: Dict[str, Any]) -> None: + self.config = config + + def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]: + conn = sqlite3.connect(self.config['database_path']) + query = """ + SELECT * FROM cleaned_medical_data + WHERE description_cond LIKE ? OR reasondescription LIKE ? + """ + df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%')) + conn.close() + + if not df.empty: + selected_row = df.sample(n=1).iloc[0] + patient_data = { + 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years, + 'gender': selected_row['GENDER'], + 'conditions': [selected_row['description_cond']], + 'observations': [selected_row.get('observation', '')], + 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')], + } + modality = selected_row.get('modality', None) + body_area = selected_row.get('body_area', None) + return patient_data, modality, body_area + else: + return {}, None, None + + def import_cleaned_data_to_sqlite(self) -> None: + conn = sqlite3.connect(self.config['database_path']) + df = pd.read_csv(self.config['cleaned_data_csv_path']) + df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) + conn.close() + + +def load_config(config_path: str) -> Dict[str, Any]: + with open(config_path, 'r') as file: + config = yaml.safe_load(file) + return config + + +def parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description='Medical Report Generator') + # Update the default path to the full path of your config.yaml file + parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.') + return parser.parse_args() + + +def main(): + args = parse_arguments() + config = load_config(args.config_path) + + data_processor = SyntheaData(config) + # Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries + data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here + + # Example usage + diagnosis = "Example Diagnosis" + patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis) + print(patient_data, modality, body_area) + + +if __name__ == '__main__': + main() diff --git a/synthea_data.py b/Prompt_engineer/synthea_data.py similarity index 67% rename from synthea_data.py rename to Prompt_engineer/synthea_data.py index 34e32e8..3e901f7 100644 --- a/synthea_data.py +++ b/Prompt_engineer/synthea_data.py @@ -2,8 +2,8 @@ import sqlite3 from datetime import datetime from dateutil.relativedelta import relativedelta -from typing import Dict, Any, Tuple -import random +from typing import Dict, Any, Tuple, Optional + class SyntheaData: """ @@ -15,15 +15,6 @@ def __init__(self, config: Dict[str, Any]) -> None: """ self.config = config - def import_cleaned_data_to_sqlite(self) -> None: - """ - Imports cleaned medical data from a CSV file into an SQLite database. - """ - conn = sqlite3.connect(self.config['database_path']) - df = pd.read_csv(self.config['cleaned_data_csv_path']) - df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) - conn.close() - def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]: """ Retrieves a random patient's data who has been diagnosed with a specified condition. @@ -45,8 +36,19 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], 'observations': [selected_row.get('observation', '')], 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')], } - modality = selected_row.get('modality', None) - body_area = selected_row.get('body_area', None) + modality = selected_row.get('modality', None) # Assuming your database includes this column + body_area = selected_row.get('body_area', None) # Assuming your database includes this column return patient_data, modality, body_area else: return {}, None, None + + def import_cleaned_data_to_sqlite(self) -> None: + """ + Imports cleaned medical data from a CSV file into an SQLite database. + """ + conn = sqlite3.connect(self.config['database_path']) + df = pd.read_csv(self.config['cleaned_data_csv_path']) + # Assuming the table name you want to use is 'cleaned_medical_data' + df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) + conn.close() + diff --git a/config.yaml b/config.yaml deleted file mode 100644 index c28a9ca..0000000 --- a/config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -database_path: 'path/to/health_data.db' -cleaned_data_csv_path: '/path/to/cleaned_medical_data.csv' diff --git a/main.py b/main.py deleted file mode 100644 index 8288db7..0000000 --- a/main.py +++ /dev/null @@ -1,65 +0,0 @@ -import argparse -import yaml -from typing import Dict, Any, Optional -from transformers import AutoTokenizer, pipeline -from synthea_data import SyntheaData #SyntheaData class is in synthea_data.py - -def load_config(config_path: str) -> Dict[str, Any]: - """ - Load the YAML configuration file. - """ - with open(config_path, 'r') as file: - config = yaml.safe_load(file) - return config - -def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str: - """ - Generate a medical prompt for the LLAMA model based on patient data, a given diagnosis, and imaging details. - """ - prompt_template = ( - "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. " - "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. " - "Imaging modality: {modality}. Body area: {body_area}. " - "Please provide a comprehensive report based on the patient's data and imaging results." - ) - - prompt = prompt_template.format( - diagnosis=diagnosis, - age=patient_data['age'], - gender=patient_data['gender'], - conditions=', '.join(patient_data['conditions']), - observations=', '.join(patient_data['observations']), - care_plans=', '.join(patient_data['care_plans']), - modality=modality or "Not specified", - body_area=body_area or "Not specified" - ) - return prompt - -def parse_arguments() -> argparse.Namespace: - """ - Parse command line arguments. - """ - parser = argparse.ArgumentParser(description='Medical Report Generator') - parser.add_argument('--config_path', type=str, default='config.yaml', help='Path to the YAML configuration file.') - return parser.parse_args() - -def main() -> None: - """ - Main function to orchestrate the workflow. - """ - args = parse_arguments() - config = load_config(args.config_path) - - synthea_data = SyntheaData(config) - diagnosis = "lung cancer" # Example diagnosis - patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis) - - if patient_data: - medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area) - print(medical_prompt) - # Further processing with LLAMA or other models can be done here - else: - print("No patient data found for the given diagnosis.") - -if __name__ == '__main__': - main() From 03ae720d58338722be1d0ad4f78f5d80eb03a154 Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:43:03 +0100 Subject: [PATCH 2/9] see changes on config.yaml file and also main.py --- Prompt_engineer/main.py | 30 ++++++++++++++++++++++-------- Prompt_engineer/synthea_data.py | 26 +++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index e863073..153366d 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -6,7 +6,6 @@ import yaml import argparse - class SyntheaData: """ Handles operations related to processing and retrieving patient data from a consolidated dataset. @@ -19,7 +18,7 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], conn = sqlite3.connect(self.config['database_path']) query = """ SELECT * FROM cleaned_medical_data - WHERE description_cond LIKE ? OR reasondescription LIKE ? + WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ? """ df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%')) conn.close() @@ -29,7 +28,7 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], patient_data = { 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years, 'gender': selected_row['GENDER'], - 'conditions': [selected_row['description_cond']], + 'conditions': [selected_row['DESCRIPTION_cond']], 'observations': [selected_row.get('observation', '')], 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')], } @@ -45,19 +44,29 @@ def import_cleaned_data_to_sqlite(self) -> None: df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) conn.close() - def load_config(config_path: str) -> Dict[str, Any]: with open(config_path, 'r') as file: config = yaml.safe_load(file) return config - def parse_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser(description='Medical Report Generator') - # Update the default path to the full path of your config.yaml file parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.') return parser.parse_args() +def generate_report(patient_data: Dict[str, Any]) -> str: + """ + Generates a textual report based on the patient data. + """ + report_lines = [ + f"Patient Age: {patient_data['age']}", + f"Gender: {patient_data['gender']}", + f"Conditions: {', '.join(patient_data['conditions'])}", + f"Observations: {', '.join(patient_data['observations'])}", + f"Care Plans: {', '.join(patient_data['care_plans'])}", + ] + report = "\n".join(report_lines) + return report def main(): args = parse_arguments() @@ -67,11 +76,16 @@ def main(): # Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here - # Example usage - diagnosis = "Example Diagnosis" + # Use "Prediabetes" as the example diagnosis to query + diagnosis = "Prediabetes" patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis) print(patient_data, modality, body_area) + if patient_data: # Ensure there is data to generate a report from + report = generate_report(patient_data) + print("Generated Report:", report) + else: + print("No patient data found for the given diagnosis.") if __name__ == '__main__': main() diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py index 3e901f7..3cf87eb 100644 --- a/Prompt_engineer/synthea_data.py +++ b/Prompt_engineer/synthea_data.py @@ -22,11 +22,12 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], conn = sqlite3.connect(self.config['database_path']) query = """ SELECT * FROM cleaned_medical_data - WHERE description_cond LIKE ? OR reasondescription LIKE ? + WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ? """ df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%')) conn.close() + if not df.empty: selected_row = df.sample(n=1).iloc[0] patient_data = { @@ -52,3 +53,26 @@ def import_cleaned_data_to_sqlite(self) -> None: df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) conn.close() + +def generate_report(patient_data: dict, + model_path: str = '/Users/ayodejioyesanya/.cache/lm-studio/models/TheBloke/medicine-chat-GGUF/medicine-chat.Q3_K_M.gguf') -> str: + # Load the tokenizer and model from the specified path + tokenizer = AutoTokenizer.from_pretrained(model_path) + model = AutoModelForCausalLM.from_pretrained(model_path) + + # Prepare the prompt from patient data + prompt = f"Based on the following patient information, generate a medical report:\n" \ + f"Age: {patient_data['age']}, Gender: {patient_data['gender']}, " \ + f"Conditions: {', '.join(patient_data['conditions'])}, Observations: {', '.join(patient_data['observations'])}, " \ + f"Care Plans: {', '.join(patient_data['care_plans'])}.\n" + + # Encode the prompt and generate text + inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) + output_sequences = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], + max_length=512, temperature=1.0, top_p=0.95, top_k=50) + + # Decode the generated text + report = tokenizer.decode(output_sequences[0], skip_special_tokens=True) + + return report + From a54bfc884e430f7e870dacc5b40139b20d013e79 Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Sun, 14 Apr 2024 18:36:47 +0100 Subject: [PATCH 3/9] changed without SQL directory --- Prompt_engineer/.DS_Store | Bin 0 -> 6148 bytes .../__pycache__/synthea_data.cpython-37.pyc | Bin 0 -> 1717 bytes Prompt_engineer/config.yaml | 2 +- Prompt_engineer/main.py | 112 +++++++----------- Prompt_engineer/synthea_data.py | 91 +++++--------- 5 files changed, 72 insertions(+), 133 deletions(-) create mode 100644 Prompt_engineer/.DS_Store create mode 100644 Prompt_engineer/__pycache__/synthea_data.cpython-37.pyc diff --git a/Prompt_engineer/.DS_Store b/Prompt_engineer/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f165c78f1ae7e178822be07fa49e619bb86d2d40 GIT binary patch literal 6148 zcmeHK%S{6@5S@VtEy^WFj&30xm}sdW1uSHP$RW}0g#$NtkY11|feNUAPI&Vnuwuyt z2_ZBi+0UOh_Ux0^_7D*-?$>jo84*<|f-H>@li|^+6DLoAtZTG%LoIFSmYR`;{^F3t zK1H+*_4I()KjLq;ck^Z2?RpFQynN5xb#=Ykb}KNdvzPtj{`L9t7<+wSgY|Tr-ETfe z0e)C05DJ6>p+G3`GX-#Gi&ay@s6&BJAQU(#Am>9u5zLOoP>&8&ngRgj8Lfu4|7eFYr-*w|@CNxoZ>3HHw(ZK_Mc$&MrCNjbbCgNNDIxx5u;0kGH#> z$d)4^B@&#FxaW`@c>>;nN9ZdjUSUsEk8L(w5Ixh?HPwIheN{Ejn@t}<`{U8h@OKBH zKlR6|d7yj&U3?FMB8oGV;}GXYXyj&S=2mE7Z8tJIw?n(q&CJQ&(5-YU^Foi>)S)i* zXpQ=`P8+vwg2UP?M4NQ&2GO-^BlM}&N3He0;1<-fl+{gRsm!Bds_emJoH6CS9?P^W zNY=v2qeL>9<_z>EXBm;{1*0sJq&M@Q?eFjH?mdGyw7w!4o7J;PkpwX*V#a6dX%TZq z1fxM*PLk1Vle1)!5gz1ak;;;%MKZI$**zoE(0(yuLbhC0dwy~@__BL6I9AT9<6igp zOl?pmVxCq$MR8eBRWHv4;}_sdS*Wem>nv$d_ zMJkj7dnUYyjitXjIkd|rbny#_pU@CfOpO~)qAt3|SH|4nKT3QD<2}AIr75jD`yPRG z<|fsyTldDb@y5Nf<`!7&Z3NQUMhS+|U5=g}Y36Y;%2k%9v|B|wc z&M5?OEF)GVX~7uph(-J(9qaaZ`p!SdQXyklbg2Cj+EWM3K7WuW?S#o_=|iC=dixrPUaO`Wr$>X6=wEl*j3Wy6Zs@KH`=Jc2@BffeBf%(7TAx-#*{`ozXVHr_i`MP?PH zExqvL|D^5-ED;}qK-MOumAEQN;>iQM_T9$9$Z@aP!Z`JEYKY4iV`{qPjCYs L&o=P;$G86m*bd3N literal 0 HcmV?d00001 diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml index 4f320e9..817c6f9 100644 --- a/Prompt_engineer/config.yaml +++ b/Prompt_engineer/config.yaml @@ -1,2 +1,2 @@ -database_path: '/Users/ayodejioyesanya/Desktop/sqlite-tools-osx-x86-3420000/health_data.db' +# Configuration file for the medical report generator cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index 153366d..6fccc66 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -1,89 +1,63 @@ -import pandas as pd -import sqlite3 -from datetime import datetime -from dateutil.relativedelta import relativedelta -from typing import Dict, Any, Tuple, Optional -import yaml import argparse +import yaml +from typing import Dict, Any, Optional +from synthea_data import SyntheaData -class SyntheaData: +def load_config(config_path: str) -> Dict[str, Any]: """ - Handles operations related to processing and retrieving patient data from a consolidated dataset. + Load the YAML configuration file. """ - - def __init__(self, config: Dict[str, Any]) -> None: - self.config = config - - def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]: - conn = sqlite3.connect(self.config['database_path']) - query = """ - SELECT * FROM cleaned_medical_data - WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ? - """ - df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%')) - conn.close() - - if not df.empty: - selected_row = df.sample(n=1).iloc[0] - patient_data = { - 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years, - 'gender': selected_row['GENDER'], - 'conditions': [selected_row['DESCRIPTION_cond']], - 'observations': [selected_row.get('observation', '')], - 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')], - } - modality = selected_row.get('modality', None) - body_area = selected_row.get('body_area', None) - return patient_data, modality, body_area - else: - return {}, None, None - - def import_cleaned_data_to_sqlite(self) -> None: - conn = sqlite3.connect(self.config['database_path']) - df = pd.read_csv(self.config['cleaned_data_csv_path']) - df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) - conn.close() - -def load_config(config_path: str) -> Dict[str, Any]: with open(config_path, 'r') as file: - config = yaml.safe_load(file) + config = yaml.safe_load(file) or {} return config +def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str: + """ + Generate a medical prompt for the medical report based on patient data and diagnosis details. + """ + prompt_template = ( + "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. " + "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. " + "Imaging modality: {modality}. Body area: {body_area}. " + "Please provide a comprehensive report based on the patient's data and imaging results." + ) + + prompt = prompt_template.format( + diagnosis=diagnosis, + age=patient_data['age'], + gender=patient_data['gender'], + conditions=', '.join(patient_data['conditions']), + observations=', '.join(patient_data['observations']), + care_plans=', '.join(patient_data['care_plans']), + modality=modality or "Not specified", + body_area=body_area or "Not specified" + ) + return prompt + def parse_arguments() -> argparse.Namespace: + """ + Parse command line arguments. + """ parser = argparse.ArgumentParser(description='Medical Report Generator') - parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.') + # Update this path to where your config.yaml actually resides + default_config_path = '/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml' + parser.add_argument('--config_path', type=str, default=default_config_path, help='Path to the YAML configuration file.') return parser.parse_args() -def generate_report(patient_data: Dict[str, Any]) -> str: +def main() -> None: """ - Generates a textual report based on the patient data. + Main function to orchestrate the workflow. """ - report_lines = [ - f"Patient Age: {patient_data['age']}", - f"Gender: {patient_data['gender']}", - f"Conditions: {', '.join(patient_data['conditions'])}", - f"Observations: {', '.join(patient_data['observations'])}", - f"Care Plans: {', '.join(patient_data['care_plans'])}", - ] - report = "\n".join(report_lines) - return report - -def main(): args = parse_arguments() config = load_config(args.config_path) - data_processor = SyntheaData(config) - # Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries - data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here - - # Use "Prediabetes" as the example diagnosis to query - diagnosis = "Prediabetes" - patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis) - print(patient_data, modality, body_area) + synthea_data = SyntheaData() + diagnosis = "lung cancer" # Example diagnosis + patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis) - if patient_data: # Ensure there is data to generate a report from - report = generate_report(patient_data) - print("Generated Report:", report) + if patient_data: + medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area) + print(medical_prompt) else: print("No patient data found for the given diagnosis.") diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py index 3cf87eb..ae96fb4 100644 --- a/Prompt_engineer/synthea_data.py +++ b/Prompt_engineer/synthea_data.py @@ -1,78 +1,43 @@ -import pandas as pd -import sqlite3 +from typing import Dict, Any, Tuple, Optional from datetime import datetime from dateutil.relativedelta import relativedelta -from typing import Dict, Any, Tuple, Optional - +import random + +# Example static data, simulating a patient database +PATIENT_DATA = [ + { + 'BIRTHDATE': '1990-01-01', + 'GENDER': 'Male', + 'description_cond': 'lung cancer', + 'observation': 'increased cough', + 'DESCRIPTION_careplan': 'regular monitoring', + 'modality': 'X-Ray', + 'body_area': 'Chest' + }, + # Additional records can be added here. +] class SyntheaData: """ - Handles operations related to processing and retrieving patient data from a consolidated dataset. + Simulates data retrieval from a static dataset. """ - def __init__(self, config: Dict[str, Any]) -> None: - """ - Initializes with configuration settings. - """ - self.config = config - def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]: """ Retrieves a random patient's data who has been diagnosed with a specified condition. """ - conn = sqlite3.connect(self.config['database_path']) - query = """ - SELECT * FROM cleaned_medical_data - WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ? - """ - df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%')) - conn.close() - - - if not df.empty: - selected_row = df.sample(n=1).iloc[0] + matching_patients = [patient for patient in PATIENT_DATA if diagnosis in patient['description_cond']] + if matching_patients: + selected_patient = random.choice(matching_patients) + age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years patient_data = { - 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years, - 'gender': selected_row['GENDER'], - 'conditions': [selected_row['description_cond']], - 'observations': [selected_row.get('observation', '')], - 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')], + 'age': age, + 'gender': selected_patient['GENDER'], + 'conditions': [selected_patient['description_cond']], + 'observations': [selected_patient['observation']], + 'care_plans': [selected_patient['DESCRIPTION_careplan']], } - modality = selected_row.get('modality', None) # Assuming your database includes this column - body_area = selected_row.get('body_area', None) # Assuming your database includes this column + modality = selected_patient.get('modality') + body_area = selected_patient.get('body_area') return patient_data, modality, body_area else: return {}, None, None - - def import_cleaned_data_to_sqlite(self) -> None: - """ - Imports cleaned medical data from a CSV file into an SQLite database. - """ - conn = sqlite3.connect(self.config['database_path']) - df = pd.read_csv(self.config['cleaned_data_csv_path']) - # Assuming the table name you want to use is 'cleaned_medical_data' - df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False) - conn.close() - - -def generate_report(patient_data: dict, - model_path: str = '/Users/ayodejioyesanya/.cache/lm-studio/models/TheBloke/medicine-chat-GGUF/medicine-chat.Q3_K_M.gguf') -> str: - # Load the tokenizer and model from the specified path - tokenizer = AutoTokenizer.from_pretrained(model_path) - model = AutoModelForCausalLM.from_pretrained(model_path) - - # Prepare the prompt from patient data - prompt = f"Based on the following patient information, generate a medical report:\n" \ - f"Age: {patient_data['age']}, Gender: {patient_data['gender']}, " \ - f"Conditions: {', '.join(patient_data['conditions'])}, Observations: {', '.join(patient_data['observations'])}, " \ - f"Care Plans: {', '.join(patient_data['care_plans'])}.\n" - - # Encode the prompt and generate text - inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) - output_sequences = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], - max_length=512, temperature=1.0, top_p=0.95, top_k=50) - - # Decode the generated text - report = tokenizer.decode(output_sequences[0], skip_special_tokens=True) - - return report - From 5cff70f8bdea3120235e66f30285b3330badd22a Mon Sep 17 00:00:00 2001 From: Dominic Date: Mon, 15 Apr 2024 07:10:33 +0100 Subject: [PATCH 4/9] Update add Llama-2-Chat --- Prompt_engineer/main.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index 6fccc66..d73d868 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -11,17 +11,11 @@ def load_config(config_path: str) -> Dict[str, Any]: config = yaml.safe_load(file) or {} return config -def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str: +def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str: """ - Generate a medical prompt for the medical report based on patient data and diagnosis details. + Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration. """ - prompt_template = ( - "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. " - "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. " - "Imaging modality: {modality}. Body area: {body_area}. " - "Please provide a comprehensive report based on the patient's data and imaging results." - ) - + prompt_template = config.get('prompt_template', "Default template if not specified in config.") prompt = prompt_template.format( diagnosis=diagnosis, age=patient_data['age'], @@ -29,8 +23,8 @@ def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modali conditions=', '.join(patient_data['conditions']), observations=', '.join(patient_data['observations']), care_plans=', '.join(patient_data['care_plans']), - modality=modality or "Not specified", - body_area=body_area or "Not specified" + modality=patient_data.get('modality', "Not specified"), + body_area=patient_data.get('body_area', "Not specified") ) return prompt @@ -39,9 +33,7 @@ def parse_arguments() -> argparse.Namespace: Parse command line arguments. """ parser = argparse.ArgumentParser(description='Medical Report Generator') - # Update this path to where your config.yaml actually resides - default_config_path = '/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml' - parser.add_argument('--config_path', type=str, default=default_config_path, help='Path to the YAML configuration file.') + parser.add_argument('--config_path', type=str, default='./config.yaml', help='Path to the YAML configuration file.') return parser.parse_args() def main() -> None: @@ -56,7 +48,7 @@ def main() -> None: patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis) if patient_data: - medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area) + medical_prompt = generate_medical_prompt(patient_data, diagnosis, config) print(medical_prompt) else: print("No patient data found for the given diagnosis.") From 003274f0c1bd8dce88daf362b34a13814a03fcaf Mon Sep 17 00:00:00 2001 From: Dominic Date: Mon, 15 Apr 2024 07:11:08 +0100 Subject: [PATCH 5/9] Llama-2-Chat format --- Prompt_engineer/config.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml index 817c6f9..16a4ae8 100644 --- a/Prompt_engineer/config.yaml +++ b/Prompt_engineer/config.yaml @@ -1,2 +1,16 @@ # Configuration file for the medical report generator cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' + +# Configuration file for the medical report generator +cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' + +prompt_template: | + [INST] <> You are an experienced medical AI assistant trained to provide helpful and accurate + information to patients. You have extensive knowledge of human anatomy, common medical conditions, and + evidence-based treatments. Your responses should be empathetic, informative, and adhere to medical best + practices. You will not provide any medical advice that could be harmful. [/INST] + + Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. + Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. + Imaging modality: {modality}. Body area: {body_area}. + Please provide a comprehensive report based on the patient's data and imaging results. From 041085b0ff560cd0e9cbd678a15968be0150560f Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:31:09 +0100 Subject: [PATCH 6/9] dynamic with csv included --- .../__pycache__/synthea_data.cpython-310.pyc | Bin 0 -> 1737 bytes Prompt_engineer/config.yaml | 4 -- Prompt_engineer/main.py | 43 ++++++++++++------ Prompt_engineer/synthea_data.py | 10 ++-- 4 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc diff --git a/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc b/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d7c514d1d951e612e8e234a601a7a84b96428f0 GIT binary patch literal 1737 zcma)6&2Aev5GMCuE6H};w2c7=*o#~2!IE<{;2LowC-tFnGEqd!c2m6>O=sWZgcic;cmc1X_7_sz^!?Y56#{Q6*X_=|(k zZ~9?*c<}HPruZHNMHJ_#z!5Ht$SBOnEUd`F+HT}_VMlhYo4HfCkz4Cl?nNH8sY6}r z(FXNtleXTvDUKR15N*?yYeZMBjL4_f0EMf+!&^{jDXW(yQke(kRN1GKan6+YaxAl| zBzcIHM~P%ID;Vf)&T=BNb4FP%Nq^=)+1=gQ-gyH5$oiV(Y}U*tWf~-;OccKd`(Bm3Ei2^qSo@#WFU=~umj(?jLFIPCWh zPt+P^BH>vbQ=C*KRn6*5Fn$iPRHb^@I~+VaIXXT)dfAT?!r3?{rSgi3l01`B)i|r@ zGzL3KG~(s)p7A}fG8AEoB7@?{qz1F7$!z8@i@8H%12KC5Q{Pbynvt}u zL?)C2HzvGHjHQ@$$WT1UgeiUk@dFxSim7q!Nz_AE_|ljg{Heq@u-@WJQ<~Dcv2PJb zXKqpmom#iXmGRoWwB{CA>b7$@j z%>l{~ZPAw2!uD)fW6;=7?!W?e08UB@fX9HAmGbii9T%f27!eViF;)f(&`>HDnH*_u z#w^K(SzS9N&2*;k2-40I48ZS)X2*v45lpcOB1Lob%D==vqB$gV1Dzvo%<<308v|m* zTsPr66dKCt>_qM&sCSsJft{~|`15)98z?W)CDV$s*I6}Xf|OIz?N!O700g2tcuv`Q z_ZZSRmN6^StYnOL#a#u&I@_J`RQY>(CS(GK_H|rBN4kk-Uo2HhCuK5T#8BUh&!+MF z3h3fKh4O^t z(47TS_LLDWlnJOpPg#QH!WZEhsty~u=|4a*AWlKT>;;SGuJfV5@kcD^5+7>pI zBd24aZpl^cx%O9Q;S>Flp2#fk{Ob#Sm#4?tyRN_5-O_98C~7jjy*oVny02>99-$U) R0bhn~;156R59{ys_CLgb&KCdx literal 0 HcmV?d00001 diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml index 16a4ae8..a950330 100644 --- a/Prompt_engineer/config.yaml +++ b/Prompt_engineer/config.yaml @@ -1,9 +1,5 @@ # Configuration file for the medical report generator cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' - -# Configuration file for the medical report generator -cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv' - prompt_template: | [INST] <> You are an experienced medical AI assistant trained to provide helpful and accurate information to patients. You have extensive knowledge of human anatomy, common medical conditions, and diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index d73d868..3d00d69 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -1,7 +1,8 @@ +# main.py import argparse import yaml +import pandas as pd from typing import Dict, Any, Optional -from synthea_data import SyntheaData def load_config(config_path: str) -> Dict[str, Any]: """ @@ -18,22 +19,37 @@ def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config prompt_template = config.get('prompt_template', "Default template if not specified in config.") prompt = prompt_template.format( diagnosis=diagnosis, - age=patient_data['age'], - gender=patient_data['gender'], - conditions=', '.join(patient_data['conditions']), - observations=', '.join(patient_data['observations']), - care_plans=', '.join(patient_data['care_plans']), - modality=patient_data.get('modality', "Not specified"), + age=patient_data['BIRTHDATE'], + gender=patient_data['GENDER'], + conditions=patient_data['DESCRIPTION_cond'], + observations=', '.join(patient_data.get('observations', [])), # Assuming this needs a similar change + care_plans=patient_data['DESCRIPTION_careplan'], + modality=patient_data.get('modality', "Not specified"), # Assuming a similar field needs to be added or changed body_area=patient_data.get('body_area', "Not specified") ) return prompt +def load_patient_data(csv_path: str) -> pd.DataFrame: + """ + Load patient data from a CSV file. + """ + return pd.read_csv(csv_path) + +def select_random_patient_data(patient_data: pd.DataFrame, diagnosis: str) -> Optional[Dict[str, Any]]: + """ + Select random patient data with a specific diagnosis. + """ + matching_patients = patient_data[patient_data['REASONDESCRIPTION'] == diagnosis] + if not matching_patients.empty: + return matching_patients.sample(n=1).iloc[0].to_dict() + return None + def parse_arguments() -> argparse.Namespace: """ Parse command line arguments. """ parser = argparse.ArgumentParser(description='Medical Report Generator') - parser.add_argument('--config_path', type=str, default='./config.yaml', help='Path to the YAML configuration file.') + parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.') return parser.parse_args() def main() -> None: @@ -42,13 +58,12 @@ def main() -> None: """ args = parse_arguments() config = load_config(args.config_path) + patient_data = load_patient_data(config['cleaned_data_csv_path']) + diagnosis = "Lung Cancer" # Example diagnosis + random_patient_data = select_random_patient_data(patient_data, diagnosis) - synthea_data = SyntheaData() - diagnosis = "lung cancer" # Example diagnosis - patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis) - - if patient_data: - medical_prompt = generate_medical_prompt(patient_data, diagnosis, config) + if random_patient_data: + medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config) print(medical_prompt) else: print("No patient data found for the given diagnosis.") diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py index ae96fb4..929d6fb 100644 --- a/Prompt_engineer/synthea_data.py +++ b/Prompt_engineer/synthea_data.py @@ -32,12 +32,14 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], patient_data = { 'age': age, 'gender': selected_patient['GENDER'], - 'conditions': [selected_patient['description_cond']], - 'observations': [selected_patient['observation']], - 'care_plans': [selected_patient['DESCRIPTION_careplan']], + 'conditions': selected_patient['description_cond'], + 'observations': selected_patient['observation'], + 'care_plans': selected_patient['DESCRIPTION_careplan'], + 'modality': selected_patient.get('modality', "Not specified"), + 'body_area': selected_patient.get('body_area', "Not specified") } modality = selected_patient.get('modality') body_area = selected_patient.get('body_area') return patient_data, modality, body_area else: - return {}, None, None + return None, None, None From 2d567efdd590fefab91c8e694ce5cc35fb4a7b89 Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Mon, 29 Apr 2024 09:19:21 +0100 Subject: [PATCH 7/9] update DOB, observation and not related to diagnosis --- Prompt_engineer/main.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index 3d00d69..95301b4 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -1,7 +1,7 @@ -# main.py import argparse import yaml import pandas as pd +from datetime import datetime from typing import Dict, Any, Optional def load_config(config_path: str) -> Dict[str, Any]: @@ -12,19 +12,28 @@ def load_config(config_path: str) -> Dict[str, Any]: config = yaml.safe_load(file) or {} return config +def calculate_age(birthdate: str) -> int: + """ + Calculate age given the birthdate. + """ + birth_date = datetime.strptime(birthdate, "%Y-%m-%d") + today = datetime.now() + return today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day)) + def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str: """ Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration. """ prompt_template = config.get('prompt_template', "Default template if not specified in config.") + age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field prompt = prompt_template.format( diagnosis=diagnosis, - age=patient_data['BIRTHDATE'], + age=age, gender=patient_data['GENDER'], conditions=patient_data['DESCRIPTION_cond'], - observations=', '.join(patient_data.get('observations', [])), # Assuming this needs a similar change - care_plans=patient_data['DESCRIPTION_careplan'], - modality=patient_data.get('modality', "Not specified"), # Assuming a similar field needs to be added or changed + observations=patient_data.get('REASONDESCRIPTION', 'No reason description provided'), # Using REASONDESCRIPTION for observations + care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'), + modality=patient_data.get('modality', "Not specified"), body_area=patient_data.get('body_area', "Not specified") ) return prompt @@ -35,14 +44,11 @@ def load_patient_data(csv_path: str) -> pd.DataFrame: """ return pd.read_csv(csv_path) -def select_random_patient_data(patient_data: pd.DataFrame, diagnosis: str) -> Optional[Dict[str, Any]]: +def select_random_patient_data(patient_data: pd.DataFrame) -> Dict[str, Any]: """ - Select random patient data with a specific diagnosis. + Select random patient data from the entire dataset, independent of the diagnosis. """ - matching_patients = patient_data[patient_data['REASONDESCRIPTION'] == diagnosis] - if not matching_patients.empty: - return matching_patients.sample(n=1).iloc[0].to_dict() - return None + return patient_data.sample(n=1).iloc[0].to_dict() def parse_arguments() -> argparse.Namespace: """ @@ -50,6 +56,8 @@ def parse_arguments() -> argparse.Namespace: """ parser = argparse.ArgumentParser(description='Medical Report Generator') parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.') + parser.add_argument('--csv_path', type=str, default='/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv', help='Path to the CSV file containing patient data.') + parser.add_argument('--diagnosis', type=str, required=True, help='Diagnosis determined by the image classifier.') return parser.parse_args() def main() -> None: @@ -58,15 +66,15 @@ def main() -> None: """ args = parse_arguments() config = load_config(args.config_path) - patient_data = load_patient_data(config['cleaned_data_csv_path']) - diagnosis = "Lung Cancer" # Example diagnosis - random_patient_data = select_random_patient_data(patient_data, diagnosis) + patient_data = load_patient_data(args.csv_path) + diagnosis = args.diagnosis # Diagnosis passed from the command line + random_patient_data = select_random_patient_data(patient_data) if random_patient_data: medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config) print(medical_prompt) else: - print("No patient data found for the given diagnosis.") + print("No patient data available.") if __name__ == '__main__': main() From 5c7fe1adb38bc7a07ffeaf2c1bebdd068e363eda Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Mon, 13 May 2024 07:50:42 +0100 Subject: [PATCH 8/9] synthea data output updated to REASONDESCRIPTION --- Prompt_engineer/synthea_data.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py index 929d6fb..b591636 100644 --- a/Prompt_engineer/synthea_data.py +++ b/Prompt_engineer/synthea_data.py @@ -8,12 +8,21 @@ { 'BIRTHDATE': '1990-01-01', 'GENDER': 'Male', - 'description_cond': 'lung cancer', + 'REASONDESCRIPTION': 'lung cancer', 'observation': 'increased cough', 'DESCRIPTION_careplan': 'regular monitoring', 'modality': 'X-Ray', 'body_area': 'Chest' }, + { + 'BIRTHDATE': '1985-05-15', + 'GENDER': 'Female', + 'REASONDESCRIPTION': '', # This patient has no diagnosis specified. + 'observation': 'shortness of breath', + 'DESCRIPTION_careplan': 'oxygen therapy', + 'modality': 'CT Scan', + 'body_area': 'Chest' + }, # Additional records can be added here. ] @@ -23,16 +32,19 @@ class SyntheaData: """ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]: """ - Retrieves a random patient's data who has been diagnosed with a specified condition. + Retrieves a random patient's data who has been diagnosed with a specified condition using the REASONDESCRIPTION. + If no diagnosis is provided in REASONDESCRIPTION, it defaults to "Nil significant past medical history." """ - matching_patients = [patient for patient in PATIENT_DATA if diagnosis in patient['description_cond']] + # Filtering patients based on diagnosis, considering those with empty or null diagnosis as having no significant history. + matching_patients = [patient for patient in PATIENT_DATA if patient['REASONDESCRIPTION'].strip().lower() == diagnosis.lower()] if diagnosis.strip() else [patient for patient in PATIENT_DATA if not patient['REASONDESCRIPTION'].strip()] + if matching_patients: selected_patient = random.choice(matching_patients) age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years patient_data = { 'age': age, 'gender': selected_patient['GENDER'], - 'conditions': selected_patient['description_cond'], + 'conditions': selected_patient['REASONDESCRIPTION'] if selected_patient['REASONDESCRIPTION'].strip() else "Nil significant past medical history", 'observations': selected_patient['observation'], 'care_plans': selected_patient['DESCRIPTION_careplan'], 'modality': selected_patient.get('modality', "Not specified"), @@ -42,4 +54,13 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], body_area = selected_patient.get('body_area') return patient_data, modality, body_area else: - return None, None, None + # Default case when no patients match the criteria, including empty diagnosis search + return ({ + 'age': None, + 'gender': None, + 'conditions': "Nil significant past medical history", + 'observations': None, + 'care_plans': None, + 'modality': None, + 'body_area': None + }, None, None) From 1858de4c67d194934f52ea8eb34b8a68a63914eb Mon Sep 17 00:00:00 2001 From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com> Date: Mon, 13 May 2024 08:02:37 +0100 Subject: [PATCH 9/9] further synthea changes --- Prompt_engineer/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py index 95301b4..54f9c86 100644 --- a/Prompt_engineer/main.py +++ b/Prompt_engineer/main.py @@ -23,15 +23,17 @@ def calculate_age(birthdate: str) -> int: def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str: """ Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration. + Uses REASONDESCRIPTION as the source for the patient's conditions. """ prompt_template = config.get('prompt_template', "Default template if not specified in config.") age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field + conditions = patient_data.get('REASONDESCRIPTION', 'No reason description provided') # Default message if not specified prompt = prompt_template.format( diagnosis=diagnosis, age=age, gender=patient_data['GENDER'], - conditions=patient_data['DESCRIPTION_cond'], - observations=patient_data.get('REASONDESCRIPTION', 'No reason description provided'), # Using REASONDESCRIPTION for observations + conditions=conditions, # Correct field used + observations=patient_data.get('observation', 'No observations recorded'), # Use correct field for observations care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'), modality=patient_data.get('modality', "Not specified"), body_area=patient_data.get('body_area', "Not specified")