From 8440ae44b6c77c393c4db0577780348618bf372e Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Sun, 7 Apr 2024 23:10:43 +0100
Subject: [PATCH 1/9] see changes on config.yaml file and also main.py
---
.idea/vcs.xml | 6 ++
Prompt_engineer/config.yaml | 2 +
Prompt_engineer/main.py | 77 +++++++++++++++++++
.../synthea_data.py | 28 +++----
config.yaml | 2 -
main.py | 65 ----------------
6 files changed, 100 insertions(+), 80 deletions(-)
create mode 100644 .idea/vcs.xml
create mode 100644 Prompt_engineer/config.yaml
create mode 100644 Prompt_engineer/main.py
rename synthea_data.py => Prompt_engineer/synthea_data.py (67%)
delete mode 100644 config.yaml
delete mode 100644 main.py
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml
new file mode 100644
index 0000000..4f320e9
--- /dev/null
+++ b/Prompt_engineer/config.yaml
@@ -0,0 +1,2 @@
+database_path: '/Users/ayodejioyesanya/Desktop/sqlite-tools-osx-x86-3420000/health_data.db'
+cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
new file mode 100644
index 0000000..e863073
--- /dev/null
+++ b/Prompt_engineer/main.py
@@ -0,0 +1,77 @@
+import pandas as pd
+import sqlite3
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from typing import Dict, Any, Tuple, Optional
+import yaml
+import argparse
+
+
+class SyntheaData:
+ """
+ Handles operations related to processing and retrieving patient data from a consolidated dataset.
+ """
+
+ def __init__(self, config: Dict[str, Any]) -> None:
+ self.config = config
+
+ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
+ conn = sqlite3.connect(self.config['database_path'])
+ query = """
+ SELECT * FROM cleaned_medical_data
+ WHERE description_cond LIKE ? OR reasondescription LIKE ?
+ """
+ df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
+ conn.close()
+
+ if not df.empty:
+ selected_row = df.sample(n=1).iloc[0]
+ patient_data = {
+ 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
+ 'gender': selected_row['GENDER'],
+ 'conditions': [selected_row['description_cond']],
+ 'observations': [selected_row.get('observation', '')],
+ 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
+ }
+ modality = selected_row.get('modality', None)
+ body_area = selected_row.get('body_area', None)
+ return patient_data, modality, body_area
+ else:
+ return {}, None, None
+
+ def import_cleaned_data_to_sqlite(self) -> None:
+ conn = sqlite3.connect(self.config['database_path'])
+ df = pd.read_csv(self.config['cleaned_data_csv_path'])
+ df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
+ conn.close()
+
+
+def load_config(config_path: str) -> Dict[str, Any]:
+ with open(config_path, 'r') as file:
+ config = yaml.safe_load(file)
+ return config
+
+
+def parse_arguments() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description='Medical Report Generator')
+ # Update the default path to the full path of your config.yaml file
+ parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
+ return parser.parse_args()
+
+
+def main():
+ args = parse_arguments()
+ config = load_config(args.config_path)
+
+ data_processor = SyntheaData(config)
+ # Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries
+ data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here
+
+ # Example usage
+ diagnosis = "Example Diagnosis"
+ patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis)
+ print(patient_data, modality, body_area)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/synthea_data.py b/Prompt_engineer/synthea_data.py
similarity index 67%
rename from synthea_data.py
rename to Prompt_engineer/synthea_data.py
index 34e32e8..3e901f7 100644
--- a/synthea_data.py
+++ b/Prompt_engineer/synthea_data.py
@@ -2,8 +2,8 @@
import sqlite3
from datetime import datetime
from dateutil.relativedelta import relativedelta
-from typing import Dict, Any, Tuple
-import random
+from typing import Dict, Any, Tuple, Optional
+
class SyntheaData:
"""
@@ -15,15 +15,6 @@ def __init__(self, config: Dict[str, Any]) -> None:
"""
self.config = config
- def import_cleaned_data_to_sqlite(self) -> None:
- """
- Imports cleaned medical data from a CSV file into an SQLite database.
- """
- conn = sqlite3.connect(self.config['database_path'])
- df = pd.read_csv(self.config['cleaned_data_csv_path'])
- df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
- conn.close()
-
def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
"""
Retrieves a random patient's data who has been diagnosed with a specified condition.
@@ -45,8 +36,19 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
'observations': [selected_row.get('observation', '')],
'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
}
- modality = selected_row.get('modality', None)
- body_area = selected_row.get('body_area', None)
+ modality = selected_row.get('modality', None) # Assuming your database includes this column
+ body_area = selected_row.get('body_area', None) # Assuming your database includes this column
return patient_data, modality, body_area
else:
return {}, None, None
+
+ def import_cleaned_data_to_sqlite(self) -> None:
+ """
+ Imports cleaned medical data from a CSV file into an SQLite database.
+ """
+ conn = sqlite3.connect(self.config['database_path'])
+ df = pd.read_csv(self.config['cleaned_data_csv_path'])
+ # Assuming the table name you want to use is 'cleaned_medical_data'
+ df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
+ conn.close()
+
diff --git a/config.yaml b/config.yaml
deleted file mode 100644
index c28a9ca..0000000
--- a/config.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-database_path: 'path/to/health_data.db'
-cleaned_data_csv_path: '/path/to/cleaned_medical_data.csv'
diff --git a/main.py b/main.py
deleted file mode 100644
index 8288db7..0000000
--- a/main.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import argparse
-import yaml
-from typing import Dict, Any, Optional
-from transformers import AutoTokenizer, pipeline
-from synthea_data import SyntheaData #SyntheaData class is in synthea_data.py
-
-def load_config(config_path: str) -> Dict[str, Any]:
- """
- Load the YAML configuration file.
- """
- with open(config_path, 'r') as file:
- config = yaml.safe_load(file)
- return config
-
-def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str:
- """
- Generate a medical prompt for the LLAMA model based on patient data, a given diagnosis, and imaging details.
- """
- prompt_template = (
- "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. "
- "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. "
- "Imaging modality: {modality}. Body area: {body_area}. "
- "Please provide a comprehensive report based on the patient's data and imaging results."
- )
-
- prompt = prompt_template.format(
- diagnosis=diagnosis,
- age=patient_data['age'],
- gender=patient_data['gender'],
- conditions=', '.join(patient_data['conditions']),
- observations=', '.join(patient_data['observations']),
- care_plans=', '.join(patient_data['care_plans']),
- modality=modality or "Not specified",
- body_area=body_area or "Not specified"
- )
- return prompt
-
-def parse_arguments() -> argparse.Namespace:
- """
- Parse command line arguments.
- """
- parser = argparse.ArgumentParser(description='Medical Report Generator')
- parser.add_argument('--config_path', type=str, default='config.yaml', help='Path to the YAML configuration file.')
- return parser.parse_args()
-
-def main() -> None:
- """
- Main function to orchestrate the workflow.
- """
- args = parse_arguments()
- config = load_config(args.config_path)
-
- synthea_data = SyntheaData(config)
- diagnosis = "lung cancer" # Example diagnosis
- patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis)
-
- if patient_data:
- medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area)
- print(medical_prompt)
- # Further processing with LLAMA or other models can be done here
- else:
- print("No patient data found for the given diagnosis.")
-
-if __name__ == '__main__':
- main()
From 03ae720d58338722be1d0ad4f78f5d80eb03a154 Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Tue, 9 Apr 2024 15:43:03 +0100
Subject: [PATCH 2/9] see changes on config.yaml file and also main.py
---
Prompt_engineer/main.py | 30 ++++++++++++++++++++++--------
Prompt_engineer/synthea_data.py | 26 +++++++++++++++++++++++++-
2 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index e863073..153366d 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -6,7 +6,6 @@
import yaml
import argparse
-
class SyntheaData:
"""
Handles operations related to processing and retrieving patient data from a consolidated dataset.
@@ -19,7 +18,7 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
conn = sqlite3.connect(self.config['database_path'])
query = """
SELECT * FROM cleaned_medical_data
- WHERE description_cond LIKE ? OR reasondescription LIKE ?
+ WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ?
"""
df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
conn.close()
@@ -29,7 +28,7 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
patient_data = {
'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
'gender': selected_row['GENDER'],
- 'conditions': [selected_row['description_cond']],
+ 'conditions': [selected_row['DESCRIPTION_cond']],
'observations': [selected_row.get('observation', '')],
'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
}
@@ -45,19 +44,29 @@ def import_cleaned_data_to_sqlite(self) -> None:
df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
conn.close()
-
def load_config(config_path: str) -> Dict[str, Any]:
with open(config_path, 'r') as file:
config = yaml.safe_load(file)
return config
-
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Medical Report Generator')
- # Update the default path to the full path of your config.yaml file
parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
return parser.parse_args()
+def generate_report(patient_data: Dict[str, Any]) -> str:
+ """
+ Generates a textual report based on the patient data.
+ """
+ report_lines = [
+ f"Patient Age: {patient_data['age']}",
+ f"Gender: {patient_data['gender']}",
+ f"Conditions: {', '.join(patient_data['conditions'])}",
+ f"Observations: {', '.join(patient_data['observations'])}",
+ f"Care Plans: {', '.join(patient_data['care_plans'])}",
+ ]
+ report = "\n".join(report_lines)
+ return report
def main():
args = parse_arguments()
@@ -67,11 +76,16 @@ def main():
# Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries
data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here
- # Example usage
- diagnosis = "Example Diagnosis"
+ # Use "Prediabetes" as the example diagnosis to query
+ diagnosis = "Prediabetes"
patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis)
print(patient_data, modality, body_area)
+ if patient_data: # Ensure there is data to generate a report from
+ report = generate_report(patient_data)
+ print("Generated Report:", report)
+ else:
+ print("No patient data found for the given diagnosis.")
if __name__ == '__main__':
main()
diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py
index 3e901f7..3cf87eb 100644
--- a/Prompt_engineer/synthea_data.py
+++ b/Prompt_engineer/synthea_data.py
@@ -22,11 +22,12 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
conn = sqlite3.connect(self.config['database_path'])
query = """
SELECT * FROM cleaned_medical_data
- WHERE description_cond LIKE ? OR reasondescription LIKE ?
+ WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ?
"""
df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
conn.close()
+
if not df.empty:
selected_row = df.sample(n=1).iloc[0]
patient_data = {
@@ -52,3 +53,26 @@ def import_cleaned_data_to_sqlite(self) -> None:
df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
conn.close()
+
+def generate_report(patient_data: dict,
+ model_path: str = '/Users/ayodejioyesanya/.cache/lm-studio/models/TheBloke/medicine-chat-GGUF/medicine-chat.Q3_K_M.gguf') -> str:
+ # Load the tokenizer and model from the specified path
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
+ model = AutoModelForCausalLM.from_pretrained(model_path)
+
+ # Prepare the prompt from patient data
+ prompt = f"Based on the following patient information, generate a medical report:\n" \
+ f"Age: {patient_data['age']}, Gender: {patient_data['gender']}, " \
+ f"Conditions: {', '.join(patient_data['conditions'])}, Observations: {', '.join(patient_data['observations'])}, " \
+ f"Care Plans: {', '.join(patient_data['care_plans'])}.\n"
+
+ # Encode the prompt and generate text
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
+ output_sequences = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
+ max_length=512, temperature=1.0, top_p=0.95, top_k=50)
+
+ # Decode the generated text
+ report = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
+
+ return report
+
From a54bfc884e430f7e870dacc5b40139b20d013e79 Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Sun, 14 Apr 2024 18:36:47 +0100
Subject: [PATCH 3/9] changed without SQL directory
---
Prompt_engineer/.DS_Store | Bin 0 -> 6148 bytes
.../__pycache__/synthea_data.cpython-37.pyc | Bin 0 -> 1717 bytes
Prompt_engineer/config.yaml | 2 +-
Prompt_engineer/main.py | 112 +++++++-----------
Prompt_engineer/synthea_data.py | 91 +++++---------
5 files changed, 72 insertions(+), 133 deletions(-)
create mode 100644 Prompt_engineer/.DS_Store
create mode 100644 Prompt_engineer/__pycache__/synthea_data.cpython-37.pyc
diff --git a/Prompt_engineer/.DS_Store b/Prompt_engineer/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..f165c78f1ae7e178822be07fa49e619bb86d2d40
GIT binary patch
literal 6148
zcmeHK%S{6@5S@VtEy^WFj&30xm}sdW1uSHP$RW}0g#$NtkY11|feNUAPI&Vnuwuyt
z2_ZBi+0UOh_Ux0^_7D*-?$>jo84*<|f-H>@li|^+6DLoAtZTG%LoIFSmYR`;{^F3t
zK1H+*_4I()KjLq;ck^Z2?RpFQynN5xb#=Ykb}KNdvzPtj{`L9t7<+wSgY|Tr-ETfe
z0e)C05DJ6>p+G3`GX-#Gi&ay@s6&BJAQU(#Am>9u5zLOoP>&8&ngRgj8Lfu4|7eFYr-*w|@CNxoZ>3HHw(ZK_Mc$&MrCNjbbCgNNDIxx5u;0kGH#>
z$d)4^B@FxaW`@c>>;nN9ZdjUSUsEk8L(w5Ixh?HPwIheN{Ejn@t}<`{U8h@OKBH
zKlR6|d7yj&U3?FMB8oGV;}GXYXyj&S=2mE7Z8tJIw?n(q&CJQ&(5-YU^Foi>)S)i*
zXpQ=`P8+vwg2UP?M4NQ&2GO-^BlM}&N3He0;1<-fl+{gRsm!Bds_emJoH6CS9?P^W
zNY=v2qeL>9<_z>EXBm;{1*0sJq&M@Q?eFjH?mdGyw7w!4o7J;PkpwX*V#a6dX%TZq
z1fxM*PLk1Vle1)!5gz1ak;;;%MKZI$**zoE(0(yuLbhC0dwy~@__BL6I9AT9<6igp
zOl?pmVxCq$MR8eBRWHv4;}_sdS*Wem>nv$d_
zMJkj7dnUYyjitXjIkd|rbny#_pU@CfOpO~)qAt3|SH|4nKT3QD<2}AIr75jD`yPRG
z<|fsyTldDb@y5Nf<`!7&Z3NQUMhS+|U5=g}Y36Y;%2k%9v|B|wc
z&M5?OEF)GVX~7uph(-J(9qaaZ`p!SdQXyklbg2Cj+EWM3K7WuW?S#o_=|iC=dixrPUaO`Wr$>X6=wEl*j3Wy6Zs@KH`=Jc2@BffeBf%(7TAx-#*{`ozXVHr_i`MP?PH
zExqvL|D^5-ED;}qK-MOumAEQN;>iQM_T9$9$Z@aP!Z`JEYKY4iV`{qPjCYs
L&o=P;$G86m*bd3N
literal 0
HcmV?d00001
diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml
index 4f320e9..817c6f9 100644
--- a/Prompt_engineer/config.yaml
+++ b/Prompt_engineer/config.yaml
@@ -1,2 +1,2 @@
-database_path: '/Users/ayodejioyesanya/Desktop/sqlite-tools-osx-x86-3420000/health_data.db'
+# Configuration file for the medical report generator
cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index 153366d..6fccc66 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -1,89 +1,63 @@
-import pandas as pd
-import sqlite3
-from datetime import datetime
-from dateutil.relativedelta import relativedelta
-from typing import Dict, Any, Tuple, Optional
-import yaml
import argparse
+import yaml
+from typing import Dict, Any, Optional
+from synthea_data import SyntheaData
-class SyntheaData:
+def load_config(config_path: str) -> Dict[str, Any]:
"""
- Handles operations related to processing and retrieving patient data from a consolidated dataset.
+ Load the YAML configuration file.
"""
-
- def __init__(self, config: Dict[str, Any]) -> None:
- self.config = config
-
- def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
- conn = sqlite3.connect(self.config['database_path'])
- query = """
- SELECT * FROM cleaned_medical_data
- WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ?
- """
- df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
- conn.close()
-
- if not df.empty:
- selected_row = df.sample(n=1).iloc[0]
- patient_data = {
- 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
- 'gender': selected_row['GENDER'],
- 'conditions': [selected_row['DESCRIPTION_cond']],
- 'observations': [selected_row.get('observation', '')],
- 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
- }
- modality = selected_row.get('modality', None)
- body_area = selected_row.get('body_area', None)
- return patient_data, modality, body_area
- else:
- return {}, None, None
-
- def import_cleaned_data_to_sqlite(self) -> None:
- conn = sqlite3.connect(self.config['database_path'])
- df = pd.read_csv(self.config['cleaned_data_csv_path'])
- df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
- conn.close()
-
-def load_config(config_path: str) -> Dict[str, Any]:
with open(config_path, 'r') as file:
- config = yaml.safe_load(file)
+ config = yaml.safe_load(file) or {}
return config
+def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str:
+ """
+ Generate a medical prompt for the medical report based on patient data and diagnosis details.
+ """
+ prompt_template = (
+ "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. "
+ "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. "
+ "Imaging modality: {modality}. Body area: {body_area}. "
+ "Please provide a comprehensive report based on the patient's data and imaging results."
+ )
+
+ prompt = prompt_template.format(
+ diagnosis=diagnosis,
+ age=patient_data['age'],
+ gender=patient_data['gender'],
+ conditions=', '.join(patient_data['conditions']),
+ observations=', '.join(patient_data['observations']),
+ care_plans=', '.join(patient_data['care_plans']),
+ modality=modality or "Not specified",
+ body_area=body_area or "Not specified"
+ )
+ return prompt
+
def parse_arguments() -> argparse.Namespace:
+ """
+ Parse command line arguments.
+ """
parser = argparse.ArgumentParser(description='Medical Report Generator')
- parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
+ # Update this path to where your config.yaml actually resides
+ default_config_path = '/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml'
+ parser.add_argument('--config_path', type=str, default=default_config_path, help='Path to the YAML configuration file.')
return parser.parse_args()
-def generate_report(patient_data: Dict[str, Any]) -> str:
+def main() -> None:
"""
- Generates a textual report based on the patient data.
+ Main function to orchestrate the workflow.
"""
- report_lines = [
- f"Patient Age: {patient_data['age']}",
- f"Gender: {patient_data['gender']}",
- f"Conditions: {', '.join(patient_data['conditions'])}",
- f"Observations: {', '.join(patient_data['observations'])}",
- f"Care Plans: {', '.join(patient_data['care_plans'])}",
- ]
- report = "\n".join(report_lines)
- return report
-
-def main():
args = parse_arguments()
config = load_config(args.config_path)
- data_processor = SyntheaData(config)
- # Call import_cleaned_data_to_sqlite to ensure the database is populated before making queries
- data_processor.import_cleaned_data_to_sqlite() # Make sure this line is uncommented and called here
-
- # Use "Prediabetes" as the example diagnosis to query
- diagnosis = "Prediabetes"
- patient_data, modality, body_area = data_processor.get_patient_data_by_diagnosis(diagnosis)
- print(patient_data, modality, body_area)
+ synthea_data = SyntheaData()
+ diagnosis = "lung cancer" # Example diagnosis
+ patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis)
- if patient_data: # Ensure there is data to generate a report from
- report = generate_report(patient_data)
- print("Generated Report:", report)
+ if patient_data:
+ medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area)
+ print(medical_prompt)
else:
print("No patient data found for the given diagnosis.")
diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py
index 3cf87eb..ae96fb4 100644
--- a/Prompt_engineer/synthea_data.py
+++ b/Prompt_engineer/synthea_data.py
@@ -1,78 +1,43 @@
-import pandas as pd
-import sqlite3
+from typing import Dict, Any, Tuple, Optional
from datetime import datetime
from dateutil.relativedelta import relativedelta
-from typing import Dict, Any, Tuple, Optional
-
+import random
+
+# Example static data, simulating a patient database
+PATIENT_DATA = [
+ {
+ 'BIRTHDATE': '1990-01-01',
+ 'GENDER': 'Male',
+ 'description_cond': 'lung cancer',
+ 'observation': 'increased cough',
+ 'DESCRIPTION_careplan': 'regular monitoring',
+ 'modality': 'X-Ray',
+ 'body_area': 'Chest'
+ },
+ # Additional records can be added here.
+]
class SyntheaData:
"""
- Handles operations related to processing and retrieving patient data from a consolidated dataset.
+ Simulates data retrieval from a static dataset.
"""
- def __init__(self, config: Dict[str, Any]) -> None:
- """
- Initializes with configuration settings.
- """
- self.config = config
-
def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
"""
Retrieves a random patient's data who has been diagnosed with a specified condition.
"""
- conn = sqlite3.connect(self.config['database_path'])
- query = """
- SELECT * FROM cleaned_medical_data
- WHERE DESCRIPTION_cond LIKE ? OR REASONDESCRIPTION LIKE ?
- """
- df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
- conn.close()
-
-
- if not df.empty:
- selected_row = df.sample(n=1).iloc[0]
+ matching_patients = [patient for patient in PATIENT_DATA if diagnosis in patient['description_cond']]
+ if matching_patients:
+ selected_patient = random.choice(matching_patients)
+ age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years
patient_data = {
- 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
- 'gender': selected_row['GENDER'],
- 'conditions': [selected_row['description_cond']],
- 'observations': [selected_row.get('observation', '')],
- 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
+ 'age': age,
+ 'gender': selected_patient['GENDER'],
+ 'conditions': [selected_patient['description_cond']],
+ 'observations': [selected_patient['observation']],
+ 'care_plans': [selected_patient['DESCRIPTION_careplan']],
}
- modality = selected_row.get('modality', None) # Assuming your database includes this column
- body_area = selected_row.get('body_area', None) # Assuming your database includes this column
+ modality = selected_patient.get('modality')
+ body_area = selected_patient.get('body_area')
return patient_data, modality, body_area
else:
return {}, None, None
-
- def import_cleaned_data_to_sqlite(self) -> None:
- """
- Imports cleaned medical data from a CSV file into an SQLite database.
- """
- conn = sqlite3.connect(self.config['database_path'])
- df = pd.read_csv(self.config['cleaned_data_csv_path'])
- # Assuming the table name you want to use is 'cleaned_medical_data'
- df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
- conn.close()
-
-
-def generate_report(patient_data: dict,
- model_path: str = '/Users/ayodejioyesanya/.cache/lm-studio/models/TheBloke/medicine-chat-GGUF/medicine-chat.Q3_K_M.gguf') -> str:
- # Load the tokenizer and model from the specified path
- tokenizer = AutoTokenizer.from_pretrained(model_path)
- model = AutoModelForCausalLM.from_pretrained(model_path)
-
- # Prepare the prompt from patient data
- prompt = f"Based on the following patient information, generate a medical report:\n" \
- f"Age: {patient_data['age']}, Gender: {patient_data['gender']}, " \
- f"Conditions: {', '.join(patient_data['conditions'])}, Observations: {', '.join(patient_data['observations'])}, " \
- f"Care Plans: {', '.join(patient_data['care_plans'])}.\n"
-
- # Encode the prompt and generate text
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
- output_sequences = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
- max_length=512, temperature=1.0, top_p=0.95, top_k=50)
-
- # Decode the generated text
- report = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
-
- return report
-
From 5cff70f8bdea3120235e66f30285b3330badd22a Mon Sep 17 00:00:00 2001
From: Dominic
Date: Mon, 15 Apr 2024 07:10:33 +0100
Subject: [PATCH 4/9] Update add Llama-2-Chat
---
Prompt_engineer/main.py | 22 +++++++---------------
1 file changed, 7 insertions(+), 15 deletions(-)
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index 6fccc66..d73d868 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -11,17 +11,11 @@ def load_config(config_path: str) -> Dict[str, Any]:
config = yaml.safe_load(file) or {}
return config
-def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str:
+def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str:
"""
- Generate a medical prompt for the medical report based on patient data and diagnosis details.
+ Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration.
"""
- prompt_template = (
- "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. "
- "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. "
- "Imaging modality: {modality}. Body area: {body_area}. "
- "Please provide a comprehensive report based on the patient's data and imaging results."
- )
-
+ prompt_template = config.get('prompt_template', "Default template if not specified in config.")
prompt = prompt_template.format(
diagnosis=diagnosis,
age=patient_data['age'],
@@ -29,8 +23,8 @@ def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modali
conditions=', '.join(patient_data['conditions']),
observations=', '.join(patient_data['observations']),
care_plans=', '.join(patient_data['care_plans']),
- modality=modality or "Not specified",
- body_area=body_area or "Not specified"
+ modality=patient_data.get('modality', "Not specified"),
+ body_area=patient_data.get('body_area', "Not specified")
)
return prompt
@@ -39,9 +33,7 @@ def parse_arguments() -> argparse.Namespace:
Parse command line arguments.
"""
parser = argparse.ArgumentParser(description='Medical Report Generator')
- # Update this path to where your config.yaml actually resides
- default_config_path = '/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml'
- parser.add_argument('--config_path', type=str, default=default_config_path, help='Path to the YAML configuration file.')
+ parser.add_argument('--config_path', type=str, default='./config.yaml', help='Path to the YAML configuration file.')
return parser.parse_args()
def main() -> None:
@@ -56,7 +48,7 @@ def main() -> None:
patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis)
if patient_data:
- medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area)
+ medical_prompt = generate_medical_prompt(patient_data, diagnosis, config)
print(medical_prompt)
else:
print("No patient data found for the given diagnosis.")
From 003274f0c1bd8dce88daf362b34a13814a03fcaf Mon Sep 17 00:00:00 2001
From: Dominic
Date: Mon, 15 Apr 2024 07:11:08 +0100
Subject: [PATCH 5/9] Llama-2-Chat format
---
Prompt_engineer/config.yaml | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml
index 817c6f9..16a4ae8 100644
--- a/Prompt_engineer/config.yaml
+++ b/Prompt_engineer/config.yaml
@@ -1,2 +1,16 @@
# Configuration file for the medical report generator
cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
+
+# Configuration file for the medical report generator
+cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
+
+prompt_template: |
+ [INST] <> You are an experienced medical AI assistant trained to provide helpful and accurate
+ information to patients. You have extensive knowledge of human anatomy, common medical conditions, and
+ evidence-based treatments. Your responses should be empathetic, informative, and adhere to medical best
+ practices. You will not provide any medical advice that could be harmful. [/INST]
+
+ Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}.
+ Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}.
+ Imaging modality: {modality}. Body area: {body_area}.
+ Please provide a comprehensive report based on the patient's data and imaging results.
From 041085b0ff560cd0e9cbd678a15968be0150560f Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Mon, 22 Apr 2024 19:31:09 +0100
Subject: [PATCH 6/9] dynamic with csv included
---
.../__pycache__/synthea_data.cpython-310.pyc | Bin 0 -> 1737 bytes
Prompt_engineer/config.yaml | 4 --
Prompt_engineer/main.py | 43 ++++++++++++------
Prompt_engineer/synthea_data.py | 10 ++--
4 files changed, 35 insertions(+), 22 deletions(-)
create mode 100644 Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc
diff --git a/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc b/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d7c514d1d951e612e8e234a601a7a84b96428f0
GIT binary patch
literal 1737
zcma)6&2Aev5GMCuE6H};w2c7=*o#~2!IE<{;2LowC-tFnGEqd!c2m6>O=sWZgcic;cmc1X_7_sz^!?Y56#{Q6*X_=|(k
zZ~9?*c<}HPruZHNMHJ_#z!5Ht$SBOnEUd`F+HT}_VMlhYo4HfCkz4Cl?nNH8sY6}r
z(FXNtleXTvDUKR15N*?yYeZMBjL4_f0EMf+!&^{jDXW(yQke(kRN1GKan6+YaxAl|
zBzcIHM~P%ID;Vf)&T=BNb4FP%Nq^=)+1=gQ-gyH5$oiV(Y}U*tWf~-;OccKd`(Bm3Ei2^qSo@#WFU=~umj(?jLFIPCWh
zPt+P^BH>vbQ=C*KRn6*5Fn$iPRHb^@I~+VaIXXT)dfAT?!r3?{rSgi3l01`B)i|r@
zGzL3KG~(s)p7A}fG8AEoB7@?{qz1F7$!z8@i@8H%12KC5Q{Pbynvt}u
zL?)C2HzvGHjHQ@$$WT1UgeiUk@dFxSim7q!Nz_AE_|ljg{Heq@u-@WJQ<~Dcv2PJb
zXKqpmom#iXmGRoWwB{CA>b7$@j
z%>l{~ZPAw2!uD)fW6;=7?!W?e08UB@fX9HAmGbii9T%f27!eViF;)f(&`>HDnH*_u
z#w^K(SzS9N&2*;k2-40I48ZS)X2*v45lpcOB1Lob%D==vqB$gV1Dzvo%<<308v|m*
zTsPr66dKCt>_qM&sCSsJft{~|`15)98z?W)CDV$s*I6}Xf|OIz?N!O700g2tcuv`Q
z_ZZSRmN6^StYnOL#a#u&I@_J`RQY>(CS(GK_H|rBN4kk-Uo2HhCuK5T#8BUh&!+MF
z3h3fKh4O^t
z(47TS_LLDWlnJOpPg#QH!WZEhsty~u=|4a*AWlKT>;;SGuJfV5@kcD^5+7>pI
zBd24aZpl^cx%O9Q;S>Flp2#fk{Ob#Sm#4?tyRN_5-O_98C~7jjy*oVny02>99-$U)
R0bhn~;156R59{ys_CLgb&KCdx
literal 0
HcmV?d00001
diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml
index 16a4ae8..a950330 100644
--- a/Prompt_engineer/config.yaml
+++ b/Prompt_engineer/config.yaml
@@ -1,9 +1,5 @@
# Configuration file for the medical report generator
cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
-
-# Configuration file for the medical report generator
-cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
-
prompt_template: |
[INST] <> You are an experienced medical AI assistant trained to provide helpful and accurate
information to patients. You have extensive knowledge of human anatomy, common medical conditions, and
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index d73d868..3d00d69 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -1,7 +1,8 @@
+# main.py
import argparse
import yaml
+import pandas as pd
from typing import Dict, Any, Optional
-from synthea_data import SyntheaData
def load_config(config_path: str) -> Dict[str, Any]:
"""
@@ -18,22 +19,37 @@ def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config
prompt_template = config.get('prompt_template', "Default template if not specified in config.")
prompt = prompt_template.format(
diagnosis=diagnosis,
- age=patient_data['age'],
- gender=patient_data['gender'],
- conditions=', '.join(patient_data['conditions']),
- observations=', '.join(patient_data['observations']),
- care_plans=', '.join(patient_data['care_plans']),
- modality=patient_data.get('modality', "Not specified"),
+ age=patient_data['BIRTHDATE'],
+ gender=patient_data['GENDER'],
+ conditions=patient_data['DESCRIPTION_cond'],
+ observations=', '.join(patient_data.get('observations', [])), # Assuming this needs a similar change
+ care_plans=patient_data['DESCRIPTION_careplan'],
+ modality=patient_data.get('modality', "Not specified"), # Assuming a similar field needs to be added or changed
body_area=patient_data.get('body_area', "Not specified")
)
return prompt
+def load_patient_data(csv_path: str) -> pd.DataFrame:
+ """
+ Load patient data from a CSV file.
+ """
+ return pd.read_csv(csv_path)
+
+def select_random_patient_data(patient_data: pd.DataFrame, diagnosis: str) -> Optional[Dict[str, Any]]:
+ """
+ Select random patient data with a specific diagnosis.
+ """
+ matching_patients = patient_data[patient_data['REASONDESCRIPTION'] == diagnosis]
+ if not matching_patients.empty:
+ return matching_patients.sample(n=1).iloc[0].to_dict()
+ return None
+
def parse_arguments() -> argparse.Namespace:
"""
Parse command line arguments.
"""
parser = argparse.ArgumentParser(description='Medical Report Generator')
- parser.add_argument('--config_path', type=str, default='./config.yaml', help='Path to the YAML configuration file.')
+ parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
return parser.parse_args()
def main() -> None:
@@ -42,13 +58,12 @@ def main() -> None:
"""
args = parse_arguments()
config = load_config(args.config_path)
+ patient_data = load_patient_data(config['cleaned_data_csv_path'])
+ diagnosis = "Lung Cancer" # Example diagnosis
+ random_patient_data = select_random_patient_data(patient_data, diagnosis)
- synthea_data = SyntheaData()
- diagnosis = "lung cancer" # Example diagnosis
- patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis)
-
- if patient_data:
- medical_prompt = generate_medical_prompt(patient_data, diagnosis, config)
+ if random_patient_data:
+ medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config)
print(medical_prompt)
else:
print("No patient data found for the given diagnosis.")
diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py
index ae96fb4..929d6fb 100644
--- a/Prompt_engineer/synthea_data.py
+++ b/Prompt_engineer/synthea_data.py
@@ -32,12 +32,14 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
patient_data = {
'age': age,
'gender': selected_patient['GENDER'],
- 'conditions': [selected_patient['description_cond']],
- 'observations': [selected_patient['observation']],
- 'care_plans': [selected_patient['DESCRIPTION_careplan']],
+ 'conditions': selected_patient['description_cond'],
+ 'observations': selected_patient['observation'],
+ 'care_plans': selected_patient['DESCRIPTION_careplan'],
+ 'modality': selected_patient.get('modality', "Not specified"),
+ 'body_area': selected_patient.get('body_area', "Not specified")
}
modality = selected_patient.get('modality')
body_area = selected_patient.get('body_area')
return patient_data, modality, body_area
else:
- return {}, None, None
+ return None, None, None
From 2d567efdd590fefab91c8e694ce5cc35fb4a7b89 Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Mon, 29 Apr 2024 09:19:21 +0100
Subject: [PATCH 7/9] update DOB, observation and not related to diagnosis
---
Prompt_engineer/main.py | 38 +++++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index 3d00d69..95301b4 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -1,7 +1,7 @@
-# main.py
import argparse
import yaml
import pandas as pd
+from datetime import datetime
from typing import Dict, Any, Optional
def load_config(config_path: str) -> Dict[str, Any]:
@@ -12,19 +12,28 @@ def load_config(config_path: str) -> Dict[str, Any]:
config = yaml.safe_load(file) or {}
return config
+def calculate_age(birthdate: str) -> int:
+ """
+ Calculate age given the birthdate.
+ """
+ birth_date = datetime.strptime(birthdate, "%Y-%m-%d")
+ today = datetime.now()
+ return today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
+
def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str:
"""
Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration.
"""
prompt_template = config.get('prompt_template', "Default template if not specified in config.")
+ age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field
prompt = prompt_template.format(
diagnosis=diagnosis,
- age=patient_data['BIRTHDATE'],
+ age=age,
gender=patient_data['GENDER'],
conditions=patient_data['DESCRIPTION_cond'],
- observations=', '.join(patient_data.get('observations', [])), # Assuming this needs a similar change
- care_plans=patient_data['DESCRIPTION_careplan'],
- modality=patient_data.get('modality', "Not specified"), # Assuming a similar field needs to be added or changed
+ observations=patient_data.get('REASONDESCRIPTION', 'No reason description provided'), # Using REASONDESCRIPTION for observations
+ care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'),
+ modality=patient_data.get('modality', "Not specified"),
body_area=patient_data.get('body_area', "Not specified")
)
return prompt
@@ -35,14 +44,11 @@ def load_patient_data(csv_path: str) -> pd.DataFrame:
"""
return pd.read_csv(csv_path)
-def select_random_patient_data(patient_data: pd.DataFrame, diagnosis: str) -> Optional[Dict[str, Any]]:
+def select_random_patient_data(patient_data: pd.DataFrame) -> Dict[str, Any]:
"""
- Select random patient data with a specific diagnosis.
+ Select random patient data from the entire dataset, independent of the diagnosis.
"""
- matching_patients = patient_data[patient_data['REASONDESCRIPTION'] == diagnosis]
- if not matching_patients.empty:
- return matching_patients.sample(n=1).iloc[0].to_dict()
- return None
+ return patient_data.sample(n=1).iloc[0].to_dict()
def parse_arguments() -> argparse.Namespace:
"""
@@ -50,6 +56,8 @@ def parse_arguments() -> argparse.Namespace:
"""
parser = argparse.ArgumentParser(description='Medical Report Generator')
parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
+ parser.add_argument('--csv_path', type=str, default='/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv', help='Path to the CSV file containing patient data.')
+ parser.add_argument('--diagnosis', type=str, required=True, help='Diagnosis determined by the image classifier.')
return parser.parse_args()
def main() -> None:
@@ -58,15 +66,15 @@ def main() -> None:
"""
args = parse_arguments()
config = load_config(args.config_path)
- patient_data = load_patient_data(config['cleaned_data_csv_path'])
- diagnosis = "Lung Cancer" # Example diagnosis
- random_patient_data = select_random_patient_data(patient_data, diagnosis)
+ patient_data = load_patient_data(args.csv_path)
+ diagnosis = args.diagnosis # Diagnosis passed from the command line
+ random_patient_data = select_random_patient_data(patient_data)
if random_patient_data:
medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config)
print(medical_prompt)
else:
- print("No patient data found for the given diagnosis.")
+ print("No patient data available.")
if __name__ == '__main__':
main()
From 5c7fe1adb38bc7a07ffeaf2c1bebdd068e363eda Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Mon, 13 May 2024 07:50:42 +0100
Subject: [PATCH 8/9] synthea data output updated to REASONDESCRIPTION
---
Prompt_engineer/synthea_data.py | 31 ++++++++++++++++++++++++++-----
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py
index 929d6fb..b591636 100644
--- a/Prompt_engineer/synthea_data.py
+++ b/Prompt_engineer/synthea_data.py
@@ -8,12 +8,21 @@
{
'BIRTHDATE': '1990-01-01',
'GENDER': 'Male',
- 'description_cond': 'lung cancer',
+ 'REASONDESCRIPTION': 'lung cancer',
'observation': 'increased cough',
'DESCRIPTION_careplan': 'regular monitoring',
'modality': 'X-Ray',
'body_area': 'Chest'
},
+ {
+ 'BIRTHDATE': '1985-05-15',
+ 'GENDER': 'Female',
+ 'REASONDESCRIPTION': '', # This patient has no diagnosis specified.
+ 'observation': 'shortness of breath',
+ 'DESCRIPTION_careplan': 'oxygen therapy',
+ 'modality': 'CT Scan',
+ 'body_area': 'Chest'
+ },
# Additional records can be added here.
]
@@ -23,16 +32,19 @@ class SyntheaData:
"""
def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
"""
- Retrieves a random patient's data who has been diagnosed with a specified condition.
+ Retrieves a random patient's data who has been diagnosed with a specified condition using the REASONDESCRIPTION.
+ If no diagnosis is provided in REASONDESCRIPTION, it defaults to "Nil significant past medical history."
"""
- matching_patients = [patient for patient in PATIENT_DATA if diagnosis in patient['description_cond']]
+ # Filtering patients based on diagnosis, considering those with empty or null diagnosis as having no significant history.
+ matching_patients = [patient for patient in PATIENT_DATA if patient['REASONDESCRIPTION'].strip().lower() == diagnosis.lower()] if diagnosis.strip() else [patient for patient in PATIENT_DATA if not patient['REASONDESCRIPTION'].strip()]
+
if matching_patients:
selected_patient = random.choice(matching_patients)
age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years
patient_data = {
'age': age,
'gender': selected_patient['GENDER'],
- 'conditions': selected_patient['description_cond'],
+ 'conditions': selected_patient['REASONDESCRIPTION'] if selected_patient['REASONDESCRIPTION'].strip() else "Nil significant past medical history",
'observations': selected_patient['observation'],
'care_plans': selected_patient['DESCRIPTION_careplan'],
'modality': selected_patient.get('modality', "Not specified"),
@@ -42,4 +54,13 @@ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any],
body_area = selected_patient.get('body_area')
return patient_data, modality, body_area
else:
- return None, None, None
+ # Default case when no patients match the criteria, including empty diagnosis search
+ return ({
+ 'age': None,
+ 'gender': None,
+ 'conditions': "Nil significant past medical history",
+ 'observations': None,
+ 'care_plans': None,
+ 'modality': None,
+ 'body_area': None
+ }, None, None)
From 1858de4c67d194934f52ea8eb34b8a68a63914eb Mon Sep 17 00:00:00 2001
From: Ayodeji Oyesanya <117065443+Drswitch49@users.noreply.github.com>
Date: Mon, 13 May 2024 08:02:37 +0100
Subject: [PATCH 9/9] further synthea changes
---
Prompt_engineer/main.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
index 95301b4..54f9c86 100644
--- a/Prompt_engineer/main.py
+++ b/Prompt_engineer/main.py
@@ -23,15 +23,17 @@ def calculate_age(birthdate: str) -> int:
def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str:
"""
Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration.
+ Uses REASONDESCRIPTION as the source for the patient's conditions.
"""
prompt_template = config.get('prompt_template', "Default template if not specified in config.")
age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field
+ conditions = patient_data.get('REASONDESCRIPTION', 'No reason description provided') # Default message if not specified
prompt = prompt_template.format(
diagnosis=diagnosis,
age=age,
gender=patient_data['GENDER'],
- conditions=patient_data['DESCRIPTION_cond'],
- observations=patient_data.get('REASONDESCRIPTION', 'No reason description provided'), # Using REASONDESCRIPTION for observations
+ conditions=conditions, # Correct field used
+ observations=patient_data.get('observation', 'No observations recorded'), # Use correct field for observations
care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'),
modality=patient_data.get('modality', "Not specified"),
body_area=patient_data.get('body_area', "Not specified")