diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Prompt_engineer/.DS_Store b/Prompt_engineer/.DS_Store
new file mode 100644
index 0000000..f165c78
Binary files /dev/null and b/Prompt_engineer/.DS_Store differ
diff --git a/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc b/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc
new file mode 100644
index 0000000..9d7c514
Binary files /dev/null and b/Prompt_engineer/__pycache__/synthea_data.cpython-310.pyc differ
diff --git a/Prompt_engineer/__pycache__/synthea_data.cpython-37.pyc b/Prompt_engineer/__pycache__/synthea_data.cpython-37.pyc
new file mode 100644
index 0000000..0a5c570
Binary files /dev/null and b/Prompt_engineer/__pycache__/synthea_data.cpython-37.pyc differ
diff --git a/Prompt_engineer/config.yaml b/Prompt_engineer/config.yaml
new file mode 100644
index 0000000..a950330
--- /dev/null
+++ b/Prompt_engineer/config.yaml
@@ -0,0 +1,12 @@
+# Configuration file for the medical report generator
+cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
+prompt_template: |
+ [INST] <> You are an experienced medical AI assistant trained to provide helpful and accurate
+ information to patients. You have extensive knowledge of human anatomy, common medical conditions, and
+ evidence-based treatments. Your responses should be empathetic, informative, and adhere to medical best
+ practices. You will not provide any medical advice that could be harmful. [/INST]
+
+ Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}.
+ Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}.
+ Imaging modality: {modality}. Body area: {body_area}.
+ Please provide a comprehensive report based on the patient's data and imaging results.
diff --git a/Prompt_engineer/main.py b/Prompt_engineer/main.py
new file mode 100644
index 0000000..54f9c86
--- /dev/null
+++ b/Prompt_engineer/main.py
@@ -0,0 +1,82 @@
+import argparse
+import yaml
+import pandas as pd
+from datetime import datetime
+from typing import Dict, Any, Optional
+
+def load_config(config_path: str) -> Dict[str, Any]:
+ """
+ Load the YAML configuration file.
+ """
+ with open(config_path, 'r') as file:
+ config = yaml.safe_load(file) or {}
+ return config
+
+def calculate_age(birthdate: str) -> int:
+ """
+ Calculate age given the birthdate.
+ """
+ birth_date = datetime.strptime(birthdate, "%Y-%m-%d")
+ today = datetime.now()
+ return today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
+
+def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str:
+ """
+ Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration.
+ Uses REASONDESCRIPTION as the source for the patient's conditions.
+ """
+ prompt_template = config.get('prompt_template', "Default template if not specified in config.")
+ age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field
+ conditions = patient_data.get('REASONDESCRIPTION', 'No reason description provided') # Default message if not specified
+ prompt = prompt_template.format(
+ diagnosis=diagnosis,
+ age=age,
+ gender=patient_data['GENDER'],
+ conditions=conditions, # Correct field used
+ observations=patient_data.get('observation', 'No observations recorded'), # Use correct field for observations
+ care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'),
+ modality=patient_data.get('modality', "Not specified"),
+ body_area=patient_data.get('body_area', "Not specified")
+ )
+ return prompt
+
+def load_patient_data(csv_path: str) -> pd.DataFrame:
+ """
+ Load patient data from a CSV file.
+ """
+ return pd.read_csv(csv_path)
+
+def select_random_patient_data(patient_data: pd.DataFrame) -> Dict[str, Any]:
+ """
+ Select random patient data from the entire dataset, independent of the diagnosis.
+ """
+ return patient_data.sample(n=1).iloc[0].to_dict()
+
+def parse_arguments() -> argparse.Namespace:
+ """
+ Parse command line arguments.
+ """
+ parser = argparse.ArgumentParser(description='Medical Report Generator')
+ parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
+ parser.add_argument('--csv_path', type=str, default='/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv', help='Path to the CSV file containing patient data.')
+ parser.add_argument('--diagnosis', type=str, required=True, help='Diagnosis determined by the image classifier.')
+ return parser.parse_args()
+
+def main() -> None:
+ """
+ Main function to orchestrate the workflow.
+ """
+ args = parse_arguments()
+ config = load_config(args.config_path)
+ patient_data = load_patient_data(args.csv_path)
+ diagnosis = args.diagnosis # Diagnosis passed from the command line
+ random_patient_data = select_random_patient_data(patient_data)
+
+ if random_patient_data:
+ medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config)
+ print(medical_prompt)
+ else:
+ print("No patient data available.")
+
+if __name__ == '__main__':
+ main()
diff --git a/Prompt_engineer/synthea_data.py b/Prompt_engineer/synthea_data.py
new file mode 100644
index 0000000..b591636
--- /dev/null
+++ b/Prompt_engineer/synthea_data.py
@@ -0,0 +1,66 @@
+from typing import Dict, Any, Tuple, Optional
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+import random
+
+# Example static data, simulating a patient database
+PATIENT_DATA = [
+ {
+ 'BIRTHDATE': '1990-01-01',
+ 'GENDER': 'Male',
+ 'REASONDESCRIPTION': 'lung cancer',
+ 'observation': 'increased cough',
+ 'DESCRIPTION_careplan': 'regular monitoring',
+ 'modality': 'X-Ray',
+ 'body_area': 'Chest'
+ },
+ {
+ 'BIRTHDATE': '1985-05-15',
+ 'GENDER': 'Female',
+ 'REASONDESCRIPTION': '', # This patient has no diagnosis specified.
+ 'observation': 'shortness of breath',
+ 'DESCRIPTION_careplan': 'oxygen therapy',
+ 'modality': 'CT Scan',
+ 'body_area': 'Chest'
+ },
+ # Additional records can be added here.
+]
+
+class SyntheaData:
+ """
+ Simulates data retrieval from a static dataset.
+ """
+ def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
+ """
+ Retrieves a random patient's data who has been diagnosed with a specified condition using the REASONDESCRIPTION.
+ If no diagnosis is provided in REASONDESCRIPTION, it defaults to "Nil significant past medical history."
+ """
+ # Filtering patients based on diagnosis, considering those with empty or null diagnosis as having no significant history.
+ matching_patients = [patient for patient in PATIENT_DATA if patient['REASONDESCRIPTION'].strip().lower() == diagnosis.lower()] if diagnosis.strip() else [patient for patient in PATIENT_DATA if not patient['REASONDESCRIPTION'].strip()]
+
+ if matching_patients:
+ selected_patient = random.choice(matching_patients)
+ age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years
+ patient_data = {
+ 'age': age,
+ 'gender': selected_patient['GENDER'],
+ 'conditions': selected_patient['REASONDESCRIPTION'] if selected_patient['REASONDESCRIPTION'].strip() else "Nil significant past medical history",
+ 'observations': selected_patient['observation'],
+ 'care_plans': selected_patient['DESCRIPTION_careplan'],
+ 'modality': selected_patient.get('modality', "Not specified"),
+ 'body_area': selected_patient.get('body_area', "Not specified")
+ }
+ modality = selected_patient.get('modality')
+ body_area = selected_patient.get('body_area')
+ return patient_data, modality, body_area
+ else:
+ # Default case when no patients match the criteria, including empty diagnosis search
+ return ({
+ 'age': None,
+ 'gender': None,
+ 'conditions': "Nil significant past medical history",
+ 'observations': None,
+ 'care_plans': None,
+ 'modality': None,
+ 'body_area': None
+ }, None, None)
diff --git a/config.yaml b/config.yaml
deleted file mode 100644
index c28a9ca..0000000
--- a/config.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-database_path: 'path/to/health_data.db'
-cleaned_data_csv_path: '/path/to/cleaned_medical_data.csv'
diff --git a/main.py b/main.py
deleted file mode 100644
index 8288db7..0000000
--- a/main.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import argparse
-import yaml
-from typing import Dict, Any, Optional
-from transformers import AutoTokenizer, pipeline
-from synthea_data import SyntheaData #SyntheaData class is in synthea_data.py
-
-def load_config(config_path: str) -> Dict[str, Any]:
- """
- Load the YAML configuration file.
- """
- with open(config_path, 'r') as file:
- config = yaml.safe_load(file)
- return config
-
-def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, modality: Optional[str], body_area: Optional[str]) -> str:
- """
- Generate a medical prompt for the LLAMA model based on patient data, a given diagnosis, and imaging details.
- """
- prompt_template = (
- "Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}. "
- "Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}. "
- "Imaging modality: {modality}. Body area: {body_area}. "
- "Please provide a comprehensive report based on the patient's data and imaging results."
- )
-
- prompt = prompt_template.format(
- diagnosis=diagnosis,
- age=patient_data['age'],
- gender=patient_data['gender'],
- conditions=', '.join(patient_data['conditions']),
- observations=', '.join(patient_data['observations']),
- care_plans=', '.join(patient_data['care_plans']),
- modality=modality or "Not specified",
- body_area=body_area or "Not specified"
- )
- return prompt
-
-def parse_arguments() -> argparse.Namespace:
- """
- Parse command line arguments.
- """
- parser = argparse.ArgumentParser(description='Medical Report Generator')
- parser.add_argument('--config_path', type=str, default='config.yaml', help='Path to the YAML configuration file.')
- return parser.parse_args()
-
-def main() -> None:
- """
- Main function to orchestrate the workflow.
- """
- args = parse_arguments()
- config = load_config(args.config_path)
-
- synthea_data = SyntheaData(config)
- diagnosis = "lung cancer" # Example diagnosis
- patient_data, modality, body_area = synthea_data.get_patient_data_by_diagnosis(diagnosis)
-
- if patient_data:
- medical_prompt = generate_medical_prompt(patient_data, diagnosis, modality, body_area)
- print(medical_prompt)
- # Further processing with LLAMA or other models can be done here
- else:
- print("No patient data found for the given diagnosis.")
-
-if __name__ == '__main__':
- main()
diff --git a/synthea_data.py b/synthea_data.py
deleted file mode 100644
index 34e32e8..0000000
--- a/synthea_data.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import pandas as pd
-import sqlite3
-from datetime import datetime
-from dateutil.relativedelta import relativedelta
-from typing import Dict, Any, Tuple
-import random
-
-class SyntheaData:
- """
- Handles operations related to processing and retrieving patient data from a consolidated dataset.
- """
- def __init__(self, config: Dict[str, Any]) -> None:
- """
- Initializes with configuration settings.
- """
- self.config = config
-
- def import_cleaned_data_to_sqlite(self) -> None:
- """
- Imports cleaned medical data from a CSV file into an SQLite database.
- """
- conn = sqlite3.connect(self.config['database_path'])
- df = pd.read_csv(self.config['cleaned_data_csv_path'])
- df.to_sql('cleaned_medical_data', conn, if_exists='replace', index=False)
- conn.close()
-
- def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
- """
- Retrieves a random patient's data who has been diagnosed with a specified condition.
- """
- conn = sqlite3.connect(self.config['database_path'])
- query = """
- SELECT * FROM cleaned_medical_data
- WHERE description_cond LIKE ? OR reasondescription LIKE ?
- """
- df = pd.read_sql_query(query, conn, params=(f'%{diagnosis}%', f'%{diagnosis}%'))
- conn.close()
-
- if not df.empty:
- selected_row = df.sample(n=1).iloc[0]
- patient_data = {
- 'age': relativedelta(datetime.now(), datetime.strptime(selected_row['BIRTHDATE'], '%Y-%m-%d')).years,
- 'gender': selected_row['GENDER'],
- 'conditions': [selected_row['description_cond']],
- 'observations': [selected_row.get('observation', '')],
- 'care_plans': [selected_row.get('DESCRIPTION_careplan', '')],
- }
- modality = selected_row.get('modality', None)
- body_area = selected_row.get('body_area', None)
- return patient_data, modality, body_area
- else:
- return {}, None, None