Skip to content

Working with correct csv path but no output #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added Prompt_engineer/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
12 changes: 12 additions & 0 deletions Prompt_engineer/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Configuration file for the medical report generator
cleaned_data_csv_path: '/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv'
prompt_template: |
<s>[INST] <<SYS>> You are an experienced medical AI assistant trained to provide helpful and accurate
information to patients. You have extensive knowledge of human anatomy, common medical conditions, and
evidence-based treatments. Your responses should be empathetic, informative, and adhere to medical best
practices. You will not provide any medical advice that could be harmful. [/INST]

Diagnosis: {diagnosis}. Patient's age: {age}. Gender: {gender}.
Conditions: {conditions}. Observations: {observations}. Care plans: {care_plans}.
Imaging modality: {modality}. Body area: {body_area}.
Please provide a comprehensive report based on the patient's data and imaging results.</s>
82 changes: 82 additions & 0 deletions Prompt_engineer/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import argparse
import yaml
import pandas as pd
from datetime import datetime
from typing import Dict, Any, Optional

def load_config(config_path: str) -> Dict[str, Any]:
"""
Load the YAML configuration file.
"""
with open(config_path, 'r') as file:
config = yaml.safe_load(file) or {}
return config

def calculate_age(birthdate: str) -> int:
"""
Calculate age given the birthdate.
"""
birth_date = datetime.strptime(birthdate, "%Y-%m-%d")
today = datetime.now()
return today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))

def generate_medical_prompt(patient_data: Dict[str, Any], diagnosis: str, config: Dict[str, Any]) -> str:
"""
Generate a medical prompt for the medical report based on patient data and diagnosis details using a template from the configuration.
Uses REASONDESCRIPTION as the source for the patient's conditions.
"""
prompt_template = config.get('prompt_template', "Default template if not specified in config.")
age = calculate_age(patient_data['BIRTHDATE']) # Calculating age using the BIRTHDATE field
conditions = patient_data.get('REASONDESCRIPTION', 'No reason description provided') # Default message if not specified
prompt = prompt_template.format(
diagnosis=diagnosis,
age=age,
gender=patient_data['GENDER'],
conditions=conditions, # Correct field used
observations=patient_data.get('observation', 'No observations recorded'), # Use correct field for observations
care_plans=patient_data.get('DESCRIPTION_careplan', 'No care plans recorded'),
modality=patient_data.get('modality', "Not specified"),
body_area=patient_data.get('body_area', "Not specified")
)
return prompt

def load_patient_data(csv_path: str) -> pd.DataFrame:
"""
Load patient data from a CSV file.
"""
return pd.read_csv(csv_path)

def select_random_patient_data(patient_data: pd.DataFrame) -> Dict[str, Any]:
"""
Select random patient data from the entire dataset, independent of the diagnosis.
"""
return patient_data.sample(n=1).iloc[0].to_dict()

def parse_arguments() -> argparse.Namespace:
"""
Parse command line arguments.
"""
parser = argparse.ArgumentParser(description='Medical Report Generator')
parser.add_argument('--config_path', type=str, default='/Users/ayodejioyesanya/Documents/SFdev/Prompt_engineer/config.yaml', help='Path to the YAML configuration file.')
parser.add_argument('--csv_path', type=str, default='/Users/ayodejioyesanya/Desktop/Tdata/cleaned_medical_data.csv', help='Path to the CSV file containing patient data.')
parser.add_argument('--diagnosis', type=str, required=True, help='Diagnosis determined by the image classifier.')
return parser.parse_args()

def main() -> None:
"""
Main function to orchestrate the workflow.
"""
args = parse_arguments()
config = load_config(args.config_path)
patient_data = load_patient_data(args.csv_path)
diagnosis = args.diagnosis # Diagnosis passed from the command line
random_patient_data = select_random_patient_data(patient_data)

if random_patient_data:
medical_prompt = generate_medical_prompt(random_patient_data, diagnosis, config)
print(medical_prompt)
else:
print("No patient data available.")

if __name__ == '__main__':
main()
66 changes: 66 additions & 0 deletions Prompt_engineer/synthea_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from typing import Dict, Any, Tuple, Optional
from datetime import datetime
from dateutil.relativedelta import relativedelta
import random

# Example static data, simulating a patient database
PATIENT_DATA = [
{
'BIRTHDATE': '1990-01-01',
'GENDER': 'Male',
'REASONDESCRIPTION': 'lung cancer',
'observation': 'increased cough',
'DESCRIPTION_careplan': 'regular monitoring',
'modality': 'X-Ray',
'body_area': 'Chest'
},
{
'BIRTHDATE': '1985-05-15',
'GENDER': 'Female',
'REASONDESCRIPTION': '', # This patient has no diagnosis specified.
'observation': 'shortness of breath',
'DESCRIPTION_careplan': 'oxygen therapy',
'modality': 'CT Scan',
'body_area': 'Chest'
},
# Additional records can be added here.
]

class SyntheaData:
"""
Simulates data retrieval from a static dataset.
"""
def get_patient_data_by_diagnosis(self, diagnosis: str) -> Tuple[Dict[str, Any], Optional[str], Optional[str]]:
"""
Retrieves a random patient's data who has been diagnosed with a specified condition using the REASONDESCRIPTION.
If no diagnosis is provided in REASONDESCRIPTION, it defaults to "Nil significant past medical history."
"""
# Filtering patients based on diagnosis, considering those with empty or null diagnosis as having no significant history.
matching_patients = [patient for patient in PATIENT_DATA if patient['REASONDESCRIPTION'].strip().lower() == diagnosis.lower()] if diagnosis.strip() else [patient for patient in PATIENT_DATA if not patient['REASONDESCRIPTION'].strip()]

if matching_patients:
selected_patient = random.choice(matching_patients)
age = relativedelta(datetime.now(), datetime.strptime(selected_patient['BIRTHDATE'], '%Y-%m-%d')).years
patient_data = {
'age': age,
'gender': selected_patient['GENDER'],
'conditions': selected_patient['REASONDESCRIPTION'] if selected_patient['REASONDESCRIPTION'].strip() else "Nil significant past medical history",
'observations': selected_patient['observation'],
'care_plans': selected_patient['DESCRIPTION_careplan'],
'modality': selected_patient.get('modality', "Not specified"),
'body_area': selected_patient.get('body_area', "Not specified")
}
modality = selected_patient.get('modality')
body_area = selected_patient.get('body_area')
return patient_data, modality, body_area
else:
# Default case when no patients match the criteria, including empty diagnosis search
return ({
'age': None,
'gender': None,
'conditions': "Nil significant past medical history",
'observations': None,
'care_plans': None,
'modality': None,
'body_area': None
}, None, None)
2 changes: 0 additions & 2 deletions config.yaml

This file was deleted.

65 changes: 0 additions & 65 deletions main.py

This file was deleted.

52 changes: 0 additions & 52 deletions synthea_data.py

This file was deleted.