|
| 1 | + |
| 2 | +import streamlit as st |
| 3 | +import pandas as pd |
| 4 | +from PIL import Image |
| 5 | +import subprocess |
| 6 | +import os |
| 7 | +import base64 |
| 8 | +import pickle |
| 9 | + |
| 10 | +# Molecular descriptor calculator |
| 11 | +def desc_calc(): |
| 12 | + # Performs the descriptor calculation |
| 13 | + bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -2d -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv" |
| 14 | + process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) |
| 15 | + output, error = process.communicate() |
| 16 | + os.remove('molecule.smi') |
| 17 | + |
| 18 | +# File download |
| 19 | +def filedownload(df): |
| 20 | + csv = df.to_csv(index=False) |
| 21 | + b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions |
| 22 | + href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>' |
| 23 | + return href |
| 24 | + |
| 25 | +# Model building |
| 26 | +def build_model(input_data): |
| 27 | + # Reads in saved regression model |
| 28 | + load_model = pickle.load(open('ML_model_EGFR.pkl', 'rb')) |
| 29 | + # Apply model to make predictions |
| 30 | + prediction = load_model.predict(input_data) |
| 31 | + st.header('**Prediction output**') |
| 32 | + prediction_output = pd.Series(prediction, name='pIC50') |
| 33 | + molecule_name = pd.Series(load_data[1], name='molecule_name') |
| 34 | + df = pd.concat([molecule_name, prediction_output], axis=1) |
| 35 | + st.write(df) |
| 36 | + st.markdown(filedownload(df), unsafe_allow_html=True) |
| 37 | + |
| 38 | +# Logo image |
| 39 | +image = Image.open('App_Logo.jpg') |
| 40 | + |
| 41 | +st.image(image, use_column_width=True) |
| 42 | + |
| 43 | +# Page title |
| 44 | +st.markdown(""" |
| 45 | +# Bioactivity Prediction App """) |
| 46 | + |
| 47 | +# Sidebar |
| 48 | +with st.sidebar.header('1. Upload your data'): |
| 49 | + uploaded_file = st.sidebar.file_uploader("Upload your input file(The input file should contain SMILES notations for the small molecules)", type=['txt']) |
| 50 | + st.sidebar.markdown(""" |
| 51 | +[Example input file](Input_file_example.txt) |
| 52 | +""") |
| 53 | + |
| 54 | +if st.sidebar.button('Predict'): |
| 55 | + if uploaded_file is not None: |
| 56 | + load_data = pd.read_table(uploaded_file, sep=' ', header=None) |
| 57 | + load_data.to_csv('molecule.smi', sep='\t', header=False, index=False) |
| 58 | + |
| 59 | + st.header('**Original input data**') |
| 60 | + st.write(load_data) |
| 61 | + |
| 62 | + with st.spinner("Calculating descriptors..."): |
| 63 | + desc_calc() |
| 64 | + |
| 65 | + # Read in calculated descriptors and display the dataframe |
| 66 | + st.header('**Calculated molecular descriptors**') |
| 67 | + desc = pd.read_csv('descriptors_output.csv') |
| 68 | + st.write(desc) |
| 69 | + st.write(desc.shape) |
| 70 | + |
| 71 | + # Read descriptor list used in previously built model |
| 72 | + st.header('**Subset of descriptors from previously built models**') |
| 73 | + Xlist = list(pd.read_csv('descriptor_list.csv').columns) |
| 74 | + desc_subset = desc[Xlist] |
| 75 | + st.write(desc_subset) |
| 76 | + st.write(desc_subset.shape) |
| 77 | + |
| 78 | + # Apply trained model to make a prediction on query compounds |
| 79 | + build_model(desc_subset) |
| 80 | + else: |
| 81 | + st.warning('Please upload a file before predicting.') |
| 82 | +else: |
| 83 | + st.info('Upload input data in the sidebar to start!') |
0 commit comments