amarinderthind
diff --git a/‎App_Logo.jpg
161 KB b/‎App_Logo.jpg
161 KB
diff --git a/‎EGFR_app.py
+83 b/‎EGFR_app.py
+83
diff --git a/‎Input_file_example.txt
+5 b/‎Input_file_example.txt
+5
@@ -0,0 +1,83 @@
+
+import streamlit as st
+import pandas as pd
+from PIL import Image
+import subprocess
+import os
+import base64
+import pickle
+
+# Molecular descriptor calculator
+def desc_calc():
+    # Performs the descriptor calculation
+    bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -2d -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv"
+    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
+    output, error = process.communicate()
+    os.remove('molecule.smi')
+
+# File download
+def filedownload(df):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()  # strings <-> bytes conversions
+    href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>'
+    return href
+
+# Model building
+def build_model(input_data):
+    # Reads in saved regression model
+    load_model = pickle.load(open('ML_model_EGFR.pkl', 'rb'))
+    # Apply model to make predictions
+    prediction = load_model.predict(input_data)
+    st.header('**Prediction output**')
+    prediction_output = pd.Series(prediction, name='pIC50')
+    molecule_name = pd.Series(load_data[1], name='molecule_name')
+    df = pd.concat([molecule_name, prediction_output], axis=1)
+    st.write(df)
+    st.markdown(filedownload(df), unsafe_allow_html=True)
+
+# Logo image
+image = Image.open('App_Logo.jpg')
+
+st.image(image, use_column_width=True)
+
+# Page title
+st.markdown("""
+# Bioactivity Prediction App """)
+
+# Sidebar
+with st.sidebar.header('1. Upload your data'):
+    uploaded_file = st.sidebar.file_uploader("Upload your input file(The input file should contain SMILES notations for the small molecules)", type=['txt'])
+    st.sidebar.markdown("""
+[Example input file](Input_file_example.txt)
+""")
+
+if st.sidebar.button('Predict'):
+    if uploaded_file is not None:
+        load_data = pd.read_table(uploaded_file, sep=' ', header=None)
+        load_data.to_csv('molecule.smi', sep='\t', header=False, index=False)
+
+        st.header('**Original input data**')
+        st.write(load_data)
+
+        with st.spinner("Calculating descriptors..."):
+            desc_calc()
+
+        # Read in calculated descriptors and display the dataframe
+        st.header('**Calculated molecular descriptors**')
+        desc = pd.read_csv('descriptors_output.csv')
+        st.write(desc)
+        st.write(desc.shape)
+
+        # Read descriptor list used in previously built model
+        st.header('**Subset of descriptors from previously built models**')
+        Xlist = list(pd.read_csv('descriptor_list.csv').columns)
+        desc_subset = desc[Xlist]
+        st.write(desc_subset)
+        st.write(desc_subset.shape)
+
+        # Apply trained model to make a prediction on query compounds
+        build_model(desc_subset)
+    else:
+        st.warning('Please upload a file before predicting.')
+else:
+    st.info('Upload input data in the sidebar to start!')
@@ -0,0 +1,5 @@
+CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1 CHEMBL133897
+O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1 CHEMBL336398
+CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1 CHEMBL131588
+O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F CHEMBL130628
+CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C CHEMBL130478