Skip to content

Commit 6ea330a

Browse files
Add files via upload
0 parents  commit 6ea330a

File tree

4 files changed

+8187
-0
lines changed

4 files changed

+8187
-0
lines changed

App_Logo.jpg

161 KB
Loading

EGFR_app.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
import streamlit as st
3+
import pandas as pd
4+
from PIL import Image
5+
import subprocess
6+
import os
7+
import base64
8+
import pickle
9+
10+
# Molecular descriptor calculator
11+
def desc_calc():
12+
# Performs the descriptor calculation
13+
bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -2d -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv"
14+
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
15+
output, error = process.communicate()
16+
os.remove('molecule.smi')
17+
18+
# File download
19+
def filedownload(df):
20+
csv = df.to_csv(index=False)
21+
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
22+
href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>'
23+
return href
24+
25+
# Model building
26+
def build_model(input_data):
27+
# Reads in saved regression model
28+
load_model = pickle.load(open('ML_model_EGFR.pkl', 'rb'))
29+
# Apply model to make predictions
30+
prediction = load_model.predict(input_data)
31+
st.header('**Prediction output**')
32+
prediction_output = pd.Series(prediction, name='pIC50')
33+
molecule_name = pd.Series(load_data[1], name='molecule_name')
34+
df = pd.concat([molecule_name, prediction_output], axis=1)
35+
st.write(df)
36+
st.markdown(filedownload(df), unsafe_allow_html=True)
37+
38+
# Logo image
39+
image = Image.open('App_Logo.jpg')
40+
41+
st.image(image, use_column_width=True)
42+
43+
# Page title
44+
st.markdown("""
45+
# Bioactivity Prediction App """)
46+
47+
# Sidebar
48+
with st.sidebar.header('1. Upload your data'):
49+
uploaded_file = st.sidebar.file_uploader("Upload your input file(The input file should contain SMILES notations for the small molecules)", type=['txt'])
50+
st.sidebar.markdown("""
51+
[Example input file](Input_file_example.txt)
52+
""")
53+
54+
if st.sidebar.button('Predict'):
55+
if uploaded_file is not None:
56+
load_data = pd.read_table(uploaded_file, sep=' ', header=None)
57+
load_data.to_csv('molecule.smi', sep='\t', header=False, index=False)
58+
59+
st.header('**Original input data**')
60+
st.write(load_data)
61+
62+
with st.spinner("Calculating descriptors..."):
63+
desc_calc()
64+
65+
# Read in calculated descriptors and display the dataframe
66+
st.header('**Calculated molecular descriptors**')
67+
desc = pd.read_csv('descriptors_output.csv')
68+
st.write(desc)
69+
st.write(desc.shape)
70+
71+
# Read descriptor list used in previously built model
72+
st.header('**Subset of descriptors from previously built models**')
73+
Xlist = list(pd.read_csv('descriptor_list.csv').columns)
74+
desc_subset = desc[Xlist]
75+
st.write(desc_subset)
76+
st.write(desc_subset.shape)
77+
78+
# Apply trained model to make a prediction on query compounds
79+
build_model(desc_subset)
80+
else:
81+
st.warning('Please upload a file before predicting.')
82+
else:
83+
st.info('Upload input data in the sidebar to start!')

Input_file_example.txt

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1 CHEMBL133897
2+
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1 CHEMBL336398
3+
CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1 CHEMBL131588
4+
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F CHEMBL130628
5+
CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C CHEMBL130478

0 commit comments

Comments
 (0)