Skip to content

Commit b87a95b

Browse files
authored
Add files via upload
0 parents  commit b87a95b

File tree

4 files changed

+263
-0
lines changed

4 files changed

+263
-0
lines changed

Diff for: Analysis.py

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import pandas as pd
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
data1 = pd.read_csv('D:\\New2\\QikProp_PROJECT.CSV', header = 0, sep=';')
5+
data1.columns = data1.columns.str.replace('#','_')
6+
data1 = data1[['Title', 'QPlogHERG', 'CNS', 'PercentHumanOralAbsorption', '_rtvFG', 'docking score', 'HumanOralAbsorption','_stars', 'RuleOfFive', 'RuleOfThree']]
7+
data2 = pd.read_csv('D:\\New2\\XP_resist_residue.csv', header = 0)[['Title', 'docking score']]
8+
9+
10+
data3 = data2.merge(data1, left_on = 'Title', right_on = 'Title')
11+
data3.rename(columns = {'docking score_x':'resist_score','docking score_y':'score'}, inplace = True)
12+
data3['sum'] = np.sum(data3[['resist_score', 'score']], axis = 1)
13+
data3.sort_values(by = ['sum'], inplace = True)
14+
15+
16+
rate = data3[:]
17+
rate['6BKL_rate'] = rate['score'] / -4.412
18+
rate['2LY0_rate'] = rate['resist_score'] / -1.527
19+
rate.dropna(inplace = True)
20+
21+
22+
def Plot_violin(data, columns):
23+
n_axes = len(columns)
24+
fig, axes = plt.subplots(nrows = n_axes)
25+
return [[axes[x].violinplot(data[column], showmedians = True, vert = False), axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])]
26+
for x, column in enumerate(columns)]
27+
28+
29+
Plot_violin(rate, ['6BKL_rate', '2LY0_rate'])
30+
plt.tight_layout()
31+
plt.show()
32+
33+
34+
35+
plt.style.use('seaborn')
36+
37+
38+
# fig, (axe1, axe2, axe3, axe4) = plt.subplots(1, 4, gridspec_kw = {"width_ratios": [np.max(dropnan['_stars'])-np.min(dropnan['_stars']), np.max(dropnan['_rtvFG'])-np.min(dropnan['_rtvFG']), np.max(dropnan['RuleOfThree']) - np.mean(dropnan['RuleOfThree']), np.max(dropnan['RuleOfFive']) - np.min(dropnan['RuleOfFive'])]})
39+
40+
# N1, bins1, patches1 = axe1.hist(dropnan['_stars'])
41+
# bins1 = range(8)
42+
# [patches1[i].set_fc('r') for i in bins1 if i > 5]
43+
# axe1.set_title('stars')
44+
# axe1.set_xticks(range(8))
45+
# plt.xticks(bins1)
46+
47+
# N2, bins2, patches2 = axe2.hist(dropnan['_rtvFG'], bins = 5)
48+
# bins2 = range(7)
49+
# [patches2[i].set_fc('r') for i in bins2 if i > 2]
50+
# axe2.set_title('rtvFG')
51+
# axe2.set_xticks(range(7))
52+
# plt.xticks(bins2)
53+
54+
# N3, bins3, patches3 = axe3.hist(dropnan['RuleOfFive'])
55+
# bins3 = range(3)
56+
# [patches3[i].set_fc('r') for i in bins3 if i > 4]
57+
# axe3.set_title('RuleOfFive')
58+
# # axe3.set_xticks(range(3))
59+
# plt.xticks(bins3)
60+
61+
# N4, bins4, patches4 = axe4.hist(dropnan['RuleOfThree'])
62+
# bins = range(3)
63+
# [patches4[i].set_fc('r') for i in bins4 if i > 2]
64+
# axe4.set_title('RuleOfThree')
65+
# # axe4.set_xticks(range(3))
66+
67+
# [ok[column].value_counts().plot(kind = 'bar', ax = axes[x], title = column) for x, column in enumerate(ok.columns)]
68+
69+
# [print(column, axe) for column, axe in zip(ok.columns,(axe1, axe2, axe3, axe4))]
70+
71+
# ok['_stars'].value_counts().plot(kind = 'bar', ax = axes[1])
72+
fig, axes = plt.subplots(ncols = 4)
73+
rate = rate[['_stars', 'RuleOfFive', 'RuleOfThree', '_rtvFG']]
74+
[rate[column].value_counts().reset_index().sort_values(by = 'index').set_index('index').reindex(range(int(max(rate[column])+1))).plot(kind = 'bar', ax = axes[x], legend = 0, fontsize = 13) for x, column in enumerate(rate.columns)]
75+
[axes[x].set_xlabel('') for x, column in enumerate(rate.columns)]
76+
[axes[x].set_title(column, color = 'b', fontsize = 15) for x, column in enumerate(rate.columns)]
77+
axes[3].get_children()[3].set_color('y')
78+
axes[3].get_children()[4].set_color('y')
79+
plt.tight_layout()
80+
plt.show()
81+

Diff for: MERGED.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import pandas as pd
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
plt.style.use('ggplot')
5+
6+
data1 = pd.read_csv('D:\\New2\\QikProp_PROJECT.CSV', header = 0, sep=';')
7+
data1.columns = data1.columns.str.replace('#','_')
8+
data1 = data1[['Title', 'QPlogHERG', 'CNS', 'PercentHumanOralAbsorption', '_rtvFG', 'docking score', 'HumanOralAbsorption', '_stars', 'RuleOfFive', 'RuleOfThree', 'FOSA', 'FISA']]
9+
data2 = pd.read_csv('D:\\New2\\XP_resist_residue.csv', header = 0)[['Title', 'docking score']]
10+
11+
12+
data3 = data2.merge(data1, left_on = 'Title', right_on = 'Title')
13+
data3.rename(columns = {'docking score_x':'resist_score','docking score_y':'score'}, inplace = True)
14+
data3['sum'] = np.sum(data3[['resist_score', 'score']], axis = 1)
15+
data3.sort_values(by = ['sum'], inplace = True)
16+
top10 = data3.head(10).to_excel('D:\\New2\\top10.xlsx')
17+
18+
# data3[['Title','sum','HumanOralAbsorption']].head(20).plot.scatter('sum','HumanOralAbsorption')
19+
20+
rate = data3[:]
21+
rate['6BKL_rate'] = rate['score'] / -4.412
22+
rate['2LY0_rate'] = rate['resist_score'] / -1.527
23+
def Plot_violin(data, columns):
24+
n_axes = len(columns)
25+
fig, axes = plt.subplots(nrows = n_axes)
26+
axes = [[axes[x].violinplot(data[column], showmedians = True, vert = False), axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])] for x, column in enumerate(columns)]
27+
return axes
28+
29+
rate.dropna(inplace = True)
30+
Plot_violin(rate, ['6BKL_rate', '2LY0_rate', 'CNS', 'QPlogHERG', 'PercentHumanOralAbsorption'])
31+
plt.tight_layout()
32+
plt.show()
33+
34+
plt.boxplot(rate['score_rate'])
35+
36+
rate.loc[rate.isnull().any(axis = 1)]
37+
38+
39+
data = rate
40+
columns = ['6BKL_rate', '2LY0_rate', 'CNS', 'QPlogHERG', 'PercentHumanOralAbsorption']
41+
n_axes = len(columns)
42+
fig, axes = plt.subplots(nrows = n_axes)
43+
[[axes[x].violinplot(data[column], showmedians = True, vert = False), axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])] for x, column in enumerate(columns)]
44+
45+
axes[2].set_xticks(np.arange(-2,3,1))
46+
[axes[2].get_xticklabels()[x].set_color('y') for x in range(3,5)]
47+
[axes[3].get_xticklabels()[x].set_color('y') for x in [1]]
48+
axes[4].set_xticks(np.arange(0,125,25))
49+
[axes[4].get_xticklabels()[x].set_color('y') for x in range(0,2)]
50+
plt.tight_layout()
51+
plt.show()
52+
53+
54+
55+
56+
57+
# FOSA FISA
58+
new = data3.head(10).reset_index()[['FOSA', 'FISA']]
59+
new['FISA/FOSA'] = new['FISA']/new['FOSA']
60+
new.to_excel('D:/New2/FIFO.xlsx')

Diff for: RESIDUE - Plot - Dist.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pandas as pd
2+
import matplotlib.pyplot as plt
3+
from adjustText import adjust_text
4+
import seaborn as sns
5+
6+
data1 = pd.read_csv('D:\\New2\\1_residue_dist.csv', header = 0)
7+
[data1.drop([ind], inplace = True) for ind in data1[data1.pep_only.str.contains('HOH')].index]
8+
data2_res = pd.read_csv('D:\\New2\\2_residue_resist_dist.csv', header = 0)
9+
10+
data1.to_excel('D:\\New2\\1_residue_dist.xlsx')
11+
data2_res.to_excel('D:\\New2\\2_residue_dist.xlsx')
12+
13+
def Plot(datas, title):
14+
fig, axes = plt.subplots(ncols = len(datas))
15+
for n, data in enumerate(datas):
16+
box_dict = axes[n].boxplot(data['dist'])
17+
flier = box_dict['fliers']
18+
position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
19+
post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
20+
pep_name = [data[data['dist'] == y]['pep_only'].values[0] for x, y in post]
21+
[axes[n].text(x + 0.02, y + 0.02, s = data[data['dist'] == y]['pep_only'].values[0], color = 'r') for x,y in post]
22+
axes[n].set_xticks([],[])
23+
axes[n].set_title(title[n], color = 'b')
24+
axes[n].set_ylabel('Khoảng cách trung bình (Angstrom)')
25+
texts = [axes[n].annotate(data.iloc[i]['pep_only'], color = 'r', xy = (1, data.iloc[i]['dist']), xytext = (1.12, data.iloc[i]['dist'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90")) for i in range(4) if all(~data.iloc[i].isin(pep_name))]
26+
adjust_text(texts)
27+
28+
def Boxplot(datas, title):
29+
fig, axes = plt.subplots(ncols = len(datas))
30+
[axes[i].boxplot(data['dist']) for i, data in enumerate(datas)]
31+
[(axes[i].set_xticks([],[]), axes[i].set_title(title[i], color = 'b'), axes[i].set_ylabel('Khoảng cách (Angstrom)')) for i, data in enumerate(datas)]
32+
plt.subplots_adjust(wspace = 1000)
33+
34+
35+
def Plotsame(datas, title):
36+
fig, axes = plt.subplots()
37+
box_dict = axes.boxplot([datas[0]['dist'], datas[1]['dist']])
38+
axes.set_xticklabels(title, color = 'b', fontsize = 13)
39+
axes.set_ylabel('Khoảng cách trung bình (angstrom)', fontsize = 13)
40+
41+
def Plotswarm(datas, title):
42+
data = pd.concat([datas[0]['dist'], datas[1]['dist']], axis = 1, keys = title).stack(0).reset_index(level = 1)
43+
data.columns = ['index', 'value']
44+
axes = sns.swarmplot(x = 'index', y = 'value', data = data, order = title)
45+
# text1 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.05, datas[n].iloc[i]['value'] - 0.05), fontsize = 11) for n in range(1) for i in range(10) if datas[n].iloc[i]['value'] < -3]
46+
# text2 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(10) if datas[n].iloc[i]['value'] < -3]
47+
# text3 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'g', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(len(datas[1])) if datas[n].iloc[i]['value'] > 0]
48+
axes.set_xticklabels(title, color = 'b', fontsize = 13)
49+
axes.set_ylabel('Khoảng cách trung bình (angstrom)', fontsize = 13)
50+
axes.set_xlabel('')
51+
# adjust_text(text2)
52+
# adjust_text(text3)
53+
54+
# Plot(datas = [data1, data2_res], title = ['(1)','(2)'])
55+
# Boxplot(datas = [data1, data2_res], title = ['6BKL','2LY0'])
56+
Plotswarm(datas = [data1, data2_res], title = ['6BKL','2LY0'])
57+
plt.tight_layout()
58+
plt.show()
59+
60+
61+
data = pd.read_csv(r'D:\New2\1_HTVS\1st _ HTVS Joined.csv')
62+
data.to_excel(r'D:\New2\1_HTVS\1st _ HTVS Joined.xlsx')

Diff for: RESIDUE - Plot.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import pandas as pd
2+
import matplotlib.pyplot as plt
3+
from adjustText import adjust_text
4+
import seaborn as sns
5+
6+
data1 = pd.read_csv('D:\\New2\\1_residue.csv', header = 0)
7+
[data1.drop([ind], inplace = True) for ind in data1[data1.pep_only.str.contains('HOH')].index]
8+
data2_res = pd.read_csv('D:\\New2\\2_residue_resist.csv', header = 0)
9+
10+
data1.to_excel('D:\\New2\\1_residue.xlsx')
11+
data2_res.to_excel('D:\\New2\\2_residue.xlsx')
12+
13+
def Plot(datas, title):
14+
fig, axes = plt.subplots(ncols = len(datas))
15+
for n, data in enumerate(datas):
16+
box_dict = axes[n].boxplot(data['value'])
17+
flier = box_dict['fliers']
18+
position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
19+
post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
20+
pep_name = [data[data['value'] == y]['pep_only'].values[0] for x, y in post]
21+
[axes[n].text(x + 0.02, y + 0.02, s = data[data['value'] == y]['pep_only'].values[0], color = 'r') for x,y in post]
22+
axes[n].set_xticks([],[])
23+
axes[n].set_title(title[n], color = 'b')
24+
axes[n].set_ylabel('Năng lượng tự do trung bình (kcal/mol)')
25+
texts = [axes[n].annotate(data.iloc[i]['pep_only'], color = 'r', xy = (1, data.iloc[i]['value']), xytext = (1, data.iloc[i]['value'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90")) for i in range(5) if all(~data.iloc[i].isin(pep_name))]
26+
adjust_text(texts)
27+
28+
29+
def Plotsame(datas, title):
30+
fig, axes = plt.subplots()
31+
# axes.violinplot([datas[0]['value'], datas[1]['value']])
32+
box_dict = axes.boxplot([datas[0]['value'], datas[1]['value']])
33+
flier = box_dict['fliers']
34+
position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
35+
post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
36+
pep_name = [datas[int(x)-1][datas[int(x)-1]['value'] == y]['pep_only'].values[0] for x, y in post]
37+
[axes.text(x + 0.02, y + 0.02, s = datas[int(x)-1][datas[int(x)-1]['value'] == y]['pep_only'].values[0], color = 'r', fontsize = 11) for x,y in post]
38+
axes.set_xticklabels(title, color = 'b', fontsize = 13)
39+
axes.set_ylabel('Năng lượng tự do trung bình (kcal/mol)', fontsize = 13)
40+
texts = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n+1, datas[n].iloc[i]['value']), xytext = (n+1, datas[n].iloc[i]['value'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(2) for i in range(6) if (n+1, datas[n].iloc[i]['value']) not in post]
41+
adjust_text(texts)
42+
43+
44+
def Plotswarm(datas, title):
45+
data = pd.concat([datas[0]['value'], datas[1]['value']], axis = 1, keys = title).stack(0).reset_index(level = 1)
46+
data.columns = ['index', 'value']
47+
axes = sns.swarmplot(x = 'index', y = 'value', data = data, order = title)
48+
text1 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.05, datas[n].iloc[i]['value'] - 0.05), fontsize = 11) for n in range(1) for i in range(10) if datas[n].iloc[i]['value'] < -3]
49+
text2 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(10) if datas[n].iloc[i]['value'] < -3]
50+
# text3 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'g', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(len(datas[1])) if datas[n].iloc[i]['value'] > 0]
51+
axes.set_xticklabels(title, color = 'b', fontsize = 13)
52+
axes.set_ylabel('Năng lượng tự do trung bình (kcal/mol)', fontsize = 13)
53+
axes.set_xlabel('')
54+
adjust_text(text2)
55+
# adjust_text(text3)
56+
57+
# Plot(datas = [data1, data2_res], title = ['6BKL','2LY0'])
58+
Plotswarm(datas = [data1, data2_res], title = ['6BKL','2LY0'])
59+
plt.tight_layout()
60+
plt.show()

0 commit comments

Comments
 (0)