Add files via upload

tithuytrang · web-flow · commit b87a95bd4eb9 · 2019-09-02T21:31:52.000+07:00
diff --git a/Analysis.py b/Analysis.py
@@ -0,0 +1,81 @@
+import pandas as pd 
+import numpy as np 
+import matplotlib.pyplot as plt 
+data1 = pd.read_csv('D:\\New2\\QikProp_PROJECT.CSV', header = 0, sep=';')
+data1.columns = data1.columns.str.replace('#','_')
+data1 = data1[['Title', 'QPlogHERG', 'CNS', 'PercentHumanOralAbsorption', '_rtvFG', 'docking score', 'HumanOralAbsorption','_stars', 'RuleOfFive', 'RuleOfThree']]
+data2 = pd.read_csv('D:\\New2\\XP_resist_residue.csv', header = 0)[['Title', 'docking score']]
+
+
+data3 = data2.merge(data1, left_on = 'Title', right_on = 'Title')
+data3.rename(columns = {'docking score_x':'resist_score','docking score_y':'score'}, inplace = True)
+data3['sum'] = np.sum(data3[['resist_score', 'score']], axis = 1)
+data3.sort_values(by = ['sum'], inplace = True)
+
+
+rate = data3[:]
+rate['6BKL_rate'] = rate['score'] / -4.412
+rate['2LY0_rate'] = rate['resist_score'] / -1.527
+rate.dropna(inplace = True)
+
+
+def Plot_violin(data, columns):
+    n_axes = len(columns)
+    fig, axes = plt.subplots(nrows = n_axes)
+    return [[axes[x].violinplot(data[column], showmedians = True, vert = False),  axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])]
+    for x, column in enumerate(columns)]
+
+
+Plot_violin(rate, ['6BKL_rate', '2LY0_rate'])
+plt.tight_layout() 
+plt.show()
+
+
+
+plt.style.use('seaborn')
+
+
+# fig, (axe1, axe2, axe3, axe4) = plt.subplots(1, 4, gridspec_kw = {"width_ratios": [np.max(dropnan['_stars'])-np.min(dropnan['_stars']), np.max(dropnan['_rtvFG'])-np.min(dropnan['_rtvFG']), np.max(dropnan['RuleOfThree']) - np.mean(dropnan['RuleOfThree']), np.max(dropnan['RuleOfFive']) - np.min(dropnan['RuleOfFive'])]})
+
+# N1, bins1, patches1 = axe1.hist(dropnan['_stars'])
+# bins1 = range(8)
+# [patches1[i].set_fc('r') for i in bins1 if i > 5]
+# axe1.set_title('stars')
+# axe1.set_xticks(range(8))
+# plt.xticks(bins1)
+
+# N2, bins2, patches2 = axe2.hist(dropnan['_rtvFG'], bins = 5)
+# bins2 = range(7)
+# [patches2[i].set_fc('r') for i in bins2 if i > 2]
+# axe2.set_title('rtvFG')
+# axe2.set_xticks(range(7))
+# plt.xticks(bins2)
+
+# N3, bins3, patches3 = axe3.hist(dropnan['RuleOfFive'])
+# bins3 = range(3)
+# [patches3[i].set_fc('r') for i in bins3 if i > 4]
+# axe3.set_title('RuleOfFive')
+# # axe3.set_xticks(range(3))
+# plt.xticks(bins3)
+
+# N4, bins4, patches4 = axe4.hist(dropnan['RuleOfThree'])
+# bins = range(3)
+# [patches4[i].set_fc('r') for i in bins4 if i > 2]
+# axe4.set_title('RuleOfThree')
+# # axe4.set_xticks(range(3))
+
+# [ok[column].value_counts().plot(kind = 'bar', ax = axes[x], title = column) for x, column in enumerate(ok.columns)]
+
+# [print(column, axe) for column, axe in zip(ok.columns,(axe1, axe2, axe3, axe4))]
+
+# ok['_stars'].value_counts().plot(kind = 'bar', ax = axes[1])
+fig, axes = plt.subplots(ncols = 4)
+rate = rate[['_stars', 'RuleOfFive', 'RuleOfThree', '_rtvFG']]
+[rate[column].value_counts().reset_index().sort_values(by = 'index').set_index('index').reindex(range(int(max(rate[column])+1))).plot(kind = 'bar', ax = axes[x], legend = 0, fontsize = 13) for x, column in enumerate(rate.columns)]
+[axes[x].set_xlabel('') for x, column in enumerate(rate.columns)]
+[axes[x].set_title(column, color = 'b', fontsize = 15) for x, column in enumerate(rate.columns)]
+axes[3].get_children()[3].set_color('y')
+axes[3].get_children()[4].set_color('y')
+plt.tight_layout()
+plt.show()
+
diff --git a/MERGED.py b/MERGED.py
@@ -0,0 +1,60 @@
+import pandas as pd 
+import numpy as np 
+import matplotlib.pyplot as plt 
+plt.style.use('ggplot')
+
+data1 = pd.read_csv('D:\\New2\\QikProp_PROJECT.CSV', header = 0, sep=';')
+data1.columns = data1.columns.str.replace('#','_')
+data1 = data1[['Title', 'QPlogHERG', 'CNS', 'PercentHumanOralAbsorption', '_rtvFG', 'docking score', 'HumanOralAbsorption', '_stars', 'RuleOfFive', 'RuleOfThree', 'FOSA', 'FISA']]
+data2 = pd.read_csv('D:\\New2\\XP_resist_residue.csv', header = 0)[['Title', 'docking score']]
+
+
+data3 = data2.merge(data1, left_on = 'Title', right_on = 'Title')
+data3.rename(columns = {'docking score_x':'resist_score','docking score_y':'score'}, inplace = True)
+data3['sum'] = np.sum(data3[['resist_score', 'score']], axis = 1)
+data3.sort_values(by = ['sum'], inplace = True)
+top10 = data3.head(10).to_excel('D:\\New2\\top10.xlsx')
+
+# data3[['Title','sum','HumanOralAbsorption']].head(20).plot.scatter('sum','HumanOralAbsorption')
+
+rate = data3[:]
+rate['6BKL_rate'] = rate['score'] / -4.412
+rate['2LY0_rate'] = rate['resist_score'] / -1.527
+def Plot_violin(data, columns):
+    n_axes = len(columns)
+    fig, axes = plt.subplots(nrows = n_axes)
+    axes = [[axes[x].violinplot(data[column], showmedians = True, vert = False),  axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])] for x, column in enumerate(columns)]
+    return axes
+
+rate.dropna(inplace = True)
+Plot_violin(rate, ['6BKL_rate', '2LY0_rate', 'CNS', 'QPlogHERG', 'PercentHumanOralAbsorption'])
+plt.tight_layout()
+plt.show()
+
+plt.boxplot(rate['score_rate'])
+
+rate.loc[rate.isnull().any(axis = 1)]
+
+
+data = rate
+columns = ['6BKL_rate', '2LY0_rate', 'CNS', 'QPlogHERG', 'PercentHumanOralAbsorption']
+n_axes = len(columns)
+fig, axes = plt.subplots(nrows = n_axes)
+[[axes[x].violinplot(data[column], showmedians = True, vert = False),  axes[x].set_xlabel(column, color = 'b'), axes[x].set_yticks([])] for x, column in enumerate(columns)]
+
+axes[2].set_xticks(np.arange(-2,3,1))
+[axes[2].get_xticklabels()[x].set_color('y') for x in range(3,5)]
+[axes[3].get_xticklabels()[x].set_color('y') for x in [1]]
+axes[4].set_xticks(np.arange(0,125,25))
+[axes[4].get_xticklabels()[x].set_color('y') for x in range(0,2)]
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+# FOSA FISA
+new = data3.head(10).reset_index()[['FOSA', 'FISA']]
+new['FISA/FOSA'] = new['FISA']/new['FOSA']
+new.to_excel('D:/New2/FIFO.xlsx')
diff --git a/RESIDUE - Plot - Dist.py b/RESIDUE - Plot - Dist.py
@@ -0,0 +1,62 @@
+import pandas as pd 
+import matplotlib.pyplot as plt  
+from adjustText import adjust_text 
+import seaborn as sns 
+
+data1 = pd.read_csv('D:\\New2\\1_residue_dist.csv', header = 0)
+[data1.drop([ind], inplace = True) for ind in data1[data1.pep_only.str.contains('HOH')].index]
+data2_res = pd.read_csv('D:\\New2\\2_residue_resist_dist.csv', header = 0)
+
+data1.to_excel('D:\\New2\\1_residue_dist.xlsx')
+data2_res.to_excel('D:\\New2\\2_residue_dist.xlsx')
+
+def Plot(datas, title):
+	fig, axes = plt.subplots(ncols = len(datas))
+	for n, data in enumerate(datas):
+		box_dict = axes[n].boxplot(data['dist'])
+		flier = box_dict['fliers']
+		position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
+		post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
+		pep_name = [data[data['dist'] == y]['pep_only'].values[0] for x, y in post]
+		[axes[n].text(x + 0.02, y + 0.02, s = data[data['dist'] == y]['pep_only'].values[0], color = 'r') for x,y in post]
+		axes[n].set_xticks([],[])
+		axes[n].set_title(title[n], color = 'b')
+		axes[n].set_ylabel('Khoảng cách trung bình (Angstrom)')
+		texts = [axes[n].annotate(data.iloc[i]['pep_only'], color = 'r', xy = (1, data.iloc[i]['dist']), xytext = (1.12, data.iloc[i]['dist'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90"))  for i in range(4) if all(~data.iloc[i].isin(pep_name))]
+		adjust_text(texts)
+
+def Boxplot(datas, title):
+	fig, axes = plt.subplots(ncols = len(datas))
+	[axes[i].boxplot(data['dist']) for i, data in enumerate(datas)]
+	[(axes[i].set_xticks([],[]), axes[i].set_title(title[i], color = 'b'), axes[i].set_ylabel('Khoảng cách (Angstrom)')) for i, data in enumerate(datas)]
+	plt.subplots_adjust(wspace = 1000)
+
+
+def Plotsame(datas, title):
+	fig, axes = plt.subplots()
+	box_dict = axes.boxplot([datas[0]['dist'], datas[1]['dist']])
+	axes.set_xticklabels(title, color = 'b', fontsize = 13)
+	axes.set_ylabel('Khoảng cách trung bình (angstrom)', fontsize = 13)
+
+def Plotswarm(datas, title):
+	data = pd.concat([datas[0]['dist'], datas[1]['dist']], axis = 1, keys = title).stack(0).reset_index(level = 1)
+	data.columns = ['index', 'value']
+	axes = sns.swarmplot(x = 'index', y = 'value', data = data, order = title)
+	# text1 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.05, datas[n].iloc[i]['value'] - 0.05), fontsize = 11) for n in range(1) for i in range(10) if datas[n].iloc[i]['value'] < -3]
+	# text2 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(10) if datas[n].iloc[i]['value'] < -3]
+	# text3 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'g', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(len(datas[1])) if datas[n].iloc[i]['value'] > 0]
+	axes.set_xticklabels(title, color = 'b', fontsize = 13)
+	axes.set_ylabel('Khoảng cách trung bình (angstrom)', fontsize = 13)
+	axes.set_xlabel('')
+	# adjust_text(text2)
+	# adjust_text(text3)
+
+# Plot(datas = [data1, data2_res], title = ['(1)','(2)'])
+# Boxplot(datas = [data1, data2_res], title = ['6BKL','2LY0'])
+Plotswarm(datas = [data1, data2_res], title = ['6BKL','2LY0'])
+plt.tight_layout()
+plt.show()
+
+
+data = pd.read_csv(r'D:\New2\1_HTVS\1st _ HTVS Joined.csv')
+data.to_excel(r'D:\New2\1_HTVS\1st _ HTVS Joined.xlsx')
diff --git a/RESIDUE - Plot.py b/RESIDUE - Plot.py
@@ -0,0 +1,60 @@
+import pandas as pd 
+import matplotlib.pyplot as plt  
+from adjustText import adjust_text 
+import seaborn as sns
+
+data1 = pd.read_csv('D:\\New2\\1_residue.csv', header = 0)
+[data1.drop([ind], inplace = True) for ind in data1[data1.pep_only.str.contains('HOH')].index]
+data2_res = pd.read_csv('D:\\New2\\2_residue_resist.csv', header = 0)
+
+data1.to_excel('D:\\New2\\1_residue.xlsx')
+data2_res.to_excel('D:\\New2\\2_residue.xlsx')
+
+def Plot(datas, title):
+	fig, axes = plt.subplots(ncols = len(datas))
+	for n, data in enumerate(datas):
+		box_dict = axes[n].boxplot(data['value'])
+		flier = box_dict['fliers']
+		position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
+		post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
+		pep_name = [data[data['value'] == y]['pep_only'].values[0] for x, y in post]
+		[axes[n].text(x + 0.02, y + 0.02, s = data[data['value'] == y]['pep_only'].values[0], color = 'r') for x,y in post]
+		axes[n].set_xticks([],[])
+		axes[n].set_title(title[n], color = 'b')
+		axes[n].set_ylabel('Năng lượng tự do trung bình (kcal/mol)')
+		texts = [axes[n].annotate(data.iloc[i]['pep_only'], color = 'r', xy = (1, data.iloc[i]['value']), xytext = (1, data.iloc[i]['value'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90"))  for i in range(5) if all(~data.iloc[i].isin(pep_name))]
+		adjust_text(texts)
+
+
+def Plotsame(datas, title):
+	fig, axes = plt.subplots()
+	# axes.violinplot([datas[0]['value'], datas[1]['value']])
+	box_dict = axes.boxplot([datas[0]['value'], datas[1]['value']])
+	flier = box_dict['fliers']
+	position = [(flier[i].get_xdata(),flier[i].get_ydata()) for i in range(len(flier))]
+	post = [(position[0][0][i], position[0][1][i]) for i in range(len(position[0][0]))]
+	pep_name = [datas[int(x)-1][datas[int(x)-1]['value'] == y]['pep_only'].values[0] for x, y in post]
+	[axes.text(x + 0.02, y + 0.02, s = datas[int(x)-1][datas[int(x)-1]['value'] == y]['pep_only'].values[0], color = 'r', fontsize = 11) for x,y in post]
+	axes.set_xticklabels(title, color = 'b', fontsize = 13)
+	axes.set_ylabel('Năng lượng tự do trung bình (kcal/mol)', fontsize = 13)
+	texts = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n+1, datas[n].iloc[i]['value']), xytext = (n+1, datas[n].iloc[i]['value'] + 0.05), arrowprops=dict(arrowstyle="fancy", color = 'r', connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(2) for i in range(6) if (n+1, datas[n].iloc[i]['value']) not in post]
+	adjust_text(texts)
+
+
+def Plotswarm(datas, title):
+	data = pd.concat([datas[0]['value'], datas[1]['value']], axis = 1, keys = title).stack(0).reset_index(level = 1)
+	data.columns = ['index', 'value']
+	axes = sns.swarmplot(x = 'index', y = 'value', data = data, order = title)
+	text1 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.05, datas[n].iloc[i]['value'] - 0.05), fontsize = 11) for n in range(1) for i in range(10) if datas[n].iloc[i]['value'] < -3]
+	text2 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'r', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(10) if datas[n].iloc[i]['value'] < -3]
+	# text3 = [axes.annotate(datas[n].iloc[i]['pep_only'], color = 'g', xy = (n, datas[n].iloc[i]['value']), xytext = (n + 0.1, datas[n].iloc[i]['value'] - 0.05), arrowprops=dict(arrowstyle="fancy", color = 'grey', alpha = 0.3, connectionstyle="angle3,angleA=0,angleB=-90"), fontsize = 11) for n in range(1,2) for i in range(len(datas[1])) if datas[n].iloc[i]['value'] > 0]
+	axes.set_xticklabels(title, color = 'b', fontsize = 13)
+	axes.set_ylabel('Năng lượng tự do trung bình (kcal/mol)', fontsize = 13)
+	axes.set_xlabel('')
+	adjust_text(text2)
+	# adjust_text(text3)
+
+# Plot(datas = [data1, data2_res], title = ['6BKL','2LY0'])
+Plotswarm(datas = [data1, data2_res], title = ['6BKL','2LY0'])
+plt.tight_layout()
+plt.show()