From 0ddb7b301a1935302894a12b7c1fd0dec15b0e13 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 14 Jun 2016 16:43:03 -0500 Subject: [PATCH 01/55] initial copy of scripts. No changes yet made to them. --- .../analysis/plots/antibodies/SASA/.gitignore | 0 .../plots/antibodies/SASA/ab_cdr_SASA_den.R | 109 ++++ .../antibodies/SASA/ab_cdr_cluster_SASA_den.R | 0 .../antibodies/SASA/ab_paratope_SASA_den.R | 77 +++ .../SASA/ag_ab_cdr_cluster_dSASA_den.R | 0 .../antibodies/SASA/ag_ab_cdr_dSASA_den.R | 184 ++++++ .../antibodies/anchor_distances/.gitignore | 0 .../anchor_distances/ab_cdr_anchor_dis_den.R | 93 +++ .../plots/antibodies/charge/.gitignore | 0 .../charge/ab_cdr_cluster_charge_den.R | 111 ++++ .../plots/antibodies/charge/ab_charge_den.R | 233 +++++++ .../plots/antibodies/composition/.gitignore | 0 .../composition/ab_cdr_length_correlations.R | 156 +++++ .../composition/ab_composition_den.R | 190 ++++++ .../plots/antibodies/contacts/.gitignore | 0 .../contacts/ag_ab_cdr_cluster_contact_den.R | 0 .../antibodies/contacts/ag_ab_contact_den.R | 124 ++++ .../plots/antibodies/energies/.gitignore | 0 .../antibodies/energies/ab_cdr_energy_den.R | 59 ++ .../plots/antibodies/h3_kink/.gitignore | 0 .../plots/antibodies/h3_kink/h3_kink_den.R | 124 ++++ .../plots/antibodies/hbonds/.gitignore | 0 .../antibodies/hbonds/ab_cdr_cdr_hbonds_den.R | 590 ++++++++++++++++++ .../hbonds/ab_cdr_framework_hbonds_den.R | 236 +++++++ .../hbonds/ab_intra_cdr_hbonds_den.R | 228 +++++++ .../hbonds/ag_ab_cdr_cluster_hbonds_den.R | 0 .../antibodies/hbonds/ag_ab_cdr_hbonds_den.R | 410 ++++++++++++ .../plots/antibodies/packing_angle/.gitignore | 0 .../packing_angle/ab_packing_angle_den.R | 88 +++ .../plots/interfaces/SASA/int_SASA_den.R | 236 +++++++ .../SASA/int_SASA_residue_avgs_den.R | 86 +++ .../interfaces/SASA/int_SASA_residue_den.R | 126 ++++ .../interfaces/SASA/int_SASA_residue_vs.R | 140 +++++ .../plots/interfaces/SASA/int_dSASA_vs.R | 140 +++++ ...nt_composition-dG_dSASA_stats_by_restype.R | 254 ++++++++ .../int_composition-hbond_stats_by_restype.R | 160 +++++ .../composition/int_composition_den.R | 196 ++++++ .../plots/interfaces/energies/int_dG_vs.R | 114 ++++ .../interfaces/energies/int_energies_den.R | 127 ++++ .../energies/int_energies_residue_den.R | 119 ++++ .../plots/interfaces/hbonds/int_hbonds_den.R | 151 +++++ .../interfaces/hbonds/int_unsat_polars_den.R | 90 +++ .../interfaces/hbonds/int_unsat_polars_vs.R | 96 +++ .../interfaces/packing/int_packing_den.R | 68 ++ .../interfaces/packing/int_packstat_vs.R | 84 +++ .../interfaces/packing/int_sc_value_vs.R | 112 ++++ .../secondary_structure/int_ss_den.R | 75 +++ inst/scripts/analysis/plots/interfaces/util.R | 8 + .../analysis/plots/loops/alpha_vm_estimates.R | 64 ++ .../analysis/plots/scores/total_score.R | 98 +++ .../antibodies/cdr_cluster_recovery.R | 230 +++++++ 51 files changed, 5786 insertions(+) create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/anchor_distances/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/charge/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/composition/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R create mode 100644 inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/contacts/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/energies/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/h3_kink/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/antibodies/packing_angle/.gitignore create mode 100644 inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R create mode 100644 inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R create mode 100644 inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R create mode 100644 inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R create mode 100644 inst/scripts/analysis/plots/interfaces/util.R create mode 100644 inst/scripts/analysis/plots/loops/alpha_vm_estimates.R create mode 100644 inst/scripts/analysis/plots/scores/total_score.R create mode 100644 inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R diff --git a/inst/scripts/analysis/plots/antibodies/SASA/.gitignore b/inst/scripts/analysis/plots/antibodies/SASA/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R new file mode 100644 index 0000000..98e294a --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R @@ -0,0 +1,109 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_SASA-CDR_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR Sasas", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " + SELECT + SASA, + CDR, + length + FROM + cdr_metrics + WHERE + CDR NOT LIKE '%Proto%' + " + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + SASA, + CDR, + length + FROM + cdr_metrics" + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + SASA, + CDR, + length + FROM + cdr_metrics + WHERE + CDR LIKE '%Proto%'" + } + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #CDR SASA + group = c("sample_source", "CDR") + dens <- estimate_density_1d(data, group, c("SASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR SASA") + plot_field(p, "cdr_sasa_den", ~CDR) + + cdr_avgs = ddply(data, .(sample_source, CDR), function(data){ + data.frame(m=mean(data$SASA)) + }) + + len_avgs = ddply(data, .(sample_source, CDR, length), function(data){ + data.frame(m=mean(data$SASA)) + }) + + p <- ggplot(data=cdr_avgs) + + geom_bar(aes(x=CDR, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR") + + ylab("SASA") + + ggtitle("Average CDR SASA") + plot_field(p, "avg_cdr_sasa_hist") + + p <- ggplot(data=len_avgs) + + geom_bar(aes(x=length, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR Length") + + ylab("SASA") + + ggtitle("Average CDR SASA") + plot_field(p, "avg_cdr_sasa_hist_by_length", grid= ~CDR) + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R new file mode 100644 index 0000000..f7027f0 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -0,0 +1,77 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_SASA-paratope_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Various statistics on the H3 Kink", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + paratope_SASA, + paratope_hSASA, + paratope_SASA - paratope_hSASA as paratope_pSASA + FROM + ab_metrics + " + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #Paratope SASA + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("paratope_SASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope SASA") + plot_field(p, "paratope_sasa_den") + + #Paratope hSASA + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("paratope_hSASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope hSASA") + plot_field(p, "paratope_hsasa_den") + + #Paratope pSASA + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("paratope_pSASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope pSASA") + plot_field(p, "paratope_psasa_den") +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R new file mode 100644 index 0000000..2d58854 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -0,0 +1,184 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_dSASA-ab_cdr_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR Sasas", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " + SELECT + ag_ab_dSASA as dSASA, + ag_ab_dSASA_sc as dSASA_sc, + ag_ab_dhSASA as dhSASA, + ag_ab_dhSASA_sc as dhSASA_sc, + ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, + struct_id, + CDR, + length + FROM + cdr_metrics + WHERE + dSASA > 0 and + CDR NOT LIKE '%Proto%' + " + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + ag_ab_dSASA as dSASA, + ag_ab_dSASA_sc as dSASA_sc, + ag_ab_dhSASA as dhSASA, + ag_ab_dhSASA_sc as dhSASA_sc, + ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, + struct_id, + CDR, + length + FROM + cdr_metrics + WHERE + dSASA > 0 + " + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + ag_ab_dSASA as dSASA, + ag_ab_dSASA_sc as dSASA_sc, + ag_ab_dhSASA as dhSASA, + ag_ab_dhSASA_sc as dhSASA_sc, + ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, + struct_id, + CDR, + length + FROM + cdr_metrics + WHERE + dSASA > 0 and + CDR LIKE '%Proto%' + " + } + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + + data$polar_fraction = (data$dSASA - data$dhSASA)/data$dSASA + + #Avg CDR dSASA + means_save <- ddply(data, .(sample_source, CDR), function(data){ + data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dSASA)) + }) + p <- ggplot(data=means_save, na.rm=T) + + geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + + ggtitle("Average Antigen Buried SASA") + + xlab("CDR") + + ylab("dSASA") + + theme_bw() + plot_field(p, "avg_cdr_dSASA_hist") + + + #Avg CDR dSASA by length + means_save <- ddply(data, .(sample_source, CDR, length), function(data){ + data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dSASA)) + }) + p <- ggplot(data=means_save, na.rm=T) + + geom_bar(position="dodge", stat='identity', aes(x=length, y=m, fill=sample_source)) + + ggtitle("Average Antigen Buried SASA") + + xlab("CDR Length") + + ylab("dSASA") + + theme_bw() + plot_field(p, "avg_cdr_dSASA_hist_by_length", ~CDR) + + + #Avg CDR polar SASA + means <- ddply(data, .(sample_source, CDR), function(data){ + data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dSASA-data$dhSASA)) + }) + p <- ggplot(data=means, na.rm=T) + + geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + + ggtitle("Average Antigen Buried Polar SASA") + + xlab("CDR") + + ylab("dpSASA") + + theme_bw() + plot_field(p, "avg_cdr_dpSASA_hist") + + #Avg CDR hSASA + means <- ddply(data, .(sample_source, CDR), function(data){ + data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dhSASA)) + }) + p <- ggplot(data=means, na.rm=T) + + geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + + ggtitle("Average Antigen Buried Hydrophobic SASA") + + xlab("CDR") + + ylab("dhSASA") + + theme_bw() + plot_field(p, "avg_cdr_dhSASA_hist") + + #Avg CDR polar fraction + means <- ddply(data, .(sample_source, CDR), function(data){ + m_d = mean(data$dSASA) + m_p = mean(data$dSASA-data$dhSASA) + data.frame(sample_source = data$sample_source, CDR = data$CDR, m=m_p/m_d) + }) + p <- ggplot(data=means, na.rm=T) + + geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + + ggtitle("Average Antigen Buried Polar SASA Fraction") + + xlab("CDR") + + ylab("dSASA (polar) /dSASA") + + theme_bw() + plot_field(p, "avg_cdr_polar_fraction_hist") + + + + #CDR dSASA + group = c("sample_source", "CDR") + dens <- estimate_density_1d(data, group, c("dSASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("dSASA") + + ggtitle("Antigen Buried Solvent Accessible Surface Area") + plot_field(p, "cdr_dSASA_den", ~CDR) + + #CDR polar density + group = c("sample_source", "CDR") + has_dsasa_data = data[data$dSASA != 0,] + dens <- estimate_density_1d(has_dsasa_data, group, c("polar_fraction")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x =x, y= y, colour=sample_source), size=1.2) + + scale_x_continuous("dSASA (polar) /dSASA", limit = c(0, 1.0)) + + ggtitle("Antigen Buried Polar SASA Fraction") + plot_field(p, "cdr_polar_fraction_den", ~CDR) +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/.gitignore b/inst/scripts/analysis/plots/antibodies/anchor_distances/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R new file mode 100644 index 0000000..55e9abe --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -0,0 +1,93 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_anchor_dis", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic antibody composition densities", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " + SELECT + anchor_CN_distance, + CDR, + length + FROM + cdr_metrics where CDR NOT LIKE '%Proto%'" + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + anchor_CN_distance, + CDR, + length + FROM + cdr_metrics" + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + anchor_CN_distance, + CDR, + length + FROM + cdr_metrics where CDR LIKE '%Proto%'" + } + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 3) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #C-N distance by CDR and length density + parts = list(plot_parts, xlab("Angstrom")) + field = c("anchor_CN_distance") + group = c("sample_source", "CDR") + + dens <- estimate_density_1d(data[data$anchor_CN_distance < 15, ], group, field) + group = c("sample_source", "CDR", "length") + + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("C-N distance") + plot_field_wrap(p, "c_n_dis_by_cdr_den", ~ CDR) + + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/charge/.gitignore b/inst/scripts/analysis/plots/antibodies/charge/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R new file mode 100644 index 0000000..d143a9a --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R @@ -0,0 +1,111 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_charge-clusters_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic antibody composition densities", +feature_reporter_dependencies = c("AntibodyFeatures", "CDRClusterFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + cdr_metrics.charge as charge, + cdr_metrics.CDR as CDR, + cdr_metrics.length as length, + cdr_clusters.fullcluster as cluster + FROM + cdr_metrics, + cdr_clusters + WHERE + cdr_clusters.struct_id = cdr_metrics.struct_id AND + cdr_clusters.CDR = cdr_metrics.CDR + " + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 3) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #CDR Charge Histogram + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data$cdr_length = paste(data$CDR, data$length, sep="_") + + avgs = ddply(data, .(sample_source, cdr_length, cluster), function(data) { + data.frame(m=mean(data$charge)) + }) + + for (cdr_length in unique(avgs$cdr_length)){ + + p <- ggplot(data=avgs[avgs$cdr_length==cdr_length,]) + + geom_bar(position='dodge', stat='identity', aes(x=cluster, y=m, fill=sample_source)) + + theme_bw() + + xlab("Cluster") + + ylab("Charge") + + ggtitle(paste("Average CDR Charge", cdr_length)) + plot_field(p, paste("cdr_charge_hist_lengths", cdr_length, sep="_")) + + } + + + #CDR Charge Density + for (cluster in unique(data$cluster)) { + + clus_data = data[data$cluster==cluster,] + + dens = estimate_density_1d(clus_data, c("sample_source"), c("charge")) + if (nrow(dens)>=1){ + p<- ggplot(data=dens, na.rm=T)+ + geom_line(aes(x, y, colour=sample_source), size = 1.2) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle(paste("CDR Charge", cluster)) + + xlab("Charge") + + scale_y_continuous("Feature Density") + plot_field(p, paste("cdr_charge_den", cluster, sep="_")) + } + + + + p <- ggplot(data=clus_data) + + theme_bw() + + ggtitle(paste("CDR Charge", cluster)) + + scale_y_continuous(label=percent) + + ylab("% of Sample Source") + plot_field(p, paste("cdr_charge_hist", cluster, sep="_")) + + } + +})) # end FeaturesAnalysis + \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R new file mode 100644 index 0000000..70e2545 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -0,0 +1,233 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_charge_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic antibody composition densities", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + sele = " + SELECT + net_charge, + paratope_charge + FROM + ab_metrics" + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 3) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + get_charge_seq <- function(d, spacer = 1){ + d2 = d[! is.nan(d$charge) || is.na(d$charge),] + r = seq(min(d2$charge), max(d2$charge), spacer) + r + } + #Net Charge density + parts = list(plot_parts, xlab("charge")) + field = c("net_charge") + group = c("sample_source") + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + scale_x_continuous("charge", breaks = seq(min(data$net_charge), max(data$net_charge), 2)) + + ggtitle("Antibody Net Charge") + plot_field(p, "net_charge_den") + + perc <- ddply(data, .(sample_source, net_charge), function(d2){ + perc = nrow(d2)/nrow(data[data$sample_source == d2$sample_source[1],]) + data.frame(perc = perc) + }) + + #Net Charge histogram + p <- ggplot(data=perc ) + + geom_bar(position="dodge", stat='identity', aes(x = net_charge, y= perc , fill=sample_source)) + + theme_bw() + + scale_x_continuous("charge", breaks = seq(min(perc$net_charge), max(perc$net_charge), 2)) + + ggtitle("Antibody Net Charge") + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "net_charge_hist") + + + + #Avg Net Charge Histogram + avgs <- ddply(data, .(sample_source), function(d2){ + data.frame(m = mean(d2$net_charge)) + }) + p <- ggplot(data=avgs, ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + theme_bw() + + ggtitle("Average Antibody Net Charge") + + ylab("charge") + + xlab("Sample Source") + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "avg_net_charge_hist") + + #Paratope Charge density + parts = list(plot_parts, xlab("charge")) + field = c("paratope_charge") + group = c("sample_source") + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + scale_x_continuous("charge", breaks = seq(min(data$paratope_charge), max(data$paratope_charge), 2)) + + ggtitle("Paratope Net Charge") + plot_field(p, "paratope_charge_den") + + perc <- ddply(data, .(sample_source, paratope_charge), function(d2){ + perc = nrow(d2)/nrow(data[data$sample_source == d2$sample_source[1],]) + data.frame(perc = perc) + }) + + #Paratope Charge histogram + p <- ggplot(data=perc, aes(x=paratope_charge )) + + geom_bar(position="dodge", stat='identity', aes( y= perc, fill=sample_source)) + + theme_bw() + + ggtitle("Paratope Net Charge") + + scale_x_continuous("charge", breaks = seq(min(perc$paratope_charge), max(perc$paratope_charge), 2)) + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "paratope_charge_hist") + + #Avg Paratope histogram + avgs <- ddply(data, .(sample_source), function(d2){ + data.frame(m = mean(d2$paratope_charge)) + }) + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + theme_bw() + + ggtitle("Average Paratope Net Charge") + + ylab("charge") + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "avg_paratope_charge_hist") + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " + SELECT + charge, + CDR, + length + FROM + cdr_metrics + WHERE + CDR NOT LIKE '%Proto%'" + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + charge, + CDR, + length + FROM + cdr_metrics" + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + charge, + CDR, + length + FROM + cdr_metrics + WHERE + CDR LIKE '%Proto%'" + } + + #CDR Charge Density + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + dens <- estimate_density_1d(data, c("sample_source", "CDR"), c("charge")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size = 1.2) + + #scale_x_continuous("Charge", breaks = get_charge_seq(data, 1)) + + ggtitle("CDR Charge") + plot_field_wrap(p, "cdr_charge_den", ~ CDR) + + + #CDR Charge Histogram + avgs = ddply(data, .(sample_source, CDR), function(data) { + data.frame(m=mean(data$charge)) + }) + perc = ddply(data, .(sample_source, CDR, charge), function(d2){ + perc = nrow(d2)/nrow(data[data$CDR == d2$CDR[1] & data$sample_source == d2$sample_source[1],]) *100 + data.frame(perc = perc) + }) + perc$chargec = as.character(perc$charge) + + p <- ggplot(data=perc, aes(x = charge)) + + geom_bar(position="dodge", stat='identity', aes(y = perc, fill=sample_source))+ + theme_bw() + + #scale_x_continuous("charge", breaks = get_charge_seq(perc, 1)) + + ggtitle("CDR Charge") + + scale_y_continuous("charge", label=percent) + + ylab("% of Sample Source") + plot_field_wrap(p, "cdr_charge_hist", ~ CDR) + + p <- ggplot(data=avgs, aes(x=charge)) + + geom_bar(position="dodge", aes(x= CDR, y=m, fill=sample_source), stat = 'identity')+ + theme_bw() + + xlab("CDR") + + #scale_y_continuous("Charge", breaks = get_charge_seq(avgs, 1)) + + ylab("charge") + + ggtitle("Average CDR Charge") + plot_field(p, "avg_cdr_charge_hist_by_cdr") + + avgs = ddply(data, .(sample_source, CDR, length), function(data) { + data.frame(m=mean(data$charge)) + }) + avgs$lengthc = as.character(avgs$length) + + p <- ggplot(data=avgs, aes(x=charge)) + + geom_bar(position="dodge", aes(x= length, y=m, fill=sample_source), stat = 'identity')+ + theme_bw() + + xlab("CDR Length") + + ylab("charge") + + #scale_y_continuous("Charge", breaks = get_charge_seq(avgs, 1)) + + ggtitle("Average CDR Charge") + plot_field_wrap(p, "avg_cdr_charge_hist_by_length", ~ CDR) + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/composition/.gitignore b/inst/scripts/analysis/plots/antibodies/composition/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R new file mode 100644 index 0000000..4a23f02 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R @@ -0,0 +1,156 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_composition-length_correlations", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic antibody composition densities", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + length, + CDR, + struct_id + FROM + cdr_metrics" + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 3) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #This does not assume 1 antibody per struct_id, which is currently how it works. + #It does assumes all CDRs are present, which is also how AntibodyInfo currently works. + print("Calculating length pairs..") + cdrs = c("L1", "L2", "L3", "H1", "H2", "H3") + #cdrs = c("L1", "L3", "H3") + + data <- ddply(data, .(struct_id, sample_source), function(d2){ + pairs = data.frame() + for (outer_cdr in cdrs){ + for (inner_cdr in cdrs){ + if (inner_cdr == outer_cdr) {next} + + c = data.frame(CDR1 =d2[d2$CDR==outer_cdr,]$CDR, CDR2 = d2[d2$CDR==inner_cdr,]$CDR, CDR1_length = d2[d2$CDR==outer_cdr,]$length, CDR2_length = d2[d2$CDR==inner_cdr,]$length) + pairs = rbind(pairs, c) + } + } + pairs + }) + + print(head(data)) + + + perc <- ddply(data, .(sample_source, CDR1, CDR2, CDR1_length, CDR2_length), function(d2){ + total = nrow(data[ + data$sample_source==d2$sample_source & + data$CDR1 == d2$CDR1 & + data$CDR2 == d2$CDR2, + ]) + perc = nrow(d2)/total * 100 + data.frame(percent_sample_source = perc) + }) + + print(head(perc)) + + heat_map_parts = list( + geom_tile(aes(x=CDR1_length, y = CDR2_length, fill = percent_sample_source), stat='identity'), + scale_fill_gradient()) + + #This will be a bit difficult to see, but its meant to get a rough idea of what is going on. Add to specific lengths to get more. + for (cdr in cdrs){ + + d = perc[perc$CDR1==cdr,] + d_all = data[data$CDR1==cdr,] + + p <- ggplot(data=d) + + heat_map_parts + + scale_x_continuous(paste("CDR", cdr, "length"), breaks = seq(min(d$CDR1_length), max(d$CDR1_length), 4)) + + scale_y_continuous(paste("CDR", "length"), breaks = seq(min(d$CDR2_length), max(d$CDR2_length), 4)) + plot_field(p, paste("cdr_length_all_heat_map", cdr, "x_axis", sep="_"), sample_source ~ CDR2) + + #Points + p <- ggplot(d_all, aes(x=CDR1_length, y = CDR2_length)) + + #geom_point(size=1.5, position="jitter", aes(colour = percent_sample_source))+ + + stat_smooth(data=d_all, method=lm) + + scale_x_continuous(paste("CDR", cdr, "length"), breaks = seq(min(d$CDR1_length), max(d$CDR1_length), 4)) + + scale_y_continuous(paste("CDR","length"), breaks = seq(min(d$CDR2_length), max(d$CDR2_length), 4)) + + p_den = p + + geom_point(data = d, size=1.2, position="jitter", aes(colour=percent_sample_source)) + + p_all = p + + geom_point(data = d_all, size=1.2, position="jitter") + + plot_field(p_den, paste("cdr_length_all_regression_jitter_den_points", cdr, "x_axis",sep="_"), sample_source ~ CDR2) + plot_field(p_all, paste("cdr_length_all_regression_jitter_all_points", cdr, "x_axis", sep="_"), sample_source ~ CDR2) + + } + + + + #Specific length graphs. Could do all of them (36), but that is too overwhelming. Only writing these for ones we are interested in + pairs = list(c("L3", "L1"), c("L3", "H3"), c("H3", "H1"), c("L2", "L3"), c("L2", "H3"), c("H2", "H3")) + #pairs = list(c("L1", "L3")) + + for (pair in pairs){ + d = perc[perc$CDR1==pair[1] & perc$CDR2 == pair[2],] + p <- ggplot(data = d) + + heat_map_parts + + scale_x_continuous(paste("CDR", pair[1], "length"), breaks = seq(min(d$CDR1_length), max(d$CDR1_length), 4)) + + scale_y_continuous(paste("CDR", pair[2], "length"), breaks = seq(min(d$CDR2_length), max(d$CDR2_length), 4)) + plot_field(p, paste("cdr_length_ind_heat_map", pair[1], pair[2], sep="_"), ~ sample_source) + + #Points + d_all = data[data$CDR1==pair[1] & data$CDR2 == pair[2],] + p <- ggplot(d_all, aes(x=CDR1_length, y = CDR2_length)) + + #geom_point(size=1.5, position="jitter", aes(colour = percent_sample_source))+ + + stat_smooth(data=d_all, method=lm) + + scale_x_continuous(paste("CDR", pair[1], "length"), breaks = seq(min(d$CDR1_length), max(d$CDR1_length), 4)) + + scale_y_continuous(paste("CDR", pair[2], "length"), breaks = seq(min(d$CDR2_length), max(d$CDR2_length), 4)) + + p_den = p + + geom_point(data = d, size=1.2, position="jitter", aes(colour=percent_sample_source)) + + p_all = p + + geom_point(data = d_all, size=1.2, position="jitter") + + plot_field(p_den, paste("cdr_length_ind_regression_den_points_jitter", pair[1], pair[2], sep="_"), ~ sample_source) + plot_field(p_all, paste("cdr_length_ind_regression_all_points_jitter", pair[1], pair[2], sep="_"), ~ sample_source) + } + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R new file mode 100644 index 0000000..0763c4e --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -0,0 +1,190 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_composition_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic antibody composition densities", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " + SELECT + length, + CDR, + aromatic_nres/length as aromatic_makeup + FROM + cdr_metrics + WHERE CDR NOT LIKE '%Proto%'" + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + length, + CDR, + aromatic_nres/length as aromatic_makeup + FROM + cdr_metrics" + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + length, + CDR, + aromatic_nres/length as aromatic_makeup + FROM + cdr_metrics + WHERE CDR LIKE '%Proto%'" + } + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 3) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #Length density + parts = list(plot_parts, xlab("length"), xlim(5, 20)) + field = c("length") + group = c("sample_source", "CDR") + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + scale_x_continuous("", breaks = seq(min(data$length), max(data$length), 4)) + ggtitle("CDR Lengths") + plot_field_wrap(p, "cdr_lengths_den",grid= ~ CDR) + + perc <- ddply(data, .(sample_source, CDR, length), function(d2){ + perc = nrow(d2)/nrow(data[data$sample_source == d2$sample_source[1] & data$CDR == d2$CDR[1],]) + data.frame(perc = perc) + }) + + #Length histogram + p <- ggplot(data=perc, aes(x=length)) + + geom_bar(position="dodge", stat = 'identity', aes(y= perc , fill=sample_source ))+ + theme_bw() + + ggtitle("CDR Lengths") + + ylab("% of Sample Source") + #scale_x_continuous("CDR Length", seq(min(perc$length), max(perc$length), 4)) + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field_wrap(p, "cdr_lengths_hist", grid= ~ CDR) + + avgs <- ddply(data, .(sample_source, CDR), function(d2){ + data.frame(m = mean(d2$length)) + }) + + p <- ggplot(data=avgs, aes(x=CDR)) + + geom_bar(position="dodge", stat = 'identity', aes(y= round(m) , fill=sample_source ))+ + theme_bw() + + ggtitle("Average CDR Lengths ") + + scale_y_continuous("Avg CDR Length (rounded)", breaks = seq(0, round(max(avgs$m)), 4)) + + xlab("CDR") + plot_field(p, "avg_cdr_lengths_hist") + + #Aromaticity + parts = list(plot_parts, xlab("% Aromatic Makeup")) + field = c("aromatic_makeup") + dens <- estimate_density_1d(data, group, field) + p<- ggplot(data = dens, na.rm = T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Aromatic Composition") + plot_field_wrap(p, "cdr_aromatic_den", grid = ~ CDR) + + avgs <- ddply(data, .(sample_source, CDR), function(d2){ + data.frame(m = mean(d2$aromatic_makeup)) + }) + + p <- ggplot(data=avgs, aes(x=CDR)) + + geom_bar(position="dodge", stat = 'identity', aes(y= m , fill=sample_source ))+ + theme_bw() + + ggtitle("Average Aromatic Composition") + + ylab("avg composition") + + xlab("CDR") + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "avg_cdr_aromatic_hist") + + avgs <- ddply(data, .(sample_source, CDR, length), function(d2){ + data.frame(m = mean(d2$aromatic_makeup)) + }) + + p <- ggplot(data=avgs, aes(x=length)) + + geom_bar(position="dodge", stat = 'identity', aes(y= m , fill=sample_source ))+ + theme_bw() + + ggtitle("Average Aromatic Composition") + + ylab("avg composition") + + scale_x_continuous("CDR Length", breaks = seq(min(avgs$length), max(avgs$length), 4)) + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field_wrap(p, "avg_cdr_aromatic_hist_by_length", ~ CDR) + + sele = " + SELECT + cdr_residues + FROM + ab_metrics" + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + #Total CDR residues density + + parts = list(plot_parts, xlab("n")) + field = c("cdr_residues") + group = c("sample_source") + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Total CDR Residues") + plot_field(p, "total_cdr_res_den") + + #Total CDR residues histogram + perc <- ddply(data, .(sample_source, cdr_residues), function(d2){ + perc = nrow(d2)/nrow(data[data$sample_source == d2$sample_source[1],]) + data.frame(perc = perc) + }) + p <- ggplot(data=perc, aes(x=cdr_residues)) + + geom_bar(position="dodge", stat = 'identity', aes(y= perc , fill=sample_source)) + + theme_bw() + + ggtitle("Total CDR Residues") + + ylab("% of Sample Source") + + scale_x_continuous("", breaks = seq(min(perc$cdr_residues), max(perc$cdr_residues), 4)) + + #scale_x_continuous("restype") + + #scale_y_continuous("n") + plot_field(p, "total_cdr_res_hist") +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/contacts/.gitignore b/inst/scripts/analysis/plots/antibodies/contacts/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R new file mode 100644 index 0000000..7d1eee3 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -0,0 +1,124 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ag_ab_contact_den", +author = "Jared Adolf-Bryfogle", +brief_description = "VL VH packing angle metrics", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + CDR, + struct_id, + length, + ag_ab_contacts_total, + ag_ab_contacts_nres + FROM + cdr_metrics + " + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #Contacts Antigen + binary_data <- ddply(data, .(sample_source, CDR), function(data){ + percent = (nrow(data[data$ag_ab_contacts_total >= 1,]))/length(data$ag_ab_contacts_total) + data.frame(percent=percent) + }) + p <- ggplot(data=binary_data, na.rm=T, aes(x=CDR)) + + geom_bar(position="dodge", aes(y=percent, fill=sample_source), stat='identity') + + ggtitle("CDR Makes Antigen Contact") + + scale_y_continuous(label=percent) + + ylab("% of sample source") + plot_field(p, "cdr_makes_contact_hist") + save_tables(self, binary_data, "cdr_makes_contact_table", sample_sources, output_dir, output_formats, + caption="CDR makes contact", caption.placement="top", quote_strings=F) + + #Avg Contacts per Residue per CDR + #Testing to make sure this is working: + data$avg = data$ag_ab_contacts_total/data$length + means = ddply(data, .(sample_source, CDR), function(data){ + data.frame(m= mean(data$avg)) + }) + p <- ggplot(data=means, na.rm=T, aes(x=CDR)) + + geom_bar(position="dodge", aes(y=m, fill=sample_source), stat='identity') + + ggtitle("Average Contacts per Residue") + + xlab("CDR") + + ylab("n") + plot_field(p, "avg_contacts_per_residue_per_cdr_hist") + #save_tables(self, means, "avg_contacts_per_residue_per_cdr_table", sample_sources, output_dir, output_formats, + # caption="Avg contacts per residue per cdr", caption.placement="top", quote_strings=F) + + #Residues in contact per CDR + means = ddply(data, .(sample_source, CDR), function(data){ + data.frame(m= mean(data$ag_ab_contacts_nres)) + }) + p <- ggplot(data=means, na.rm = T, aes(x=CDR)) + + geom_bar(position="dodge", aes(y=m, fill=sample_source), stat='identity') + + ggtitle("Average Residues in contact") + + xlab("CDR") + + ylab("n") + plot_field(p, "avg_residues_in_contact_per_cdr_hist") + save_tables(self, means, "avg_residues_in_contact_per_cdr_table", sample_sources, output_dir, output_formats, + caption="Avg residues in contact per cdr", caption.placement="top", quote_strings=F) + + #This is for Brain - average percentage of contacts that come from a CDR. If the antibody is not in contact with antigen, we skip it. + data2 = ddply(data, .(sample_source, struct_id), function(d2){ + total_contacts = sum(d2$ag_ab_contacts_total) + data.frame(total_contacts = total_contacts, CDR = d2$CDR, ag_ab_contacts_total = d2$ag_ab_contacts_total) + }) + + avgs = ddply(data2[data2$total_contacts > 0,], .(sample_source, struct_id, CDR), function(d2){ + contacts = d2$ag_ab_contacts_total[1]/d2$total_contacts + print(paste(contacts, d2$total_contacts)) + perc = contacts + data.frame(perc = perc) + }) + print(head(avgs)) + + avg_perc = ddply(avgs, .(sample_source, CDR), function(d2){ + + data.frame(m_perc = mean(d2$perc)) + }) + print(head(avg_perc)) + + p <- ggplot(data=avg_perc, na.rm = T, aes(x=CDR)) + + geom_bar(position="dodge", aes(y=m_perc, fill=sample_source), stat='identity') + + ggtitle("Average Percent of total contacts") + + xlab("CDR") + + ylab("Avg %") + + scale_y_continuous(label=percent) + plot_field(p, "avg_perc_total_contacts_hist") + save_tables(self, avg_perc, "avg_perc_total_contacts_table", sample_sources, output_dir, output_formats, + caption="Avg Percent of total contacts", caption.placement="top", quote_strings=F) + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/energies/.gitignore b/inst/scripts/analysis/plots/antibodies/energies/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R new file mode 100644 index 0000000..ae07ed0 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R @@ -0,0 +1,59 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_energies-CDR_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR Energies", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + energy, + CDR + FROM + cdr_metrics + " + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #CDR SASA + group = c("sample_source", "CDR") + dens <- estimate_density_1d(data, group, c("energy")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("REU") + + ggtitle("CDR Energy") + plot_field(p, "cdr_energy_den", ~CDR) + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/.gitignore b/inst/scripts/analysis/plots/antibodies/h3_kink/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R new file mode 100644 index 0000000..348d572 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -0,0 +1,124 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "H3_kink_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Various statistics on the H3 Kink", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + kink_type, + RD_Hbond_dis, + bb_Hbond_dis, + Trp_Hbond_dis, + qdis, + qdih, + anion_res - cation_res as cation_res_sep + FROM + ab_h3_kink_metrics" + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #Kink vs No Kink Histogram + counts = ddply(data, .(sample_source), function(by_ss){ + result = ddply(by_ss, .(kink_type), function(by_kink) { + + df = data.frame(percent = length(by_kink$qdis)/length(by_ss$kink_type)) + }) + }) + + p <- ggplot(data=counts, na.rm=T) + + geom_bar(position="dodge", stat="identity", aes(x=kink_type, y= percent, fill=sample_source)) + + theme_bw() + + ggtitle("Kink Type Comparison") + + scale_y_continuous(label = percent) + + xlab("kink type") + plot_field(p, "kink_type_hist") + + #Anion - Cation res separation + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("cation_res_sep")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Residue Separation") + + ggtitle("Anion Cation Residue Separation") + plot_field(p, "anion_cation_res_sep_den", ~kink_type) + + #Hbond Distance RD + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("RD_Hbond_dis")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Angstroms") + + ggtitle("RD Hbond Distance") + plot_field(p, "hbond_dis_RD_den", ~kink_type) + + #Hbond Distance BB + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("bb_Hbond_dis")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Angstroms") + + ggtitle("BB Hbond Distance") + plot_field(p, "hbond_dis_BB_den", ~kink_type) + + #Hbond Distance Trp + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("Trp_Hbond_dis")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Angstroms") + + ggtitle("TRP Hbond Distance") + plot_field(p, "hbond_dis_TRP_den", ~kink_type) + + #qDis + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("qdis")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Angstroms") + + ggtitle("Q Distance") + plot_field(p, "qdis_den", ~kink_type) + + #qdih - This may be better off on a circular plot however ggplot2 implementation of this does not seem to exist + group = c("sample_source", "kink_type") + dens <- estimate_density_1d(data, group, c("qdih")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Degrees") + + ggtitle("Q Dihedral") + plot_field(p, "qdih_den", ~kink_type) + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/.gitignore b/inst/scripts/analysis/plots/antibodies/hbonds/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R new file mode 100644 index 0000000..1cc20b9 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R @@ -0,0 +1,590 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_hbonds-cdr_cdr_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR - CDR Hbonds", +feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele_don = " +SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + don_c.CDR as CDR1, + acc_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR NOT LIKE '%Proto%' AND + acc_c.CDR NOT LIKE '%Proto%' + " + + sele_acc = " +SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + acc_c.CDR as CDR1, + don_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don.resNum = don_ss.resNum AND + acc.resNum = acc_ss.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR NOT LIKE '%Proto%' AND + acc_c.CDR NOT LIKE '%Proto%' + " + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues + WHERE + CDR NOT LIKE '%Proto%' + " + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele_don = " + SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + don_c.CDR as CDR1, + acc_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + " + + sele_acc = " + SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + acc_c.CDR as CDR1, + don_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don.resNum = don_ss.resNum AND + acc.resNum = acc_ss.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + " + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues + " + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele_don = " + SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + don_c.CDR as CDR1, + acc_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR LIKE '%Proto%' AND + acc_c.CDR LIKE '%Proto%' + " + + sele_acc = " + SELECT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + acc_c.CDR as CDR1, + don_c.CDR as CDR2, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type, + don_ss.dssp as don_dssp, + acc_ss.dssp as acc_dssp + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don.resNum = don_ss.resNum AND + acc.resNum = acc_ss.resNum AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + don_c.CDR != acc_c.CDR AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR LIKE '%Proto%' AND + acc_c.CDR LIKE '%Proto%' + " + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues + WHERE + CDR LIKE '%Proto%' + " + } + + + #NOT (don.HBChemType == 'hbdon_PBA' AND acc.HBChemType == 'hbacc_PBA') + don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) + acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) + total_cdrs = query_sample_sources(sample_sources, sele_total_cdrs, char_as_factor=F) + + #print(sum(data$struct_id==1)) + #print(sum(data$struct_id==2)) + #print(sum(data$struct_id==3)) + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + don_data$type = "don" + don_data$pair = paste(don_data$CDR1, don_data$CDR2, sep="_") + acc_data$type = "acc" + acc_data$pair = paste(acc_data$CDR1, acc_data$CDR2, sep="_") + data = rbind(don_data, acc_data) + + cdrs = c("L1", "L2", "L3", "H1", "H2", "H3") + types = c("don", "acc") + + #Hbond Counts per CDR + + + don_counts = data.frame() + acc_counts = data.frame() + + print("Calculating hbond counts") + counts <- ddply(data, .(sample_source, struct_id, type), function(data){ + counts = data.frame() + for (outer_cdr in cdrs){ + for (inner_cdr in cdrs){ + if(inner_cdr == outer_cdr){next} + + + n = nrow(data[data$energy <0 & data$CDR1==outer_cdr & data$CDR2 == inner_cdr,]) + c = data.frame(sample_source = as.character(data$sample_source[1]), struct_id = data$struct_id[1], n=n, CDR2= inner_cdr, CDR1 = outer_cdr, type=data$type[1]) + counts = rbind(counts, c) + } + } + counts + }) + + print(head(counts)) + counts$nc = as.character(counts$n) + field = "n" + group = c("sample_source", "CDR1", "CDR2") + + #In order to get spacing correct, we need to use identity: + perc <- ddply(counts, .(sample_source, CDR1, CDR2, type), function(data){ + perc <- ddply(data, .(n), function(d2){ + perc = nrow(d2)/nrow(data) + data.frame(perc = perc) + }) + }) + perc$nc = as.character(perc$n) + + perc_all <- ddply(counts, .(sample_source, CDR1, CDR2), function(data){ + perc <- ddply(data, .(n), function(d2){ + perc = nrow(d2)/nrow(data) + data.frame(perc = perc) + }) + }) + perc_all$nc = as.character(perc_all$n) + + print(head(perc)) + for (outer_cdr in cdrs){ + + p <- ggplot(data=perc_all[perc_all$CDR1==outer_cdr,], na.rm=T) + + geom_bar(aes(x=nc, y= perc , fill=sample_source), position="dodge", stat='identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + + ggtitle(paste("Cross CDR H-Bond Counts", outer_cdr)) + plot_field(p, paste("hbond_counts", "hist_by_cdr", outer_cdr,"tog", sep="_"), ~ CDR2) + + p <- ggplot(data=perc_all[perc_all$CDR1==outer_cdr,], na.rm=T) + + geom_bar(aes(x=nc, y = perc, fill=sample_source), position="dodge", stat = 'identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + + ggtitle(paste("Cross CDR H-Bond Counts", outer_cdr)) + plot_field(p, paste("hbond_counts", "hist_by_all", outer_cdr,"tog", sep="_")) + + for (type in types){ + p <- ggplot(data=perc[perc$CDR1==outer_cdr & counts$type==type,], na.rm=T) + + geom_bar(aes(x=nc, y= perc, fill=sample_source), position="dodge", stat = 'identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + + ggtitle(paste("Cross CDR H-Bond Counts,", outer_cdr, "as", type)) + plot_field(p, paste("hbond_counts", "hist_by_cdr", outer_cdr,type, sep="_"), ~ CDR2) + + p <- ggplot(data=perc[perc$CDR1==outer_cdr & counts$type == type,], na.rm=T) + + geom_bar(aes(x=nc, y = perc, fill=sample_source), position="dodge", stat = 'identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + scale_y_continuous(label=percent) + + ggtitle(paste("Cross CDR H-Bond Counts,", outer_cdr, "as", type)) + plot_field(p, paste("hbond_counts", "hist_by_all", outer_cdr,type, sep="_")) + } + + } + + #Total Averages for each cdr + + avgs <- ddply(counts, .(sample_source, CDR1, CDR2, type), function(data){ + data.frame(sample_source=data$sample_source, CDR1=data$CDR1, CDR2=data$CDR2, m=mean(data$n), type=data$type) + }) + + #Histograms + for (outer_cdr in cdrs){ + + p <- ggplot(data=avgs[avgs$CDR1==outer_cdr & avgs$CDR2!=outer_cdr,], na.rm=T) + + geom_bar(aes(x=CDR2, y = m, fill=sample_source),position="dodge", stat='identity') + + xlab("CDR2") + + ylab("Avg hbonds") + + ggtitle(paste("Average Cross CDR H-Bond Counts", outer_cdr)) + plot_field(p, paste("avg_hbond_counts", "hist_by_cdr_as_tog", outer_cdr, sep="_")) + + for (type in types){ + p <- ggplot(data=avgs[avgs$CDR1==outer_cdr & avgs$CDR2!=outer_cdr & avgs$type==type,], na.rm=T) + + geom_bar(aes(x=CDR2, y = m , fill=sample_source), position="dodge", stat='identity') + + xlab("CDR2") + + ylab("Avg hbonds") + + ggtitle(paste("Average Cross CDR H-Bond Counts,", outer_cdr, "as", type)) + plot_field(p, paste("avg_hbond_counts", "hist_by_cdr", outer_cdr, type, sep="_")) + } + } + + + #Combined everages - CDR and everything else + + avgs <- ddply(counts, .(sample_source, CDR1, type), function(data){ + data.frame(sample_source=data$sample_source, CDR1=data$CDR1, CDR2=data$CDR2, m=mean(data$n), type = data$type) + }) + + p <- ggplot(data=avgs, na.rm=T) + + geom_bar(aes(x=CDR1, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR2") + + ylab("Avg hbonds") + + ggtitle(paste("Average Cross CDR H-Bond Counts")) + plot_field(p, paste("avg_hbond_counts", "hist_all_tog", sep="_")) + + for (type in types){ + p <- ggplot(data=avgs[avgs$type==type,], na.rm=T) + + geom_bar(aes(x=CDR1, y = m , fill=sample_source), position="dodge", stat='identity') + + xlab("CDR2") + + ylab("Avg hbonds") + + ggtitle(paste("Average Cross CDR H-Bond Counts as", type)) + plot_field(p, paste("avg_hbond_counts","hist_all",type, sep="_")) + } + + + #Hbond Energies + field = "energy" + group = c("sample_source", "CDR1", "CDR2") + group2 = c("sample_source", "CDR1") + + for (outer_cdr in cdrs){ + + + dens <- estimate_density_1d(data[data$CDR1==outer_cdr,], group, field) + if(nrow(dens)>=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr, "Cross CDR H-Bond Energies", outer_cdr)) + plot_field(p, paste("hbond", field, "den_by_cdr", outer_cdr, "tog", sep="_"), ~ CDR2) + } + + if(nrow(dens)>=1){ + dens <- estimate_density_1d(data[data$CDR1==outer_cdr,], group2, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr, "Cross CDR H-Bond Energies", outer_cdr)) + plot_field(p, paste("hbond", field, "den_by_all", outer_cdr, "tog", sep="_")) + } + + for (type in types){ + dens <- estimate_density_1d(data[data$CDR1==outer_cdr & data$type == type,], group, field) + if(nrow(dens) >=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr,"Cross CDR H-Bond Energies,", outer_cdr, "as", type)) + plot_field(p, paste("hbond", field, "den_by_cdr", outer_cdr, type, sep="_"), ~ CDR2) + } + + dens <- estimate_density_1d(data[data$CDR1==outer_cdr & data$type==type,], group2, field) + if(nrow(dens) >=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr,"Cross CDR H-Bond Energies,", outer_cdr, "as", type)) + plot_field(p, paste("hbond", field, "den_by_all", outer_cdr, type ,sep="_") ) + } + } + + + + } + + #Hbond Distances + field = "distance" + group = c("sample_source", "CDR1", "CDR2") + group1 = c("sample_source", "CDR1") + for (outer_cdr in cdrs){ + + dens <- estimate_density_1d(data[data$CDR1==outer_cdr,], group, field) + if(nrow(dens)>=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr, "Cross CDR H-Bond Distances", outer_cdr)) + plot_field(p, paste("hbond", field, "den_by_cdr", outer_cdr, "tog", sep="_"), ~ CDR2) + } + + dens <- estimate_density_1d(data[data$CDR1==outer_cdr,], group2, field) + if(nrow(dens)>=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr, "Cross CDR H-Bond Distances", outer_cdr)) + plot_field(p, paste("hbond", field, "den_by_all", outer_cdr, "tog", sep="_")) + } + + for (type in types){ + dens <- estimate_density_1d(data[data$CDR1==outer_cdr & data$type == type,], group, field) + if(nrow(dens)>=1){ + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr,"Cross CDR H-Bond Distances,", outer_cdr, "as", type)) + plot_field(p, paste("hbond", field, "den_by_cdr", outer_cdr, type, sep="_"), ~ CDR2) + } + if(nrow(dens)>=1){ + dens <- estimate_density_1d(data[data$CDR1==outer_cdr & data$type == type,], group2, field) + + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(outer_cdr,"Cross CDR H-Bond Distances,", outer_cdr, "as", type)) + plot_field(p, paste("hbond", field, "den_by_all", outer_cdr, type, sep="_")) + } + } + + + + + } + +})) # end FeaturesAnalysis diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R new file mode 100644 index 0000000..bef7d30 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R @@ -0,0 +1,236 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_hbonds-cdr_framework_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR -> Framework Hbonds", +feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + + sele_don = " +SELECT + DISTINCT + hb.energy as energy, + don.struct_id as struct_id, + don_pdb_info.residue_number as resnum, + don_pdb_info.chain_id as chainid1, + acc_pdb_info.residue_number as resnum2, + acc_pdb_info.chain_id as chainid2, + hb_geom.AHdist as distance, + don_cdr_res.CDR + FROM + interface_residues as acc_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_cdr_res, + cdr_residues as acc_cdr_res, + residue_pdb_identification as acc_pdb_info, + residue_pdb_identification as don_pdb_info + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id AND + hb.struct_id == don_cdr_res.struct_id == acc_cdr_res.struct_id AND + acc.struct_id == acc_pdb_info.struct_id AND + acc.struct_id == don_pdb_info.struct_id AND + don.resNum == don_cdr_res.resNum AND + acc.resNum == acc_pdb_info.residue_number AND + don.resNum == don_pdb_info.residue_number AND + acc.resNum != acc_cdr_res.resNum AND + (acc_pdb_info.chain_id == 'L' OR acc_pdb_info.chain_id == 'H') AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id + " + + sele_acc = " +SELECT + hb.energy as energy, + acc.struct_id as struct_id, + acc.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as don_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id AND + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id AND + hb.struct_id == cdr_residues.struct_id AND + acc.resNum == cdr_residues.resNum AND + don.resNum == don_res.resNum AND + don.struct_id == don_res.struct_id AND + don_res.side == 'side2' + " + + don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) + acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) + + + #print(sum(data$struct_id==1)) + #print(sum(data$struct_id==2)) + #print(sum(data$struct_id==3)) + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + don_data$type = "don" + acc_data$type = "acc" + data = rbind(don_data, acc_data) + + #Hbond Energy density + field = "energy" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(don_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies, CDR Donor") + plot_field(p, paste("hbond", field, "don_den_by_cdr", sep="_"), ~ CDR) + + dens <- estimate_density_1d(acc_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies, CDR Acceptor") + plot_field(p, paste("hbond", field, "acc_den_by_cdr", sep="_"), ~ CDR) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies") + plot_field(p, paste("hbond", field, "tog_den_by_cdr", sep="_"), ~ CDR) + + #Hbond Distances + field = "distance" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(don_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances, CDR Donor") + plot_field(p, paste("hbond", field, "don_den_by_cdr", sep="_"), ~ CDR) + + dens <- estimate_density_1d(acc_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances, CDR Acceptor") + plot_field(p, paste("hbond", field, "acc_den_by_cdr", sep="_"), ~ CDR) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances") + plot_field(p, paste("hbond", field, "tog_den_by_cdr", sep="_"), ~ CDR) + + #Hbond Counts per CDR + don_counts <- ddply(don_data, .(sample_source, struct_id, CDR), function(data){ + n = length(data$energy > 0) + df = data.frame(n=n) + }) + + acc_counts <- ddply(acc_data, .(sample_source, struct_id, CDR), function(data){ + n = length(data$energy > 0) + df = data.frame(n=n) + }) + + counts <- ddply(data, .(sample_source, struct_id, CDR), function(data){ + n = length(data$energy > 0) + df = data.frame(n=n) + }) + + #print(head(hbond_counts)) + field = "n" + group = c("sample_source", "CDR") + + dens <- estimate_density_1d(don_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bonds, CDR Donor") + plot_field(p, "hbond_don_den_by_cdr", ~ CDR) + + dens <- estimate_density_1d(acc_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bonds, CDR Acceptor") + plot_field(p, "hbond_acc_den_by_cdr", ~ CDR) + + dens <- estimate_density_1d(counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface hbonds") + plot_field(p, "hbond_tog_den_by_cdr", ~ CDR) + + don_avgs <- ddply(don_data, .(sample_source, CDR), function(data){ + data.frame(sample_source=data$sample_source, CDR=data$CDR, m=mean(data$n)) + }) + + acc_avgs <- ddply(acc_data, .(sample_source, struct_id, CDR), function(data){ + data.frame(sample_source=data$sample_source, CDR=data$CDR, m=mean(data$n)) + }) + + avgs <- ddply(data, .(sample_source, struct_id, CDR), function(data){ + data.frame(sample_source=data$sample_source, CDR=data$CDR, m=mean(data$n)) + }) + + #Histograms + p <- ggplot(data=don_counts, na.rm=T) + + geom_bar(aes(x=CDR, y = m, fill=sample_source), position="dodge", stat='identity') + + scale_y_continuous(label=percent) + + xlab("hbonds") + + ggtitle("Cross Interface Hydrogen Bonds, CDR Donor") + plot_field(p, "hbond_don_hist_by_cdr") + + p <- ggplot(data=acc_counts, na.rm=T) + + geom_bar(aes(x=CDR, y = m , fill=sample_source), position="dodge", stat='identity') + + scale_y_continuous(label=percent) + + xlab("hbonds") + + ggtitle("Cross Interface Hydrogen Bonds, CDR Acceptor") + plot_field(p, "hbond_acc_hist_by_cdr") + + p <- ggplot(data=counts, na.rm=T) + + geom_bar(aes(x=CDR, y=m, fill=sample_source), position="dodge", stat='identity') + + scale_y_continuous(label=percent) + + xlab("hbonds") + + ggtitle("Cross Interface Hbonds") + plot_field(p, "hbond_tog_hist_by_cdr") + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R new file mode 100644 index 0000000..0ad66f4 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R @@ -0,0 +1,228 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_hbonds-intra_cdr_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Self-CDR Hbonds Excluding those arising from BB-BB beta sheet", +feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #Checked, works perfectly fine: + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele = " +SELECT + DISTINCT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum1, + acc.resNum as resnum2, + hb_geom.AHdist as distance, + don_c.CDR as CDR, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don_c.CDR == acc_c.CDR AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR NOT LIKE '%Proto%' AND + acc_c.CDR NOT LIKE '%Proto%' + " + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele = " + SELECT + DISTINCT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum1, + acc.resNum as resnum2, + hb_geom.AHdist as distance, + don_c.CDR as CDR, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don_c.CDR == acc_c.CDR AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + " + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele = " + SELECT + DISTINCT + hb.energy as energy, + don.struct_id as struct_id, + don.resNum as resnum1, + acc.resNum as resnum2, + hb_geom.AHdist as distance, + don_c.CDR as CDR, + don.atmType as don_atm, + acc.atmType as acc_atm, + don.HBChemType as don_type, + acc.HBChemType as acc_type + FROM + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues as don_c, + cdr_residues as acc_c, + residue_secondary_structure as don_ss, + residue_secondary_structure as acc_ss + WHERE + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + don_c.struct_id = hb.struct_id AND + acc_c.struct_id = hb.struct_id AND + don_ss.struct_id = hb.struct_id AND + acc_ss.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + don_c.resNum = don.resNum AND + acc_c.resNum = acc.resNum AND + don_c.CDR == acc_c.CDR AND + acc_ss.resNum = acc.resNum AND + don_ss.resNum = don.resNum AND + NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + don_c.CDR LIKE '%Proto%' AND + acc_c.CDR LIKE '%Proto%' + " + } + + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + #print(sum(data$struct_id==1)) + #print(sum(data$struct_id==2)) + #print(sum(data$struct_id==3)) + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #Hbond Energy density + field = "energy" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Intra-CDR H-Bond Energies") + plot_field(p, paste("intra_cdr_hbonds", field, "tog_den_by_cdr", sep="_"), ~ CDR) + + #Hbond Distances + field = "distance" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Intra-CDR H-Bond Distances") + plot_field(p, paste("intra_cdr_hbonds", field, "tog_den_by_cdr", sep="_"), ~ CDR) + + #Hbond Counts per CDR + counts <- ddply(data, .(sample_source, struct_id, CDR), function(data){ + n = length(data$energy > 0) + df = data.frame(n=n) + }) + + field = "n" + group = c("sample_source", "CDR") + + dens <- estimate_density_1d(counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Average Intra-CDR H-Bonds") + plot_field(p, "intra_cdr_hbond_counts_tog_den_by_cdr", ~ CDR) + + + avgs <- ddply(counts, .(sample_source, CDR), function(data){ + data.frame(sample_source=data$sample_source, CDR=data$CDR, m=mean(data$n)) + }) + + #Histograms + p <- ggplot(data=avgs, na.rm=T) + + geom_bar(aes(x=CDR, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("hbonds") + + ylab("n") + ggtitle("Average Intra-CDR H-Bonds") + plot_field(p, "intra_cdr_hbond_counts_tog_hist_by_cdr") + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R new file mode 100644 index 0000000..1a696e4 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R @@ -0,0 +1,410 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_hbonds-cdr_ag_den", +author = "Jared Adolf-Bryfogle", +brief_description = "CDR - Antigen Hbonds. Must have LH_A analyzed by features reporter for this to work", +feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + sele_don = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as acc_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id == hb.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + acc_res.struct_id = hb.struct_id AND + acc.struct_id == hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = don.resNum AND + acc_res.resNum = acc.resNum AND + acc_res.side == 'side2' AND + CDR NOT LIKE '%Proto%' + " + + sele_acc = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + acc.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as don_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id = hb.struct_id AND + don.struct_id = hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + don_res.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = acc.resNum AND + don_res.resNum = don.resNum AND + don_res.side = 'side2' AND + CDR NOT LIKE '%Proto%' + " + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues where CDR NOT LIKE '%Proto%'" + + } + + if ("TRUE" %in% opt$options$include_cdr4){ + sele_don = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as acc_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id == hb.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + acc_res.struct_id = hb.struct_id AND + acc.struct_id == hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = don.resNum AND + acc_res.resNum = acc.resNum AND + acc_res.side == 'side2'" + + sele_acc = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + acc.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as don_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id = hb.struct_id AND + don.struct_id = hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + don_res.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = acc.resNum AND + don_res.resNum = don.resNum AND + don_res.side = 'side2'" + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues" + + } + + if ("TRUE" %in% opt$options$cdr4_only){ + sele_don = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + don.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as acc_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id == hb.struct_id AND + don.struct_id == hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + acc_res.struct_id = hb.struct_id AND + acc.struct_id == hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = don.resNum AND + acc_res.resNum = acc.resNum AND + acc_res.side == 'side2' AND + CDR LIKE '%Proto%'" + + sele_acc = " + SELECT + hb.energy as energy, + hb.struct_id as struct_id, + acc.resNum as resnum, + hb_geom.AHdist as distance, + cdr_residues.CDR + FROM + interface_residues as don_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom, + cdr_residues + WHERE + acc.struct_id = hb.struct_id AND + don.struct_id = hb.struct_id AND + hb_geom.struct_id = hb.struct_id AND + cdr_residues.struct_id = hb.struct_id AND + don_res.struct_id = hb.struct_id AND + don.site_id = hb.don_id AND + acc.site_id = hb.acc_id AND + hb_geom.hbond_id = hb.hbond_id AND + cdr_residues.resNum = acc.resNum AND + don_res.resNum = don.resNum AND + don_res.side = 'side2' AND + CDR LIKE '%Proto%'" + + sele_total_cdrs = " + SELECT + struct_id, + CDR + FROM + cdr_residues where CDR = 'Proto_H4' or CDR = 'Proto_L4'" + + } + + don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) + acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) + total_data = query_sample_sources(sample_sources, sele_total_cdrs, char_as_factor=F) + + #print(sum(data$struct_id==1)) + #print(sum(data$struct_id==2)) + #print(sum(data$struct_id==3)) + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + don_data$type = "don" + acc_data$type = "acc" + data = rbind(don_data, acc_data) + + #Hbond Counts per CDR + print("Calculating hbond counts") + counts <- ddply(total_data, .(sample_source, struct_id, CDR), function(totals){ + total_cdrs = nrow(totals) + ndon = nrow(data[data$sample_source == totals$sample_source & data$struct_id == totals$struct_id & data$CDR == totals$CDR & data$type=="don",]) + nacc = nrow(data[data$sample_source == totals$sample_source & data$struct_id == totals$struct_id & data$CDR == totals$CDR & data$type=="acc",]) + + if (is.null(ndon)){ndon = 0} + if (is.null(nacc)){nacc = 0} + + don = data.frame(n=ndon, sample_source = as.character(totals$sample_source[1]), struct_id = totals$struct_id[1], CDR = totals$CDR[1], type="don") + acc = data.frame(n=nacc, sample_source = as.character(totals$sample_source[1]), struct_id = totals$struct_id[1], CDR = totals$CDR[1], type="acc") + + counts = rbind(don, acc) + counts + }) + + + counts$nc = as.character(counts$n) + + types = c("don", "acc") + + #print(head(hbond_counts)) + field = "n" + group = c("sample_source", "CDR") + + dens <- estimate_density_1d(counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface hbonds") + plot_field(p, "hbond_counts_den_by_cdr_tog", ~ CDR) + + for (type in types){ + dens <- estimate_density_1d(counts[counts$type == type,], group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Cross Ab-Ag Hydrogen Bonds, CDR", type)) + plot_field(p, paste("hbond_counts_den_by_cdr", type, sep="_"), ~ CDR) + + dens <- estimate_density_1d(counts[counts$type == type,], group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Cross Ab-Ag Hydrogen Bonds, CDR", type)) + plot_field(p, paste("hbond_counts_den_by_cdr", type, sep="_"), ~ CDR) + } + + + avgs <- ddply(counts, .(sample_source, CDR, type), function(d){ + data.frame( m=mean(d$n)) + }) + + perc <- ddply(counts, .(sample_source, CDR, type), function(d){ + perc <- ddply(d, .(n), function(d2){ + perc = nrow(d2)/nrow(d) * 100 + data.frame(perc) + }) + }) + + perc_all <- ddply(counts, .(sample_source, CDR), function(d){ + perc <- ddply(d, .(n), function(d2){ + perc = nrow(d2)/nrow(d) * 100 + data.frame(perc) + }) + }) + + + #Histograms by number of hbonds: + for (type in types){ + + p <- ggplot(data=perc[counts$perc == type,], na.rm=T) + + geom_bar(aes(x=as.character(n), y = perc, fill=sample_source), position="dodge", stat='identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + ggtitle(paste("Cross Interface H-bonds, CDR", type)) + plot_field(p, paste("hbond_counts_hist_by_cdr", type, sep="_"), ~ CDR) + } + + p <- ggplot(data=perc_all, na.rm=T) + + geom_bar(aes(x=as.character(n), y=perc, fill=sample_source), position="dodge", stat='identity') + + xlab("hbonds") + + ylab("% of Sample Source") + + ggtitle("Cross Interface H-bonds") + plot_field(p, "hbond_counts_hist_by_cdr_tog", ~CDR) + + + #Average Histograms + for (type in types){ + + p <- ggplot(data=avgs[avgs$type == type,], na.rm=T) + + geom_bar(aes(x=CDR, y = m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR") + + ylab("avg n") + + ggtitle(paste("Average Cross Interface H-Bonds, CDR", type)) + plot_field(p, paste("avg_hbond_counts_hist_by_cdr", type, sep="_")) + } + + p <- ggplot(data=avgs, na.rm=T) + + geom_bar(aes(x=CDR, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR") + + ylab("avg n") + + ggtitle("Average Cross Interface H-Bonds") + plot_field(p, "avg_hbond_counts_hist_by_cdr_tog") + + p <- ggplot(data=avgs, na.rm=T) + + geom_bar(aes(x=type, y=m, fill=sample_source), position="dodge", stat='identity') + + xlab("CDR") + + ylab("avg n") + + ggtitle("Average Cross Interface H-Bonds") + plot_field(p, "avg_hbond_counts_hist_acc_vs_don_by_cdr", ~ CDR) + + + #Hbond Energy density + field = "energy" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(don_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies, CDR Donor") + plot_field(p, paste("hbond", "den_by_cdr", field, "don", sep="_"), ~ CDR) + + dens <- estimate_density_1d(acc_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies, CDR Acceptor") + plot_field(p, paste("hbond", "den_by_cdr", field, "acc", sep="_"), ~ CDR) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Energies") + plot_field(p, paste("hbond", "den_by_cdr", field, "tog", sep="_"), ~ CDR) + + #Hbond Distances + field = "distance" + group = c("sample_source", "CDR") + dens <- estimate_density_1d(don_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances, CDR Donor") + plot_field(p, paste("hbond", "den_by_cdr", field, "don", sep="_"), ~ CDR) + + dens <- estimate_density_1d(acc_data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances, CDR Acceptor") + plot_field(p, paste("hbond", "den_by_cdr", field, "acc", sep="_"), ~ CDR) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Ab-Ag Hydrogen Bond Distances") + plot_field(p, paste("hbond", "den_by_cdr",field, "tog",sep="_"), ~ CDR) + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/packing_angle/.gitignore b/inst/scripts/analysis/plots/antibodies/packing_angle/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R new file mode 100644 index 0000000..8f990f0 --- /dev/null +++ b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R @@ -0,0 +1,88 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "ab_packing_angle_den", +author = "Jared Adolf-Bryfogle", +brief_description = "VL VH packing angle metrics", +feature_reporter_dependencies = c("AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + VL_VH_packing_angle, + VL_VH_distance, + VL_VH_opening_angle, + VL_VH_opposite_opening_angle + FROM + ab_metrics + " + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_wrap(facets=grid, ncol=3) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #Packing Angle + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("VL_VH_packing_angle")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Degrees") + + ggtitle("VL VH Packing Angle") + plot_field(p, "vl_vh_packing_angle_den") + + #Distance + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("VL_VH_distance")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Angstroms") + + ggtitle("VL VH Distance") + plot_field(p, "vl_vh_distance_den") + + #Opening Angle + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("VL_VH_opening_angle")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Degrees") + + ggtitle("VL VH Opening Angle") + plot_field(p, "vl_vh_opening_angle_den") + + #Opposite Opening angle + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("VL_VH_opposite_opening_angle")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("Degrees") + + ggtitle("VL VH Opposite Opening Angle") + plot_field(p, "vl_vh_opposite_opening_angle_den") + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R new file mode 100644 index 0000000..fe3f515 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -0,0 +1,236 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_SASA_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic dSASA and SASA information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + +# sele = " +# SELECT +# dSASA, +# dSASA_hphobic, +# dSASA_polar, +# interface +# FROM +# interfaces" +# +# data = query_sample_sources(sample_sources, sele, char_as_factor=F) +# + +# fields = c("dSASA", "dSASA_hphobic", "dSASA_polar") +# for(field in fields){ +# +# group = c("sample_source") +# dens <- estimate_density_1d(data, group, field) +# p <- ggplot(data=dens, na.rm=T) + parts + +# geom_line(aes(x, y, colour=sample_source), size=1.2) + +# ggtitle(field) +# plot_field(p, paste(field, "den_by_all", sep="_")) +# +# group = c("sample_source", "interface") +# dens <- estimate_density_1d(data, group, field) +# p <- ggplot(data=dens, na.rm=T) + parts + +# geom_line(aes(x, y, colour=sample_source), size=1.2) + +# ggtitle(field) +# plot_field(p, paste(field, "den_by_interface", sep="_"),grid=~interface) +# } +# +# +# #dSASA sides +# int_data = data + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + sele = " + SELECT + dSASA, + dSASA_sc, + dSASA - dSASA_sc as dSASA_bb, + dhSASA, + dhSASA_sc, + dhSASA - dhSASA_sc as dhSASA_bb, + dhSASA_rel_by_charge, + aromatic_dSASA_fraction, + interface, + side + FROM + interface_sides + ORDER BY dSASA DESC + " + + #Polar fraction - from Ben Strange's Paper + + + parts = list(plot_parts, xlab("SASA")) + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + data$polar_fraction = (data$dSASA - data$dhSASA)/data$dSASA + field = "polar_fraction" + + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("dSASA(Polar)/dSASA") + + ggtitle("Polar dSASA Fraction") + plot_field(p, "dSASA_polar_fraction_den_by_all", grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("dSASA(Polar)/dSASA") + + ggtitle("Polar dSASA Fraction") + plot_field(p, "dSASA_polar_fraction_den_by_interface", grid=side~interface) + + #print(data) + + #Backbone SASA may not be interesting, but I want I still want to know for now. + fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + for (field in fields){ + + parts = list(plot_parts, scale_x_continuous("SASA")) + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + + + } + + +# Plotting all together - Might look like crap, but lets try it. +# group = c("sample_source", "interface", "side") +# dens_dsasa <- estimate_density_1d(data, group, c("dSASA")) +# dens_dsasa_bb <- estimate_density_1d(data, group, c("dSASA_bb")) +# dens_dsasa_sc <- estimate_density_1d(data, group, c("dSASA_sc")) +# +# p <- ggplot(data=dens_dsasa, na.rm=T) + parts + +# geom_line(aes(x, y, colour=sample_source), size=1.2) + +# #geom_point(data=dens_dsasa, aes(x, y, colour=sample_source, size=.5, pch="o")) + +# geom_line(data=dens_dsasa_bb, aes(x, y, colour=sample_source, linetype= "dotted"), size=1.2) + +# geom_line(data=dens_dsasa_sc, aes(x, y, colour=sample_source, linetype= "dotdash"), size= 1.2) + +# ggtitle("dSASA Density") + +# plot_field(p, paste("dSASA_all", "den_sides","by_interface", sep="_"), grid=side~interface) + + #### Means ######### + fields = c("dSASA", "dhSASA") + for (field in fields){ + + avgs <- ddply(data, .(sample_source, side, field), function(d2){ + data.frame(m = mean(d2[,field]), std_dev = sd(d2[,field]), m_top10 = mean(d2[1:10,field]), std_dev_top_10 = sd(d2[1:10,field]), top = d2[1,field]) + }) + + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field,"Average", sep=" "))+ + scale_x_discrete(labels = abbreviate) + plot_field(p, "avg_sides_by_all", grid=side ~ .) + + #Average Top 10 + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + + scale_x_discrete(labels = abbreviate) + plot_field(p, "avg_sides_top_10_by_all", grid=side ~ .) + + #Best + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field, "top", sep=" ")) + + scale_x_discrete(labels = abbreviate) + plot_field(p, "sides_top_by_all", grid=side ~ .) + + avgs <- ddply(data, .(sample_source, side, field, interface), function(d2){ + data.frame(m = mean(d2[,field]), std_dev = sd(d2[,field]), m_top10 = mean(d2[1:10,field]), std_dev_top_10 = sd(d2[1:10,field]), top = d2[1,field]) + }) + + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field,"Average", sep=" ")) + + scale_x_discrete(labels = abbreviate) + plot_field(p, "avg_sides_by_interface", grid=side ~ interface) + + #Average Top 10 + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + + scale_x_discrete(labels = abbreviate) + + ylab(field) + plot_field(p, "avg_sides_top_10_by_interface", grid=side ~ interface) + + #Best + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + + theme_bw() + + ggtitle(paste("Buried", field, "top", sep=" ")) + + scale_x_discrete(labels = abbreviate) + + ylab(field) + plot_field(p, "sides_top_by_interface", grid=side ~ interface) + + } + #Fractions + field = "aromatic_dSASA_fraction" + parts = list(plot_parts, scale_x_continuous("fraction", limit=c(0, 1.0))) + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Aromatic dSASA Fraction") + + plot_field(p, "dSASA_aromatic_fraction_den_by_all", grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Aromatic dSASA Fraction") + plot_field(p, "dSASA_aromatic_fraction_den_by_interface", grid=side~interface) + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R new file mode 100644 index 0000000..a9bb62a --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R @@ -0,0 +1,86 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_SASA-by_residue_avgs_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic dSASA and SASA information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + + + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + + sele = " + SELECT + avg_per_residue_dSASA, + avg_per_residue_SASA_int, + avg_per_residue_SASA_sep, + interface, + side + FROM + interface_sides + " + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + #AvgFields + fields = c("avg_per_residue_dSASA", + "avg_per_residue_SASA_int", + "avg_per_residue_SASA_sep") + + #parts = list(plot_parts, scale_x_continuous("SASA", limit=c(0, 100))) + parts = list(plot_parts, xlab("SASA")) + for(field in fields){ + fieldSP = unlist(strsplit(field, split="_")) + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides_all", sep="_"), grid=side ~ .) + + group=c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + + } + + + #aromatic dSASA fraction vs packstat + + #aromatic dSASA fraction vs sc_value + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R new file mode 100644 index 0000000..8cb1c7c --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R @@ -0,0 +1,126 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_SASA-by_residue_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs all information of individual interface residues. +Should be same interface, same numbering scheme / decoy set for this to have any meaning.", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + sele <-" + SELECT + interface_residues.interface as interface, + interface_residues.relative_dSASA_fraction as dSASA_fraction, + interface_residues.dSASA as dSASA, + interface_residues.dSASA_sc as dSASA_sc, + (dSASA - dSASA_sc) as dSASA_bb, + interface_residues.dhSASA as dhSASA, + interface_residues.dhSASA_sc as dhSASA_sc, + (dhSASA - dhSASA_sc) as dhSASA_bb, + interface_residues.dhSASA_rel_by_charge as dhSASA_rel_by_charge + FROM + interface_residues" + + #Density plots + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + ##Overall plots for all residues: Add Side data once we have this. + + #Densities + + #dSASA + fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + for (field in fields){ + group = c("sample_source") + dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab(field) + plot_field(p, paste(field, "residue_>0dSASA_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab(field) + plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab(field) + plot_field(p, paste(field, "residue_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab(field) + plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) + } + #dSASA fraction + field = "dSASA_fraction" + group = c("sample_source") + dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) + plot_field(p, paste(field, "residue_>0dSASA_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) + plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) + plot_field(p, paste(field, "residue_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + scale_x_continuous(field, limit = c(0, 1.0)) + plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) + + + #Per residue data. This may get crazy. +})) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R new file mode 100644 index 0000000..918f748 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R @@ -0,0 +1,140 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_SASA-by_residue_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs all information of individual interface residues. +Should be same interface, same numbering scheme / decoy set for this to have any meaning.", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + sele <-" + SELECT + interface_residues.interface as interface, + interface_residues.relative_dSASA_fraction as dSASA_fraction, + interface_residues.dSASA as dSASA, + interface_residues.dG as dG, + interface_residues.energy_int as energy_int, + interface_residues.energy_sep as energy_sep + FROM + interface_residues" + + #Density plots + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + ##Overall plots for all residues: Add Side data once we have this. + + #Scatterplots + + #dSASA vs dSASA fraction + parts = list( + geom_point(size=.5, pch="o"), + stat_smooth(method = lm), + geom_density2d(), + scale_x_continuous("dSASA"), + scale_y_continuous("dSASA fraction", limit = c(0, 1.0)), + theme_bw(), + ggtitle("Residue dSASA vs dSASA Fraction")) + + #[data$dSASA > 0 & data$dSASA_fraction > 0,] + p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction)) + + parts + plot_field(p, "SASA_vs_dSASA_fraction_residue_by_all", grid = sample_source ~ .) + + p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction)) + + parts + plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_interface", grid = sample_source ~ interface) + + #->ss_overlay functions complain cannot coerce type 'symbol' to vector of type 'double' for some reason + + p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction, color=factor(sample_source))) + + parts + plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_all_W_ss_overlay") + + p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction, color=factor(sample_source))) + + parts + plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_interface_W_ss_overlay", grid=interface ~ .) + + #dSASA vs dG + parts = list( + xlab("dSASA"), + #scale_y_continuous("REU", limit = c(-15, 15)), + ylab("REU"), + geom_point(size=.5, pch="o"), + stat_smooth(method = lm), + stat_density2d(), + theme_bw(), + ggtitle("Residue dSASA vs dG")) + + #[data$dSASA > 0 & -15 < data[field] & data[field] < 15,] + p <- ggplot(data=data, aes(x=dSASA, y=dG)) + + parts + plot_field(p, "dSASA_vs_dG_residue_by_all", grid = sample_source ~ .) + + p <- ggplot(data=data, aes(x=dSASA, y=dG)) + + parts + plot_field(p, "dSASA_vs_dG_residue_by_interface", grid = sample_source ~ interface) + + p <- ggplot(data=data, aes(x=dSASA, y=dG, color=sample_source)) + + parts + plot_field(p, "dSASA_vs_dG_residue_by_all_W_ss_overlay") + + p <- ggplot(data=data, aes(x=dSASA, y=dG, color=sample_source)) + + parts + plot_field(p, "dSASA_vs_dG_by_residue_interface_W_ss_overlay", grid=interface ~ .) + + #dG vs dSASA_fraction + parts = list( + scale_x_continuous("dSASA fraction", limit = c(0, 1.0)), + ylab("REU"), + #scale_y_continuous("REU", limit=c(-15, 15)), + geom_point(size=.5, pch="o"), + stat_smooth(method = lm), + stat_density2d(), + theme_bw(), + ggtitle("Residue dSASA fraction vs dG")) + + #[data$dSASA_fraction > 0 & -10 < data[field] & data[field] < 15,] + p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction, color=sample_source)) + + parts + plot_field(p, "dSASA_fraction_vs_dG_residue_by_all_W_ss_overlay") + + p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction, color=sample_source)) + + parts + plot_field(p, "dSASA_fraction_vs_dG_residue_by_interface_W_ss_overlay", grid=interface ~ .) + + p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction)) + + parts + plot_field(p, "dSASA_fraction_vs_dG_residue_by_all", grid = sample_source ~ .) + + p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction)) + + parts + plot_field(p, "dSASA_fraction_vs_dG_residue_by_interface", grid = sample_source ~ interface) + + #Per residue data. This may get crazy. +})) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R new file mode 100644 index 0000000..274c030 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -0,0 +1,140 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_SASA-dSASA_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic dSASA and SASA information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #First we run on all the interfaces in the database + + + + sele = " + SELECT + dSASA, + dSASA_hphobic, + dSASA_polar, + dG, + interface + FROM + interfaces" + + int_data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + + + #dSASA sides + sele = " + SELECT + dG, + dSASA, + dSASA_sc, + dSASA - dSASA_sc as dSASA_bb, + dhSASA, + dhSASA_sc, + dhSASA - dhSASA_sc as dhSASA_bb, + dSASA-dhSASA as dpSASA, + dhSASA_rel_by_charge, + aromatic_dSASA_fraction, + interface_nres, + interface, + side + FROM + interface_sides + " + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + #print(data) + + + + #ScatterPlots + + + parts = list( + geom_point(size=.75), + #stat_smooth(color="grey"), + stat_smooth(method=lm), + geom_density2d(size=.5), + #stat_density2d(aes(fill = ..level..), geom="polygon"), + #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), + theme_bw()) + + fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + for (f in fields){ + + #dSASA vs dG + p <- ggplot(data=int_data[int_data$dG<=5000 & int_data$dG>-5000,], aes(x = f, y = dG, colour=sample_source)) + parts + + ggtitle(paste(field,"vs dG")) + + scale_x_continuous("SASA") + + scale_y_continuous("REU") + plot_field(p, paste(f, "vs_dG_by_all", sep="_")) + plot_field(p, paste(f, "vs_dG_by_interface", sep="_"), grid=interface ~ .) + +# #dSASA vs aromatic dSASA fraction. Shouldn't increase, but worth a plot +# p <- ggplot(data= data, aes(x = dSASA, y = aromatic_dSASA_fraction, colour=sample_source)) + parts + +# ggtitle(paste(field,"vs Aromatic dSASA Fraction")) + +# scale_x_continuous("SASA") + +# scale_y_continuous("fraction", limit=c(0, 1.0)) +# plot_field(p, paste(field, "vs_aromatic_dSASA_fraction", sep="_"), grid= side ~ .) + +# #dSASA vs interface nres. Again, shouldn't be interesting. Mainly a control. +# p <- ggplot(data = data, aes(y = interface_nres, x = dSASA, colour=sample_source)) + parts + +# ggtitle(paste(field, "vs Interface nres")) + +# scale_y_continuous("n") + +# scale_x_continuous("SASA") +# plot_field(p, paste("control", field, "vs_interface_nres", sep="_"), grid= side ~ .) + + #dSASA vs 'energy density' from Ben Strange's paper - Should be pretty much flat for natives + + } + + data$e_density = data$dG/data$dSASA + field = c("dSASA") + p <- ggplot(data = data, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + + ggtitle(paste(field, "vs Interface energy density")) + + scale_x_continuous("dSASA") + + scale_y_continuous("dG/dSASA") + plot_field(p, paste("control", field, "vs_energy_density", sep="_"), grid=side ~ .) + +# #dhSASA vs dpSASA +# p <- ggplot(data = data, aes(y = dpSASA, x = dhSASA, colour=sample_source)) + parts + +# ggtitle("dhSASA vs dpSASA") + +# scale_y_continuous("dSASA") + +# scale_x_continuous("dSASA") +# plot_field(p, "dhSASA_vs_dpSASA", grid=side ~ .) + + #Control - Should be flat? + p <- ggplot(data = data, aes(y = dSASA_bb, x = dSASA_sc, colour=sample_source)) + parts + + ggtitle("dSASA_sc vs dSASA_bb") + + scale_x_continuous("Sidechain dSASA") + + scale_y_continuous("Backbone dSASA") + plot_field(p, "control_dSASA_sc_vs_dSASA_bb", grid=side ~ .) + + p <- ggplot(data = data, aes(y = dhSASA_sc, x = dhSASA_bb, colour=sample_source)) + parts + + ggtitle("dhSASA_sc vs dhSASA_bb") + + scale_y_continuous("Sidechain dhSASA") + + scale_x_continuous("Backbone dhSASA") + plot_field(p, "control_dhSASA_sc_vs_dhSASA_bb", grid=side ~ .) +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R new file mode 100644 index 0000000..5db8b91 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R @@ -0,0 +1,254 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_composition-dG_dSASA_stats_by_restype", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic composition of the interfaces, restypes, etc", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures","ResidueFeatures", "ResidueTypesFeatures", "PdbDataFeatures"), + + +run=function(self, sample_sources, output_dir, output_formats){ + + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL, ssLegend=T){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(ssLegend){ + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + #Restype composition - Overall interface - this will change to side-chain only contribution. + + sele <-" + SELECT DISTINCT + residues.name3 as restype, + interface_residues.interface as interface, + interface_residues.relative_dSASA_fraction as dSASA_fraction, + interface_residues.dSASA as dSASA, + interface_residues.dSASA_sc as dSASA_sc, + (dSASA - dSASA_sc) as dSASA_bb, + interface_residues.dhSASA as dhSASA, + interface_residues.dhSASA_sc as dhSASA_sc, + (dhSASA - dhSASA_sc) as dhSASA_bb, + interface_residues.dhSASA_rel_by_charge as dhSASA_rel_by_charge, + interface_residues.dG as dG + FROM + residues, + interface_residues + WHERE + interface_residues.struct_id == residues.struct_id and + interface_residues.resNum == residues.resNum" + + res_data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + #Scatterplot of dSASA vs dSASA fraction for each restype. + #All interfaces - Coloring by Sample Source may be too damn confusing, so change this if need be. + + #dSASA vs dSASA fraction per restype + p <- ggplot(data=res_data[res_data$dSASA > 0 & res_data$dSASA_fraction > 0,], aes(x = dSASA_fraction, y=dSASA, color=sample_source)) + + #geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + geom_point(size=.5, pch="o") + + stat_smooth(method=lm) + + geom_density2d() + + theme_bw() + + ggtitle("dSASA fraction vs dSASA per restype") + + facet_wrap(~ restype, ncol=4) + + xlab("SASA buried/SASA separated") + + ylab("dSASA") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, "dSASA_fraction_vs_dSASA_per_restype_>0dSASA", sample_sources, output_dir, output_formats) + + +# #dSASA fractions bins per restype +# p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x=dSASA_fraction, fill=factor(restype))) + +# geom_bar() + +# ggtitle("dSASA fraction bins") + +# scale_fill_hue(l=40)+ +# theme_bw() +# plot_field(p, "dSASA_fraction_by_restype_bins_>0dSASA_by_all", grid=sample_source ~ ., ssLegend=F) +# plot_field(p, "dSASA_fraction_by_restype_bins_>0dSASA_by_interface", grid=sample_source ~ interface, ssLegend=F) + +# #dSASA bins per restype +# p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x=dSASA, fill=factor(restype))) + +# geom_bar() + +# ggtitle("dSASA bins") + +# scale_fill_hue(l=40)+ +# theme_bw() +# plot_field(p, "dSASA_by_restype_bins_>0dSASA_by_all", grid=sample_source ~ ., ssLegend=F) +# plot_field(p, "dSASA_by_restype_bins_>0dSASA_by_interface", grid=sample_source ~ interface, ssLegend=F) + + #dSASA density per restype + parts = list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + ylab("Feature Density"), + xlab("SASA"), + theme_bw()) + + group = c("sample_source", "restype") + field = "dSASA_fraction" + dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA buried/SASA separated") + + ggtitle("dSASA fraction per restype") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + p <- p + facet_wrap(~ restype, ncol=4) + save_plots(self, "dSASA_fraction_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) + + + fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + for (field in fields) { + group = c("sample_source", "restype") + dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle( paste(field,"per restype", sep=" ")) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + p <- p + facet_wrap(~ restype, ncol=4) + save_plots(self, paste(field, "by_restype_den_>0dSASA_by_all", sep="_"), sample_sources, output_dir, output_formats) + } + + #dG by restype + group = c("sample_source", "restype") + field = "dG" + dens <- estimate_density_1d(res_data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("dG per restype") + + xlab("REU") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + p <- p + facet_wrap(~ restype, ncol=4) + save_plots(self, "dG_by_restype_den_by_all", sample_sources, output_dir, output_formats) + + group = c("sample_source", "restype") + field = "dG" + dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("dG per restype") + + xlab("REU") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + p <- p + facet_wrap(~ restype, ncol=4) + save_plots(self, "dG_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) + + group = c("sample_source", "restype") + field = "dG" + dens <- estimate_density_1d(res_data[res_data$dSASA == 0,], group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("dG per restype") + + xlab("REU") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + p <- p + facet_wrap(~ restype, ncol=4) + save_plots(self, "dG_by_restype_den_@0dSASA_by_all", sample_sources, output_dir, output_formats) + + #Hard to see: + #group = c("sample_source", "restype") + #field = "dSASA_fraction" + #dens <- estimate_density_1d(res_data[res_data$dSASA_fraction > 0,], group, field) + #p <- ggplot(data=dens, na.rm=T) + parts + + # geom_line(aes(x, y, color=restype)) + + # ggtitle("dSASA fraction per restype") + + # facet_grid(sample_source ~ .) + #save_plots(self, "dSASA_fraction_per_restype_den_combined", sample_sources, output_dir, output_formats) + + #group = c("sample_source", "restype") + #field = "dSASA" + #dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) + #p <- ggplot(data=dens, na.rm=T) + parts + + # geom_line(aes(x, y, color=restype)) + + # ggtitle("dSASA per restype") + + # facet_grid(sample_source ~ .) + #save_plots(self, "dSASA_per_restype_den_combined", sample_sources, output_dir, output_formats) + + + + #dG vs dSASA by restype + p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x = dG, y=dSASA, color=sample_source)) + + #geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + geom_point(size=.5, pch="o") + + stat_smooth(method=lm) + + geom_density2d() + + theme_bw() + + ggtitle("dG vs dSASA per restype") + + facet_wrap(~ restype, ncol=4) + + xlab("REU") + + ylab("dSASA") + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, "dSASA_vs_dG_by_restype_>0dSASA_by_all", sample_sources, output_dir, output_formats) + + + + + + #group = c("sample_source", "restype", "interface") + #field = "dSASA_fraction" + #dens <- estimate_density_1d(res_data[res_data$dSASA_fraction > 0,], group, field) + #p <- ggplot(data=dens, na.rm=T, fill=restype) + parts + + # geom_line(aes(x, y, color=restype)) + + # ggtitle("Hotspot dSASA density") + + # facet_grid(sample_source ~ interface) + + #save_plots(self, "dSASA_fraction_per_restype_den_combined_by_interface", sample_sources, output_dir, output_formats) + + + #group = c("sample_source", "restype", "interface") + #field = "dSASA_fraction" + #dens <- estimate_density_1d(res_data, group, field) + #p <- ggplot(data=dens, na.rm=T) + parts + + #geom_line(aes(x, y, colour=sample_source), size=1.2) + + #ggtitle("Hotspot dSASA density of Interface residue") + #plot_field(p, "dSASA_fraction_per_restype_by_interface", grid=restype ~ interface) + #p <- ggplot(data=res_data[res_data$dSASA_fraction > .05,], fill=sample_source,weight=dSASA_fraction) + + # geom_histogram(aes(x=restype), position="dodge")+ + # theme_bw() + + # ggtitle("Interface ResType Composition") + #scale_x_continuous("restype") + + #scale_y_continuous("n") + #plot_field(p, "restype_composition_weighted_by_dSASA_frac") + #plot_field(p, "restype_composition_weighted_by_dSASA_frac_by_interface", grid=interface ~ .) + + #restype vs avg dSASA + #p <- ggplot(data=res_data) + + #geom_histogram(aes(x=mean(restype)), position="dodge") + + # theme_bw() + + # ggtitle("Interface ResType Composition") + #scale_x_continuous("restype") + + #scale_y_continuous("n") + #plot_field(p, "restype_vs_avg_dSASA_fraction") + #plot_field(p, "restype_vs_avg_dSASA_fraction_by_interface", grid=interface ~ .) + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R new file mode 100644 index 0000000..bab562c --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R @@ -0,0 +1,160 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_composition-hbond_stats_by_restype", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic hbond densities for interface - interface hbonds", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures", "HBondFeatures","ResidueFeatures", "ResidueTypesFeatures", "PdbDataFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #Thanks to Matt O'Meara's help for this query - Very slow: + + #########BROKEN############## + + sele = " + SELECT + hb.energy as energy, + don_res.interface as interface, + don_restype.name3 as don_name3, + acc_restype.name3 as acc_name3 + FROM + interface_residues AS don_res, + interface_residues AS acc_res, + residues AS don_restype, + residues AS acc_restype, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb + WHERE + ((don_res.side == 'side1' AND + acc_res.side == 'side2') OR + (don_res.side =='side2' AND + acc_res.side =='side1')) AND + acc_res.interface == don_res.interface AND + don.resNum == don_res.resNum == don_restype.resNum AND + acc.resNum == acc_res.resNum == acc_restype.resNum AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + don_res.struct_id ==acc_res.struct_id == acc.struct_id AND + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + don.struct_id == don_restype.struct_id AND + don_restype.struct_id == acc_restype.struct_id + + " + + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data$pair_name = paste(data$don_name3, data$acc_name3, sep="-") + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_field_wrap = function(p, plot_id, grid, columns = 4) { + p <- p + facet_wrap(grid, ncol=columns) + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #Hbond Energy density + field = "energy" + group = c("sample_source", "don_name3") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbond energies by donor") + plot_field_wrap(p, paste("hbond", field, "den_by_donor_by_all", sep="_"), ~don_name3) + + group = c("sample_source", "acc_name3") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbond energies by donor") + plot_field_wrap(p, paste("hbond", field, "den_by_acceptor_by_all", sep="_"), ~acc_name3) + + group = c("sample_source", "pair_name") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbond energies by pair") + plot_field_wrap(p, paste("hbond", field, "den_by_don_acc_pair_by_all", sep="_"), ~pair_name) + + + + #Hbonds/model or per interface + #There is probably a better way to do this. + + donor_counts <- ddply(data, .(interface, sample_source, struct_id, acc_name3), function(int_data){ + n = length(int_data$energy) + df = data.frame(n=n) + }) + + acceptor_counts <- ddply(data, .(interface, sample_source, struct_id, don_name3), function(int_data){ + n = length(int_data$energy) + df = data.frame(n=n) + }) + + pair_counts <- ddply(data, .(interface, sample_source, struct_id, pair_name), function(int_data){ + n = length(int_data$energy) + df = data.frame(n=n) + }) + + + field = "n" + group = c("sample_source", "acc_name3") + dens <- estimate_density_1d(donor_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("n") + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbonds by acceptor") + plot_field_wrap(p, paste("hbond_den_by_acceptor_by_all", sep="_"), ~acc_name3) + + group = c("sample_source", "don_name3") + dens <- estimate_density_1d(acceptor_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("n") + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbonds by donor") + plot_field_wrap(p, paste("hbond_den_by_donor_by_all", sep="_"), ~don_name3) + + group = c("sample_source", "pair_name") + dens <- estimate_density_1d(pair_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("n") + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface hbonds by pair") + plot_field_wrap(p, paste("hbond_den_by_don_acc_pair_by_all", sep="_"), ~pair_name) + + + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R new file mode 100644 index 0000000..7a0db5e --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -0,0 +1,196 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_composition_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic composition of the interfaces, restypes, etc", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures","ResidueFeatures", "ResidueTypesFeatures", "PdbDataFeatures"), + + +run=function(self, sample_sources, output_dir, output_formats){ + + #Aromatic Composition + sele <- " + SELECT + aromatic_fraction, + interface_nres, + interface, + side + FROM + interface_sides + " + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL, ssLegend=T){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(ssLegend){ + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + fields = c("aromatic_fraction") + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + for(field in fields){ + fieldSP = unlist(strsplit(field, split="_")) + parts = list(plot_parts, xlab("fraction")) + + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + } + + parts = list(plot_parts, scale_x_continuous("number of interface residues")) + + field = "interface_nres" + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface nres") + plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Interface nres") + plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + + #Restype composition - Overall interface + sele <-" + SELECT + interface_residues.interface as interface, + residues.name3 as restype, + residue_type.name1 as restype1, + interface_residues.SASA_int as SASA_int, + interface_residues.dSASA as dSASA, + interface_residues.dSASA - interface_residues.dSASA_sc as dSASA_bb, + interface_residues.dSASA_sc as dSASA_sc, + interface_residues.dhSASA as dhSASA, + interface_residues.dG as dG, + + interface_residues.relative_dSASA_fraction as dSASA_fraction, + interface_residues.struct_id as struct_id + FROM + residues, + interface_residues, + residue_type + WHERE + interface_residues.struct_id == residues.struct_id and + interface_residues.resNum == residues.resNum and + residues.name3==residue_type.name3 + " + res_data = query_sample_sources(sample_sources, sele, char_as_factor=T) + + + + ##Histogram - only plot residues that have a dSASA fraction > 5 % - change this for sidechains once we have that data + + ##### Typical way is not working, so we will have to do it manually. ##### + + #Restype Composition - Classical, not working! +# p <- ggplot(data=res_data, aes(x=restype1)) + +# geom_bar(position="dodge", aes(y = ..density.., fill=sample_source), binwidth=1)+ +# theme_bw() + +# ggtitle("Interface ResType Composition") + +# scale_y_continuous(label=percent) +# plot_field(p, "restype_composition_by_all_test") +# plot_field(p, "restype_composition_by_interface_test", grid=interface ~ .) + + get_percent <- function(d) { + d_per <- ddply(d, .(sample_source, interface, struct_id), function(per_struct_id){ + d_per_restype <- ddply(per_struct_id, .(restype1), function(per_restype){ + #print(head(per_restype)) + perc = length(per_restype$restype1)/length(per_struct_id$struct_id) + df = data.frame(perc = perc) + }) + }) + d_per + } + + + #Restype Composition + + p <- ggplot(data=get_percent(res_data), aes(x=restype1)) + + geom_bar(position="dodge", stat="identity", aes(y=perc, fill=sample_source))+ + theme_bw() + + ggtitle("Interface ResType Composition") + + scale_y_continuous(label=percent) + + ylab("% of Sample Source") + plot_field(p, "restype_composition_by_all") + plot_field(p, "restype_composition_by_interface", grid=interface ~ .) + + #Need to know how this compares relative to the overall restype composition! +# fields = c("dSASA", "dSASA_bb", "dSASA_sc", "SASA_int", "dSASA_fraction") +# for (field in fields){ +# +# +# p <- ggplot(data=get_percent(res_data[res_data[field] == 0,]), aes(x = restype1, fill=sample_source)) + +# geom_bar(position="dodge", stat="identity", aes(y=perc))+ +# theme_bw() + +# scale_y_continuous(label=percent) + +# ggtitle(paste("Interface ResType Composition @0", field)) +# #scale_x_continuous("restype") + +# +# plot_field(p, paste("restype_composition_@_0",field, "by_all", sep="_")) +# plot_field(p, paste("restype_composition_@_0",field, "by_interface", sep="_"), grid=interface ~ .) +# +# v = 0 +# p <- ggplot(data=get_percent(res_data[res_data[field] > v,]), aes(x = restype1, fill=sample_source)) + +# geom_bar(position="dodge", stat="identity", aes(y=perc))+ +# theme_bw() + +# scale_y_continuous(label=percent) + +# ggtitle(paste("Interface ResType Composition >", v, field)) +# #scale_x_continuous("restype") + +# +# plot_field(p, paste("restype_composition_>",v, field, "by_all", sep="_")) +# plot_field(p, paste("restype_composition_>",v, field, "by_interface", sep="_"), grid=interface ~ .) +# +# } + +# field = "dSASA_fraction" +# for (v in c(.25, .75)){ +# +# p <- ggplot(data=get_percent(res_data[res_data[field] > v,]), aes(x = restype1, fill=sample_source)) + +# geom_bar(position="dodge", stat="identity", aes(y=perc))+ +# theme_bw() + +# scale_y_continuous(label=percent) + +# ggtitle(paste("Interface ResType Composition >", v, field)) +# #scale_x_continuous("restype") + +# +# plot_field(p, paste("restype_composition_>",v, field, "by_all", sep="_")) +# plot_field(p, paste("restype_composition_>",v, field, "by_interface", sep="_"), grid=interface ~ .) + + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R new file mode 100644 index 0000000..7758a79 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -0,0 +1,114 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_energies-dG_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic interface energy information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures", "StructureScoreFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + interfaces.dG as dG, + interfaces.dG_cross as dG_cross, + interfaces.delta_unsatHbonds as delta_unsatHbonds, + interfaces.hbond_E_fraction as hbond_E_fraction, + interfaces.dSASA as dSASA, + interfaces.interface as interface, + structure_scores.score_value as total_score + FROM + interfaces, + score_types, + structure_scores + WHERE + score_types.score_type_name='total_score' AND + structure_scores.score_type_id = score_types.score_type_id AND + structure_scores.struct_id = interfaces.struct_id + " + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + data = query_sample_sources(sample_sources, sele) + data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + parts = list( + geom_point(size=1.0, pch="o"), + #stat_smooth(color="grey"), + stat_smooth(method=lm), + geom_density2d(size=.1), + #stat_density2d(aes(fill = ..level..), geom="polygon"), + #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), + theme_bw()) + + parts_no_density = list( + geom_point(size=1.2, pch="o"), + stat_smooth(method=lm), + theme_bw() + ) + + #dG vs dSASA + p <- ggplot(data=data_rm_out, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs dSASA") + + ylab("SASA") + + xlab("REU") + plot_field(p, "dG_vs_dSASA_by_all") + plot_field(p, "dG_vs_dSASA_by_interface", grid=~ interface) + + #dG vs Total Energy + p <- ggplot(data=data_rm_out, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs total_score") + + ylab("REU") + + xlab("REU") + plot_field(p, "dG_vs_total_score_by_all") + plot_field(p, "dG_vs_total_score_by_interface", grid=~ interface) + + #dG vs dG_cross + p <- ggplot(data = data_rm_out, aes(x=dG, y=dG_cross, colour=sample_source)) + parts_no_density + + ggtitle("dG vs Crossterm dG") + + xlab("REU") + + ylab("REU") + plot_field(p, "dG_vs_dG_cross_by_all") + plot_field(p, "dG_vs_dG_cross_by_interface", grid= ~ interface) + + p <- ggplot(data = data_rm_out, aes(x=dG, y=dG_cross, color=dSASA)) + parts_no_density + + ggtitle("dG vs Crossterm dG") + + xlab("REU") + + ylab("REU") + + scale_fill_hue(l=40) + plot_field(p, "dG_vs_dG_cross_col_by_dSASA_by_all", grid=sample_source ~ .) + plot_field(p, "dG_vs_dG_cross_col_by_dSASA_by_interface", grid=sample_source ~ interface) + + #dG_cross vs dSASA + p <- ggplot(data = data_rm_out, aes(x=dG_cross, y=dSASA, colour=sample_source)) + parts_no_density + + ggtitle("dG_cross vs dSASA") + + xlab("REU") + + ylab("SASA") + plot_field(p, "dg_cross_vs_dSASA" ) + plot_field(p, "dG_cross_vs_dSASA_by_interface", grid= ~ interface) + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R new file mode 100644 index 0000000..042de0a --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -0,0 +1,127 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_energies_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic interface energy information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + interfaces.dG as dG, + interfaces.dG_cross as dG_cross, + interfaces.hbond_E_fraction as hbond_E_fraction, + interfaces.interface as interface, + structure_scores.score_value as total_score + FROM + interfaces, + score_types, + structure_scores + WHERE + score_types.score_type_name='total_score' AND + structure_scores.score_type_id = score_types.score_type_id AND + structure_scores.struct_id = interfaces.struct_id + ORDER BY dG; + " + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + data = query_sample_sources(sample_sources, sele) + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + #Basic Densities + fields = c("dG", "dG_cross") + for(field in fields){ + parts = list(plot_parts, scale_x_continuous("Rosetta Energy")) + + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) + } + + + field = "hbond_E_fraction" + parts = list(plot_parts, xlab("fraction")) + + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Hbond Interface Energy Fraction") + plot_field(p, paste(field, "den_by_all", sep="_") ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Hbond Interface Energy Fraction") + plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) + + #Side energies? + + + #Averages: + avgs <- ddply(data, .(sample_source, interface), function(d2){ + data.frame(m = mean(d2$dG), std_dev = sd(d2$dG), m_top10 = mean(d2[1:10,]$dG), std_dev_top_10 = sd(d2[1:10,]$dG), top = d2[1,]$dG) + }) + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + theme_bw() + + ggtitle("Average Interface dG") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) + plot_field(p, "avg_dG_by_interface", grid=interface ~ .) + + #Average Top 10 + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + + theme_bw() + + ggtitle("Average Best 10 Interface dG") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) + plot_field(p, "avg_dG_top_10_by_interface", grid=interface ~ .) + + #Best + p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + + theme_bw() + + ggtitle("Top Interface dG") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) + plot_field(p, "dG_top_by_interface", grid=interface ~ .) +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R new file mode 100644 index 0000000..b91cfc5 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -0,0 +1,119 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_energies-by_residue_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs all information of individual interface residues. +Should be same interface, same numbering scheme / decoy set for this to have any meaning.", +feature_reporter_dependencies = c("InterfaceFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + sele <-" + SELECT + interface_residues.interface as interface, + interface_residues.dG as dG, + interface_residues.dSASA as dSASA, + interface_residues.energy_int as energy_int, + interface_residues.energy_sep as energy_sep + FROM + interface_residues" + + #Density plots + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + ##Overall plots for all residues: Add Side data once we have this. + + #Densities + + + #Energies + fields = c("dG", "energy_int", "energy_sep") + for(field in fields){ + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "residue_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) + } + + + #dG where dSASA is 0: + #data[-15 < data[field] & data[field] < 15,] + + field = "dG" + group = c("sample_source") + dens <- estimate_density_1d(data[data$dSASA == 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous(field, limit=c(-15, 15)) + plot_field(p, paste(field, "residue_@0dSASA_dens", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data[data$dSASA == 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous(field, limit=c(-15, 15)) + plot_field(p, paste(field, "residue_@0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) + + #dG where dSASA > 0 : + field = "dG" + group = c("sample_source") + dens <- estimate_density_1d(data[data$dSASA >0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous(field, limit=c(-15, 15)) + plot_field(p, paste(field, "residue_>0dSASA_dens", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous(field, limit=c(-15, 15)) + plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) + #Per residue data. This may get crazy. +})) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R new file mode 100644 index 0000000..b08c009 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -0,0 +1,151 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_hbonds_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic hbond densities for interface - interface hbonds", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures", "HBondFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + #Thanks to Matt O'Meara's help for this query - Very slow: + + + sele = " + SELECT + hb.energy as energy, + don_res.interface as interface, + don_res.struct_id as struct_id, + hb_geom.AHdist as dis + FROM + interface_residues AS don_res, + interface_residues AS acc_res, + hbond_sites AS don, + hbond_sites AS acc, + hbonds AS hb, + hbond_geom_coords as hb_geom + WHERE + ((don_res.side== 'side1' AND + acc_res.side == 'side2') OR + (don_res.side=='side2' AND + acc_res.side=='side1')) AND + acc_res.interface == don_res.interface AND + don.resNum == don_res.resNum AND + acc.resNum == acc_res.resNum AND + hb.don_id == don.site_id AND + hb.acc_id == acc.site_id AND + hb.hbond_id == hb_geom.hbond_id AND + don_res.struct_id == acc_res.struct_id AND + acc_res.struct_id == acc.struct_id AND + acc.struct_id == don.struct_id AND + don.struct_id == hb.struct_id AND + hb.struct_id == hb_geom.struct_id + " + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + #print(sum(data$struct_id==1)) + #print(sum(data$struct_id==2)) + #print(sum(data$struct_id==3)) + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + #Hbond Energy density + field = "energy" + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bond Energies") + plot_field(p, paste("hbond", field, "den_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("REU") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bond Energies") + plot_field(p, paste("hbond", field, "den_by_interface", sep="_"),grid=~interface) + + #Hbond Distances + field = "dis" + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bond Distances") + plot_field(p, paste("hbond", field, "den_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("Angstroms") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bond Distances") + plot_field(p, paste("hbond", field, "den_by_interface", sep="_"),grid=~interface) + + #Hbonds/model or per interface + #There is probably a better way to do this. + + hbond_counts <- ddply(data, .(interface, sample_source, struct_id), function(int_data){ + n = length(int_data$energy > 0) + df = data.frame(n=n) + }) + + #print(head(hbond_counts)) + field = "n" + group = c("sample_source") + dens <- estimate_density_1d(hbond_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bonds") + plot_field(p, "hbond_den_by_all") + + group = c("sample_source", "interface") + dens <- estimate_density_1d(hbond_counts, group, field) + p <- ggplot(data=dens, na.rm=T) + plot_parts + + xlab("hbonds") + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Cross Interface Hydrogen Bonds") + plot_field(p, "hbond_den_by_interface",grid=~interface) + + + #Histogram + p <- ggplot(data=hbond_counts, na.rm=T) + + geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + + scale_y_continuous(label=percent) + + xlab("hbonds") + + ggtitle("Average Cross Interface Hydrogen Bonds") + plot_field(p, "hbond_hist_by_all") + + p <- ggplot(data=hbond_counts, na.rm=T) + + geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + + scale_y_continuous(label=percent) + + xlab("hbonds") + + ggtitle("Average Cross Interface Hydrogen Bonds") + plot_field(p, "hbond_hist_by_interface", grid=~interface) +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R new file mode 100644 index 0000000..6f0f07b --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R @@ -0,0 +1,90 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_hbonds-unsat_polars_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic interface energy information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + delta_unsatHbonds, + dSASA, + interface + FROM + interfaces + " + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + data = query_sample_sources(sample_sources, sele) + + #Basic Densities + + field = "delta_unsatHbonds" + parts = list(plot_parts, scale_x_continuous("n")) + + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Delta Unsatisfied Polar Atoms") + plot_field(p, paste("delta_unsat_polars", "den_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle("Delta Unsatisfied Polar Atoms") + plot_field(p, paste("delta_unsat_polars", "den_by_interface", sep="_"), grid=interface ~ .) + + #Reproduction of Ben Strange's plots within a density. + #"A comparison of successful and failed protein interface designs highlights the challenges of designing buried hydrogen bonds" + + #unsat per 1000 dSASA + data$unsat_per_thousand = (data$delta_unsatHbonds*1000)/data$dSASA + + group = c("sample_source") + field = "unsat_per_thousand" + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab(expression(paste("No. Unsatisfied /", A^2))) + + ggtitle("Delta Unsatisfied Polar Atoms per area") + plot_field(p, paste("delta_unsat_polars_per_1000_dSASA", "den_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab(expression(paste("No. Unsatisfied /", A^2))) + + ggtitle("Delta Unsatisfied Polar Atoms per area") + plot_field(p, paste("delta_unsat_polars_per_1000_dSASA", "den_by_interface", sep="_"), grid=interface ~ .) + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R new file mode 100644 index 0000000..93e2727 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R @@ -0,0 +1,96 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_hbonds-unsat_polars_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic interface energy information", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + dG, + dG_cross, + delta_unsatHbonds, + hbond_E_fraction, + dSASA, + interface + FROM + interfaces + " + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + + data = query_sample_sources(sample_sources, sele) + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + parts = list( + geom_point(size=1.5, pch="o"), + #stat_smooth(color="grey"), + stat_smooth(method=lm), + geom_density2d(), + #stat_density2d(aes(fill = ..level..), geom="polygon"), + #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), + theme_bw()) + + #unsat hbonds vs hbond fraction + p <- ggplot(data = data, aes(y=hbond_E_fraction, x=delta_unsatHbonds)) + parts + + ggtitle("Delta Unsatisfied Polar Atoms vs Hbond Energy fraction") + + ylab("fraction") + + xlab("atoms") + plot_field(p, "delta_unsat_polars_vs_hbond_E_fraction_by_all", grid=sample_source ~ .) + plot_field(p, "delta_unsat_polars_vs_hbond_E_fraction_by_interface", grid=interface ~ sample_source) + + #unsat hbonds vs dG + p <- ggplot(data = data, aes(y=dG, x=delta_unsatHbonds)) + parts + + ggtitle("Delta unsatisfied Polar Atoms vs dG") + + ylab("REU") + + xlab("atoms") + plot_field(p, "delta_unsat_polars_vs_dG_by_interface", grid=sample_source ~ .) + plot_field(p, "delta_unsat_polars_vs_dG_by_interface", grid=interface ~ sample_source) + + #unsat hbonds vs dSASA + #hbond fraction vs dunsat hbonds + p <- ggplot(data = data, aes(y=dSASA, x=delta_unsatHbonds)) + parts + + ggtitle("Delta unsatisfied Polar Atoms vs dSASA") + + ylab("SASA") + + xlab("atoms") + plot_field(p, "delta_unsat_polars_vs_dSASA_by_all", grid=sample_source ~ .) + plot_field(p, "delta_unsat_polars_vs_dSASA_by_interface", grid=interface ~ sample_source) + + #unsat hbonds vs hbond E fraction + + #unsat hbonds vs dG_Cross + p <- ggplot(data = data, aes(y=dG_cross, x=delta_unsatHbonds)) + parts + + ggtitle("Delta unsatisfied Polar Atoms vs Crossterm dG") + + ylab("REU") + + xlab("atoms") + plot_field(p, "delta_unsat_polars_vs_dG_cross_by_all", grid=sample_source ~ .) + plot_field(p, "delta_unsat_polars_vs_dG_cross_by_interface", grid=interface ~ sample_source) + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R new file mode 100644 index 0000000..39c0e3b --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -0,0 +1,68 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_packing-den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + sc_value, + packstat, + interface + FROM + interfaces + " + + + + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + #Basic densities of sc_value and packstat + fields = c("sc_value", "packstat") + for(field in fields){ + parts = list(plot_parts, scale_x_continuous("value", limit = c(0, 1.0))) + + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) + } + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R new file mode 100644 index 0000000..ee00a1e --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R @@ -0,0 +1,84 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_packing-packstat_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + packstat, + dSASA, + delta_unsatHbonds, + interface + FROM + interfaces + " + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + + data = query_sample_sources(sample_sources, sele) + + + #Scatterplots + #sc_value vs packstat + parts = list( + geom_point(size=1.5, pch="o"), + #stat_smooth(color="grey"), + stat_smooth(method=lm), + geom_density2d(), + #stat_density2d(aes(fill = ..level..), geom="polygon"), + #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), + theme_bw()) + + #packstat vs dSASA + p <- ggplot(data = data, aes(x=packstat, y=dSASA)) + parts + + ggtitle("packstat vs dSASA") + + scale_x_continuous("packstat", limit=c(0,1.0)) + + scale_y_continuous("Buried SASA") + plot_field(p, "packstat_vs_dSASA_by_all", grid = sample_source ~.) + plot_field(p, "packstat_vs_dSASA_by_interface", grid = interface ~ sample_source) + + + #deltaUnsatHbonds vs packstat + p <- ggplot(data = data, aes(x = delta_unsatHbonds, y=packstat)) + parts + + ggtitle("packstat vs interface unsatisfied polar atoms ") + + scale_x_continuous("n") + + scale_y_continuous("packstat", limit = c(0, 1.0)) + plot_field(p, "packstat_vs_delta_unsat_polars_by_all", grid=sample_source ~ .) + plot_field(p, "packstat_vs_delta_unsat_polars_by_interface", grid = interface ~ sample_source) + #3D Plots + + #sc_value vs dG vs dSASA + + #Sides: + + #sc_value vs interface_energy + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R new file mode 100644 index 0000000..800e827 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -0,0 +1,112 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_packing-sc_value_vs", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + sele <- " + SELECT + sc_value, + packstat, + dSASA, + dG, + dG_cross, + delta_unsatHbonds, + interface + FROM + interfaces + " + + plot_field = function(p, plot_id, grid = NULL){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + + data = query_sample_sources(sample_sources, sele) + + + #Scatterplots + #sc_value vs packstat + parts = list( + geom_point(size=1.5, pch="o"), + #stat_smooth(color="grey"), + stat_smooth(method=lm), + geom_density2d(), + #stat_density2d(aes(fill = ..level..), geom="polygon"), + #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), + theme_bw()) + + p <- ggplot(data=data, aes(x =sc_value, y=packstat)) + parts + + ggtitle("sc_value vs packstat") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("packstat", limit = c(0, 1.0)) + plot_field(p, "sc_value_vs_packstat_by_all", grid = sample_source ~ .) + plot_field(p, "sc_value_vs_packstat_by_interface", grid=interface ~ sample_source) + + #sc_value vs dSASA + p <- ggplot(data=data, aes(x =sc_value, y=dSASA)) + parts + + ggtitle("sc_value vs dSASA") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("Buried SASA") + plot_field(p, "sc_value_vs dSASA_all", grid = sample_source ~ .) + plot_field(p, "sc_value_vs_dSASA_by_interface", grid=interface ~ sample_source) + + + #sc_value vs dG + p <- ggplot(data = data[data$dG<5000,], aes(x=sc_value, y=dG)) + parts + + ggtitle("sc_value_vs_dG") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("REU") + plot_field(p, "sc_value_vs_dG_by_all", grid=sample_source ~ .) + plot_field(p, "sc_value_vs_dG_by_interface", grid=interface ~ sample_source) + + #sc_value vs crossterm + p <- ggplot(data = data[data$dG<5000,], aes(x=sc_value, y=dG_cross)) + parts + + ggtitle("sc_value vs dG_cross") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("REU") + plot_field(p, "sc_value_vs_dG_cross_by_all", grid=sample_source ~ .) + plot_field(p, "sc_value_vs_dG_cross_by_interface", grid=interface ~ sample_source) + + #deltaUnsatHbonds vs sc_value + p <- ggplot(data = data, aes(x=sc_value, y=delta_unsatHbonds)) + parts + + ggtitle("sc_value vs Interface unsatisfied polar atoms") + + scale_y_continuous("n") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + plot_field(p, "sc_value_vs_delta_unsat_polars_by_all", grid=sample_source ~ .) + plot_field(p, "sc_value_vs_delta_unsat_polars_by_interface", grid=interface ~ sample_source) + + + #3D Plots + + #sc_value vs dG vs dSASA + + #Sides: + + #sc_value vs interface_energy + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R new file mode 100644 index 0000000..90730d8 --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R @@ -0,0 +1,75 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() + +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "int_ss_den", +author = "Jared Adolf-Bryfogle", +brief_description = "Graphs basic composition of the interfaces, restypes, etc", +feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), + + +run=function(self, sample_sources, output_dir, output_formats){ + + #Aromatic Composition + sele <- " + SELECT + ss_sheet_fraction, + ss_helix_fraction, + ss_loop_fraction, + interface, + side + FROM + interface_sides + " + plot_parts <- list( + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + scale_y_continuous("Feature Density"), + theme_bw()) + + plot_field = function(p, plot_id, grid = NULL, ssLegend=T){ + + if (! is.null(grid)){ + p <- p+ facet_grid(facets=grid) + } + if(ssLegend){ + if(nrow(sample_sources) <= 3){ + p <- p + theme(legend.position="bottom", legend.direction="horizontal") + } + } + save_plots(self, plot_id, sample_sources, output_dir, output_formats) + } + + fields = c("ss_sheet_fraction", "ss_helix_fraction", "ss_loop_fraction") + data = query_sample_sources(sample_sources, sele, char_as_factor=F) + + for(field in fields){ + fieldSP = unlist(strsplit(field, split="_")) + parts = list(plot_parts, xlab("fraction")) + + group = c("sample_source", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste(capwords(fieldSP), collapse=" ")) + plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + } + + + + +})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/util.R b/inst/scripts/analysis/plots/interfaces/util.R new file mode 100644 index 0000000..4466c0d --- /dev/null +++ b/inst/scripts/analysis/plots/interfaces/util.R @@ -0,0 +1,8 @@ +// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +// vi: set ts=2 noet: +// +// (c) Copyright Rosetta Commons Member Institutions. +// (c) This file is part of the Rosetta software suite and is made available under license. +// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +// (c) For more information, see http://www.rosettacommons.org. Questions about this can be +// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. diff --git a/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R b/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R new file mode 100644 index 0000000..edd5a0a --- /dev/null +++ b/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R @@ -0,0 +1,64 @@ +library(RSQLite) + +db_path <- "/scratch/weitzner/loop_features_test/antibodies_f33ffc7_130904/features_antibodies_f33ffc7_130904.db3" + +sele <-paste("SELECT alpha101 FROM loop_anchor_transforms") + + +sqlite <- dbDriver("SQLite") +db <- dbConnect(sqlite, db_path) + +f <- dbGetQuery(db, sele) + +dbDisconnect(db) + +library(movMF) +#movMF(data.matrix(f$alpha101), 2) + +#library(mixtools) +library(ggthemes) +# f$alpha101_scaled <- ifelse(f$alpha101 < -100, f$alpha101 + 360, f$alpha101) +# +# expectation_maximization.model <- normalmixEM(x=f$alpha101_scaled) +# parameters <- expectation_maximization.model[c("lambda", "mu", "sigma")] +# +# f$curve.1 <- parameters$lambda[1] * dnorm(f$alpha101_scaled, parameters$mu[1], +# parameters$sigma[1]) +# +# f$curve.2 <- parameters$lambda[2] * dnorm(f$alpha101_scaled, parameters$mu[2], +# parameters$sigma[2]) +# +# f$combined <- f$curve.1 + f$curve.2 + +pts_on_unit_circle <- cbind(cos(f$alpha101 * pi / 180), + sin(f$alpha101 * pi / 180)) + +d <- movMF(pts_on_unit_circle, 2) + +# detect quadrant of theta for each component of mixture +norm_theta <- skmeans:::row_normalize(d$theta) + +# compute mean angles of distributions +mu <- atan2(norm_theta[,2], norm_theta[,1]) * 180 / pi + +# compute the standard deviations +kappa <- (d$theta / norm_theta)[,1] + + +# Because the data are laid out on a unit circle and we will be plotting +# on a domain of 360, divide the densities by 360. +f$curve.1 <- d$alpha[1] * dmovMF(pts_on_unit_circle, d$theta[1,]) / 360 +f$curve.2 <- d$alpha[2] * dmovMF(pts_on_unit_circle, d$theta[2,]) / 360 +f$combined <- f$curve.1 + f$curve.2 + +hline.data <- data.frame(z = seq(0.005, 0.025, by=0.005)) +p <- ggplot(f, aes(x=alpha101)) +p + geom_histogram(aes(y=..density..), fill="lightgrey", colour="white") + +# geom_line(aes(y=combined), size=1, colour="black") + + geom_hline(aes(yintercept = z), hline.data, colour="white") + + geom_line(aes(y=curve.1), size=1, colour="grey50") + + geom_line(aes(y=curve.2), size=1, colour="darkgrey") + + theme_tufte() + #geom_rangeframe() + + scale_x_continuous(expression(paste(alpha[101], " (degrees)")), + limits=c(-180, 180), breaks=seq(-180,180, by=45)) + + scale_y_continuous("Density", expand=c(0, 0)) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R new file mode 100644 index 0000000..a6813fc --- /dev/null +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -0,0 +1,98 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + + +check_setup() +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "total_score", +author = "Jared Adolf-Bryfogle", +brief_description = "", +feature_reporter_dependencies = c("StructureScoreFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + +sele <-" +SELECT + structure_scores.struct_id as struct_id, + structure_scores.score_value as total_score, + structure_scores.score_type_id as score_type +FROM + structure_scores, + score_types + +WHERE + score_types.score_type_name='total_score' AND + structure_scores.score_type_id = score_types.score_type_id +ORDER BY score_value;" + +data <- query_sample_sources(sample_sources, sele) + +print(summary(data)) + +dens <- estimate_density_1d( + data = data, + ids = c("sample_source"), + variable = "total_score") + +plot_id <- "total_score" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x=x, y=y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + +f <- ddply(data, .(sample_source), function(d2){ + data.frame(total_score = d2[1:10,]$total_score) +}) + +dens <- estimate_density_1d(f, c("sample_source"), c("total_score")) + +plot_id <- "total_score_top_10" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x=x, y=y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score - Top 10") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + +#Averages Scoring +avgs <- ddply(data, .(sample_source), function(d2){ + data.frame(m = mean(d2$total_score), std_dev = sd(d2$total_score), m_top10 = mean(d2[1:10,]$total_score), std_dev_top_10 = sd(d2[1:10,]$total_score), top = d2[1,]$total_score) +}) +print(avgs) +p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + + #geom_errorbar(aes(ymin = m-std_dev, ymax=m+std_dev) + + theme_bw() + + ggtitle("Average Score") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) +save_plots(self, "avg_total_score", sample_sources, output_dir, output_formats) + +#Avg Top 10 Scoring +p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + + theme_bw() + + ggtitle("Average Best 10 Score") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) +save_plots(self, "avg_top_10_total_score", sample_sources, output_dir, output_formats) + +#Top Scoring +p <- ggplot(data=avgs ) + + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top, fill=sample_source)) + + theme_bw() + + ggtitle("Best Score") + + ylab("REU") + + scale_x_discrete(labels = abbreviate) +save_plots(self, "best_total_score", sample_sources, output_dir, output_formats) + +})) # end FeaturesAnalysis diff --git a/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R new file mode 100644 index 0000000..692900c --- /dev/null +++ b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R @@ -0,0 +1,230 @@ +# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +# vi: set ts=2 noet: +# +# (c) Copyright Rosetta Commons Member Institutions. +# (c) This file is part of the Rosetta software suite and is made available under license. +# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +# (c) For more information, see http://www.rosettacommons.org. Questions about this can be +# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. + +check_setup() +feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +id = "cdr_cluster_recovery", +author = "Jared Adolf-Bryfogle", +brief_description = "Used for length and cluster recovery of CDRs. Mainly for AbDesign program based on North Clusters,", +long_description = "First sample source should be Natives. This is the reference. Other sample sources are collections of decoys from different experiments. +Decoys should have the native's name in input_tag.", +feature_reporter_dependencies = c("CDRClusterFeatures"), +run=function(self, sample_sources, output_dir, output_formats){ + + + len_sele <- " + SELECT + new.cdr_clusters.CDR as CDR, + new.cdr_clusters.length as length, + new.cdr_clusters.normDis_deg as normDis_deg + FROM + ref.cdr_clusters, new.cdr_clusters, + ref.structures, new.structures + WHERE + new.cdr_clusters.struct_id=new.structures.struct_id AND new.structures.input_tag LIKE :like_tag AND + ref.cdr_clusters.struct_id=ref.structures.struct_id AND ref.structures.input_tag = :tag AND + new.cdr_clusters.CDR = ref.cdr_clusters.CDR AND + new.cdr_clusters.length = ref.cdr_clusters.length; + " + + clu_sele <- " + SELECT + new.cdr_clusters.CDR as CDR, + new.cdr_clusters.fullcluster as cluster, + new.cdr_clusters.normDis_deg as normDis_deg + FROM + ref.cdr_clusters, new.cdr_clusters, + ref.structures, new.structures + WHERE + new.cdr_clusters.struct_id=new.structures.struct_id AND new.structures.input_tag LIKE :like_tag AND + ref.cdr_clusters.struct_id=ref.structures.struct_id AND ref.structures.input_tag = :tag AND + new.cdr_clusters.CDR = ref.cdr_clusters.CDR AND + new.cdr_clusters.fullcluster = ref.cdr_clusters.fullcluster; + " + + total_sele <- " + SELECT + cdr_clusters.fullcluster + FROM + structures, cdr_clusters + WHERE + structures.input_tag LIKE :like_tag AND structures.struct_id = cdr_clusters.struct_id AND + cdr_clusters.CDR= :c; + " + + #Need individual and combined data of CDR/PDB/sample_source. Next time, we do this the slower usual way. + + cdrs = c("L1", "L2", "L3", "H1", "H2", "H3") + + #This is a fixer function as I really should not have used ddply for everything. Inserts zeros into DF where needed. + create_zero_data = function(current_data){ + sele = "SELECT DISTINCT cdr_clusters.CDR FROM cdr_clusters" + #print(sample_sources) + for (i in 2:nrow(sample_sources)){ + #ss_id = as.character(ss["sample_source"]) + ss = sample_sources[i,] + summary(ss) + ss_id = as.character(ss$sample_source) + result = query_sample_source(ss, sele, char_as_factor=F) + for (cdr in result$CDR){ + if (! any(current_data$CDR==cdr & current_data$sample_source == ss_id)){ + new_df = data.frame(CDR=as.character(cdr), sample_source = ss_id, normDis_deg=0, fullcluster="NA", stringsAsFactors=F) + current_data = merge(current_data, new_df, all=T) + } + } + } + return(current_data) + } + #Get input_tags to match natives + result = query_sample_source(sample_sources[1,], "SELECT input_tag from structures", char_as_factor=F) + native_tags = result$input_tag + get_and_write_recovery = function(sele, type){ + all_data = adply(native_tags, 1, function(native_tag) { + + native_tag_sp = unlist(strsplit(native_tag, "/")) + pdb_sp = strsplit(native_tag_sp[length(native_tag_sp)], "\\.") + tag = unlist(pdb_sp)[1] #2J88.pdb -> take 2J88 + + cat("\nWorking on", tag, type, "recovery", "\n", sep=" ") + + match = paste("%", tag, "%", sep="") + tag_frame = data.frame(like_tag=match, tag=native_tag) + + data = query_sample_sources_against_ref(sample_sources, sele, sele_args_frame=tag_frame, char_as_factor=F) + data = create_zero_data(data) + + #Type is length or cluster here: + combine_data <- function(data, type){ + res_by_ss = ddply(data, "sample_source", function(data_by_ss, type){ + + res_by_cdr = ddply(data_by_ss, "CDR", function(data_by_cdr, type) { + + cdr = data_by_cdr$CDR[1] + total_data = query_sample_sources(sample_sources, total_sele, sele_args_frame=data.frame(like_tag=match, c=cdr), char_as_factor=F) + total_decoys = length(total_data$fullcluster[total_data$sample_source == as.character(data_by_ss$sample_source[1])]) + + if (length(data_by_cdr$normDis_deg[data_by_cdr$normDis_deg > 0]) == 0){ + recovery = 0 + recovery_total = 0 + angle_mean = 0 + angle_sd = 0 + } + else{ + recovery = length(data_by_cdr$CDR)/total_decoys + recovery_total = length(data_by_cdr$CDR) + angle_mean = mean(data_by_cdr$normDis_deg[data_by_cdr$normDis_deg > 0]) + angle_sd = sd(data_by_cdr$normDis_deg[data_by_cdr$normDis_deg >0]) + } + #cat("rec: ", recovery, "rec_total: ", recovery_total, "\n") + + result = data.frame(native=tag, sample_source=as.character(data_by_cdr$sample_source[1]), recovery = recovery, + total_rec=recovery_total, total=total_decoys, angle_mean=angle_mean, angle_sd = angle_sd) + return(result) + }) + }) + } #End combine_data + + rec_by_ss = combine_data(data, type) + return(rec_by_ss) + }) + + + #Recovery for CDR by individual PDB + #print(all_data) + all_data$X1 = NULL #Remove extra column + all_data = all_data[all_data$total != 0,] + grouped_data = sort(all_data, by = ~ native + CDR + sample_source) + + save_tables(self, + grouped_data, paste("cdr_recovery_by_pdb", type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + + grouped_data = sort(all_data, by = ~recovery + total_rec + CDR + sample_source) + + save_tables(self, + grouped_data, paste("cdr_recovery_by_pdb_best",type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + + calc_df = function(data){ + recovery = sum(data$total_rec)/sum(data$total) + recovery_total = sum(data$total_rec) + total = sum(data$total) + + #Note not totally correct mean and SD here. (mean of mean) + angle_mean = mean(data$angle_mean[data$angle_mean>0]) + angle_sd = sd(data$angle_mean[data$angle_mean>0]) + df = data.frame(recovery = recovery, + total_rec=recovery_total, total=total, angle_mean=angle_mean, angle_sd = angle_sd) + } + + + #Recovery by CDR by source + grouped_data = ddply(all_data, "sample_source", function(ss_data){ + f = ddply(ss_data, "CDR", function(cdr_data){ + df = calc_df(cdr_data) + }) + }) + grouped_data = sort(grouped_data, by = ~ sample_source + CDR) + + save_tables(self, + grouped_data, paste("cdr_recovery_by_sample_source", type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quot_strings=F) + + grouped_data = sort(grouped_data, by= ~ recovery +total_rec) + + save_tables(self, + grouped_data, paste("cdr_recovery_by_sample_source_best",type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + + + + #Recovery by CDR: + grouped_data = ddply(all_data, "CDR", function(cdr_data){ + df = calc_df(cdr_data) + }) + + grouped_data = sort(grouped_data, by= ~ CDR + recovery) + + save_tables(self, + grouped_data, paste("overall_cdr_recovery",type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + + + #Reovery by Native: + grouped_data = ddply(all_data, "native", function(cdr_data) { + df = calc_df(cdr_data) + }) + + grouped_data = sort(grouped_data, by=~ native + recovery) + save_tables(self, + grouped_data, paste("overall_native_recovery",type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + + + #Recovery by Source: + grouped_data = ddply(all_data, "sample_source", function(cdr_data) { + df = calc_df(cdr_data) + }) + grouped_data = sort(grouped_data, by= ~ sample_source + recovery) + save_tables(self, + grouped_data, paste("overall_recovery_by_sample_source",type, sep="_"), sample_sources, output_dir, output_formats, + caption=paste("CDR", type, "recovery", sep=" "), caption.placement="top", quote_strings=F) + } + + get_and_write_recovery(len_sele, "length") + get_and_write_recovery(clu_sele, "cluster") + + #DIR: cdr_cluster_recovery + #5 Tables?: + #cdr_by_pdb, cdr_by_source, by_cdr, by_pdb, by_source + + +})) # end FeaturesAnalysis + + \ No newline at end of file From 58264ce44be67f9519282e163aa93d20dc957448 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 14 Jun 2016 17:23:59 -0500 Subject: [PATCH 02/55] Fix all interface scripts. Remove some non-useful plots that I have been meaning to remove. --- .../plots/interfaces/SASA/int_SASA_den.R | 8 +- .../SASA/int_SASA_residue_avgs_den.R | 5 +- .../interfaces/SASA/int_SASA_residue_den.R | 9 +- .../interfaces/SASA/int_SASA_residue_vs.R | 5 +- .../plots/interfaces/SASA/int_dSASA_vs.R | 10 +- ...nt_composition-dG_dSASA_stats_by_restype.R | 92 ++++++++++--------- .../int_composition-hbond_stats_by_restype.R | 5 +- .../composition/int_composition_den.R | 46 +--------- .../plots/interfaces/energies/int_dG_vs.R | 6 +- .../interfaces/energies/int_energies_den.R | 6 +- .../energies/int_energies_residue_den.R | 4 +- .../plots/interfaces/hbonds/int_hbonds_den.R | 7 +- .../interfaces/hbonds/int_unsat_polars_den.R | 5 +- .../interfaces/hbonds/int_unsat_polars_vs.R | 18 ++-- .../interfaces/packing/int_packing_den.R | 5 +- .../interfaces/packing/int_packstat_vs.R | 7 +- .../interfaces/packing/int_sc_value_vs.R | 5 +- .../secondary_structure/int_ss_den.R | 5 +- 18 files changed, 122 insertions(+), 126 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index fe3f515..3efec0d 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic dSASA and SASA information", @@ -116,7 +117,10 @@ run=function(self, sample_sources, output_dir, output_formats){ #print(data) #Backbone SASA may not be interesting, but I want I still want to know for now. + #JAB - Commenting out hydrophibic sasa. Not very useful from my experience and it makes too many plots. fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + fields = c("dSASA", "dSASA_bb", "dSASA_sc") + for (field in fields){ parts = list(plot_parts, scale_x_continuous("SASA")) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R index a9bb62a..05435f3 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_avgs_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic dSASA and SASA information", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R index 8cb1c7c..49908ce 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs all information of individual interface residues. @@ -55,7 +56,10 @@ run=function(self, sample_sources, output_dir, output_formats){ #Densities #dSASA + #JAB - Comment out hydrophobic component as I don't think its very useful and it creates too man plots. fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + fields = c("dSASA", "dSASA_bb", "dSASA_sc") + for (field in fields){ group = c("sample_source") dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) @@ -122,5 +126,4 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) - #Per residue data. This may get crazy. })) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R index 918f748..1316139 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_vs", author = "Jared Adolf-Bryfogle", brief_description = "Graphs all information of individual interface residues. diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index 274c030..071a7f5 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-dSASA_vs", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic dSASA and SASA information", @@ -80,8 +81,11 @@ run=function(self, sample_sources, output_dir, output_formats){ #stat_density2d(aes(fill = ..level..), geom="polygon"), #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), theme_bw()) + #JAB - commenting out the hydrophobic dSASA. Unclear if this is useful or not. I don't think it is very much. + #fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + + fields = c("dSASA", "dSASA_bb", "dSASA_sc") - fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") for (f in fields){ #dSASA vs dG diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R index 5db8b91..58f6b0d 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition-dG_dSASA_stats_by_restype", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic composition of the interfaces, restypes, etc", @@ -119,8 +120,11 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- p + facet_wrap(~ restype, ncol=4) save_plots(self, "dSASA_fraction_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) + #JAB - commenting out original fields. It certainly works, but the hydrophobic dSASA and the relative by charge is + # interesting, but not entirely useful. Uncomment this if you find otherwise. + #fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") + fields = c("dSASA", "dSASA_bb", "dSASA_sc") for (field in fields) { group = c("sample_source", "restype") dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) @@ -148,33 +152,35 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- p + facet_wrap(~ restype, ncol=4) save_plots(self, "dG_by_restype_den_by_all", sample_sources, output_dir, output_formats) - group = c("sample_source", "restype") - field = "dG" - dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle("dG per restype") + - xlab("REU") - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - p <- p + facet_wrap(~ restype, ncol=4) - save_plots(self, "dG_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) + #JAB - works, but not really useful.. + #group = c("sample_source", "restype") + #field = "dG" + #dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) + #p <- ggplot(data=dens, na.rm=T) + parts + + # geom_line(aes(x, y, colour=sample_source), size=1.2) + + # ggtitle("dG per restype") + + # xlab("REU") + #if(nrow(sample_sources) <= 3){ + # p <- p + theme(legend.position="bottom", legend.direction="horizontal") + #} + #p <- p + facet_wrap(~ restype, ncol=4) + #save_plots(self, "dG_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) - group = c("sample_source", "restype") - field = "dG" - dens <- estimate_density_1d(res_data[res_data$dSASA == 0,], group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle("dG per restype") + - xlab("REU") - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - p <- p + facet_wrap(~ restype, ncol=4) - save_plots(self, "dG_by_restype_den_@0dSASA_by_all", sample_sources, output_dir, output_formats) + #JAB - works, but not really useful.. + #group = c("sample_source", "restype") + #field = "dG" + #dens <- estimate_density_1d(res_data[res_data$dSASA == 0,], group, field) + #p <- ggplot(data=dens, na.rm=T) + parts + + # geom_line(aes(x, y, colour=sample_source), size=1.2) + + # ggtitle("dG per restype") + + # xlab("REU") + #if(nrow(sample_sources) <= 3){ + # p <- p + theme(legend.position="bottom", legend.direction="horizontal") + #} + #p <- p + facet_wrap(~ restype, ncol=4) + #save_plots(self, "dG_by_restype_den_@0dSASA_by_all", sample_sources, output_dir, output_formats) - #Hard to see: + #JAB - Works, but this is hard to see and interpret: #group = c("sample_source", "restype") #field = "dSASA_fraction" #dens <- estimate_density_1d(res_data[res_data$dSASA_fraction > 0,], group, field) @@ -196,20 +202,22 @@ run=function(self, sample_sources, output_dir, output_formats){ #dG vs dSASA by restype - p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x = dG, y=dSASA, color=sample_source)) + - #geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + - geom_point(size=.5, pch="o") + - stat_smooth(method=lm) + - geom_density2d() + - theme_bw() + - ggtitle("dG vs dSASA per restype") + - facet_wrap(~ restype, ncol=4) + - xlab("REU") + - ylab("dSASA") - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, "dSASA_vs_dG_by_restype_>0dSASA_by_all", sample_sources, output_dir, output_formats) + #JAB - This works, but it is not really useful. Commenting it out. + + #p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x = dG, y=dSASA, color=sample_source)) + + # #geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + # geom_point(size=.5, pch="o") + + # stat_smooth(method=lm) + + # geom_density2d() + + # theme_bw() + + # ggtitle("dG vs dSASA per restype") + + # facet_wrap(~ restype, ncol=4) + + # xlab("REU") + + # ylab("dSASA") + #if(nrow(sample_sources) <= 3){ + # p <- p + theme(legend.position="bottom", legend.direction="horizontal") + #} + #save_plots(self, "dSASA_vs_dG_by_restype_>0dSASA_by_all", sample_sources, output_dir, output_formats) diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R index bab562c..c12fa33 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition-hbond_stats_by_restype", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic hbond densities for interface - interface hbonds", diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index 7a0db5e..aa4f940 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic composition of the interfaces, restypes, etc", @@ -148,47 +149,6 @@ run=function(self, sample_sources, output_dir, output_formats){ ylab("% of Sample Source") plot_field(p, "restype_composition_by_all") plot_field(p, "restype_composition_by_interface", grid=interface ~ .) - - #Need to know how this compares relative to the overall restype composition! -# fields = c("dSASA", "dSASA_bb", "dSASA_sc", "SASA_int", "dSASA_fraction") -# for (field in fields){ -# -# -# p <- ggplot(data=get_percent(res_data[res_data[field] == 0,]), aes(x = restype1, fill=sample_source)) + -# geom_bar(position="dodge", stat="identity", aes(y=perc))+ -# theme_bw() + -# scale_y_continuous(label=percent) + -# ggtitle(paste("Interface ResType Composition @0", field)) -# #scale_x_continuous("restype") + -# -# plot_field(p, paste("restype_composition_@_0",field, "by_all", sep="_")) -# plot_field(p, paste("restype_composition_@_0",field, "by_interface", sep="_"), grid=interface ~ .) -# -# v = 0 -# p <- ggplot(data=get_percent(res_data[res_data[field] > v,]), aes(x = restype1, fill=sample_source)) + -# geom_bar(position="dodge", stat="identity", aes(y=perc))+ -# theme_bw() + -# scale_y_continuous(label=percent) + -# ggtitle(paste("Interface ResType Composition >", v, field)) -# #scale_x_continuous("restype") + -# -# plot_field(p, paste("restype_composition_>",v, field, "by_all", sep="_")) -# plot_field(p, paste("restype_composition_>",v, field, "by_interface", sep="_"), grid=interface ~ .) -# -# } - -# field = "dSASA_fraction" -# for (v in c(.25, .75)){ -# -# p <- ggplot(data=get_percent(res_data[res_data[field] > v,]), aes(x = restype1, fill=sample_source)) + -# geom_bar(position="dodge", stat="identity", aes(y=perc))+ -# theme_bw() + -# scale_y_continuous(label=percent) + -# ggtitle(paste("Interface ResType Composition >", v, field)) -# #scale_x_continuous("restype") + -# -# plot_field(p, paste("restype_composition_>",v, field, "by_all", sep="_")) -# plot_field(p, paste("restype_composition_>",v, field, "by_interface", sep="_"), grid=interface ~ .) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 7758a79..df71f1f 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -7,9 +7,11 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +library(ggplot2) +library(plyr) + +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_energies-dG_vs", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic interface energy information", diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 042de0a..747d7a0 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -7,9 +7,11 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", + +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_energies_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic interface energy information", diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index b91cfc5..5fa2292 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -7,7 +7,9 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) + feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", id = "int_energies-by_residue_den", diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index b08c009..a9d0453 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -7,16 +7,17 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic hbond densities for interface - interface hbonds", feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures", "HBondFeatures"), run=function(self, sample_sources, output_dir, output_formats){ - #Thanks to Matt O'Meara's help for this query - Very slow: + #Thanks to Matt O'Meara's help for this query - VERY slow: sele = " diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R index 6f0f07b..966d7cf 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds-unsat_polars_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic interface energy information", diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R index 93e2727..24452a8 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds-unsat_polars_vs", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic interface energy information", @@ -85,12 +86,13 @@ run=function(self, sample_sources, output_dir, output_formats){ #unsat hbonds vs hbond E fraction #unsat hbonds vs dG_Cross - p <- ggplot(data = data, aes(y=dG_cross, x=delta_unsatHbonds)) + parts + - ggtitle("Delta unsatisfied Polar Atoms vs Crossterm dG") + - ylab("REU") + - xlab("atoms") - plot_field(p, "delta_unsat_polars_vs_dG_cross_by_all", grid=sample_source ~ .) - plot_field(p, "delta_unsat_polars_vs_dG_cross_by_interface", grid=interface ~ sample_source) + #JAB - commenting out. dG_Cross is actually pretty useless + #p <- ggplot(data = data, aes(y=dG_cross, x=delta_unsatHbonds)) + parts + + # ggtitle("Delta unsatisfied Polar Atoms vs Crossterm dG") + + # ylab("REU") + + # xlab("atoms") + #plot_field(p, "delta_unsat_polars_vs_dG_cross_by_all", grid=sample_source ~ .) + #plot_field(p, "delta_unsat_polars_vs_dG_cross_by_interface", grid=interface ~ sample_source) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 39c0e3b..26e8eba 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_packing-den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R index ee00a1e..2e99c90 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R @@ -7,12 +7,13 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_packing-packstat_vs", author = "Jared Adolf-Bryfogle", -brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", +brief_description = "Graphs Interface metrics such as packstat vs other metrics. Packstat not very useful in general.", feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), run=function(self, sample_sources, output_dir, output_formats){ diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index 800e827..172b4da 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -7,12 +7,13 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", id = "int_packing-sc_value_vs", author = "Jared Adolf-Bryfogle", -brief_description = "Graphs Interface metrics such as packstat and shape complementarity scores", +brief_description = "Graphs Interface metrics such as packstat vs other metrics", feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), run=function(self, sample_sources, output_dir, output_formats){ diff --git a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R index 90730d8..b10abd3 100644 --- a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R +++ b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_ss_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic composition of the interfaces, restypes, etc", From 4e6c58a0fcf0cb5dd057a25961daa6ac8bb645e0 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 15 Jun 2016 11:36:06 -0500 Subject: [PATCH 03/55] Fix all antibody scripts. --- .../analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R | 5 +++-- .../plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R | 0 .../analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R | 5 +++-- .../analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R | 5 +++-- .../antibodies/anchor_distances/ab_cdr_anchor_dis_den.R | 5 +++-- .../plots/antibodies/charge/ab_cdr_cluster_charge_den.R | 5 +++-- .../analysis/plots/antibodies/charge/ab_charge_den.R | 5 +++-- .../antibodies/composition/ab_cdr_length_correlations.R | 5 +++-- .../plots/antibodies/composition/ab_composition_den.R | 5 +++-- .../antibodies/contacts/ag_ab_cdr_cluster_contact_den.R | 0 .../analysis/plots/antibodies/contacts/ag_ab_contact_den.R | 5 +++-- .../analysis/plots/antibodies/energies/ab_cdr_energy_den.R | 5 +++-- .../scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R | 5 +++-- .../plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R | 5 +++-- .../plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R | 5 +++-- .../plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R | 5 +++-- .../plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R | 0 .../analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R | 5 +++-- .../plots/antibodies/packing_angle/ab_packing_angle_den.R | 5 +++-- inst/scripts/analysis/plots/loops/alpha_vm_estimates.R | 3 +++ inst/scripts/analysis/plots/scores/total_score.R | 6 ++++-- .../analysis/statistics/antibodies/cdr_cluster_recovery.R | 6 ++++-- 22 files changed, 59 insertions(+), 36 deletions(-) delete mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R delete mode 100644 inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R delete mode 100644 inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R index 98e294a..8efcd76 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_SASA-CDR_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR Sasas", diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_cluster_SASA_den.R deleted file mode 100644 index e69de29..0000000 diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index f7027f0..d439aef 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_SASA-paratope_den", author = "Jared Adolf-Bryfogle", brief_description = "Various statistics on the H3 Kink", diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R index 2d58854..715c6cc 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_dSASA-ab_cdr_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR Sasas", diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R index 55e9abe..13377a3 100644 --- a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_anchor_dis", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic antibody composition densities", diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R index d143a9a..0e9d4a8 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_charge-clusters_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic antibody composition densities", diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index 70e2545..051a78a 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_charge_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic antibody composition densities", diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R index 4a23f02..3a82589 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_composition-length_correlations", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic antibody composition densities", diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R index 0763c4e..8303435 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_composition_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic antibody composition densities", diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_cdr_cluster_contact_den.R deleted file mode 100644 index e69de29..0000000 diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R index 7d1eee3..c49d5aa 100644 --- a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ag_ab_contact_den", author = "Jared Adolf-Bryfogle", brief_description = "VL VH packing angle metrics", diff --git a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R index ae07ed0..e3c79b2 100644 --- a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R +++ b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_energies-CDR_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR Energies", diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R index 348d572..8bb171d 100644 --- a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "H3_kink_den", author = "Jared Adolf-Bryfogle", brief_description = "Various statistics on the H3 Kink", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R index 1cc20b9..960e089 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_cdr_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR - CDR Hbonds", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R index bef7d30..f05c2f1 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_framework_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR -> Framework Hbonds", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R index 0ad66f4..e1580a0 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-intra_cdr_den", author = "Jared Adolf-Bryfogle", brief_description = "Self-CDR Hbonds Excluding those arising from BB-BB beta sheet", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_cluster_hbonds_den.R deleted file mode 100644 index e69de29..0000000 diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R index 1a696e4..e00719f 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_ag_den", author = "Jared Adolf-Bryfogle", brief_description = "CDR - Antigen Hbonds. Must have LH_A analyzed by features reporter for this to work", diff --git a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R index 8f990f0..b094a77 100644 --- a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R +++ b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R @@ -7,9 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() +library(ggplot2) +library(plyr) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_packing_angle_den", author = "Jared Adolf-Bryfogle", brief_description = "VL VH packing angle metrics", diff --git a/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R b/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R index edd5a0a..e76cc67 100644 --- a/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R +++ b/inst/scripts/analysis/plots/loops/alpha_vm_estimates.R @@ -1,4 +1,7 @@ library(RSQLite) +library(ggplot2) +library(plyr) + db_path <- "/scratch/weitzner/loop_features_test/antibodies_f33ffc7_130904/features_antibodies_f33ffc7_130904.db3" diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index a6813fc..c366547 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -8,8 +8,10 @@ # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +library(ggplot2) +library(plyr) + +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", author = "Jared Adolf-Bryfogle", brief_description = "", diff --git a/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R index 692900c..6683e20 100644 --- a/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R +++ b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R @@ -7,8 +7,10 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. -check_setup() -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +library(ggplot2) +library(plyr) + +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "cdr_cluster_recovery", author = "Jared Adolf-Bryfogle", brief_description = "Used for length and cluster recovery of CDRs. Mainly for AbDesign program based on North Clusters,", From dd2a3bc9d3852776227f76949bcc3bbdfbdee378 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 15 Jun 2016 12:31:41 -0500 Subject: [PATCH 04/55] add script for local installation from dunbrack repo for now. --- DESCRIPTION | 2 +- install_from_dunbrack_github.R | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 install_from_dunbrack_github.R diff --git a/DESCRIPTION b/DESCRIPTION index 82dcdf8..f91625d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Title: Tools for analyzing macromolecular feature distributions with Rosetta Description: A Stela is a slab, such as the Rosetta stone, that illustrates small symbols or diagrams showing rules or patterns. This package supports the analysis of molecular energy - function by comparing distributions of local geometric features, + function or other molecular charactoristics by comparing distributions of local geometric features, often obtained from native or Rosetta-simulated macromolecular conformations. Authors@R: person("Matthew", "O'Meara", email = "mattjomeara@gmail.com", diff --git a/install_from_dunbrack_github.R b/install_from_dunbrack_github.R new file mode 100644 index 0000000..4db8535 --- /dev/null +++ b/install_from_dunbrack_github.R @@ -0,0 +1,4 @@ +#!/usr/bin/env Rscript + +#install.packages("devtools", repos="http://cran.rstudio.com/") +devtools::install_github("DunbrackLab/RosettaFeatures") \ No newline at end of file From edefc3e7d2d83f924db9a11e7f7503f0eec02311 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 15 Jun 2016 12:40:06 -0500 Subject: [PATCH 05/55] add a script to run the features by passing a configuration file. --- install_from_dunbrack_github.R | 3 +++ run_features.R | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 run_features.R diff --git a/install_from_dunbrack_github.R b/install_from_dunbrack_github.R index 4db8535..403357a 100644 --- a/install_from_dunbrack_github.R +++ b/install_from_dunbrack_github.R @@ -1,4 +1,7 @@ #!/usr/bin/env Rscript +#NOTE: You must run this script with SUDO!! + + #install.packages("devtools", repos="http://cran.rstudio.com/") devtools::install_github("DunbrackLab/RosettaFeatures") \ No newline at end of file diff --git a/run_features.R b/run_features.R new file mode 100644 index 0000000..22f18c4 --- /dev/null +++ b/run_features.R @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript +#Jared Adolf-Bryfogle + +#This script accepts a single command-line argument with a JSON configuration file that specifies how to run the analysis. +#You must install the RosettaFeatures library first! + +library(RosettaFeatures) + + +args = commandArgs(trailingOnly=TRUE) + +# test if there is at least one argument: if not, return an error +if (length(args)!=1) { + stop("At single argument, the JSON configuration file, must be supplied (analysis_configuration).json", call.=FALSE) +} + +#Run the analysis +compare_sample_sources( config_filename=args[1] ) + From d0290984c0e980c91e3ff1df36c52b43466018b6 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 15 Jun 2016 17:11:19 -0500 Subject: [PATCH 06/55] Update README.md --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3741ee3..e83d905 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,13 @@ To install this package, in R: } devtools::install_github("momeara/RosettaFeatures") +To install locally, run the ```install_local.R``` script or run the following: + + devtools::document() # if you changed function signatures + devtools:build() + + devtools::install_local(PATH) + Generate features databases following the features_benchmark protocol capture https://github.com/RosettaCommons/demos/tree/master/protocol_capture/features_benchmark/README.md @@ -45,10 +52,11 @@ Generate features databases following the features_benchmark protocol capture Then to report features, in R: library(RosettaFeatures) + libary(methods) compare_sample_sources( config_filename="analysis_configuration.json") -Where the `analysis_configuration.json` looks like: +Where the `analysis_configuration.json` looks like (note the change removal of compare_sample_sources main dictionary from previous the pre-library version): { "output_dir" : "native_vs_relax_native", From cca80cf2bc1ee89fade402fc00bf40ba50a2d9ed Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 15 Jun 2016 17:26:43 -0500 Subject: [PATCH 07/55] re-implement char_as_factor in query_sample_sources. --- R/compare_sample_sources.R | 2 -- R/support-query_sample_sources.R | 24 ++++++++++++++++++++++-- install_from_dunbrack_github.R | 0 install_local.R | 10 ++++++++++ run_features.R | 0 5 files changed, 32 insertions(+), 4 deletions(-) mode change 100644 => 100755 install_from_dunbrack_github.R create mode 100755 install_local.R mode change 100644 => 100755 run_features.R diff --git a/R/compare_sample_sources.R b/R/compare_sample_sources.R index e4b228e..c5ec4af 100644 --- a/R/compare_sample_sources.R +++ b/R/compare_sample_sources.R @@ -8,8 +8,6 @@ # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - - load_config_file <- function(config_filename, verbose=F){ if(!file.exists(config_filename)){ cat("ERROR: Config file '", config_filename, "' does not exist.\n", sep="") diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 8650524..53909e4 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -22,7 +22,8 @@ query_sample_sources <- function( sample_sources, sele, bind.data = NULL, - warn_zero_rows=T + warn_zero_rows=T, + char_as_factor=T ){ tryCatch(sele,error=function(e){ cat("ERROR: The select statement is not defined.\n") @@ -62,6 +63,16 @@ query_sample_sources <- function( cat("WARNING: The following query returned no rows:\n") cat(sele) } + + if(char_as_factor){ + for(col in names(features)){ + if(is.character(features[,col])){ + features[,col] <- factor(features[,col]) + } + } + } + + features } @@ -70,7 +81,8 @@ query_sample_sources_against_ref <- function( sample_sources, sele, sele_args_frame = NULL, - warn_zero_rows=T + warn_zero_rows=T, + char_as_factor=T ){ tryCatch(sele,error=function(e){ cat("ERROR: The select statement ", sele, " is not defined.\n") @@ -139,6 +151,14 @@ In the returned data.frame the there will be the following columns: cat(sele) return(features) } + + if(char_as_factor){ + for(col in names(features)){ + if(is.character(features[,col])){ + features[,col] <- factor(features[,col]) + } + } + } data.frame( ref_sample_source = factor(ref_ss$sample_source[1]), new_sample_source = factor(features$sample_source), diff --git a/install_from_dunbrack_github.R b/install_from_dunbrack_github.R old mode 100644 new mode 100755 diff --git a/install_local.R b/install_local.R new file mode 100755 index 0000000..81a8af9 --- /dev/null +++ b/install_local.R @@ -0,0 +1,10 @@ +#!/usr/bin/env Rscript + +#NOTE: You must run this script with SUDO!! + + +#install.packages("devtools", repos="http://cran.rstudio.com/") +devtools::document() # if you changed function signatures +devtools:build() + +devtools::install_local(".") \ No newline at end of file diff --git a/run_features.R b/run_features.R old mode 100644 new mode 100755 From f0a3ee4077bf3c34b0e84d24209f3ef404a72245 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 12:47:51 -0500 Subject: [PATCH 08/55] fixes. --- DESCRIPTION | 1 + R/support-save_plots.R | 1 + .../plots/antibodies/contacts/ag_ab_contact_den.R | 4 ++-- .../analysis/plots/interfaces/hbonds/int_hbonds_den.R | 4 ++-- inst/scripts/analysis/plots/scores/total_score.R | 1 + install_local.R | 10 ++++++++-- 6 files changed, 15 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f91625d..803e30a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Imports: optparse, reshape2, proto, + grid, ggplot2 (>= 1.0.1), RSQLite, logspline, diff --git a/R/support-save_plots.R b/R/support-save_plots.R index ddccae9..501a099 100644 --- a/R/support-save_plots.R +++ b/R/support-save_plots.R @@ -10,6 +10,7 @@ # Save the last ggplot() object created. For each output format, # generate a plot and put in the output directory + #' @export save_plots <- function( features_analysis, diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R index c49d5aa..f96ff5e 100644 --- a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -58,7 +58,7 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=binary_data, na.rm=T, aes(x=CDR)) + geom_bar(position="dodge", aes(y=percent, fill=sample_source), stat='identity') + ggtitle("CDR Makes Antigen Contact") + - scale_y_continuous(label=percent) + + scale_y_continuous(label="percent") + ylab("% of sample source") plot_field(p, "cdr_makes_contact_hist") save_tables(self, binary_data, "cdr_makes_contact_table", sample_sources, output_dir, output_formats, @@ -117,7 +117,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Average Percent of total contacts") + xlab("CDR") + ylab("Avg %") + - scale_y_continuous(label=percent) + scale_y_continuous(label="percent") plot_field(p, "avg_perc_total_contacts_hist") save_tables(self, avg_perc, "avg_perc_total_contacts_table", sample_sources, output_dir, output_formats, caption="Avg Percent of total contacts", caption.placement="top", quote_strings=F) diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index a9d0453..a6c9a72 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -138,14 +138,14 @@ run=function(self, sample_sources, output_dir, output_formats){ #Histogram p <- ggplot(data=hbond_counts, na.rm=T) + geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + - scale_y_continuous(label=percent) + + scale_y_continuous(label="percent") + xlab("hbonds") + ggtitle("Average Cross Interface Hydrogen Bonds") plot_field(p, "hbond_hist_by_all") p <- ggplot(data=hbond_counts, na.rm=T) + geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + - scale_y_continuous(label=percent) + + scale_y_continuous(label="percent") + xlab("hbonds") + ggtitle("Average Cross Interface Hydrogen Bonds") plot_field(p, "hbond_hist_by_interface", grid=~interface) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index c366547..96cc803 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -10,6 +10,7 @@ library(ggplot2) library(plyr) +#library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", diff --git a/install_local.R b/install_local.R index 81a8af9..1d29dbd 100755 --- a/install_local.R +++ b/install_local.R @@ -2,9 +2,15 @@ #NOTE: You must run this script with SUDO!! - #install.packages("devtools", repos="http://cran.rstudio.com/") + +required_packages <- c("devtools", "roxygen2") + +if (length(setdiff(required_packages, rownames(installed.packages()))) > 0) { + install.packages(setdiff(required_packages, rownames(installed.packages())), repos="http://cran.rstudio.com/") +} + devtools::document() # if you changed function signatures devtools:build() -devtools::install_local(".") \ No newline at end of file +devtools::install_local(".") From b3afa42e110ccc599a5008d0b105879ba8b6af44 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 12:52:08 -0500 Subject: [PATCH 09/55] fixes. --- inst/scripts/analysis/plots/scores/total_score.R | 2 +- install_from_dunbrack_github.R | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 96cc803..96625d3 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -10,7 +10,7 @@ library(ggplot2) library(plyr) -#library(grid) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", diff --git a/install_from_dunbrack_github.R b/install_from_dunbrack_github.R index 403357a..f77d06f 100755 --- a/install_from_dunbrack_github.R +++ b/install_from_dunbrack_github.R @@ -3,5 +3,11 @@ #NOTE: You must run this script with SUDO!! -#install.packages("devtools", repos="http://cran.rstudio.com/") +required_packages <- c("devtools", "roxygen2") + +if (length(setdiff(required_packages, rownames(installed.packages()))) > 0) { + install.packages(setdiff(required_packages, rownames(installed.packages())), repos="http://cran.rstudio.com/") +} + + devtools::install_github("DunbrackLab/RosettaFeatures") \ No newline at end of file From e6bef23ca128b8938cba769868a020b42180bfae Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 12:58:53 -0500 Subject: [PATCH 10/55] fixes. --- R/support-save_plots.R | 2 ++ .../analysis/plots/antibodies/contacts/ag_ab_contact_den.R | 6 +++--- inst/scripts/analysis/plots/scores/total_score.R | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/support-save_plots.R b/R/support-save_plots.R index 501a099..cbabd53 100644 --- a/R/support-save_plots.R +++ b/R/support-save_plots.R @@ -10,6 +10,8 @@ # Save the last ggplot() object created. For each output format, # generate a plot and put in the output directory +library(grid) + #' @export save_plots <- function( diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R index f96ff5e..edeaf3a 100644 --- a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -100,17 +100,17 @@ run=function(self, sample_sources, output_dir, output_formats){ avgs = ddply(data2[data2$total_contacts > 0,], .(sample_source, struct_id, CDR), function(d2){ contacts = d2$ag_ab_contacts_total[1]/d2$total_contacts - print(paste(contacts, d2$total_contacts)) + #print(paste(contacts, d2$total_contacts)) perc = contacts data.frame(perc = perc) }) - print(head(avgs)) + #print(head(avgs)) avg_perc = ddply(avgs, .(sample_source, CDR), function(d2){ data.frame(m_perc = mean(d2$perc)) }) - print(head(avg_perc)) + #print(head(avg_perc)) p <- ggplot(data=avg_perc, na.rm = T, aes(x=CDR)) + geom_bar(position="dodge", aes(y=m_perc, fill=sample_source), stat='identity') + diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 96625d3..c366547 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -10,7 +10,6 @@ library(ggplot2) library(plyr) -library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", From 683d04c93bfcfc8745aba37c748e04874cd939f6 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 13:09:31 -0500 Subject: [PATCH 11/55] fixes. --- R/support-save_plots.R | 2 -- .../plots/antibodies/SASA/ab_cdr_SASA_den.R | 1 + .../plots/antibodies/SASA/ab_paratope_SASA_den.R | 1 + .../antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R | 0 .../plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R | 1 + .../anchor_distances/ab_cdr_anchor_dis_den.R | 1 + .../antibodies/charge/ab_cdr_cluster_charge_den.R | 1 + .../plots/antibodies/charge/ab_charge_den.R | 1 + .../composition/ab_cdr_length_correlations.R | 1 + .../antibodies/composition/ab_composition_den.R | 1 + .../plots/antibodies/energies/ab_cdr_energy_den.R | 1 + .../analysis/plots/antibodies/h3_kink/h3_kink_den.R | 1 + .../plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R | 1 + .../antibodies/hbonds/ab_cdr_framework_hbonds_den.R | 1 + .../antibodies/hbonds/ab_intra_cdr_hbonds_den.R | 1 + .../plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R | 2 ++ .../antibodies/packing_angle/ab_packing_angle_den.R | 1 + .../analysis/plots/interfaces/SASA/int_SASA_den.R | 1 + .../interfaces/SASA/int_SASA_residue_avgs_den.R | 1 + .../plots/interfaces/SASA/int_SASA_residue_den.R | 1 + .../plots/interfaces/SASA/int_SASA_residue_vs.R | 1 + .../analysis/plots/interfaces/SASA/int_dSASA_vs.R | 1 + .../int_composition-dG_dSASA_stats_by_restype.R | 1 + .../int_composition-hbond_stats_by_restype.R | 1 + .../interfaces/composition/int_composition_den.R | 13 +++++++++++++ .../analysis/plots/interfaces/energies/int_dG_vs.R | 1 + .../plots/interfaces/energies/int_energies_den.R | 1 + .../interfaces/energies/int_energies_residue_den.R | 1 + .../plots/interfaces/hbonds/int_hbonds_den.R | 1 + .../plots/interfaces/hbonds/int_unsat_polars_den.R | 1 + .../plots/interfaces/hbonds/int_unsat_polars_vs.R | 1 + .../plots/interfaces/packing/int_packing_den.R | 1 + .../plots/interfaces/packing/int_packstat_vs.R | 1 + .../plots/interfaces/packing/int_sc_value_vs.R | 1 + .../interfaces/secondary_structure/int_ss_den.R | 1 + inst/scripts/analysis/plots/scores/total_score.R | 1 + 36 files changed, 47 insertions(+), 2 deletions(-) delete mode 100644 inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R diff --git a/R/support-save_plots.R b/R/support-save_plots.R index cbabd53..501a099 100644 --- a/R/support-save_plots.R +++ b/R/support-save_plots.R @@ -10,8 +10,6 @@ # Save the last ggplot() object created. For each output format, # generate a plot and put in the output directory -library(grid) - #' @export save_plots <- function( diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R index 8efcd76..5d0cb5d 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_SASA-CDR_den", diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index d439aef..218bd33 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_SASA-paratope_den", diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_cluster_dSASA_den.R deleted file mode 100644 index e69de29..0000000 diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R index 715c6cc..d3e6e63 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_dSASA-ab_cdr_den", diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R index 13377a3..488d982 100644 --- a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_anchor_dis", diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R index 0e9d4a8..5cdf2fe 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_charge-clusters_den", diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index 051a78a..01d2274 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_charge_den", diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R index 3a82589..e9359b3 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_composition-length_correlations", diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R index 8303435..2fca0ce 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_composition_den", diff --git a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R index e3c79b2..adc786f 100644 --- a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R +++ b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_energies-CDR_den", diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R index 8bb171d..7e7ff66 100644 --- a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "H3_kink_den", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R index 960e089..2dd6cc2 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_cdr_den", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R index f05c2f1..0803ea2 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_framework_den", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R index e1580a0..7afb09c 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-intra_cdr_den", diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R index e00719f..90fa46c 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R @@ -9,6 +9,8 @@ library(ggplot2) library(plyr) +library(grid) + feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_hbonds-cdr_ag_den", diff --git a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R index b094a77..96509ea 100644 --- a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R +++ b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_packing_angle_den", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index 3efec0d..cc01e44 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA_den", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R index 05435f3..9f5d9e2 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_avgs_den", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R index 49908ce..a96aa9e 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_den", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R index 1316139..31dad44 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-by_residue_vs", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index 071a7f5..ca0f902 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_SASA-dSASA_vs", diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R index 58f6b0d..c458f5b 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition-dG_dSASA_stats_by_restype", diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R index c12fa33..63e9616 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition-hbond_stats_by_restype", diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index aa4f940..7882133 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_composition_den", @@ -16,6 +17,18 @@ author = "Jared Adolf-Bryfogle", brief_description = "Graphs basic composition of the interfaces, restypes, etc", feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures","ResidueFeatures", "ResidueTypesFeatures", "PdbDataFeatures"), +#Because R is stupid and can't find this function. + +capwords <- function(s, strict = FALSE) +{ + cap <- function(s) paste(toupper(substring(s, 1, 1)), { + s <- substring(s, 2) + if (strict) + tolower(s) + else s + }, sep = "", collapse = " ") + sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) +} run=function(self, sample_sources, output_dir, output_formats){ diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index df71f1f..24d8542 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -10,6 +10,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_energies-dG_vs", diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 747d7a0..04b8fa4 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 5fa2292..fb11699 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index a6c9a72..5864537 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds_den", diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R index 966d7cf..5137fb3 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds-unsat_polars_den", diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R index 24452a8..4b19904 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_unsat_polars_vs.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_hbonds-unsat_polars_vs", diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 26e8eba..25b63e8 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_packing-den", diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R index 2e99c90..cdad54d 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_packing-packstat_vs", diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index 172b4da..e1b757c 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", id = "int_packing-sc_value_vs", diff --git a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R index b10abd3..1fc576f 100644 --- a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R +++ b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_ss_den", diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index c366547..96625d3 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -10,6 +10,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", From d7df97ad4bfaf87ca3104d868b9c4239e222729d Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 13:10:31 -0500 Subject: [PATCH 12/55] fixes. --- .../composition/int_composition_den.R | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index 7882133..ffd93f9 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -19,16 +19,7 @@ feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures","ResidueF #Because R is stupid and can't find this function. -capwords <- function(s, strict = FALSE) -{ - cap <- function(s) paste(toupper(substring(s, 1, 1)), { - s <- substring(s, 2) - if (strict) - tolower(s) - else s - }, sep = "", collapse = " ") - sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) -} + run=function(self, sample_sources, output_dir, output_formats){ @@ -42,6 +33,19 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM interface_sides " + + capwords <- function(s, strict = FALSE) + { + cap <- function(s) paste(toupper(substring(s, 1, 1)), { + s <- substring(s, 2) + if (strict) + tolower(s) + else s + }, sep = "", collapse = " ") + sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) + } + + plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), scale_y_continuous("Feature Density"), From 12d1c24bf69153eaeba4854ed2bdb3367dd8eccf Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 13:23:35 -0500 Subject: [PATCH 13/55] fixes. --- .../analysis/plots/antibodies/charge/ab_charge_den.R | 6 +++--- .../analysis/plots/antibodies/h3_kink/h3_kink_den.R | 2 +- .../plots/interfaces/SASA/int_SASA_residue_avgs_den.R | 11 ++++++++++- .../interfaces/composition/int_composition_den.R | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index 01d2274..ed4f17d 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -82,7 +82,7 @@ run=function(self, sample_sources, output_dir, output_formats){ scale_x_continuous("charge", breaks = seq(min(perc$net_charge), max(perc$net_charge), 2)) + ggtitle("Antibody Net Charge") + ylab("% of Sample Source") + - scale_y_continuous(label=percent) + scale_y_continuous(label="percent") #scale_x_continuous("restype") + #scale_y_continuous("n") plot_field(p, "net_charge_hist") @@ -127,7 +127,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Paratope Net Charge") + scale_x_continuous("charge", breaks = seq(min(perc$paratope_charge), max(perc$paratope_charge), 2)) + ylab("% of Sample Source") + - scale_y_continuous(label=percent) + scale_y_continuous(label="percent") #scale_x_continuous("restype") + #scale_y_continuous("n") plot_field(p, "paratope_charge_hist") @@ -204,7 +204,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + #scale_x_continuous("charge", breaks = get_charge_seq(perc, 1)) + ggtitle("CDR Charge") + - scale_y_continuous("charge", label=percent) + + scale_y_continuous("charge", label="percent") + ylab("% of Sample Source") plot_field_wrap(p, "cdr_charge_hist", ~ CDR) diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R index 7e7ff66..dbfea6b 100644 --- a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -65,7 +65,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat="identity", aes(x=kink_type, y= percent, fill=sample_source)) + theme_bw() + ggtitle("Kink Type Comparison") + - scale_y_continuous(label = percent) + + scale_y_continuous(label = "percent") + xlab("kink type") plot_field(p, "kink_type_hist") diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R index 9f5d9e2..4dda551 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R @@ -20,7 +20,16 @@ run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - + capwords <- function(s, strict = FALSE) + { + cap <- function(s) paste(toupper(substring(s, 1, 1)), { + s <- substring(s, 2) + if (strict) + tolower(s) + else s + }, sep = "", collapse = " ") + sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) + } diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index ffd93f9..1fc93a1 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -162,7 +162,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat="identity", aes(y=perc, fill=sample_source))+ theme_bw() + ggtitle("Interface ResType Composition") + - scale_y_continuous(label=percent) + + scale_y_continuous(label="percent") + ylab("% of Sample Source") plot_field(p, "restype_composition_by_all") plot_field(p, "restype_composition_by_interface", grid=interface ~ .) From 1b4d17bc69b8a9811468479a41894849e53e78d1 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 14:05:06 -0500 Subject: [PATCH 14/55] fixes. --- R/support-ggplot2_geom_indicator.R | 2 +- inst/scripts/analysis/plots/scores/total_score.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/support-ggplot2_geom_indicator.R b/R/support-ggplot2_geom_indicator.R index 590e9c8..411cb6d 100644 --- a/R/support-ggplot2_geom_indicator.R +++ b/R/support-ggplot2_geom_indicator.R @@ -114,7 +114,7 @@ GeomIndicator <- ggplot2::ggproto( size <- data$size[1] level <- data$group[1] - 1 - textGrob( + grid::textGrob( indicator_display_value, unit(xpos, "npc"), unit(ypos, "npc") - unit(level, "line"), diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 96625d3..96cc803 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -10,7 +10,7 @@ library(ggplot2) library(plyr) -library(grid) +#library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "total_score", From e69a250dec6341f2e1338bf6b9ee1393160a8df6 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 14:07:53 -0500 Subject: [PATCH 15/55] fixes. --- R/support-ggplot2_geom_indicator.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-ggplot2_geom_indicator.R b/R/support-ggplot2_geom_indicator.R index 411cb6d..4c4751a 100644 --- a/R/support-ggplot2_geom_indicator.R +++ b/R/support-ggplot2_geom_indicator.R @@ -119,7 +119,7 @@ GeomIndicator <- ggplot2::ggproto( unit(xpos, "npc"), unit(ypos, "npc") - unit(level, "line"), just=c(xjust, yjust), - gp=gpar( + gp=grid::gpar( col=alpha(data$colour[1], data$alpha[1]), fontsize=size*12/5, fontfamily=data$family[1], From 58758ea43a4bd0642e229cf95f7641568c9afb86 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 16:57:42 -0500 Subject: [PATCH 16/55] remove char_as_factor --- R/support-query_sample_sources.R | 38 +++++++++---------- .../plots/antibodies/SASA/ab_cdr_SASA_den.R | 2 +- .../antibodies/SASA/ab_paratope_SASA_den.R | 2 +- .../antibodies/SASA/ag_ab_cdr_dSASA_den.R | 2 +- .../anchor_distances/ab_cdr_anchor_dis_den.R | 2 +- .../charge/ab_cdr_cluster_charge_den.R | 2 +- .../plots/antibodies/charge/ab_charge_den.R | 4 +- .../composition/ab_cdr_length_correlations.R | 2 +- .../composition/ab_composition_den.R | 4 +- .../antibodies/contacts/ag_ab_contact_den.R | 3 +- .../antibodies/energies/ab_cdr_energy_den.R | 2 +- .../plots/antibodies/h3_kink/h3_kink_den.R | 2 +- .../antibodies/hbonds/ab_cdr_cdr_hbonds_den.R | 6 +-- .../hbonds/ab_cdr_framework_hbonds_den.R | 4 +- .../hbonds/ab_intra_cdr_hbonds_den.R | 2 +- .../antibodies/hbonds/ag_ab_cdr_hbonds_den.R | 6 +-- .../packing_angle/ab_packing_angle_den.R | 2 +- .../plots/interfaces/SASA/int_SASA_den.R | 4 +- .../SASA/int_SASA_residue_avgs_den.R | 2 +- .../interfaces/SASA/int_SASA_residue_den.R | 2 +- .../interfaces/SASA/int_SASA_residue_vs.R | 2 +- .../plots/interfaces/SASA/int_dSASA_vs.R | 4 +- ...nt_composition-dG_dSASA_stats_by_restype.R | 2 +- .../int_composition-hbond_stats_by_restype.R | 2 +- .../composition/int_composition_den.R | 4 +- .../energies/int_energies_residue_den.R | 2 +- .../plots/interfaces/hbonds/int_hbonds_den.R | 2 +- .../interfaces/packing/int_packing_den.R | 2 +- .../secondary_structure/int_ss_den.R | 2 +- .../antibodies/cdr_cluster_recovery.R | 8 ++-- 30 files changed, 62 insertions(+), 61 deletions(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 53909e4..a41677c 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -22,8 +22,7 @@ query_sample_sources <- function( sample_sources, sele, bind.data = NULL, - warn_zero_rows=T, - char_as_factor=T + warn_zero_rows=T ){ tryCatch(sele,error=function(e){ cat("ERROR: The select statement is not defined.\n") @@ -64,13 +63,14 @@ query_sample_sources <- function( cat(sele) } - if(char_as_factor){ - for(col in names(features)){ - if(is.character(features[,col])){ - features[,col] <- factor(features[,col]) - } - } - } + + #if(char_as_factor){ + # for(col in names(features)){ + # if(is.character(features[,col])){ + # features[,col] <- factor(features[,col]) + # } + # } + #} features @@ -81,8 +81,7 @@ query_sample_sources_against_ref <- function( sample_sources, sele, sele_args_frame = NULL, - warn_zero_rows=T, - char_as_factor=T + warn_zero_rows=T ){ tryCatch(sele,error=function(e){ cat("ERROR: The select statement ", sele, " is not defined.\n") @@ -151,14 +150,15 @@ In the returned data.frame the there will be the following columns: cat(sele) return(features) } - - if(char_as_factor){ - for(col in names(features)){ - if(is.character(features[,col])){ - features[,col] <- factor(features[,col]) - } - } - } + + + #if(char_as_factor){ + # for(col in names(features)){ + # if(is.character(features[,col])){ + # features[,col] <- factor(features[,col]) + # } + # } + #} data.frame( ref_sample_source = factor(ref_ss$sample_source[1]), new_sample_source = factor(features$sample_source), diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R index 5d0cb5d..fa53d0d 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R @@ -56,7 +56,7 @@ run=function(self, sample_sources, output_dir, output_formats){ CDR LIKE '%Proto%'" } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index 218bd33..8ad248b 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -31,7 +31,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ab_metrics " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R index d3e6e63..a479579 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -77,7 +77,7 @@ run=function(self, sample_sources, output_dir, output_formats){ " } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R index 488d982..c999d9d 100644 --- a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -50,7 +50,7 @@ run=function(self, sample_sources, output_dir, output_formats){ cdr_metrics where CDR LIKE '%Proto%'" } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R index 5cdf2fe..f70d3b8 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R @@ -62,7 +62,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #CDR Charge Histogram - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) data$cdr_length = paste(data$CDR, data$length, sep="_") avgs = ddply(data, .(sample_source, cdr_length, cluster), function(data) { diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index ed4f17d..1bf0da7 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -27,7 +27,7 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM ab_metrics" - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), @@ -180,7 +180,7 @@ run=function(self, sample_sources, output_dir, output_formats){ } #CDR Charge Density - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) dens <- estimate_density_1d(data, c("sample_source", "CDR"), c("charge")) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size = 1.2) + diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R index e9359b3..83aeb94 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_cdr_length_correlations.R @@ -30,7 +30,7 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM cdr_metrics" - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_field = function(p, plot_id, grid = NULL){ diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R index 2fca0ce..3ca2a11 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -51,7 +51,7 @@ run=function(self, sample_sources, output_dir, output_formats){ cdr_metrics WHERE CDR LIKE '%Proto%'" } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), @@ -160,7 +160,7 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM ab_metrics" - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) #Total CDR residues density diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R index edeaf3a..1c57252 100644 --- a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -9,6 +9,7 @@ library(ggplot2) library(plyr) +library(grid) feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ag_ab_contact_den", @@ -32,7 +33,7 @@ run=function(self, sample_sources, output_dir, output_formats){ cdr_metrics " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R index adc786f..87c8841 100644 --- a/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R +++ b/inst/scripts/analysis/plots/antibodies/energies/ab_cdr_energy_den.R @@ -30,7 +30,7 @@ run=function(self, sample_sources, output_dir, output_formats){ cdr_metrics " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R index dbfea6b..070576c 100644 --- a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -34,7 +34,7 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM ab_h3_kink_metrics" - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R index 2dd6cc2..1a9aa02 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R @@ -326,9 +326,9 @@ SELECT #NOT (don.HBChemType == 'hbdon_PBA' AND acc.HBChemType == 'hbacc_PBA') - don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) - acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) - total_cdrs = query_sample_sources(sample_sources, sele_total_cdrs, char_as_factor=F) + don_data = query_sample_sources(sample_sources, sele_don) + acc_data = query_sample_sources(sample_sources, sele_acc) + total_cdrs = query_sample_sources(sample_sources, sele_total_cdrs) #print(sum(data$struct_id==1)) #print(sum(data$struct_id==2)) diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R index 0803ea2..6b2371c 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_framework_hbonds_den.R @@ -85,8 +85,8 @@ SELECT don_res.side == 'side2' " - don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) - acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) + don_data = query_sample_sources(sample_sources, sele_don) + acc_data = query_sample_sources(sample_sources, sele_acc) #print(sum(data$struct_id==1)) diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R index 7afb09c..4c5220e 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R @@ -157,7 +157,7 @@ SELECT } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) #print(sum(data$struct_id==1)) #print(sum(data$struct_id==2)) diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R index 90fa46c..8afdb67 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R @@ -220,9 +220,9 @@ run=function(self, sample_sources, output_dir, output_formats){ } - don_data = query_sample_sources(sample_sources, sele_don, char_as_factor=F) - acc_data = query_sample_sources(sample_sources, sele_acc, char_as_factor=F) - total_data = query_sample_sources(sample_sources, sele_total_cdrs, char_as_factor=F) + don_data = query_sample_sources(sample_sources, sele_don) + acc_data = query_sample_sources(sample_sources, sele_acc) + total_data = query_sample_sources(sample_sources, sele_total_cdrs) #print(sum(data$struct_id==1)) #print(sum(data$struct_id==2)) diff --git a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R index 96509ea..c022de3 100644 --- a/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R +++ b/inst/scripts/analysis/plots/antibodies/packing_angle/ab_packing_angle_den.R @@ -32,7 +32,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ab_metrics " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index cc01e44..a931ad9 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -31,7 +31,7 @@ run=function(self, sample_sources, output_dir, output_formats){ # FROM # interfaces" # -# data = query_sample_sources(sample_sources, sele, char_as_factor=F) +# data = query_sample_sources(sample_sources, sele) # # fields = c("dSASA", "dSASA_hphobic", "dSASA_polar") @@ -94,7 +94,7 @@ run=function(self, sample_sources, output_dir, output_formats){ parts = list(plot_parts, xlab("SASA")) - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) data$polar_fraction = (data$dSASA - data$dhSASA)/data$dSASA field = "polar_fraction" diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R index 4dda551..63ae299 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R @@ -61,7 +61,7 @@ run=function(self, sample_sources, output_dir, output_formats){ save_plots(self, plot_id, sample_sources, output_dir, output_formats) } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) #AvgFields fields = c("avg_per_residue_dSASA", diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R index a96aa9e..2148ef5 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R @@ -51,7 +51,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Density plots - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) ##Overall plots for all residues: Add Side data once we have this. #Densities diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R index 31dad44..1522c88 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R @@ -48,7 +48,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Density plots - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) ##Overall plots for all residues: Add Side data once we have this. #Scatterplots diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index ca0f902..c07a27d 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -32,7 +32,7 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM interfaces" - int_data = query_sample_sources(sample_sources, sele, char_as_factor=F) + int_data = query_sample_sources(sample_sources, sele) @@ -66,7 +66,7 @@ run=function(self, sample_sources, output_dir, output_formats){ save_plots(self, plot_id, sample_sources, output_dir, output_formats) } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) #print(data) diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R index c458f5b..83f40c9 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R @@ -61,7 +61,7 @@ run=function(self, sample_sources, output_dir, output_formats){ interface_residues.struct_id == residues.struct_id and interface_residues.resNum == residues.resNum" - res_data = query_sample_sources(sample_sources, sele, char_as_factor=F) + res_data = query_sample_sources(sample_sources, sele) #Scatterplot of dSASA vs dSASA fraction for each restype. #All interfaces - Coloring by Sample Source may be too damn confusing, so change this if need be. diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R index 63e9616..c0a052e 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-hbond_stats_by_restype.R @@ -55,7 +55,7 @@ run=function(self, sample_sources, output_dir, output_formats){ " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) data$pair_name = paste(data$don_name3, data$acc_name3, sep="-") plot_parts <- list( diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index 1fc93a1..a7120f7 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -65,7 +65,7 @@ run=function(self, sample_sources, output_dir, output_formats){ } fields = c("aromatic_fraction") - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) for(field in fields){ fieldSP = unlist(strsplit(field, split="_")) @@ -127,7 +127,7 @@ run=function(self, sample_sources, output_dir, output_formats){ interface_residues.resNum == residues.resNum and residues.name3==residue_type.name3 " - res_data = query_sample_sources(sample_sources, sele, char_as_factor=T) + res_data = query_sample_sources(sample_sources, sele) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index fb11699..9d07377 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -48,7 +48,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Density plots - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) ##Overall plots for all residues: Add Side data once we have this. #Densities diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index 5864537..3e130b6 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -52,7 +52,7 @@ run=function(self, sample_sources, output_dir, output_formats){ hb.struct_id == hb_geom.struct_id " - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) #print(sum(data$struct_id==1)) #print(sum(data$struct_id==2)) #print(sum(data$struct_id==3)) diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 25b63e8..8a9f84d 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -29,7 +29,7 @@ run=function(self, sample_sources, output_dir, output_formats){ - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_field = function(p, plot_id, grid = NULL){ diff --git a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R index 1fc576f..0c29ae0 100644 --- a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R +++ b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R @@ -50,7 +50,7 @@ run=function(self, sample_sources, output_dir, output_formats){ } fields = c("ss_sheet_fraction", "ss_helix_fraction", "ss_loop_fraction") - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) for(field in fields){ fieldSP = unlist(strsplit(field, split="_")) diff --git a/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R index 6683e20..6517d98 100644 --- a/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R +++ b/inst/scripts/analysis/statistics/antibodies/cdr_cluster_recovery.R @@ -73,7 +73,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ss = sample_sources[i,] summary(ss) ss_id = as.character(ss$sample_source) - result = query_sample_source(ss, sele, char_as_factor=F) + result = query_sample_source(ss, sele) for (cdr in result$CDR){ if (! any(current_data$CDR==cdr & current_data$sample_source == ss_id)){ new_df = data.frame(CDR=as.character(cdr), sample_source = ss_id, normDis_deg=0, fullcluster="NA", stringsAsFactors=F) @@ -84,7 +84,7 @@ run=function(self, sample_sources, output_dir, output_formats){ return(current_data) } #Get input_tags to match natives - result = query_sample_source(sample_sources[1,], "SELECT input_tag from structures", char_as_factor=F) + result = query_sample_source(sample_sources[1,], "SELECT input_tag from structures") native_tags = result$input_tag get_and_write_recovery = function(sele, type){ all_data = adply(native_tags, 1, function(native_tag) { @@ -98,7 +98,7 @@ run=function(self, sample_sources, output_dir, output_formats){ match = paste("%", tag, "%", sep="") tag_frame = data.frame(like_tag=match, tag=native_tag) - data = query_sample_sources_against_ref(sample_sources, sele, sele_args_frame=tag_frame, char_as_factor=F) + data = query_sample_sources_against_ref(sample_sources, sele, sele_args_frame=tag_frame) data = create_zero_data(data) #Type is length or cluster here: @@ -108,7 +108,7 @@ run=function(self, sample_sources, output_dir, output_formats){ res_by_cdr = ddply(data_by_ss, "CDR", function(data_by_cdr, type) { cdr = data_by_cdr$CDR[1] - total_data = query_sample_sources(sample_sources, total_sele, sele_args_frame=data.frame(like_tag=match, c=cdr), char_as_factor=F) + total_data = query_sample_sources(sample_sources, total_sele, sele_args_frame=data.frame(like_tag=match, c=cdr)) total_decoys = length(total_data$fullcluster[total_data$sample_source == as.character(data_by_ss$sample_source[1])]) if (length(data_by_cdr$normDis_deg[data_by_cdr$normDis_deg > 0]) == 0){ From a131d5a3de021ed97629aeb7dddf253594e40db7 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 16 Jun 2016 17:33:24 -0500 Subject: [PATCH 17/55] comment out cdr4 option or now, until it can be set via json. --- .../plots/antibodies/SASA/ab_cdr_SASA_den.R | 44 +- .../antibodies/SASA/ag_ab_cdr_dSASA_den.R | 78 ++-- .../anchor_distances/ab_cdr_anchor_dis_den.R | 44 +- .../plots/antibodies/charge/ab_charge_den.R | 48 +-- .../composition/ab_composition_den.R | 45 +- .../antibodies/hbonds/ab_cdr_cdr_hbonds_den.R | 406 +++++++++--------- .../hbonds/ab_intra_cdr_hbonds_den.R | 182 ++++---- .../antibodies/hbonds/ag_ab_cdr_hbonds_den.R | 266 ++++++------ install_from_momeara_github.R | 13 + install_local.R | 4 +- 10 files changed, 573 insertions(+), 557 deletions(-) create mode 100755 install_from_momeara_github.R diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R index fa53d0d..95b25cc 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_cdr_SASA_den.R @@ -21,7 +21,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT SASA, @@ -32,29 +32,29 @@ run=function(self, sample_sources, output_dir, output_formats){ WHERE CDR NOT LIKE '%Proto%' " - } + #} - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - SASA, - CDR, - length - FROM - cdr_metrics" - } + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # SASA, + # CDR, + # length + # FROM + # cdr_metrics" + # } - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - SASA, - CDR, - length - FROM - cdr_metrics - WHERE - CDR LIKE '%Proto%'" - } + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # SASA, + # CDR, + # length + # FROM + # cdr_metrics + # WHERE + # CDR LIKE '%Proto%'" + # } data = query_sample_sources(sample_sources, sele) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R index a479579..b65a72d 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -21,7 +21,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT ag_ab_dSASA as dSASA, @@ -38,44 +38,44 @@ run=function(self, sample_sources, output_dir, output_formats){ dSASA > 0 and CDR NOT LIKE '%Proto%' " - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - ag_ab_dSASA as dSASA, - ag_ab_dSASA_sc as dSASA_sc, - ag_ab_dhSASA as dhSASA, - ag_ab_dhSASA_sc as dhSASA_sc, - ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, - struct_id, - CDR, - length - FROM - cdr_metrics - WHERE - dSASA > 0 - " - } - - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - ag_ab_dSASA as dSASA, - ag_ab_dSASA_sc as dSASA_sc, - ag_ab_dhSASA as dhSASA, - ag_ab_dhSASA_sc as dhSASA_sc, - ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, - struct_id, - CDR, - length - FROM - cdr_metrics - WHERE - dSASA > 0 and - CDR LIKE '%Proto%' - " - } + #} + + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # ag_ab_dSASA as dSASA, + # ag_ab_dSASA_sc as dSASA_sc, + # ag_ab_dhSASA as dhSASA, + # ag_ab_dhSASA_sc as dhSASA_sc, + # ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, + # struct_id, + # CDR, + # length + # FROM + # cdr_metrics + # WHERE + # dSASA > 0 + # " + # } + # + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # ag_ab_dSASA as dSASA, + # ag_ab_dSASA_sc as dSASA_sc, + # ag_ab_dhSASA as dhSASA, + # ag_ab_dhSASA_sc as dhSASA_sc, + # ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, + # struct_id, + # CDR, + # length + # FROM + # cdr_metrics + # WHERE + # dSASA > 0 and + # CDR LIKE '%Proto%' + # " + # } data = query_sample_sources(sample_sources, sele) diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R index c999d9d..d0d60e9 100644 --- a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -9,7 +9,7 @@ library(ggplot2) library(plyr) -library(grid) + feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "ab_anchor_dis", @@ -20,7 +20,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT anchor_CN_distance, @@ -28,29 +28,29 @@ run=function(self, sample_sources, output_dir, output_formats){ length FROM cdr_metrics where CDR NOT LIKE '%Proto%'" - } + #} - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - anchor_CN_distance, - CDR, - length - FROM - cdr_metrics" - } + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # anchor_CN_distance, + # CDR, + # length + # FROM + # cdr_metrics" + # } - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - anchor_CN_distance, - CDR, - length - FROM - cdr_metrics where CDR LIKE '%Proto%'" - } + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # anchor_CN_distance, + # CDR, + # length + # FROM + # cdr_metrics where CDR LIKE '%Proto%'" + # } - data = query_sample_sources(sample_sources, sele) + data = query_sample_sources(sample_sources, sele, char_as_factor=F) plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index 1bf0da7..9bfd37c 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -145,7 +145,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #scale_y_continuous("n") plot_field(p, "avg_paratope_charge_hist") - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT charge, @@ -155,29 +155,29 @@ run=function(self, sample_sources, output_dir, output_formats){ cdr_metrics WHERE CDR NOT LIKE '%Proto%'" - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - charge, - CDR, - length - FROM - cdr_metrics" - } - - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - charge, - CDR, - length - FROM - cdr_metrics - WHERE - CDR LIKE '%Proto%'" - } + #} + + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # charge, + # CDR, + # length + # FROM + # cdr_metrics" + # } + + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # charge, + # CDR, + # length + # FROM + # cdr_metrics + # WHERE + # CDR LIKE '%Proto%'" + # } #CDR Charge Density data = query_sample_sources(sample_sources, sele) diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R index 3ca2a11..e03511c 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -20,7 +20,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT length, @@ -29,28 +29,29 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM cdr_metrics WHERE CDR NOT LIKE '%Proto%'" - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - length, - CDR, - aromatic_nres/length as aromatic_makeup - FROM - cdr_metrics" - } + #} + + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # length, + # CDR, + # aromatic_nres/length as aromatic_makeup + # FROM + # cdr_metrics" + # } + + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # length, + # CDR, + # aromatic_nres/length as aromatic_makeup + # FROM + # cdr_metrics + # WHERE CDR LIKE '%Proto%'" + # } - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - length, - CDR, - aromatic_nres/length as aromatic_makeup - FROM - cdr_metrics - WHERE CDR LIKE '%Proto%'" - } data = query_sample_sources(sample_sources, sele) plot_parts <- list( diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R index 1a9aa02..21ce7c3 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_cdr_cdr_hbonds_den.R @@ -18,7 +18,7 @@ brief_description = "CDR - CDR Hbonds", feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), run=function(self, sample_sources, output_dir, output_formats){ - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele_don = " SELECT hb.energy as energy, @@ -120,209 +120,209 @@ SELECT WHERE CDR NOT LIKE '%Proto%' " - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele_don = " - SELECT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - don_c.CDR as CDR1, - acc_c.CDR as CDR2, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type, - don_ss.dssp as don_dssp, - acc_ss.dssp as acc_dssp - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - don_c.CDR != acc_c.CDR AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') - " - - sele_acc = " - SELECT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - acc_c.CDR as CDR1, - don_c.CDR as CDR2, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type, - don_ss.dssp as don_dssp, - acc_ss.dssp as acc_dssp - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb.struct_id == hb_geom.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - hb.don_id == don.site_id AND - hb.acc_id == acc.site_id AND - hb.hbond_id == hb_geom.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - don.resNum = don_ss.resNum AND - acc.resNum = acc_ss.resNum AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - don_c.CDR != acc_c.CDR AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') - " - - sele_total_cdrs = " - SELECT - struct_id, - CDR - FROM - cdr_residues - " - } + #} - if ("TRUE" %in% opt$options$cdr4_only){ - sele_don = " - SELECT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - don_c.CDR as CDR1, - acc_c.CDR as CDR2, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type, - don_ss.dssp as don_dssp, - acc_ss.dssp as acc_dssp - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - don_c.CDR != acc_c.CDR AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND - don_c.CDR LIKE '%Proto%' AND - acc_c.CDR LIKE '%Proto%' - " - - sele_acc = " - SELECT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - acc_c.CDR as CDR1, - don_c.CDR as CDR2, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type, - don_ss.dssp as don_dssp, - acc_ss.dssp as acc_dssp - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb.struct_id == hb_geom.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - hb.don_id == don.site_id AND - hb.acc_id == acc.site_id AND - hb.hbond_id == hb_geom.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - don.resNum = don_ss.resNum AND - acc.resNum = acc_ss.resNum AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - don_c.CDR != acc_c.CDR AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND - don_c.CDR LIKE '%Proto%' AND - acc_c.CDR LIKE '%Proto%' - " - - sele_total_cdrs = " - SELECT - struct_id, - CDR - FROM - cdr_residues - WHERE - CDR LIKE '%Proto%' - " - } + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele_don = " + # SELECT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # don_c.CDR as CDR1, + # acc_c.CDR as CDR2, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type, + # don_ss.dssp as don_dssp, + # acc_ss.dssp as acc_dssp + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # don_c.CDR != acc_c.CDR AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + # " + # + # sele_acc = " + # SELECT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # acc_c.CDR as CDR1, + # don_c.CDR as CDR2, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type, + # don_ss.dssp as don_dssp, + # acc_ss.dssp as acc_dssp + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb.struct_id == hb_geom.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # hb.don_id == don.site_id AND + # hb.acc_id == acc.site_id AND + # hb.hbond_id == hb_geom.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # don.resNum = don_ss.resNum AND + # acc.resNum = acc_ss.resNum AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # don_c.CDR != acc_c.CDR AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + # " + # + # sele_total_cdrs = " + # SELECT + # struct_id, + # CDR + # FROM + # cdr_residues + # " + # } + # + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele_don = " + # SELECT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # don_c.CDR as CDR1, + # acc_c.CDR as CDR2, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type, + # don_ss.dssp as don_dssp, + # acc_ss.dssp as acc_dssp + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # don_c.CDR != acc_c.CDR AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + # don_c.CDR LIKE '%Proto%' AND + # acc_c.CDR LIKE '%Proto%' + # " + # + # sele_acc = " + # SELECT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # acc_c.CDR as CDR1, + # don_c.CDR as CDR2, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type, + # don_ss.dssp as don_dssp, + # acc_ss.dssp as acc_dssp + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb.struct_id == hb_geom.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # hb.don_id == don.site_id AND + # hb.acc_id == acc.site_id AND + # hb.hbond_id == hb_geom.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # don.resNum = don_ss.resNum AND + # acc.resNum = acc_ss.resNum AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # don_c.CDR != acc_c.CDR AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + # don_c.CDR LIKE '%Proto%' AND + # acc_c.CDR LIKE '%Proto%' + # " + # + # sele_total_cdrs = " + # SELECT + # struct_id, + # CDR + # FROM + # cdr_residues + # WHERE + # CDR LIKE '%Proto%' + # " + # } #NOT (don.HBChemType == 'hbdon_PBA' AND acc.HBChemType == 'hbacc_PBA') diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R index 4c5220e..68520df 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ab_intra_cdr_hbonds_den.R @@ -20,7 +20,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Checked, works perfectly fine: - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele = " SELECT DISTINCT @@ -64,97 +64,97 @@ SELECT don_c.CDR NOT LIKE '%Proto%' AND acc_c.CDR NOT LIKE '%Proto%' " - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele = " - SELECT - DISTINCT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum1, - acc.resNum as resnum2, - hb_geom.AHdist as distance, - don_c.CDR as CDR, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - don_c.CDR == acc_c.CDR AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') - " - } + #} - if ("TRUE" %in% opt$options$cdr4_only){ - sele = " - SELECT - DISTINCT - hb.energy as energy, - don.struct_id as struct_id, - don.resNum as resnum1, - acc.resNum as resnum2, - hb_geom.AHdist as distance, - don_c.CDR as CDR, - don.atmType as don_atm, - acc.atmType as acc_atm, - don.HBChemType as don_type, - acc.HBChemType as acc_type - FROM - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues as don_c, - cdr_residues as acc_c, - residue_secondary_structure as don_ss, - residue_secondary_structure as acc_ss - WHERE - acc.struct_id == don.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - don_c.struct_id = hb.struct_id AND - acc_c.struct_id = hb.struct_id AND - don_ss.struct_id = hb.struct_id AND - acc_ss.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - don_c.resNum = don.resNum AND - acc_c.resNum = acc.resNum AND - don_c.CDR == acc_c.CDR AND - acc_ss.resNum = acc.resNum AND - don_ss.resNum = don.resNum AND - NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND - don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND - don_c.CDR LIKE '%Proto%' AND - acc_c.CDR LIKE '%Proto%' - " - } + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele = " + # SELECT + # DISTINCT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum1, + # acc.resNum as resnum2, + # hb_geom.AHdist as distance, + # don_c.CDR as CDR, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # don_c.CDR == acc_c.CDR AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') + # " + # } + # + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele = " + # SELECT + # DISTINCT + # hb.energy as energy, + # don.struct_id as struct_id, + # don.resNum as resnum1, + # acc.resNum as resnum2, + # hb_geom.AHdist as distance, + # don_c.CDR as CDR, + # don.atmType as don_atm, + # acc.atmType as acc_atm, + # don.HBChemType as don_type, + # acc.HBChemType as acc_type + # FROM + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues as don_c, + # cdr_residues as acc_c, + # residue_secondary_structure as don_ss, + # residue_secondary_structure as acc_ss + # WHERE + # acc.struct_id == don.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # don_c.struct_id = hb.struct_id AND + # acc_c.struct_id = hb.struct_id AND + # don_ss.struct_id = hb.struct_id AND + # acc_ss.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # don_c.resNum = don.resNum AND + # acc_c.resNum = acc.resNum AND + # don_c.CDR == acc_c.CDR AND + # acc_ss.resNum = acc.resNum AND + # don_ss.resNum = don.resNum AND + # NOT (don_ss.dssp = acc_ss.dssp AND don_ss.dssp = 'E' AND acc_ss.dssp='E' AND + # don.HBChemType = 'hbdon_PBA' and acc.HBChemType='hbacc_PBA') AND + # don_c.CDR LIKE '%Proto%' AND + # acc_c.CDR LIKE '%Proto%' + # " + # } data = query_sample_sources(sample_sources, sele) diff --git a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R index 8afdb67..fa8c132 100644 --- a/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R +++ b/inst/scripts/analysis/plots/antibodies/hbonds/ag_ab_cdr_hbonds_den.R @@ -19,7 +19,7 @@ brief_description = "CDR - Antigen Hbonds. Must have LH_A analyzed by features feature_reporter_dependencies = c("AntibodyFeatures", "HBondFeatures"), run=function(self, sample_sources, output_dir, output_formats){ - if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ + #if ("FALSE" %in% opt$options$include_cdr4 & "FALSE" %in% opt$options$cdr4_only){ sele_don = " SELECT hb.energy as energy, @@ -86,139 +86,139 @@ run=function(self, sample_sources, output_dir, output_formats){ FROM cdr_residues where CDR NOT LIKE '%Proto%'" - } - - if ("TRUE" %in% opt$options$include_cdr4){ - sele_don = " - SELECT - hb.energy as energy, - hb.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - cdr_residues.CDR - FROM - interface_residues as acc_res, - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues - WHERE - acc.struct_id == hb.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - cdr_residues.struct_id = hb.struct_id AND - acc_res.struct_id = hb.struct_id AND - acc.struct_id == hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - cdr_residues.resNum = don.resNum AND - acc_res.resNum = acc.resNum AND - acc_res.side == 'side2'" - - sele_acc = " - SELECT - hb.energy as energy, - hb.struct_id as struct_id, - acc.resNum as resnum, - hb_geom.AHdist as distance, - cdr_residues.CDR - FROM - interface_residues as don_res, - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues - WHERE - acc.struct_id = hb.struct_id AND - don.struct_id = hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - cdr_residues.struct_id = hb.struct_id AND - don_res.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - cdr_residues.resNum = acc.resNum AND - don_res.resNum = don.resNum AND - don_res.side = 'side2'" - - sele_total_cdrs = " - SELECT - struct_id, - CDR - FROM - cdr_residues" - - } + #} - if ("TRUE" %in% opt$options$cdr4_only){ - sele_don = " - SELECT - hb.energy as energy, - hb.struct_id as struct_id, - don.resNum as resnum, - hb_geom.AHdist as distance, - cdr_residues.CDR - FROM - interface_residues as acc_res, - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues - WHERE - acc.struct_id == hb.struct_id AND - don.struct_id == hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - cdr_residues.struct_id = hb.struct_id AND - acc_res.struct_id = hb.struct_id AND - acc.struct_id == hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - cdr_residues.resNum = don.resNum AND - acc_res.resNum = acc.resNum AND - acc_res.side == 'side2' AND - CDR LIKE '%Proto%'" - - sele_acc = " - SELECT - hb.energy as energy, - hb.struct_id as struct_id, - acc.resNum as resnum, - hb_geom.AHdist as distance, - cdr_residues.CDR - FROM - interface_residues as don_res, - hbond_sites AS don, - hbond_sites AS acc, - hbonds AS hb, - hbond_geom_coords as hb_geom, - cdr_residues - WHERE - acc.struct_id = hb.struct_id AND - don.struct_id = hb.struct_id AND - hb_geom.struct_id = hb.struct_id AND - cdr_residues.struct_id = hb.struct_id AND - don_res.struct_id = hb.struct_id AND - don.site_id = hb.don_id AND - acc.site_id = hb.acc_id AND - hb_geom.hbond_id = hb.hbond_id AND - cdr_residues.resNum = acc.resNum AND - don_res.resNum = don.resNum AND - don_res.side = 'side2' AND - CDR LIKE '%Proto%'" - - sele_total_cdrs = " - SELECT - struct_id, - CDR - FROM - cdr_residues where CDR = 'Proto_H4' or CDR = 'Proto_L4'" - - } + # if ("TRUE" %in% opt$options$include_cdr4){ + # sele_don = " + # SELECT + # hb.energy as energy, + # hb.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # cdr_residues.CDR + # FROM + # interface_residues as acc_res, + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues + # WHERE + # acc.struct_id == hb.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # cdr_residues.struct_id = hb.struct_id AND + # acc_res.struct_id = hb.struct_id AND + # acc.struct_id == hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # cdr_residues.resNum = don.resNum AND + # acc_res.resNum = acc.resNum AND + # acc_res.side == 'side2'" + # + # sele_acc = " + # SELECT + # hb.energy as energy, + # hb.struct_id as struct_id, + # acc.resNum as resnum, + # hb_geom.AHdist as distance, + # cdr_residues.CDR + # FROM + # interface_residues as don_res, + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues + # WHERE + # acc.struct_id = hb.struct_id AND + # don.struct_id = hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # cdr_residues.struct_id = hb.struct_id AND + # don_res.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # cdr_residues.resNum = acc.resNum AND + # don_res.resNum = don.resNum AND + # don_res.side = 'side2'" + # + # sele_total_cdrs = " + # SELECT + # struct_id, + # CDR + # FROM + # cdr_residues" + # + # } + # + # if ("TRUE" %in% opt$options$cdr4_only){ + # sele_don = " + # SELECT + # hb.energy as energy, + # hb.struct_id as struct_id, + # don.resNum as resnum, + # hb_geom.AHdist as distance, + # cdr_residues.CDR + # FROM + # interface_residues as acc_res, + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues + # WHERE + # acc.struct_id == hb.struct_id AND + # don.struct_id == hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # cdr_residues.struct_id = hb.struct_id AND + # acc_res.struct_id = hb.struct_id AND + # acc.struct_id == hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # cdr_residues.resNum = don.resNum AND + # acc_res.resNum = acc.resNum AND + # acc_res.side == 'side2' AND + # CDR LIKE '%Proto%'" + # + # sele_acc = " + # SELECT + # hb.energy as energy, + # hb.struct_id as struct_id, + # acc.resNum as resnum, + # hb_geom.AHdist as distance, + # cdr_residues.CDR + # FROM + # interface_residues as don_res, + # hbond_sites AS don, + # hbond_sites AS acc, + # hbonds AS hb, + # hbond_geom_coords as hb_geom, + # cdr_residues + # WHERE + # acc.struct_id = hb.struct_id AND + # don.struct_id = hb.struct_id AND + # hb_geom.struct_id = hb.struct_id AND + # cdr_residues.struct_id = hb.struct_id AND + # don_res.struct_id = hb.struct_id AND + # don.site_id = hb.don_id AND + # acc.site_id = hb.acc_id AND + # hb_geom.hbond_id = hb.hbond_id AND + # cdr_residues.resNum = acc.resNum AND + # don_res.resNum = don.resNum AND + # don_res.side = 'side2' AND + # CDR LIKE '%Proto%'" + # + # sele_total_cdrs = " + # SELECT + # struct_id, + # CDR + # FROM + # cdr_residues where CDR = 'Proto_H4' or CDR = 'Proto_L4'" + # + # } don_data = query_sample_sources(sample_sources, sele_don) acc_data = query_sample_sources(sample_sources, sele_acc) diff --git a/install_from_momeara_github.R b/install_from_momeara_github.R new file mode 100755 index 0000000..0478348 --- /dev/null +++ b/install_from_momeara_github.R @@ -0,0 +1,13 @@ +#!/usr/bin/env Rscript + +#NOTE: You must run this script with SUDO!! + + +required_packages <- c("devtools", "roxygen2") + +if (length(setdiff(required_packages, rownames(installed.packages()))) > 0) { + install.packages(setdiff(required_packages, rownames(installed.packages())), repos="http://cran.rstudio.com/") +} + + +devtools::install_github("momeara/RosettaFeatures") \ No newline at end of file diff --git a/install_local.R b/install_local.R index 1d29dbd..ec9be15 100755 --- a/install_local.R +++ b/install_local.R @@ -10,7 +10,9 @@ if (length(setdiff(required_packages, rownames(installed.packages()))) > 0) { install.packages(setdiff(required_packages, rownames(installed.packages())), repos="http://cran.rstudio.com/") } +library(methods) + devtools::document() # if you changed function signatures -devtools:build() +devtools::build() devtools::install_local(".") From 4fba537cb7b3c4fdaef65d486f3fd5f4242b96aa Mon Sep 17 00:00:00 2001 From: Lab Administrator Date: Tue, 28 Jun 2016 13:28:50 -0400 Subject: [PATCH 18/55] changes to local install. Please update .Renviron with your path-to-r-library --- .Renviron | 1 + .Rprofile | 5 +++++ install_local_test.R | 3 +++ 3 files changed, 9 insertions(+) create mode 100644 .Renviron create mode 100644 .Rprofile create mode 100755 install_local_test.R diff --git a/.Renviron b/.Renviron new file mode 100644 index 0000000..f910411 --- /dev/null +++ b/.Renviron @@ -0,0 +1 @@ +R_LIBS=/Library/Frameworks/R.framework/Versions/3.3/Resources/library/ diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 0000000..eba35c7 --- /dev/null +++ b/.Rprofile @@ -0,0 +1,5 @@ +cat(".Rprofile: Setting UK repositoryn") +r = getOption("repos") # hard code the UK repo for CRAN +r["CRAN"] = "http://cran.uk.r-project.org" +options(repos = r) +rm(r) diff --git a/install_local_test.R b/install_local_test.R new file mode 100755 index 0000000..57c149f --- /dev/null +++ b/install_local_test.R @@ -0,0 +1,3 @@ +#!/usr/bin/env Rscript + +install.packages("devtools", repos="http://cran.rstudio.com/") From 912b36c03ea53613d2dc2b55e27832f326937ce4 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 12:01:51 -0500 Subject: [PATCH 19/55] update gitignore. remove Rprofile and Renviron from PR. --- .Renviron | 1 - .Rprofile | 5 ----- .gitignore | 2 ++ install_local.R | 3 ++- 4 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 .Renviron delete mode 100644 .Rprofile diff --git a/.Renviron b/.Renviron deleted file mode 100644 index f910411..0000000 --- a/.Renviron +++ /dev/null @@ -1 +0,0 @@ -R_LIBS=/Library/Frameworks/R.framework/Versions/3.3/Resources/library/ diff --git a/.Rprofile b/.Rprofile deleted file mode 100644 index eba35c7..0000000 --- a/.Rprofile +++ /dev/null @@ -1,5 +0,0 @@ -cat(".Rprofile: Setting UK repositoryn") -r = getOption("repos") # hard code the UK repo for CRAN -r["CRAN"] = "http://cran.uk.r-project.org" -options(repos = r) -rm(r) diff --git a/.gitignore b/.gitignore index 52eb83e..4c486a8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ inst/doc *.Rds Rplots.pdf *~ +.Rprofile +.Renviron diff --git a/install_local.R b/install_local.R index ec9be15..1ad2442 100755 --- a/install_local.R +++ b/install_local.R @@ -1,5 +1,6 @@ #!/usr/bin/env Rscript + #NOTE: You must run this script with SUDO!! #install.packages("devtools", repos="http://cran.rstudio.com/") @@ -12,7 +13,7 @@ if (length(setdiff(required_packages, rownames(installed.packages()))) > 0) { library(methods) -devtools::document() # if you changed function signatures +#devtools::document() # if you changed function signatures devtools::build() devtools::install_local(".") From 894a0bcfab1c7987eec43d78939f8b31b6003e77 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 15:32:44 -0500 Subject: [PATCH 20/55] attempt ordering fix by using adply instead of ddply. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index a41677c..c0e10a1 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -27,7 +27,7 @@ query_sample_sources <- function( tryCatch(sele,error=function(e){ cat("ERROR: The select statement is not defined.\n") }) - features <- plyr::ddply(sample_sources, c("sample_source"), function(ss){ + features <- plyr::adply(sample_sources, 1, function(ss){ tryCatch(c(ss),error=function(e){ cat("ERROR: The specified sample source is not defined.\n") }) From 84a02e1665118700f9d96db389c31adb5630e85c Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 15:53:05 -0500 Subject: [PATCH 21/55] print features for now during debugging. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index c0e10a1..6640a90 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -72,7 +72,7 @@ query_sample_sources <- function( # } #} - + print(features) features } From 9e1b75229aee8bb5a540699c5f58e75dd8802b92 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 15:59:32 -0500 Subject: [PATCH 22/55] print features for now during debugging. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 6640a90..1accaa3 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -72,7 +72,7 @@ query_sample_sources <- function( # } #} - print(features) + tail(features) features } From 42f99afcb6ab94bcc46fbbd8fe5df2912d51fd0e Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:02:20 -0500 Subject: [PATCH 23/55] print features for now during debugging. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 1accaa3..abdc453 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -72,7 +72,7 @@ query_sample_sources <- function( # } #} - tail(features) + cat(tail(features)) features } From c85723a06408803f82a84ea7233e3b6f6e836b28 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:03:56 -0500 Subject: [PATCH 24/55] print features for now during debugging. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index abdc453..6d5cf3f 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -72,7 +72,7 @@ query_sample_sources <- function( # } #} - cat(tail(features)) + print(tail(features)) features } From 2407512d94b3570abf779a83a71979eb59b9f62b Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:07:32 -0500 Subject: [PATCH 25/55] print features for now during debugging. --- R/support-query_sample_sources.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 6d5cf3f..c9fa5d7 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -71,8 +71,11 @@ query_sample_sources <- function( # } # } #} + cat("ordering: ") + plyr::a_ply(features$sample_source, 1, function( ss )){ + cat(ss) + } - print(tail(features)) features } From cb7035db0848536f10645744416f6c6ef041c785 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:08:54 -0500 Subject: [PATCH 26/55] print features for now during debugging. --- R/support-query_sample_sources.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index c9fa5d7..de738a0 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -72,9 +72,9 @@ query_sample_sources <- function( # } #} cat("ordering: ") - plyr::a_ply(features$sample_source, 1, function( ss )){ + plyr::a_ply(features$sample_source, 1, function( ss ){ cat(ss) - } + )} features } From fddc3238b92d3f1e3458eac3eadc2af4d973ec92 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:09:24 -0500 Subject: [PATCH 27/55] print features for now during debugging. --- R/support-query_sample_sources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index de738a0..740c21a 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -74,7 +74,7 @@ query_sample_sources <- function( cat("ordering: ") plyr::a_ply(features$sample_source, 1, function( ss ){ cat(ss) - )} + }) features } From b2d93c0e546b132f34c4c7ab8f1d40c37d444319 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:11:34 -0500 Subject: [PATCH 28/55] print features for now during debugging. --- R/support-query_sample_sources.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index 740c21a..cac6956 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -71,7 +71,6 @@ query_sample_sources <- function( # } # } #} - cat("ordering: ") plyr::a_ply(features$sample_source, 1, function( ss ){ cat(ss) }) From 06299bb1aaaf639d8d683e0c17ded20e10e2841f Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:27:55 -0500 Subject: [PATCH 29/55] remove debugging info. --- R/support-query_sample_sources.R | 4 ---- inst/scripts/analysis/plots/scores/total_score.R | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/R/support-query_sample_sources.R b/R/support-query_sample_sources.R index cac6956..16146a9 100644 --- a/R/support-query_sample_sources.R +++ b/R/support-query_sample_sources.R @@ -71,10 +71,6 @@ query_sample_sources <- function( # } # } #} - plyr::a_ply(features$sample_source, 1, function( ss ){ - cat(ss) - }) - features } diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 96cc803..df8230b 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -56,6 +56,7 @@ f <- ddply(data, .(sample_source), function(d2){ }) dens <- estimate_density_1d(f, c("sample_source"), c("total_score")) +print(dens) plot_id <- "total_score_top_10" p <- ggplot(data=dens) + theme_bw() + From 50e23c0d4265204ec119a08b203cd1707a34466a Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:46:53 -0500 Subject: [PATCH 30/55] make abbreviations less severe. I wish this was automatic. --- .../analysis/plots/interfaces/SASA/int_SASA_den.R | 12 ++++++------ .../plots/interfaces/energies/int_energies_den.R | 6 +++--- inst/scripts/analysis/plots/scores/total_score.R | 7 +++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index a931ad9..cde8143 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -170,7 +170,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field,"Average", sep=" "))+ - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "avg_sides_by_all", grid=side ~ .) #Average Top 10 @@ -178,7 +178,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "avg_sides_top_10_by_all", grid=side ~ .) #Best @@ -186,7 +186,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "top", sep=" ")) + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "sides_top_by_all", grid=side ~ .) avgs <- ddply(data, .(sample_source, side, field, interface), function(d2){ @@ -197,7 +197,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field,"Average", sep=" ")) + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "avg_sides_by_interface", grid=side ~ interface) #Average Top 10 @@ -205,7 +205,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + - scale_x_discrete(labels = abbreviate) + + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + ylab(field) plot_field(p, "avg_sides_top_10_by_interface", grid=side ~ interface) @@ -214,7 +214,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "top", sep=" ")) + - scale_x_discrete(labels = abbreviate) + + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + ylab(field) plot_field(p, "sides_top_by_interface", grid=side ~ interface) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 04b8fa4..959b6de 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -107,7 +107,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Average Interface dG") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "avg_dG_by_interface", grid=interface ~ .) #Average Top 10 @@ -116,7 +116,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Average Best 10 Interface dG") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "avg_dG_top_10_by_interface", grid=interface ~ .) #Best @@ -125,6 +125,6 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Top Interface dG") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) plot_field(p, "dG_top_by_interface", grid=interface ~ .) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index df8230b..26f683d 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -56,7 +56,6 @@ f <- ddply(data, .(sample_source), function(d2){ }) dens <- estimate_density_1d(f, c("sample_source"), c("total_score")) -print(dens) plot_id <- "total_score_top_10" p <- ggplot(data=dens) + theme_bw() + @@ -78,7 +77,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Average Score") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) save_plots(self, "avg_total_score", sample_sources, output_dir, output_formats) #Avg Top 10 Scoring @@ -87,7 +86,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Average Best 10 Score") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) save_plots(self, "avg_top_10_total_score", sample_sources, output_dir, output_formats) #Top Scoring @@ -96,7 +95,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Best Score") + ylab("REU") + - scale_x_discrete(labels = abbreviate) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) save_plots(self, "best_total_score", sample_sources, output_dir, output_formats) })) # end FeaturesAnalysis From ba956a9bb8acda642877e40282258e8f41877471 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 16:50:46 -0500 Subject: [PATCH 31/55] increase length for abbreviations. --- .../analysis/plots/interfaces/energies/int_energies_den.R | 6 +++--- inst/scripts/analysis/plots/scores/total_score.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 959b6de..af9cd40 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -107,7 +107,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Average Interface dG") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) plot_field(p, "avg_dG_by_interface", grid=interface ~ .) #Average Top 10 @@ -116,7 +116,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Average Best 10 Interface dG") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) plot_field(p, "avg_dG_top_10_by_interface", grid=interface ~ .) #Best @@ -125,6 +125,6 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + ggtitle("Top Interface dG") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) plot_field(p, "dG_top_by_interface", grid=interface ~ .) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 26f683d..af2bc4e 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -77,7 +77,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Average Score") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) save_plots(self, "avg_total_score", sample_sources, output_dir, output_formats) #Avg Top 10 Scoring @@ -86,7 +86,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Average Best 10 Score") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) save_plots(self, "avg_top_10_total_score", sample_sources, output_dir, output_formats) #Top Scoring @@ -95,7 +95,7 @@ p <- ggplot(data=avgs ) + theme_bw() + ggtitle("Best Score") + ylab("REU") + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) save_plots(self, "best_total_score", sample_sources, output_dir, output_formats) })) # end FeaturesAnalysis From 0377b85b61c0fd982ac2887f955193fb4e7ef274 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 14 Sep 2016 17:08:22 -0500 Subject: [PATCH 32/55] fix sasa plots. --- .../plots/interfaces/SASA/int_SASA_den.R | 28 ++++++++++--------- .../analysis/plots/scores/total_score.R | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index cde8143..ed9937e 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -170,24 +170,24 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field,"Average", sep=" "))+ - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) - plot_field(p, "avg_sides_by_all", grid=side ~ .) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + plot_field(p, paste("avg_sides_by_all", field, sep = "_"), grid=side ~ .) #Average Top 10 p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) - plot_field(p, "avg_sides_top_10_by_all", grid=side ~ .) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + plot_field(p, paste("avg_sides_top_10_by_all", field, sep="_"), grid=side ~ .) #Best p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "top", sep=" ")) + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) - plot_field(p, "sides_top_by_all", grid=side ~ .) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + plot_field(p, paste("sides_top_by_all", field, sep = "_"), grid=side ~ .) avgs <- ddply(data, .(sample_source, side, field, interface), function(d2){ data.frame(m = mean(d2[,field]), std_dev = sd(d2[,field]), m_top10 = mean(d2[1:10,field]), std_dev_top_10 = sd(d2[1:10,field]), top = d2[1,field]) @@ -197,28 +197,30 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field,"Average", sep=" ")) + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) - plot_field(p, "avg_sides_by_interface", grid=side ~ interface) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + plot_field(p, paste("avg_sides_by_interface", field, sep="_"), grid=side ~ interface) #Average Top 10 p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m_top10 , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "Average Best 10",sep=" ")) + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + ylab(field) - plot_field(p, "avg_sides_top_10_by_interface", grid=side ~ interface) + plot_field(p, paste("avg_sides_top_10_by_interface", field, sep="_"), grid=side ~ interface) #Best p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= top , fill=sample_source)) + theme_bw() + ggtitle(paste("Buried", field, "top", sep=" ")) + - scale_x_discrete(labels=function(x) abbreviate(x, minlength=12)) + + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + ylab(field) - plot_field(p, "sides_top_by_interface", grid=side ~ interface) + plot_field(p, paste("sides_top_by_interface", field, sep = "_"), grid=side ~ interface) - } + } #End each side + + #Fractions field = "aromatic_dSASA_fraction" parts = list(plot_parts, scale_x_continuous("fraction", limit=c(0, 1.0))) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index af2bc4e..02b0491 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -75,7 +75,7 @@ p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + #geom_errorbar(aes(ymin = m-std_dev, ymax=m+std_dev) + theme_bw() + - ggtitle("Average Score") + + ggtitle("Average Total Score") + ylab("REU") + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) save_plots(self, "avg_total_score", sample_sources, output_dir, output_formats) From c59614603772dcc86119976f736e34be433813be Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 18 Jan 2017 12:56:49 -0600 Subject: [PATCH 33/55] debugging interface hbond scripts. --- inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index 3e130b6..4c5cba1 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -117,7 +117,7 @@ run=function(self, sample_sources, output_dir, output_formats){ df = data.frame(n=n) }) - #print(head(hbond_counts)) + print(head(hbond_counts)) field = "n" group = c("sample_source") dens <- estimate_density_1d(hbond_counts, group, field) From 920a6a293dc082ef9df5259dcf7edf3124acdd68 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 18 Jan 2017 19:00:56 -0600 Subject: [PATCH 34/55] fix a bunch more plots with ggplot2 upgrade. fun stuff. --- .../anchor_distances/ab_cdr_anchor_dis_den.R | 2 +- .../charge/ab_cdr_cluster_charge_den.R | 4 +- .../plots/antibodies/charge/ab_charge_den.R | 10 ++-- .../composition/ab_composition_den.R | 2 +- .../plots/antibodies/h3_kink/h3_kink_den.R | 2 +- .../plots/interfaces/SASA/int_dSASA_vs.R | 10 +--- .../composition/int_composition_den.R | 22 +++++---- .../plots/interfaces/hbonds/int_hbonds_den.R | 47 +++++++++++-------- .../secondary_structure/int_ss_den.R | 7 +++ .../analysis/plots/scores/total_score.R | 18 +++---- 10 files changed, 68 insertions(+), 56 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R index d0d60e9..eeb8222 100644 --- a/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R +++ b/inst/scripts/analysis/plots/antibodies/anchor_distances/ab_cdr_anchor_dis_den.R @@ -50,7 +50,7 @@ run=function(self, sample_sources, output_dir, output_formats){ # cdr_metrics where CDR LIKE '%Proto%'" # } - data = query_sample_sources(sample_sources, sele, char_as_factor=F) + data = query_sample_sources(sample_sources, sele) plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R index f70d3b8..acfd1ec 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_cdr_cluster_charge_den.R @@ -94,7 +94,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + ggtitle(paste("CDR Charge", cluster)) + xlab("Charge") + - scale_y_continuous("Feature Density") + #Zscale_y_continuous("Feature Density") plot_field(p, paste("cdr_charge_den", cluster, sep="_")) } @@ -103,7 +103,7 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=clus_data) + theme_bw() + ggtitle(paste("CDR Charge", cluster)) + - scale_y_continuous(label=percent) + + #scale_y_continuous(label="percent") + ylab("% of Sample Source") plot_field(p, paste("cdr_charge_hist", cluster, sep="_")) diff --git a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R index 9bfd37c..2ab072b 100644 --- a/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R +++ b/inst/scripts/analysis/plots/antibodies/charge/ab_charge_den.R @@ -66,7 +66,7 @@ run=function(self, sample_sources, output_dir, output_formats){ dens <- estimate_density_1d(data, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + - scale_x_continuous("charge", breaks = seq(min(data$net_charge), max(data$net_charge), 2)) + + #scale_x_continuous("charge", breaks = seq(min(data$net_charge), max(data$net_charge), 2)) + ggtitle("Antibody Net Charge") plot_field(p, "net_charge_den") @@ -79,10 +79,10 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=perc ) + geom_bar(position="dodge", stat='identity', aes(x = net_charge, y= perc , fill=sample_source)) + theme_bw() + - scale_x_continuous("charge", breaks = seq(min(perc$net_charge), max(perc$net_charge), 2)) + + #scale_x_continuous("charge", breaks = seq(min(perc$net_charge), max(perc$net_charge), 2)) + ggtitle("Antibody Net Charge") + ylab("% of Sample Source") + - scale_y_continuous(label="percent") + #scale_y_continuous(label="percent") #scale_x_continuous("restype") + #scale_y_continuous("n") plot_field(p, "net_charge_hist") @@ -111,7 +111,7 @@ run=function(self, sample_sources, output_dir, output_formats){ dens <- estimate_density_1d(data, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + - scale_x_continuous("charge", breaks = seq(min(data$paratope_charge), max(data$paratope_charge), 2)) + + #scale_x_continuous("charge", breaks = seq(min(data$paratope_charge), max(data$paratope_charge), 2)) + ggtitle("Paratope Net Charge") plot_field(p, "paratope_charge_den") @@ -204,7 +204,7 @@ run=function(self, sample_sources, output_dir, output_formats){ theme_bw() + #scale_x_continuous("charge", breaks = get_charge_seq(perc, 1)) + ggtitle("CDR Charge") + - scale_y_continuous("charge", label="percent") + + #scale_y_continuous("charge", label="percent") + ylab("% of Sample Source") plot_field_wrap(p, "cdr_charge_hist", ~ CDR) diff --git a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R index e03511c..b6ffb97 100644 --- a/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R +++ b/inst/scripts/analysis/plots/antibodies/composition/ab_composition_den.R @@ -86,7 +86,7 @@ run=function(self, sample_sources, output_dir, output_formats){ dens <- estimate_density_1d(data, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + - scale_x_continuous("", breaks = seq(min(data$length), max(data$length), 4)) + #scale_x_continuous("", breaks = seq(min(data$length), max(data$length), 4)) ggtitle("CDR Lengths") plot_field_wrap(p, "cdr_lengths_den",grid= ~ CDR) diff --git a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R index 070576c..780ed77 100644 --- a/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R +++ b/inst/scripts/analysis/plots/antibodies/h3_kink/h3_kink_den.R @@ -65,7 +65,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_bar(position="dodge", stat="identity", aes(x=kink_type, y= percent, fill=sample_source)) + theme_bw() + ggtitle("Kink Type Comparison") + - scale_y_continuous(label = "percent") + + #scale_y_continuous(label = "percent") + xlab("kink type") plot_field(p, "kink_type_hist") diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index c07a27d..73762f0 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -91,9 +91,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #dSASA vs dG p <- ggplot(data=int_data[int_data$dG<=5000 & int_data$dG>-5000,], aes(x = f, y = dG, colour=sample_source)) + parts + - ggtitle(paste(field,"vs dG")) + - scale_x_continuous("SASA") + - scale_y_continuous("REU") + ggtitle(paste(f,"vs dG")) + plot_field(p, paste(f, "vs_dG_by_all", sep="_")) plot_field(p, paste(f, "vs_dG_by_interface", sep="_"), grid=interface ~ .) @@ -119,8 +117,6 @@ run=function(self, sample_sources, output_dir, output_formats){ field = c("dSASA") p <- ggplot(data = data, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + ggtitle(paste(field, "vs Interface energy density")) + - scale_x_continuous("dSASA") + - scale_y_continuous("dG/dSASA") plot_field(p, paste("control", field, "vs_energy_density", sep="_"), grid=side ~ .) # #dhSASA vs dpSASA @@ -133,13 +129,9 @@ run=function(self, sample_sources, output_dir, output_formats){ #Control - Should be flat? p <- ggplot(data = data, aes(y = dSASA_bb, x = dSASA_sc, colour=sample_source)) + parts + ggtitle("dSASA_sc vs dSASA_bb") + - scale_x_continuous("Sidechain dSASA") + - scale_y_continuous("Backbone dSASA") plot_field(p, "control_dSASA_sc_vs_dSASA_bb", grid=side ~ .) p <- ggplot(data = data, aes(y = dhSASA_sc, x = dhSASA_bb, colour=sample_source)) + parts + ggtitle("dhSASA_sc vs dhSASA_bb") + - scale_y_continuous("Sidechain dhSASA") + - scale_x_continuous("Backbone dhSASA") plot_field(p, "control_dhSASA_sc_vs_dhSASA_bb", grid=side ~ .) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index a7120f7..10c1a5a 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -117,15 +117,18 @@ run=function(self, sample_sources, output_dir, output_formats){ interface_residues.dG as dG, interface_residues.relative_dSASA_fraction as dSASA_fraction, - interface_residues.struct_id as struct_id + interface_residues.struct_id as struct_id, + structures.input_tag as input_tag FROM residues, interface_residues, - residue_type + residue_type, + structures WHERE interface_residues.struct_id == residues.struct_id and interface_residues.resNum == residues.resNum and - residues.name3==residue_type.name3 + residues.name3==residue_type.name3 and + interface_residues.struct_id == structures.struct_id " res_data = query_sample_sources(sample_sources, sele) @@ -145,24 +148,25 @@ run=function(self, sample_sources, output_dir, output_formats){ # plot_field(p, "restype_composition_by_interface_test", grid=interface ~ .) get_percent <- function(d) { - d_per <- ddply(d, .(sample_source, interface, struct_id), function(per_struct_id){ + d_per <- ddply(d, .(sample_source, interface, input_tag), function(per_struct_id){ d_per_restype <- ddply(per_struct_id, .(restype1), function(per_restype){ #print(head(per_restype)) - perc = length(per_restype$restype1)/length(per_struct_id$struct_id) + perc = length(per_restype$restype1)/length(per_struct_id$input_tag) df = data.frame(perc = perc) }) }) d_per } - + print.default(d) + #Restype Composition - p <- ggplot(data=get_percent(res_data), aes(x=restype1)) + - geom_bar(position="dodge", stat="identity", aes(y=perc, fill=sample_source))+ + p <- ggplot(data=d, aes(x=restype1)) + + geom_bar(position="dodge", stat="identity", aes(y=perc, x=restype1,fill=sample_source))+ theme_bw() + ggtitle("Interface ResType Composition") + - scale_y_continuous(label="percent") + + #scale_y_continuous(label="percent") + ylab("% of Sample Source") plot_field(p, "restype_composition_by_all") plot_field(p, "restype_composition_by_interface", grid=interface ~ .) diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index 4c5cba1..872bc7d 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -26,6 +26,7 @@ run=function(self, sample_sources, output_dir, output_formats){ hb.energy as energy, don_res.interface as interface, don_res.struct_id as struct_id, + structures.input_tag as input_tag, hb_geom.AHdist as dis FROM interface_residues AS don_res, @@ -33,7 +34,8 @@ run=function(self, sample_sources, output_dir, output_formats){ hbond_sites AS don, hbond_sites AS acc, hbonds AS hb, - hbond_geom_coords as hb_geom + hbond_geom_coords as hb_geom, + structures WHERE ((don_res.side== 'side1' AND acc_res.side == 'side2') OR @@ -49,10 +51,12 @@ run=function(self, sample_sources, output_dir, output_formats){ acc_res.struct_id == acc.struct_id AND acc.struct_id == don.struct_id AND don.struct_id == hb.struct_id AND - hb.struct_id == hb_geom.struct_id + hb.struct_id == hb_geom.struct_id AND + don_res.struct_id == structures.struct_id " data = query_sample_sources(sample_sources, sele) + #print(sum(data$struct_id==1)) #print(sum(data$struct_id==2)) #print(sum(data$struct_id==3)) @@ -111,13 +115,12 @@ run=function(self, sample_sources, output_dir, output_formats){ #Hbonds/model or per interface #There is probably a better way to do this. - - hbond_counts <- ddply(data, .(interface, sample_source, struct_id), function(int_data){ - n = length(int_data$energy > 0) - df = data.frame(n=n) + #print.default(data) + hbond_counts <- ddply(data, .(interface, sample_source, input_tag), function(int_data){ + data.frame(n = length(int_data$energy > 0)) + #df = data.frame(n=n) }) - print(head(hbond_counts)) field = "n" group = c("sample_source") dens <- estimate_density_1d(hbond_counts, group, field) @@ -135,19 +138,25 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Cross Interface Hydrogen Bonds") plot_field(p, "hbond_den_by_interface",grid=~interface) + + avgs <- ddply(hbond_counts, .(sample_source), function(d2){ + std_dev = sd(d2$n) + m = mean(d2$n) + data.frame(m = m, std_dev = std_dev, ymin=m-std_dev, ymax=m+std_dev) + }) + + dodge <- position_dodge(width=0.9) - #Histogram - p <- ggplot(data=hbond_counts, na.rm=T) + - geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + - scale_y_continuous(label="percent") + - xlab("hbonds") + - ggtitle("Average Cross Interface Hydrogen Bonds") - plot_field(p, "hbond_hist_by_all") - p <- ggplot(data=hbond_counts, na.rm=T) + - geom_bar(aes(x=n, y = ..density.. , fill=sample_source), position="dodge", binwidth=1) + - scale_y_continuous(label="percent") + + #Averages + p <- ggplot(data=avgs, (x = sample_source, y= m , fill=sample_source, ymax=ymax, ymin=ymin) ) + + geom_bar(position="dodge", stat='identity', ) + + geom_errorbar(position=dodge, width=0.25) + theme_bw() + + ggtitle("Average Cross Interface Hydrogen Bonds") + xlab("hbonds") + - ggtitle("Average Cross Interface Hydrogen Bonds") - plot_field(p, "hbond_hist_by_interface", grid=~interface) + #scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) + save_plots(self, "avg_hbonds_by_all", sample_sources, output_dir, output_formats) + + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R index 0c29ae0..6c9675f 100644 --- a/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R +++ b/inst/scripts/analysis/plots/interfaces/secondary_structure/int_ss_den.R @@ -36,6 +36,13 @@ run=function(self, sample_sources, output_dir, output_formats){ scale_y_continuous("Feature Density"), theme_bw()) + capwords <- function(s, strict = FALSE) { + cap <- function(s) paste(toupper(substring(s,1,1)), + {s <- substring(s,2); if(strict) tolower(s) else s}, + sep = "", collapse = " " ) + sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) + } + plot_field = function(p, plot_id, grid = NULL, ssLegend=T){ if (! is.null(grid)){ diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 02b0491..1ce99fc 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -33,9 +33,8 @@ WHERE structure_scores.score_type_id = score_types.score_type_id ORDER BY score_value;" -data <- query_sample_sources(sample_sources, sele) -print(summary(data)) +data <- query_sample_sources(sample_sources, sele) dens <- estimate_density_1d( data = data, @@ -44,7 +43,7 @@ dens <- estimate_density_1d( plot_id <- "total_score" p <- ggplot(data=dens) + theme_bw() + - geom_line(aes(x=x, y=y, colour=sample_source), size=1.4) + + geom_line(aes(x, y, colour=sample_source), size=1.4) + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + ggtitle("Rosetta Structure Score") + labs(x="Rosetta Energy Units") + @@ -52,16 +51,16 @@ p <- ggplot(data=dens) + theme_bw() + save_plots(self, plot_id, sample_sources, output_dir, output_formats) f <- ddply(data, .(sample_source), function(d2){ - data.frame(total_score = d2[1:10,]$total_score) + data.frame(total_score = d2[1:20,]$total_score) }) - -dens <- estimate_density_1d(f, c("sample_source"), c("total_score")) -plot_id <- "total_score_top_10" +dens <- estimate_density_1d(f, ids = c("sample_source"), variable = "total_score") + +plot_id <- "total_score_top_20" p <- ggplot(data=dens) + theme_bw() + - geom_line(aes(x=x, y=y, colour=sample_source), size=1.4) + + geom_line(aes(x, y, colour=sample_source), size=1.4) + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + - ggtitle("Rosetta Structure Score - Top 10") + + ggtitle("Rosetta Structure Score - Top 20") + labs(x="Rosetta Energy Units") + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) save_plots(self, plot_id, sample_sources, output_dir, output_formats) @@ -70,6 +69,7 @@ save_plots(self, plot_id, sample_sources, output_dir, output_formats) avgs <- ddply(data, .(sample_source), function(d2){ data.frame(m = mean(d2$total_score), std_dev = sd(d2$total_score), m_top10 = mean(d2[1:10,]$total_score), std_dev_top_10 = sd(d2[1:10,]$total_score), top = d2[1,]$total_score) }) + print(avgs) p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + From f11e4e9549a5b4a9c53d03bd6512b52816934e66 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 18 Jan 2017 19:14:08 -0600 Subject: [PATCH 35/55] fix more crap that the new ggplot2 broke. --- .../analysis/plots/antibodies/contacts/ag_ab_contact_den.R | 4 ++-- .../plots/interfaces/composition/int_composition_den.R | 3 ++- .../scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R index 1c57252..4dc5a45 100644 --- a/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R +++ b/inst/scripts/analysis/plots/antibodies/contacts/ag_ab_contact_den.R @@ -59,7 +59,7 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=binary_data, na.rm=T, aes(x=CDR)) + geom_bar(position="dodge", aes(y=percent, fill=sample_source), stat='identity') + ggtitle("CDR Makes Antigen Contact") + - scale_y_continuous(label="percent") + + #scale_y_continuous(label="percent") + ylab("% of sample source") plot_field(p, "cdr_makes_contact_hist") save_tables(self, binary_data, "cdr_makes_contact_table", sample_sources, output_dir, output_formats, @@ -118,7 +118,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Average Percent of total contacts") + xlab("CDR") + ylab("Avg %") + - scale_y_continuous(label="percent") + #scale_y_continuous(label="percent") plot_field(p, "avg_perc_total_contacts_hist") save_tables(self, avg_perc, "avg_perc_total_contacts_table", sample_sources, output_dir, output_formats, caption="Avg Percent of total contacts", caption.placement="top", quote_strings=F) diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index 10c1a5a..cf4f5b3 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -157,7 +157,8 @@ run=function(self, sample_sources, output_dir, output_formats){ }) d_per } - + + d = get_percent(data) print.default(d) #Restype Composition diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index 872bc7d..23520b5 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -149,8 +149,8 @@ run=function(self, sample_sources, output_dir, output_formats){ #Averages - p <- ggplot(data=avgs, (x = sample_source, y= m , fill=sample_source, ymax=ymax, ymin=ymin) ) + - geom_bar(position="dodge", stat='identity', ) + + p <- ggplot(data=avgs, aes(x = sample_source, y= m , fill=sample_source, ymax=ymax, ymin=ymin) ) + + geom_bar(position="dodge", stat='identity') + geom_errorbar(position=dodge, width=0.25) theme_bw() + ggtitle("Average Cross Interface Hydrogen Bonds") + From e015f63455709fd56250e108f0b3396323f121bb Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 18 Jan 2017 19:17:55 -0600 Subject: [PATCH 36/55] fix more crap that the new ggplot2 broke. --- inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R | 3 ++- inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index 73762f0..231a415 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -91,7 +91,8 @@ run=function(self, sample_sources, output_dir, output_formats){ #dSASA vs dG p <- ggplot(data=int_data[int_data$dG<=5000 & int_data$dG>-5000,], aes(x = f, y = dG, colour=sample_source)) + parts + - ggtitle(paste(f,"vs dG")) + + ggtitle(paste(f,"vs dG")) + plot_field(p, paste(f, "vs_dG_by_all", sep="_")) plot_field(p, paste(f, "vs_dG_by_interface", sep="_"), grid=interface ~ .) diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index 23520b5..ae20412 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -151,7 +151,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Averages p <- ggplot(data=avgs, aes(x = sample_source, y= m , fill=sample_source, ymax=ymax, ymin=ymin) ) + geom_bar(position="dodge", stat='identity') + - geom_errorbar(position=dodge, width=0.25) + geom_errorbar(position=dodge, width=0.25) + theme_bw() + ggtitle("Average Cross Interface Hydrogen Bonds") + xlab("hbonds") + From 5357ee770ab39efe5e941c799874320de1087bce Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Wed, 18 Jan 2017 22:01:57 -0600 Subject: [PATCH 37/55] more fixes. --- .../composition/int_composition_den.R | 39 +++---------------- .../plots/interfaces/hbonds/int_hbonds_den.R | 2 +- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index cf4f5b3..3e8a5cc 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -134,43 +134,14 @@ run=function(self, sample_sources, output_dir, output_formats){ - ##Histogram - only plot residues that have a dSASA fraction > 5 % - change this for sidechains once we have that data - - ##### Typical way is not working, so we will have to do it manually. ##### - - #Restype Composition - Classical, not working! -# p <- ggplot(data=res_data, aes(x=restype1)) + -# geom_bar(position="dodge", aes(y = ..density.., fill=sample_source), binwidth=1)+ -# theme_bw() + -# ggtitle("Interface ResType Composition") + -# scale_y_continuous(label=percent) -# plot_field(p, "restype_composition_by_all_test") -# plot_field(p, "restype_composition_by_interface_test", grid=interface ~ .) - - get_percent <- function(d) { - d_per <- ddply(d, .(sample_source, interface, input_tag), function(per_struct_id){ - d_per_restype <- ddply(per_struct_id, .(restype1), function(per_restype){ - #print(head(per_restype)) - perc = length(per_restype$restype1)/length(per_struct_id$input_tag) - df = data.frame(perc = perc) - }) - }) - d_per - } - - d = get_percent(data) - print.default(d) - - #Restype Composition - - p <- ggplot(data=d, aes(x=restype1)) + - geom_bar(position="dodge", stat="identity", aes(y=perc, x=restype1,fill=sample_source))+ + p <- ggplot(data=res_data, aes(x=restype1, y = ..density.., fill=sample_source)) + + geom_histogram(position="dodge", binwidth=1)+ theme_bw() + - ggtitle("Interface ResType Composition") + - #scale_y_continuous(label="percent") + - ylab("% of Sample Source") + ggtitle("Interface ResType Composition") + plot_field(p, "restype_composition_by_all") plot_field(p, "restype_composition_by_interface", grid=interface ~ .) + diff --git a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R index ae20412..dd42f95 100644 --- a/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R +++ b/inst/scripts/analysis/plots/interfaces/hbonds/int_hbonds_den.R @@ -151,7 +151,7 @@ run=function(self, sample_sources, output_dir, output_formats){ #Averages p <- ggplot(data=avgs, aes(x = sample_source, y= m , fill=sample_source, ymax=ymax, ymin=ymin) ) + geom_bar(position="dodge", stat='identity') + - geom_errorbar(position=dodge, width=0.25) + + #geom_errorbar(position=dodge, width=0.25) + theme_bw() + ggtitle("Average Cross Interface Hydrogen Bonds") + xlab("hbonds") + From 1d41ab43b2d40600ab8c754c0b4a36b4d5ab045e Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 14:19:11 -0600 Subject: [PATCH 38/55] some trimming to scripts. Better filtering of low data. --- .../plots/interfaces/SASA/int_dSASA_vs.R | 63 +++++----- ...nt_composition-dG_dSASA_stats_by_restype.R | 108 +----------------- .../composition/int_composition_den.R | 39 ------- .../plots/interfaces/energies/int_dG_vs.R | 34 ++---- .../interfaces/energies/int_energies_den.R | 24 +++- .../energies/int_energies_residue_den.R | 67 ++++------- .../interfaces/packing/int_packstat_vs.R | 86 -------------- .../interfaces/packing/int_sc_value_vs.R | 14 +-- 8 files changed, 88 insertions(+), 347 deletions(-) delete mode 100644 inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R index 231a415..70a87b0 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R @@ -85,54 +85,43 @@ run=function(self, sample_sources, output_dir, output_formats){ #JAB - commenting out the hydrophobic dSASA. Unclear if this is useful or not. I don't think it is very much. #fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - fields = c("dSASA", "dSASA_bb", "dSASA_sc") + #fields = c("dSASA", "dSASA_bb", "dSASA_sc") + fields = c("dSASA") + + data_rm_out = subset(int_data, subset=(int_data$dG <= quantile(int_data$dG, .90))) #Remove high energy outliers + data_top = subset(int_data, subset=(int_data$dG <= quantile(int_data$dG, .10))) #Top 10 percent for (f in fields){ #dSASA vs dG - p <- ggplot(data=int_data[int_data$dG<=5000 & int_data$dG>-5000,], aes(x = f, y = dG, colour=sample_source)) + parts + + + + + p <- ggplot(data=data_rm_out, aes(x = f, y = dG, colour=sample_source)) + parts + ggtitle(paste(f,"vs dG")) plot_field(p, paste(f, "vs_dG_by_all", sep="_")) plot_field(p, paste(f, "vs_dG_by_interface", sep="_"), grid=interface ~ .) -# #dSASA vs aromatic dSASA fraction. Shouldn't increase, but worth a plot -# p <- ggplot(data= data, aes(x = dSASA, y = aromatic_dSASA_fraction, colour=sample_source)) + parts + -# ggtitle(paste(field,"vs Aromatic dSASA Fraction")) + -# scale_x_continuous("SASA") + -# scale_y_continuous("fraction", limit=c(0, 1.0)) -# plot_field(p, paste(field, "vs_aromatic_dSASA_fraction", sep="_"), grid= side ~ .) - -# #dSASA vs interface nres. Again, shouldn't be interesting. Mainly a control. -# p <- ggplot(data = data, aes(y = interface_nres, x = dSASA, colour=sample_source)) + parts + -# ggtitle(paste(field, "vs Interface nres")) + -# scale_y_continuous("n") + -# scale_x_continuous("SASA") -# plot_field(p, paste("control", field, "vs_interface_nres", sep="_"), grid= side ~ .) - - #dSASA vs 'energy density' from Ben Strange's paper - Should be pretty much flat for natives - - } - - data$e_density = data$dG/data$dSASA + data_rm_out$e_density = data_rm_out$dG/data_rm_out$dSASA field = c("dSASA") - p <- ggplot(data = data, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + + p <- ggplot(data = data_rm_out, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + ggtitle(paste(field, "vs Interface energy density")) + plot_field(p, paste("control", field, "vs_energy_density", sep="_"), grid=side ~ .) -# #dhSASA vs dpSASA -# p <- ggplot(data = data, aes(y = dpSASA, x = dhSASA, colour=sample_source)) + parts + -# ggtitle("dhSASA vs dpSASA") + -# scale_y_continuous("dSASA") + -# scale_x_continuous("dSASA") -# plot_field(p, "dhSASA_vs_dpSASA", grid=side ~ .) - - #Control - Should be flat? - p <- ggplot(data = data, aes(y = dSASA_bb, x = dSASA_sc, colour=sample_source)) + parts + - ggtitle("dSASA_sc vs dSASA_bb") + - plot_field(p, "control_dSASA_sc_vs_dSASA_bb", grid=side ~ .) - - p <- ggplot(data = data, aes(y = dhSASA_sc, x = dhSASA_bb, colour=sample_source)) + parts + - ggtitle("dhSASA_sc vs dhSASA_bb") + - plot_field(p, "control_dhSASA_sc_vs_dhSASA_bb", grid=side ~ .) + #Top 10 Percent + p <- ggplot(data=data_top, aes(x = f, y = dG, colour=sample_source)) + parts + + ggtitle(paste(f,"vs dG")) + + plot_field(p, paste(f, "vs_dG_top_10_percent_by_all", sep="_")) + plot_field(p, paste(f, "vs_dG_top_10_percent_by_interface", sep="_"), grid=interface ~ .) + + data_top$e_density = data_top$dG/data_top$dSASA + field = c("dSASA") + p <- ggplot(data = data_top, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + + ggtitle(paste(field, "vs Interface energy density")) + + plot_field(p, paste("control", field, "vs_energy_density_top_10_percent", sep="_"), grid=side ~ .) + + } + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R index 83f40c9..9e5a58a 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition-dG_dSASA_stats_by_restype.R @@ -125,7 +125,7 @@ run=function(self, sample_sources, output_dir, output_formats){ # interesting, but not entirely useful. Uncomment this if you find otherwise. #fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - fields = c("dSASA", "dSASA_bb", "dSASA_sc") + fields = c("dSASA") for (field in fields) { group = c("sample_source", "restype") dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) @@ -153,111 +153,5 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- p + facet_wrap(~ restype, ncol=4) save_plots(self, "dG_by_restype_den_by_all", sample_sources, output_dir, output_formats) - #JAB - works, but not really useful.. - #group = c("sample_source", "restype") - #field = "dG" - #dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) - #p <- ggplot(data=dens, na.rm=T) + parts + - # geom_line(aes(x, y, colour=sample_source), size=1.2) + - # ggtitle("dG per restype") + - # xlab("REU") - #if(nrow(sample_sources) <= 3){ - # p <- p + theme(legend.position="bottom", legend.direction="horizontal") - #} - #p <- p + facet_wrap(~ restype, ncol=4) - #save_plots(self, "dG_by_restype_den_>0dSASA_by_all", sample_sources, output_dir, output_formats) - - #JAB - works, but not really useful.. - #group = c("sample_source", "restype") - #field = "dG" - #dens <- estimate_density_1d(res_data[res_data$dSASA == 0,], group, field) - #p <- ggplot(data=dens, na.rm=T) + parts + - # geom_line(aes(x, y, colour=sample_source), size=1.2) + - # ggtitle("dG per restype") + - # xlab("REU") - #if(nrow(sample_sources) <= 3){ - # p <- p + theme(legend.position="bottom", legend.direction="horizontal") - #} - #p <- p + facet_wrap(~ restype, ncol=4) - #save_plots(self, "dG_by_restype_den_@0dSASA_by_all", sample_sources, output_dir, output_formats) - - #JAB - Works, but this is hard to see and interpret: - #group = c("sample_source", "restype") - #field = "dSASA_fraction" - #dens <- estimate_density_1d(res_data[res_data$dSASA_fraction > 0,], group, field) - #p <- ggplot(data=dens, na.rm=T) + parts + - # geom_line(aes(x, y, color=restype)) + - # ggtitle("dSASA fraction per restype") + - # facet_grid(sample_source ~ .) - #save_plots(self, "dSASA_fraction_per_restype_den_combined", sample_sources, output_dir, output_formats) - - #group = c("sample_source", "restype") - #field = "dSASA" - #dens <- estimate_density_1d(res_data[res_data$dSASA > 0,], group, field) - #p <- ggplot(data=dens, na.rm=T) + parts + - # geom_line(aes(x, y, color=restype)) + - # ggtitle("dSASA per restype") + - # facet_grid(sample_source ~ .) - #save_plots(self, "dSASA_per_restype_den_combined", sample_sources, output_dir, output_formats) - - - - #dG vs dSASA by restype - #JAB - This works, but it is not really useful. Commenting it out. - - #p <- ggplot(data=res_data[res_data$dSASA > 0,], aes(x = dG, y=dSASA, color=sample_source)) + - # #geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + - # geom_point(size=.5, pch="o") + - # stat_smooth(method=lm) + - # geom_density2d() + - # theme_bw() + - # ggtitle("dG vs dSASA per restype") + - # facet_wrap(~ restype, ncol=4) + - # xlab("REU") + - # ylab("dSASA") - #if(nrow(sample_sources) <= 3){ - # p <- p + theme(legend.position="bottom", legend.direction="horizontal") - #} - #save_plots(self, "dSASA_vs_dG_by_restype_>0dSASA_by_all", sample_sources, output_dir, output_formats) - - - - - - #group = c("sample_source", "restype", "interface") - #field = "dSASA_fraction" - #dens <- estimate_density_1d(res_data[res_data$dSASA_fraction > 0,], group, field) - #p <- ggplot(data=dens, na.rm=T, fill=restype) + parts + - # geom_line(aes(x, y, color=restype)) + - # ggtitle("Hotspot dSASA density") + - # facet_grid(sample_source ~ interface) + - #save_plots(self, "dSASA_fraction_per_restype_den_combined_by_interface", sample_sources, output_dir, output_formats) - - - #group = c("sample_source", "restype", "interface") - #field = "dSASA_fraction" - #dens <- estimate_density_1d(res_data, group, field) - #p <- ggplot(data=dens, na.rm=T) + parts + - #geom_line(aes(x, y, colour=sample_source), size=1.2) + - #ggtitle("Hotspot dSASA density of Interface residue") - #plot_field(p, "dSASA_fraction_per_restype_by_interface", grid=restype ~ interface) - #p <- ggplot(data=res_data[res_data$dSASA_fraction > .05,], fill=sample_source,weight=dSASA_fraction) + - # geom_histogram(aes(x=restype), position="dodge")+ - # theme_bw() + - # ggtitle("Interface ResType Composition") - #scale_x_continuous("restype") + - #scale_y_continuous("n") - #plot_field(p, "restype_composition_weighted_by_dSASA_frac") - #plot_field(p, "restype_composition_weighted_by_dSASA_frac_by_interface", grid=interface ~ .) - - #restype vs avg dSASA - #p <- ggplot(data=res_data) + - #geom_histogram(aes(x=mean(restype)), position="dodge") + - # theme_bw() + - # ggtitle("Interface ResType Composition") - #scale_x_continuous("restype") + - #scale_y_continuous("n") - #plot_field(p, "restype_vs_avg_dSASA_fraction") - #plot_field(p, "restype_vs_avg_dSASA_fraction_by_interface", grid=interface ~ .) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R index 3e8a5cc..2ae1d76 100644 --- a/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R +++ b/inst/scripts/analysis/plots/interfaces/composition/int_composition_den.R @@ -102,45 +102,6 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle("Interface nres") plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) - - #Restype composition - Overall interface - sele <-" - SELECT - interface_residues.interface as interface, - residues.name3 as restype, - residue_type.name1 as restype1, - interface_residues.SASA_int as SASA_int, - interface_residues.dSASA as dSASA, - interface_residues.dSASA - interface_residues.dSASA_sc as dSASA_bb, - interface_residues.dSASA_sc as dSASA_sc, - interface_residues.dhSASA as dhSASA, - interface_residues.dG as dG, - - interface_residues.relative_dSASA_fraction as dSASA_fraction, - interface_residues.struct_id as struct_id, - structures.input_tag as input_tag - FROM - residues, - interface_residues, - residue_type, - structures - WHERE - interface_residues.struct_id == residues.struct_id and - interface_residues.resNum == residues.resNum and - residues.name3==residue_type.name3 and - interface_residues.struct_id == structures.struct_id - " - res_data = query_sample_sources(sample_sources, sele) - - - - p <- ggplot(data=res_data, aes(x=restype1, y = ..density.., fill=sample_source)) + - geom_histogram(position="dodge", binwidth=1)+ - theme_bw() + - ggtitle("Interface ResType Composition") - - plot_field(p, "restype_composition_by_all") - plot_field(p, "restype_composition_by_interface", grid=interface ~ .) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 24d8542..beb4142 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -56,7 +56,10 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) - data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent parts = list( geom_point(size=1.0, pch="o"), @@ -89,29 +92,12 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "dG_vs_total_score_by_all") plot_field(p, "dG_vs_total_score_by_interface", grid=~ interface) - #dG vs dG_cross - p <- ggplot(data = data_rm_out, aes(x=dG, y=dG_cross, colour=sample_source)) + parts_no_density + - ggtitle("dG vs Crossterm dG") + - xlab("REU") + - ylab("REU") - plot_field(p, "dG_vs_dG_cross_by_all") - plot_field(p, "dG_vs_dG_cross_by_interface", grid= ~ interface) - - p <- ggplot(data = data_rm_out, aes(x=dG, y=dG_cross, color=dSASA)) + parts_no_density + - ggtitle("dG vs Crossterm dG") + - xlab("REU") + + #dG vs Total Energy + p <- ggplot(data=data_top, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs total_score") + ylab("REU") + - scale_fill_hue(l=40) - plot_field(p, "dG_vs_dG_cross_col_by_dSASA_by_all", grid=sample_source ~ .) - plot_field(p, "dG_vs_dG_cross_col_by_dSASA_by_interface", grid=sample_source ~ interface) - - #dG_cross vs dSASA - p <- ggplot(data = data_rm_out, aes(x=dG_cross, y=dSASA, colour=sample_source)) + parts_no_density + - ggtitle("dG_cross vs dSASA") + - xlab("REU") + - ylab("SASA") - plot_field(p, "dg_cross_vs_dSASA" ) - plot_field(p, "dG_cross_vs_dSASA_by_interface", grid= ~ interface) - + xlab("REU") + plot_field(p, "dG_vs_total_score_top_10_percent_by_all") + plot_field(p, "dG_vs_total_score_top_10_percent_by_interface", grid=~ interface) })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index af9cd40..2aa8cf4 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -57,24 +57,42 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + #Basic Densities - fields = c("dG", "dG_cross") + fields = c("dG" ) for(field in fields){ parts = list(plot_parts, scale_x_continuous("Rosetta Energy")) group = c("sample_source") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) plot_field(p, paste(field, "den_by_all", sep="_"), ) group = c("sample_source", "interface") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) + + group = c("sample_source") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_10_percent_den_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_10_percent_den_by_interface", sep="_"), grid=interface ~ .) + } diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 9d07377..62c213d 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -53,12 +53,14 @@ run=function(self, sample_sources, output_dir, output_formats){ #Densities + data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent #Energies - fields = c("dG", "energy_int", "energy_sep") + fields = c("dG") for(field in fields){ group = c("sample_source") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data = dens, na.rm=T) + plot_parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Residue", field, sep=" ")) + @@ -67,56 +69,33 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, paste(field, "residue_dens_by_all", sep="_")) group = c("sample_source", "interface") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data = dens, na.rm=T) + plot_parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Residue", field, sep=" ")) + xlab("REU") #scale_x_continuous("REU", limit = c(-15, 15)) plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "top_10_percent_residue_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "top_10_percent_residue_dens_by_interface", sep="_"), grid=interface ~ .) + } - #dG where dSASA is 0: - #data[-15 < data[field] & data[field] < 15,] - - field = "dG" - group = c("sample_source") - dens <- estimate_density_1d(data[data$dSASA == 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab("REU") - #scale_x_continuous(field, limit=c(-15, 15)) - plot_field(p, paste(field, "residue_@0dSASA_dens", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data[data$dSASA == 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab("REU") - #scale_x_continuous(field, limit=c(-15, 15)) - plot_field(p, paste(field, "residue_@0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) - - #dG where dSASA > 0 : - field = "dG" - group = c("sample_source") - dens <- estimate_density_1d(data[data$dSASA >0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab("REU") - #scale_x_continuous(field, limit=c(-15, 15)) - plot_field(p, paste(field, "residue_>0dSASA_dens", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab("REU") - #scale_x_continuous(field, limit=c(-15, 15)) - plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) #Per residue data. This may get crazy. })) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R deleted file mode 100644 index cdad54d..0000000 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packstat_vs.R +++ /dev/null @@ -1,86 +0,0 @@ -# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- -# vi: set ts=2 noet: -# -# (c) Copyright Rosetta Commons Member Institutions. -# (c) This file is part of the Rosetta software suite and is made available under license. -# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. -# (c) For more information, see http://www.rosettacommons.org. Questions about this can be -# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - -library(ggplot2) -library(plyr) -library(grid) - -feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", -id = "int_packing-packstat_vs", -author = "Jared Adolf-Bryfogle", -brief_description = "Graphs Interface metrics such as packstat vs other metrics. Packstat not very useful in general.", -feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), -run=function(self, sample_sources, output_dir, output_formats){ - - sele <- " - SELECT - packstat, - dSASA, - delta_unsatHbonds, - interface - FROM - interfaces - " - - plot_field = function(p, plot_id, grid = NULL){ - - if (! is.null(grid)){ - p <- p+ facet_grid(facets=grid) - } - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, plot_id, sample_sources, output_dir, output_formats) - } - - plot_parts <- list( - geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), - scale_y_continuous("Feature Density"), - theme_bw()) - - - data = query_sample_sources(sample_sources, sele) - - - #Scatterplots - #sc_value vs packstat - parts = list( - geom_point(size=1.5, pch="o"), - #stat_smooth(color="grey"), - stat_smooth(method=lm), - geom_density2d(), - #stat_density2d(aes(fill = ..level..), geom="polygon"), - #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), - theme_bw()) - - #packstat vs dSASA - p <- ggplot(data = data, aes(x=packstat, y=dSASA)) + parts + - ggtitle("packstat vs dSASA") + - scale_x_continuous("packstat", limit=c(0,1.0)) + - scale_y_continuous("Buried SASA") - plot_field(p, "packstat_vs_dSASA_by_all", grid = sample_source ~.) - plot_field(p, "packstat_vs_dSASA_by_interface", grid = interface ~ sample_source) - - - #deltaUnsatHbonds vs packstat - p <- ggplot(data = data, aes(x = delta_unsatHbonds, y=packstat)) + parts + - ggtitle("packstat vs interface unsatisfied polar atoms ") + - scale_x_continuous("n") + - scale_y_continuous("packstat", limit = c(0, 1.0)) - plot_field(p, "packstat_vs_delta_unsat_polars_by_all", grid=sample_source ~ .) - plot_field(p, "packstat_vs_delta_unsat_polars_by_interface", grid = interface ~ sample_source) - #3D Plots - - #sc_value vs dG vs dSASA - - #Sides: - - #sc_value vs interface_energy - -})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index e1b757c..596f8cc 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -50,7 +50,6 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) - #Scatterplots #sc_value vs packstat parts = list( @@ -77,22 +76,23 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "sc_value_vs dSASA_all", grid = sample_source ~ .) plot_field(p, "sc_value_vs_dSASA_by_interface", grid=interface ~ sample_source) + data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent #sc_value vs dG - p <- ggplot(data = data[data$dG<5000,], aes(x=sc_value, y=dG)) + parts + + p <- ggplot(data = data_rm_out, aes(x=sc_value, y=dG)) + parts + ggtitle("sc_value_vs_dG") + scale_x_continuous("sc_value", limit = c(0, 1.0)) + scale_y_continuous("REU") plot_field(p, "sc_value_vs_dG_by_all", grid=sample_source ~ .) plot_field(p, "sc_value_vs_dG_by_interface", grid=interface ~ sample_source) - #sc_value vs crossterm - p <- ggplot(data = data[data$dG<5000,], aes(x=sc_value, y=dG_cross)) + parts + - ggtitle("sc_value vs dG_cross") + + p <- ggplot(data = data_top, aes(x=sc_value, y=dG)) + parts + + ggtitle("sc_value_vs_dG") + scale_x_continuous("sc_value", limit = c(0, 1.0)) + scale_y_continuous("REU") - plot_field(p, "sc_value_vs_dG_cross_by_all", grid=sample_source ~ .) - plot_field(p, "sc_value_vs_dG_cross_by_interface", grid=interface ~ sample_source) + plot_field(p, "top_10_percent_dG-sc_value_vs_dG_by_all", grid=sample_source ~ .) + plot_field(p, "top_10_percent_dG-sc_value_vs_dG_by_interface", grid=interface ~ sample_source) #deltaUnsatHbonds vs sc_value p <- ggplot(data = data, aes(x=sc_value, y=delta_unsatHbonds)) + parts + From 1b5054dec6572ebb50da5ed24509be44dc51f2da Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 14:39:20 -0600 Subject: [PATCH 39/55] remove some unused interface features scripts. fix up. --- .../plots/interfaces/SASA/int_SASA_den.R | 25 ++- .../SASA/int_SASA_residue_avgs_den.R | 97 ------------ .../interfaces/SASA/int_SASA_residue_den.R | 130 ---------------- .../interfaces/SASA/int_SASA_residue_vs.R | 142 ------------------ .../plots/interfaces/SASA/int_dSASA_vs.R | 127 ---------------- .../plots/interfaces/energies/int_dG_vs.R | 17 ++- .../interfaces/energies/int_energies_den.R | 20 --- .../energies/int_energies_residue_den.R | 2 +- 8 files changed, 34 insertions(+), 526 deletions(-) delete mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R delete mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R delete mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R delete mode 100644 inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index ed9937e..7ca37c6 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -120,25 +120,42 @@ run=function(self, sample_sources, output_dir, output_formats){ #Backbone SASA may not be interesting, but I want I still want to know for now. #JAB - Commenting out hydrophibic sasa. Not very useful from my experience and it makes too many plots. fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - fields = c("dSASA", "dSASA_bb", "dSASA_sc") + fields = c("dSASA") + + data_rm_out = subset(data, subset=(data$dSASA <= quantile(data$dSASA, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dSASA <= quantile(data$dSASA, .10))) #Top 10 percent for (field in fields){ parts = list(plot_parts, scale_x_continuous("SASA")) group = c("sample_source", "side") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Buried", field, sep=" ")) plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) group = c("sample_source", "interface", "side") - dens <- estimate_density_1d(data, group, field) + dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Buried", field, sep=" ")) plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + parts = list(plot_parts, scale_x_continuous("SASA")) + group = c("sample_source", "side") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_10_percent_den_sides_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_10_percent_den_sides","by_interface", sep="_"), grid=side~interface) } @@ -159,7 +176,7 @@ run=function(self, sample_sources, output_dir, output_formats){ # plot_field(p, paste("dSASA_all", "den_sides","by_interface", sep="_"), grid=side~interface) #### Means ######### - fields = c("dSASA", "dhSASA") + fields = c("dSASA") for (field in fields){ avgs <- ddply(data, .(sample_source, side, field), function(d2){ diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R deleted file mode 100644 index 63ae299..0000000 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_avgs_den.R +++ /dev/null @@ -1,97 +0,0 @@ -# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- -# vi: set ts=2 noet: -# -# (c) Copyright Rosetta Commons Member Institutions. -# (c) This file is part of the Rosetta software suite and is made available under license. -# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. -# (c) For more information, see http://www.rosettacommons.org. Questions about this can be -# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - -library(ggplot2) -library(plyr) -library(grid) - -feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", -id = "int_SASA-by_residue_avgs_den", -author = "Jared Adolf-Bryfogle", -brief_description = "Graphs basic dSASA and SASA information", -feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), -run=function(self, sample_sources, output_dir, output_formats){ - - #First we run on all the interfaces in the database - - capwords <- function(s, strict = FALSE) - { - cap <- function(s) paste(toupper(substring(s, 1, 1)), { - s <- substring(s, 2) - if (strict) - tolower(s) - else s - }, sep = "", collapse = " ") - sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) - } - - - - - plot_parts <- list( - geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), - scale_y_continuous("Feature Density"), - theme_bw()) - - - sele = " - SELECT - avg_per_residue_dSASA, - avg_per_residue_SASA_int, - avg_per_residue_SASA_sep, - interface, - side - FROM - interface_sides - " - plot_field = function(p, plot_id, grid = NULL){ - - if (! is.null(grid)){ - p <- p+ facet_grid(facets=grid) - } - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, plot_id, sample_sources, output_dir, output_formats) - } - - data = query_sample_sources(sample_sources, sele) - - #AvgFields - fields = c("avg_per_residue_dSASA", - "avg_per_residue_SASA_int", - "avg_per_residue_SASA_sep") - - #parts = list(plot_parts, scale_x_continuous("SASA", limit=c(0, 100))) - parts = list(plot_parts, xlab("SASA")) - for(field in fields){ - fieldSP = unlist(strsplit(field, split="_")) - group = c("sample_source", "side") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste(capwords(fieldSP), collapse=" ")) - plot_field(p, paste(field, "den_sides_all", sep="_"), grid=side ~ .) - - group=c("sample_source", "interface", "side") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste(capwords(fieldSP), collapse=" ")) - plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) - - } - - - #aromatic dSASA fraction vs packstat - - #aromatic dSASA fraction vs sc_value - - -})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R deleted file mode 100644 index 2148ef5..0000000 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_den.R +++ /dev/null @@ -1,130 +0,0 @@ -# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- -# vi: set ts=2 noet: -# -# (c) Copyright Rosetta Commons Member Institutions. -# (c) This file is part of the Rosetta software suite and is made available under license. -# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. -# (c) For more information, see http://www.rosettacommons.org. Questions about this can be -# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - -library(ggplot2) -library(plyr) -library(grid) - -feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", -id = "int_SASA-by_residue_den", -author = "Jared Adolf-Bryfogle", -brief_description = "Graphs all information of individual interface residues. -Should be same interface, same numbering scheme / decoy set for this to have any meaning.", -feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), -run=function(self, sample_sources, output_dir, output_formats){ - - plot_parts <- list( - geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), - scale_y_continuous("Feature Density"), - theme_bw()) - - plot_field = function(p, plot_id, grid = NULL){ - - if (! is.null(grid)){ - p <- p+ facet_grid(facets=grid) - } - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, plot_id, sample_sources, output_dir, output_formats) - } - - sele <-" - SELECT - interface_residues.interface as interface, - interface_residues.relative_dSASA_fraction as dSASA_fraction, - interface_residues.dSASA as dSASA, - interface_residues.dSASA_sc as dSASA_sc, - (dSASA - dSASA_sc) as dSASA_bb, - interface_residues.dhSASA as dhSASA, - interface_residues.dhSASA_sc as dhSASA_sc, - (dhSASA - dhSASA_sc) as dhSASA_bb, - interface_residues.dhSASA_rel_by_charge as dhSASA_rel_by_charge - FROM - interface_residues" - - #Density plots - - data = query_sample_sources(sample_sources, sele) - ##Overall plots for all residues: Add Side data once we have this. - - #Densities - - #dSASA - #JAB - Comment out hydrophobic component as I don't think its very useful and it creates too man plots. - fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - fields = c("dSASA", "dSASA_bb", "dSASA_sc") - - for (field in fields){ - group = c("sample_source") - dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab(field) - plot_field(p, paste(field, "residue_>0dSASA_dens_by_all", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab(field) - plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) - - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab(field) - plot_field(p, paste(field, "residue_dens_by_all", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - xlab(field) - plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) - } - #dSASA fraction - field = "dSASA_fraction" - group = c("sample_source") - dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) - plot_field(p, paste(field, "residue_>0dSASA_dens_by_all", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data[data$dSASA > 0,], group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) - plot_field(p, paste(field, "residue_>0dSASA_dens_by_interface", sep="_"), grid=interface ~ .) - - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - scale_x_continuous("SASA buried/SASA separated", limit = c(0, 1.0)) - plot_field(p, paste(field, "residue_dens_by_all", sep="_")) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data = dens, na.rm=T) + plot_parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(paste("Residue", field, sep=" ")) + - scale_x_continuous(field, limit = c(0, 1.0)) - plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) - - -})) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R deleted file mode 100644 index 1522c88..0000000 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_residue_vs.R +++ /dev/null @@ -1,142 +0,0 @@ -# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- -# vi: set ts=2 noet: -# -# (c) Copyright Rosetta Commons Member Institutions. -# (c) This file is part of the Rosetta software suite and is made available under license. -# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. -# (c) For more information, see http://www.rosettacommons.org. Questions about this can be -# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - -library(ggplot2) -library(plyr) -library(grid) - -feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", -id = "int_SASA-by_residue_vs", -author = "Jared Adolf-Bryfogle", -brief_description = "Graphs all information of individual interface residues. -Should be same interface, same numbering scheme / decoy set for this to have any meaning.", -feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), -run=function(self, sample_sources, output_dir, output_formats){ - - plot_parts <- list( - geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), - scale_y_continuous("Feature Density"), - theme_bw()) - - plot_field = function(p, plot_id, grid = NULL){ - - if (! is.null(grid)){ - p <- p+ facet_grid(facets=grid) - } - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, plot_id, sample_sources, output_dir, output_formats) - } - - sele <-" - SELECT - interface_residues.interface as interface, - interface_residues.relative_dSASA_fraction as dSASA_fraction, - interface_residues.dSASA as dSASA, - interface_residues.dG as dG, - interface_residues.energy_int as energy_int, - interface_residues.energy_sep as energy_sep - FROM - interface_residues" - - #Density plots - - data = query_sample_sources(sample_sources, sele) - ##Overall plots for all residues: Add Side data once we have this. - - #Scatterplots - - #dSASA vs dSASA fraction - parts = list( - geom_point(size=.5, pch="o"), - stat_smooth(method = lm), - geom_density2d(), - scale_x_continuous("dSASA"), - scale_y_continuous("dSASA fraction", limit = c(0, 1.0)), - theme_bw(), - ggtitle("Residue dSASA vs dSASA Fraction")) - - #[data$dSASA > 0 & data$dSASA_fraction > 0,] - p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction)) + - parts - plot_field(p, "SASA_vs_dSASA_fraction_residue_by_all", grid = sample_source ~ .) - - p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction)) + - parts - plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_interface", grid = sample_source ~ interface) - - #->ss_overlay functions complain cannot coerce type 'symbol' to vector of type 'double' for some reason - - p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction, color=factor(sample_source))) + - parts - plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_all_W_ss_overlay") - - p <- ggplot(data=data, aes(x=dSASA, y=dSASA_fraction, color=factor(sample_source))) + - parts - plot_field(p, "dSASA_vs_dSASA_fraction_residue_by_interface_W_ss_overlay", grid=interface ~ .) - - #dSASA vs dG - parts = list( - xlab("dSASA"), - #scale_y_continuous("REU", limit = c(-15, 15)), - ylab("REU"), - geom_point(size=.5, pch="o"), - stat_smooth(method = lm), - stat_density2d(), - theme_bw(), - ggtitle("Residue dSASA vs dG")) - - #[data$dSASA > 0 & -15 < data[field] & data[field] < 15,] - p <- ggplot(data=data, aes(x=dSASA, y=dG)) + - parts - plot_field(p, "dSASA_vs_dG_residue_by_all", grid = sample_source ~ .) - - p <- ggplot(data=data, aes(x=dSASA, y=dG)) + - parts - plot_field(p, "dSASA_vs_dG_residue_by_interface", grid = sample_source ~ interface) - - p <- ggplot(data=data, aes(x=dSASA, y=dG, color=sample_source)) + - parts - plot_field(p, "dSASA_vs_dG_residue_by_all_W_ss_overlay") - - p <- ggplot(data=data, aes(x=dSASA, y=dG, color=sample_source)) + - parts - plot_field(p, "dSASA_vs_dG_by_residue_interface_W_ss_overlay", grid=interface ~ .) - - #dG vs dSASA_fraction - parts = list( - scale_x_continuous("dSASA fraction", limit = c(0, 1.0)), - ylab("REU"), - #scale_y_continuous("REU", limit=c(-15, 15)), - geom_point(size=.5, pch="o"), - stat_smooth(method = lm), - stat_density2d(), - theme_bw(), - ggtitle("Residue dSASA fraction vs dG")) - - #[data$dSASA_fraction > 0 & -10 < data[field] & data[field] < 15,] - p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction, color=sample_source)) + - parts - plot_field(p, "dSASA_fraction_vs_dG_residue_by_all_W_ss_overlay") - - p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction, color=sample_source)) + - parts - plot_field(p, "dSASA_fraction_vs_dG_residue_by_interface_W_ss_overlay", grid=interface ~ .) - - p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction)) + - parts - plot_field(p, "dSASA_fraction_vs_dG_residue_by_all", grid = sample_source ~ .) - - p <- ggplot(data=data, aes(y=dG, x=dSASA_fraction)) + - parts - plot_field(p, "dSASA_fraction_vs_dG_residue_by_interface", grid = sample_source ~ interface) - - #Per residue data. This may get crazy. -})) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R b/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R deleted file mode 100644 index 70a87b0..0000000 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_dSASA_vs.R +++ /dev/null @@ -1,127 +0,0 @@ -# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- -# vi: set ts=2 noet: -# -# (c) Copyright Rosetta Commons Member Institutions. -# (c) This file is part of the Rosetta software suite and is made available under license. -# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. -# (c) For more information, see http://www.rosettacommons.org. Questions about this can be -# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu. - -library(ggplot2) -library(plyr) -library(grid) - -feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", -id = "int_SASA-dSASA_vs", -author = "Jared Adolf-Bryfogle", -brief_description = "Graphs basic dSASA and SASA information", -feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), -run=function(self, sample_sources, output_dir, output_formats){ - - #First we run on all the interfaces in the database - - - - sele = " - SELECT - dSASA, - dSASA_hphobic, - dSASA_polar, - dG, - interface - FROM - interfaces" - - int_data = query_sample_sources(sample_sources, sele) - - - - #dSASA sides - sele = " - SELECT - dG, - dSASA, - dSASA_sc, - dSASA - dSASA_sc as dSASA_bb, - dhSASA, - dhSASA_sc, - dhSASA - dhSASA_sc as dhSASA_bb, - dSASA-dhSASA as dpSASA, - dhSASA_rel_by_charge, - aromatic_dSASA_fraction, - interface_nres, - interface, - side - FROM - interface_sides - " - plot_field = function(p, plot_id, grid = NULL){ - - if (! is.null(grid)){ - p <- p+ facet_grid(facets=grid) - } - if(nrow(sample_sources) <= 3){ - p <- p + theme(legend.position="bottom", legend.direction="horizontal") - } - save_plots(self, plot_id, sample_sources, output_dir, output_formats) - } - - data = query_sample_sources(sample_sources, sele) - #print(data) - - - - #ScatterPlots - - - parts = list( - geom_point(size=.75), - #stat_smooth(color="grey"), - stat_smooth(method=lm), - geom_density2d(size=.5), - #stat_density2d(aes(fill = ..level..), geom="polygon"), - #stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE), - theme_bw()) - #JAB - commenting out the hydrophobic dSASA. Unclear if this is useful or not. I don't think it is very much. - #fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") - - #fields = c("dSASA", "dSASA_bb", "dSASA_sc") - fields = c("dSASA") - - data_rm_out = subset(int_data, subset=(int_data$dG <= quantile(int_data$dG, .90))) #Remove high energy outliers - data_top = subset(int_data, subset=(int_data$dG <= quantile(int_data$dG, .10))) #Top 10 percent - - for (f in fields){ - - #dSASA vs dG - - - - p <- ggplot(data=data_rm_out, aes(x = f, y = dG, colour=sample_source)) + parts + - ggtitle(paste(f,"vs dG")) - - plot_field(p, paste(f, "vs_dG_by_all", sep="_")) - plot_field(p, paste(f, "vs_dG_by_interface", sep="_"), grid=interface ~ .) - - data_rm_out$e_density = data_rm_out$dG/data_rm_out$dSASA - field = c("dSASA") - p <- ggplot(data = data_rm_out, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + - ggtitle(paste(field, "vs Interface energy density")) + - plot_field(p, paste("control", field, "vs_energy_density", sep="_"), grid=side ~ .) - - #Top 10 Percent - p <- ggplot(data=data_top, aes(x = f, y = dG, colour=sample_source)) + parts + - ggtitle(paste(f,"vs dG")) - - plot_field(p, paste(f, "vs_dG_top_10_percent_by_all", sep="_")) - plot_field(p, paste(f, "vs_dG_top_10_percent_by_interface", sep="_"), grid=interface ~ .) - - data_top$e_density = data_top$dG/data_top$dSASA - field = c("dSASA") - p <- ggplot(data = data_top, aes(x = dSASA, y = e_density, colour=sample_source)) + parts + - ggtitle(paste(field, "vs Interface energy density")) + - plot_field(p, paste("control", field, "vs_energy_density_top_10_percent", sep="_"), grid=side ~ .) - - } - -})) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index beb4142..ea33086 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -80,23 +80,30 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=data_rm_out, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs dSASA") + ylab("SASA") + - xlab("REU") + xlab("REU (dG)") plot_field(p, "dG_vs_dSASA_by_all") plot_field(p, "dG_vs_dSASA_by_interface", grid=~ interface) + p <- ggplot(data=data_top, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs dSASA") + + ylab("SASA") + + xlab("REU (dG)") + plot_field(p, "dG_vs_dSASA_top_10_percentdG_by_all") + plot_field(p, "dG_vs_dSASA_top_10_percentdG_by_interface", grid=~ interface) + #dG vs Total Energy p <- ggplot(data=data_rm_out, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs total_score") + - ylab("REU") + - xlab("REU") + ylab("REU (dG)") + + xlab("REU (Total Score)") plot_field(p, "dG_vs_total_score_by_all") plot_field(p, "dG_vs_total_score_by_interface", grid=~ interface) #dG vs Total Energy p <- ggplot(data=data_top, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs total_score") + - ylab("REU") + - xlab("REU") + ylab("REU (dG)") + + xlab("REU (Total Score") plot_field(p, "dG_vs_total_score_top_10_percent_by_all") plot_field(p, "dG_vs_total_score_top_10_percent_by_interface", grid=~ interface) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 2aa8cf4..dd57916 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -96,26 +96,6 @@ run=function(self, sample_sources, output_dir, output_formats){ } - field = "hbond_E_fraction" - parts = list(plot_parts, xlab("fraction")) - - group = c("sample_source") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle("Hbond Interface Energy Fraction") - plot_field(p, paste(field, "den_by_all", sep="_") ) - - group = c("sample_source", "interface") - dens <- estimate_density_1d(data, group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle("Hbond Interface Energy Fraction") - plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) - - #Side energies? - - #Averages: avgs <- ddply(data, .(sample_source, interface), function(d2){ data.frame(m = mean(d2$dG), std_dev = sd(d2$dG), m_top10 = mean(d2[1:10,]$dG), std_dev_top_10 = sd(d2[1:10,]$dG), top = d2[1,]$dG) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 62c213d..59dcf33 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -12,7 +12,7 @@ library(plyr) library(grid) -feature_analyses <- c(feature_analyses, new("FeaturesAnalysis", +feature_analyses <- c(feature_analyses, methods::new("FeaturesAnalysis", id = "int_energies-by_residue_den", author = "Jared Adolf-Bryfogle", brief_description = "Graphs all information of individual interface residues. From f853f3b4216470ef9cc73ce1cb29c0633b9f3b72 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 14:46:26 -0600 Subject: [PATCH 40/55] finish fixing features scripts. --- .../antibodies/SASA/ab_paratope_SASA_den.R | 23 ++--- .../antibodies/SASA/ag_ab_cdr_dSASA_den.R | 84 ------------------- 2 files changed, 9 insertions(+), 98 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index 8ad248b..bdfdbdd 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -51,29 +51,24 @@ run=function(self, sample_sources, output_dir, output_formats){ #Paratope SASA + data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + group = c("sample_source") - dens <- estimate_density_1d(data, group, c("paratope_SASA")) + dens <- estimate_density_1d(data_rm_out, group, c("paratope_SASA")) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + xlab("SASA") + ggtitle("CDR Paratope SASA") plot_field(p, "paratope_sasa_den") - - #Paratope hSASA + group = c("sample_source") - dens <- estimate_density_1d(data, group, c("paratope_hSASA")) + dens <- estimate_density_1d(data_top, group, c("paratope_SASA")) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + xlab("SASA") + - ggtitle("CDR Paratope hSASA") - plot_field(p, "paratope_hsasa_den") + ggtitle("CDR Paratope SASA") + plot_field(p, "paratope_sasa__top_10_percent_den") + - #Paratope pSASA - group = c("sample_source") - dens <- estimate_density_1d(data, group, c("paratope_pSASA")) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - xlab("SASA") + - ggtitle("CDR Paratope pSASA") - plot_field(p, "paratope_psasa_den") })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R index b65a72d..7e19849 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ag_ab_cdr_dSASA_den.R @@ -38,44 +38,6 @@ run=function(self, sample_sources, output_dir, output_formats){ dSASA > 0 and CDR NOT LIKE '%Proto%' " - #} - - # if ("TRUE" %in% opt$options$include_cdr4){ - # sele = " - # SELECT - # ag_ab_dSASA as dSASA, - # ag_ab_dSASA_sc as dSASA_sc, - # ag_ab_dhSASA as dhSASA, - # ag_ab_dhSASA_sc as dhSASA_sc, - # ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, - # struct_id, - # CDR, - # length - # FROM - # cdr_metrics - # WHERE - # dSASA > 0 - # " - # } - # - # if ("TRUE" %in% opt$options$cdr4_only){ - # sele = " - # SELECT - # ag_ab_dSASA as dSASA, - # ag_ab_dSASA_sc as dSASA_sc, - # ag_ab_dhSASA as dhSASA, - # ag_ab_dhSASA_sc as dhSASA_sc, - # ag_ab_dhSASA_rel_by_charge as dhSASA_rel_by_charge, - # struct_id, - # CDR, - # length - # FROM - # cdr_metrics - # WHERE - # dSASA > 0 and - # CDR LIKE '%Proto%' - # " - # } data = query_sample_sources(sample_sources, sele) @@ -125,43 +87,6 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "avg_cdr_dSASA_hist_by_length", ~CDR) - #Avg CDR polar SASA - means <- ddply(data, .(sample_source, CDR), function(data){ - data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dSASA-data$dhSASA)) - }) - p <- ggplot(data=means, na.rm=T) + - geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + - ggtitle("Average Antigen Buried Polar SASA") + - xlab("CDR") + - ylab("dpSASA") + - theme_bw() - plot_field(p, "avg_cdr_dpSASA_hist") - - #Avg CDR hSASA - means <- ddply(data, .(sample_source, CDR), function(data){ - data.frame(sample_source = data$sample_source, CDR = data$CDR, m=mean(data$dhSASA)) - }) - p <- ggplot(data=means, na.rm=T) + - geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + - ggtitle("Average Antigen Buried Hydrophobic SASA") + - xlab("CDR") + - ylab("dhSASA") + - theme_bw() - plot_field(p, "avg_cdr_dhSASA_hist") - - #Avg CDR polar fraction - means <- ddply(data, .(sample_source, CDR), function(data){ - m_d = mean(data$dSASA) - m_p = mean(data$dSASA-data$dhSASA) - data.frame(sample_source = data$sample_source, CDR = data$CDR, m=m_p/m_d) - }) - p <- ggplot(data=means, na.rm=T) + - geom_bar(position="dodge", stat='identity', aes(x=CDR, y=m, fill=sample_source)) + - ggtitle("Average Antigen Buried Polar SASA Fraction") + - xlab("CDR") + - ylab("dSASA (polar) /dSASA") + - theme_bw() - plot_field(p, "avg_cdr_polar_fraction_hist") @@ -174,13 +99,4 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Antigen Buried Solvent Accessible Surface Area") plot_field(p, "cdr_dSASA_den", ~CDR) - #CDR polar density - group = c("sample_source", "CDR") - has_dsasa_data = data[data$dSASA != 0,] - dens <- estimate_density_1d(has_dsasa_data, group, c("polar_fraction")) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x =x, y= y, colour=sample_source), size=1.2) + - scale_x_continuous("dSASA (polar) /dSASA", limit = c(0, 1.0)) + - ggtitle("Antigen Buried Polar SASA Fraction") - plot_field(p, "cdr_polar_fraction_den", ~CDR) })) # end FeaturesAnalysis \ No newline at end of file From 72274f35e5f1add7fdc8f2f274ba24152fc10188 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 15:35:19 -0600 Subject: [PATCH 41/55] testing. --- .../plots/interfaces/energies/int_dG_vs.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index ea33086..7049120 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -58,8 +58,13 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers - data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + }) + + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + }) parts = list( geom_point(size=1.0, pch="o"), @@ -81,8 +86,8 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("dG vs dSASA") + ylab("SASA") + xlab("REU (dG)") - plot_field(p, "dG_vs_dSASA_by_all") - plot_field(p, "dG_vs_dSASA_by_interface", grid=~ interface) + plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_all") + plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_interface", grid=~ interface) p <- ggplot(data=data_top, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs dSASA") + @@ -96,8 +101,8 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("dG vs total_score") + ylab("REU (dG)") + xlab("REU (Total Score)") - plot_field(p, "dG_vs_total_score_by_all") - plot_field(p, "dG_vs_total_score_by_interface", grid=~ interface) + plot_field(p, "dG_vs_total_score_top_90_percentdG_by_all") + plot_field(p, "dG_vs_total_score_top_90_percentdG_by_interface", grid=~ interface) #dG vs Total Energy p <- ggplot(data=data_top, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + From fd6d31114cafa781612a30234322e3cfde0c9d63 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 15:49:43 -0600 Subject: [PATCH 42/55] fix top n percent plots to do ddplyr to correctly select top models. --- .../antibodies/SASA/ab_paratope_SASA_den.R | 11 ++++-- .../plots/interfaces/SASA/int_SASA_den.R | 13 +++++-- .../plots/interfaces/energies/int_dG_vs.R | 2 +- .../interfaces/energies/int_energies_den.R | 31 +++++++++++++-- .../energies/int_energies_residue_den.R | 15 ++++--- .../interfaces/packing/int_sc_value_vs.R | 13 +++++-- .../analysis/plots/scores/total_score.R | 39 ++++++++++++++++++- 7 files changed, 101 insertions(+), 23 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index bdfdbdd..fb7781f 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -51,8 +51,13 @@ run=function(self, sample_sources, output_dir, output_formats){ #Paratope SASA - data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$paratope_SASA <= quantile(data$paratope_SASA, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$paratope_SASA <= quantile(data$paratope_SASA, .10))) #Top 10 percent + }) group = c("sample_source") dens <- estimate_density_1d(data_rm_out, group, c("paratope_SASA")) @@ -60,7 +65,7 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_line(aes(x, y, colour=sample_source), size=1.2) + xlab("SASA") + ggtitle("CDR Paratope SASA") - plot_field(p, "paratope_sasa_den") + plot_field(p, "top_90_percent_paratope_sasa_den") group = c("sample_source") dens <- estimate_density_1d(data_top, group, c("paratope_SASA")) diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index 7ca37c6..6912710 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -122,8 +122,13 @@ run=function(self, sample_sources, output_dir, output_formats){ fields = c("dSASA", "dSASA_bb", "dSASA_sc", "dhSASA", "dhSASA_bb", "dhSASA_sc", "dhSASA_rel_by_charge") fields = c("dSASA") - data_rm_out = subset(data, subset=(data$dSASA <= quantile(data$dSASA, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dSASA <= quantile(data$dSASA, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dSASA <= quantile(data$dSASA, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dSASA <= quantile(data$dSASA, .10))) #Top 10 percent + }) for (field in fields){ @@ -133,14 +138,14 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Buried", field, sep=" ")) - plot_field(p, paste(field, "den_sides_by_all", sep="_"), grid=side ~ .) + plot_field(p, paste(field, "top_90_percent_den_sides_by_all", sep="_"), grid=side ~ .) group = c("sample_source", "interface", "side") dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(paste("Buried", field, sep=" ")) - plot_field(p, paste(field, "den_sides","by_interface", sep="_"), grid=side~interface) + plot_field(p, paste(field, "top_90_percent_den_sides","by_interface", sep="_"), grid=side~interface) parts = list(plot_parts, scale_x_continuous("SASA")) group = c("sample_source", "side") diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 7049120..032f318 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -62,7 +62,7 @@ run=function(self, sample_sources, output_dir, output_formats){ subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers }) - data_rm_out <- ddply(data, .(sample_source), function(d2){ + data_top <- ddply(data, .(sample_source), function(d2){ subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent }) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index dd57916..4e21421 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -57,8 +57,17 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers - data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + }) + + f <- ddply(data, .(sample_source), function(d2){ + data.frame(total_score = d2[1:20,]$dG) + }) #Basic Densities fields = c("dG" ) @@ -70,14 +79,14 @@ run=function(self, sample_sources, output_dir, output_formats){ p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) - plot_field(p, paste(field, "den_by_all", sep="_"), ) + plot_field(p, paste(field, "top_90_percent_den_by_all", sep="_"), ) group = c("sample_source", "interface") dens <- estimate_density_1d(data_rm_out, group, field) p <- ggplot(data=dens, na.rm=T) + parts + geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) - plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) + plot_field(p, paste(field, "top_90_percent_den_by_interface", sep="_"), grid=interface ~ .) group = c("sample_source") dens <- estimate_density_1d(data_top, group, field) @@ -92,6 +101,20 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) plot_field(p, paste(field, "top_10_percent_den_by_interface", sep="_"), grid=interface ~ .) + + group = c("sample_source") + dens <- estimate_density_1d(f, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_20_den_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(f, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_20_den_by_interface", sep="_"), grid=interface ~ .) } diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 59dcf33..88acebc 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -53,8 +53,13 @@ run=function(self, sample_sources, output_dir, output_formats){ #Densities - data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + }) #Energies fields = c("dG") @@ -66,7 +71,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle(paste("Residue", field, sep=" ")) + xlab("REU") #scale_x_continuous("REU", limit = c(-15, 15)) - plot_field(p, paste(field, "residue_dens_by_all", sep="_")) + plot_field(p, paste(field, "residue_dens_top_90_percent_by_all", sep="_")) group = c("sample_source", "interface") dens <- estimate_density_1d(data_rm_out, group, field) @@ -75,7 +80,7 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle(paste("Residue", field, sep=" ")) + xlab("REU") #scale_x_continuous("REU", limit = c(-15, 15)) - plot_field(p, paste(field, "residue_dens_by_interface", sep="_"), grid=interface ~ .) + plot_field(p, paste(field, "residue_dens_top_90_percent_by_interface", sep="_"), grid=interface ~ .) dens <- estimate_density_1d(data_top, group, field) p <- ggplot(data = dens, na.rm=T) + plot_parts + @@ -96,6 +101,4 @@ run=function(self, sample_sources, output_dir, output_formats){ } - - #Per residue data. This may get crazy. })) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index 596f8cc..818e443 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -76,16 +76,21 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "sc_value_vs dSASA_all", grid = sample_source ~ .) plot_field(p, "sc_value_vs_dSASA_by_interface", grid=interface ~ sample_source) - data_rm_out = subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers - data_top = subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + }) #sc_value vs dG p <- ggplot(data = data_rm_out, aes(x=sc_value, y=dG)) + parts + ggtitle("sc_value_vs_dG") + scale_x_continuous("sc_value", limit = c(0, 1.0)) + scale_y_continuous("REU") - plot_field(p, "sc_value_vs_dG_by_all", grid=sample_source ~ .) - plot_field(p, "sc_value_vs_dG_by_interface", grid=interface ~ sample_source) + plot_field(p, "sc_value_vs_dG-(top_90_percent)-by_all", grid=sample_source ~ .) + plot_field(p, "sc_value_vs_dG_(top_90_percent)-by_interface", grid=interface ~ sample_source) p <- ggplot(data = data_top, aes(x=sc_value, y=dG)) + parts + ggtitle("sc_value_vs_dG") + diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 1ce99fc..364bb36 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -54,6 +54,14 @@ f <- ddply(data, .(sample_source), function(d2){ data.frame(total_score = d2[1:20,]$total_score) }) +data_rm_out <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$total_score <= quantile(data$total_score, .90))) #Remove high energy outliers +}) + +data_top <- ddply(data, .(sample_source), function(d2){ + subset(data, subset=(data$total_score <= quantile(data$total_score, .10))) #Top 10 percent +}) + dens <- estimate_density_1d(f, ids = c("sample_source"), variable = "total_score") plot_id <- "total_score_top_20" @@ -65,12 +73,41 @@ p <- ggplot(data=dens) + theme_bw() + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) save_plots(self, plot_id, sample_sources, output_dir, output_formats) + +dens <- estimate_density_1d( + data = data_rm_out, + ids = c("sample_source"), + variable = "total_score") + +plot_id <- "total_score_top_90_percent" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x, y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + +dens <- estimate_density_1d( + data = data_top, + ids = c("sample_source"), + variable = "total_score") + +plot_id <- "total_score_top_10_percent" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x, y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + + #Averages Scoring avgs <- ddply(data, .(sample_source), function(d2){ data.frame(m = mean(d2$total_score), std_dev = sd(d2$total_score), m_top10 = mean(d2[1:10,]$total_score), std_dev_top_10 = sd(d2[1:10,]$total_score), top = d2[1,]$total_score) }) -print(avgs) p <- ggplot(data=avgs ) + geom_bar(position="dodge", stat='identity', aes(x = sample_source, y= m , fill=sample_source)) + #geom_errorbar(aes(ymin = m-std_dev, ymax=m+std_dev) + From 55cc8dba292a83756d2565b69fcc218cea98bd6b Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 15:51:12 -0600 Subject: [PATCH 43/55] fix top n percent plots to do ddplyr to correctly select top models. --- .../analysis/plots/interfaces/energies/int_energies_den.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 4e21421..2ea4ef0 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -66,7 +66,7 @@ run=function(self, sample_sources, output_dir, output_formats){ }) f <- ddply(data, .(sample_source), function(d2){ - data.frame(total_score = d2[1:20,]$dG) + data.frame(dG = d2[1:20,]$dG) }) #Basic Densities From cbb123855b351e6c3862679b6ff791500618bb29 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 15:59:50 -0600 Subject: [PATCH 44/55] fix top n percent plots to do ddplyr to correctly select top models. --- .../analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R | 4 ++-- inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R | 4 ++-- .../scripts/analysis/plots/interfaces/energies/int_dG_vs.R | 6 +++--- .../analysis/plots/interfaces/energies/int_energies_den.R | 7 ++++--- .../plots/interfaces/energies/int_energies_residue_den.R | 4 ++-- .../analysis/plots/interfaces/packing/int_sc_value_vs.R | 4 ++-- inst/scripts/analysis/plots/scores/total_score.R | 4 ++-- 7 files changed, 17 insertions(+), 16 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index fb7781f..f920700 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -52,11 +52,11 @@ run=function(self, sample_sources, output_dir, output_formats){ #Paratope SASA data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$paratope_SASA <= quantile(data$paratope_SASA, .90))) #Remove high energy outliers + subset(d2, subset=(d2$paratope_SASA <= quantile(d2$paratope_SASA, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$paratope_SASA <= quantile(data$paratope_SASA, .10))) #Top 10 percent + subset(d2, subset=(d2$paratope_SASA <= quantile(d2$paratope_SASA, .10))) #Top 10 percent }) group = c("sample_source") diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index 6912710..8983195 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -123,11 +123,11 @@ run=function(self, sample_sources, output_dir, output_formats){ fields = c("dSASA") data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dSASA <= quantile(data$dSASA, .90))) #Remove high energy outliers + subset(d2, subset=(d2$dSASA <= quantile(d2$dSASA, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dSASA <= quantile(data$dSASA, .10))) #Top 10 percent + subset(d2, subset=(d2$dSASA <= quantile(d2$dSASA, .10))) #Top 10 percent }) for (field in fields){ diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 032f318..c4beb16 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -59,11 +59,11 @@ run=function(self, sample_sources, output_dir, output_formats){ #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers }) - + data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent }) parts = list( diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 2ea4ef0..a18ba39 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -58,15 +58,15 @@ run=function(self, sample_sources, output_dir, output_formats){ #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent }) f <- ddply(data, .(sample_source), function(d2){ - data.frame(dG = d2[1:20,]$dG) + subset(data,subset=(d2[1:20,]) ) }) #Basic Densities @@ -148,4 +148,5 @@ run=function(self, sample_sources, output_dir, output_formats){ ylab("REU") + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) plot_field(p, "dG_top_by_interface", grid=interface ~ .) + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 88acebc..228eef6 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -54,11 +54,11 @@ run=function(self, sample_sources, output_dir, output_formats){ #Densities data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent }) #Energies diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index 818e443..f7d4a1f 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -77,11 +77,11 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "sc_value_vs_dSASA_by_interface", grid=interface ~ sample_source) data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .90))) #Remove high energy outliers + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$dG <= quantile(data$dG, .10))) #Top 10 percent + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent }) #sc_value vs dG diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 364bb36..4389b64 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -55,11 +55,11 @@ f <- ddply(data, .(sample_source), function(d2){ }) data_rm_out <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$total_score <= quantile(data$total_score, .90))) #Remove high energy outliers + subset(d2, subset=(d2$total_score <= quantile(d2$total_score, .90))) #Remove high energy outliers }) data_top <- ddply(data, .(sample_source), function(d2){ - subset(data, subset=(data$total_score <= quantile(data$total_score, .10))) #Top 10 percent + subset(d2, subset=(d2$total_score <= quantile(d2$total_score, .10))) #Top 10 percent }) dens <- estimate_density_1d(f, ids = c("sample_source"), variable = "total_score") From acb4a28778f466c40862cf3d6ef84588541fe731 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 16:02:38 -0600 Subject: [PATCH 45/55] fix top n percent plots to do ddplyr to correctly select top models. --- .../plots/interfaces/energies/int_energies_den.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index a18ba39..1cf0ec3 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -66,7 +66,7 @@ run=function(self, sample_sources, output_dir, output_formats){ }) f <- ddply(data, .(sample_source), function(d2){ - subset(data,subset=(d2[1:20,]) ) + data.frame(dG = d2[1:20,]$dG) }) #Basic Densities @@ -109,13 +109,6 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle(field) plot_field(p, paste(field, "top_20_den_by_all", sep="_"), ) - group = c("sample_source", "interface") - dens <- estimate_density_1d(f, group, field) - p <- ggplot(data=dens, na.rm=T) + parts + - geom_line(aes(x, y, colour=sample_source), size=1.2) + - ggtitle(field) - plot_field(p, paste(field, "top_20_den_by_interface", sep="_"), grid=interface ~ .) - } From 2b27ecea843324e68ec3e4e5bfd15a79ac9a5931 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 16:09:28 -0600 Subject: [PATCH 46/55] fix top n percent plots to do ddplyr to correctly select top models. --- .../analysis/plots/interfaces/energies/int_dG_vs.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index c4beb16..c5d079f 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -99,16 +99,16 @@ run=function(self, sample_sources, output_dir, output_formats){ #dG vs Total Energy p <- ggplot(data=data_rm_out, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs total_score") + - ylab("REU (dG)") + - xlab("REU (Total Score)") + xlab("REU (dG)") + + ylab("REU (Total Score)") plot_field(p, "dG_vs_total_score_top_90_percentdG_by_all") plot_field(p, "dG_vs_total_score_top_90_percentdG_by_interface", grid=~ interface) #dG vs Total Energy p <- ggplot(data=data_top, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs total_score") + - ylab("REU (dG)") + - xlab("REU (Total Score") + xlab("REU (dG)") + + ylab("REU (Total Score") plot_field(p, "dG_vs_total_score_top_10_percent_by_all") plot_field(p, "dG_vs_total_score_top_10_percent_by_interface", grid=~ interface) From 6f0b3a91d77a8e01a29f7bb7610465a27e3ce1e5 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Tue, 6 Feb 2018 17:00:38 -0600 Subject: [PATCH 47/55] fix top n percent plots to do ddplyr to correctly select top models. --- inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R | 6 +++++- inst/scripts/analysis/plots/scores/total_score.R | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index c5d079f..734e738 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -38,8 +38,12 @@ run=function(self, sample_sources, output_dir, output_formats){ structure_scores.struct_id = interfaces.struct_id " + #plot_parts <- list( + # geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), + # scale_y_continuous("Feature Density"), + # theme_bw()) + plot_parts <- list( - geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), scale_y_continuous("Feature Density"), theme_bw()) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 4389b64..8c22006 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -45,7 +45,7 @@ plot_id <- "total_score" p <- ggplot(data=dens) + theme_bw() + geom_line(aes(x, y, colour=sample_source), size=1.4) + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + - ggtitle("Rosetta Structure Score") + + ggtitle("Total Rosetta Energy") + labs(x="Rosetta Energy Units") + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) save_plots(self, plot_id, sample_sources, output_dir, output_formats) From b00c382e10929576f2f5b95a6e3d2b925bd51f64 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 12:12:23 -0600 Subject: [PATCH 48/55] remove lm plot from dG vs. --- .../analysis/plots/interfaces/energies/int_dG_vs.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 734e738..75dd35f 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -81,11 +81,18 @@ run=function(self, sample_sources, output_dir, output_formats){ parts_no_density = list( geom_point(size=1.2, pch="o"), - stat_smooth(method=lm), theme_bw() ) #dG vs dSASA + + p <- ggplot(data=data, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs dSASA") + + ylab("SASA") + + xlab("REU (dG)") + plot_field(p, "dG_vs_dSASA_by_all") + plot_field(p, "dG_vs_dSASA_by_interface", grid=~ interface) + p <- ggplot(data=data_rm_out, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs dSASA") + ylab("SASA") + From e677412780d5d8cd1098807937dd9ab0f99658aa Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 13:24:38 -0600 Subject: [PATCH 49/55] add plots for total score testing by native. --- .../analysis/plots/scores/total_score.R | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index 8c22006..a8377c2 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -135,4 +135,82 @@ p <- ggplot(data=avgs ) + scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) save_plots(self, "best_total_score", sample_sources, output_dir, output_formats) + +#By Native + +sele <-" +SELECT +structure_scores.struct_id as struct_id, +natives.native as native, +structure_scores.score_value as total_score, +structure_scores.score_type_id as score_type +FROM +structure_scores, +score_types, +natives + +WHERE +score_types.score_type_name='total_score' AND +structure_scores.score_type_id = score_types.score_type_id AND +natives.struct_id = structure_scores.struct_id +ORDER BY score_value;" + + +data <- query_sample_sources(sample_sources, sele) + +data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + + subset(d2, subset=(d2$total_score <= quantile(d2$total_score, .90))) #Remove high energy outliers +}) + +data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$total_score <= quantile(d2$total_score, .10))) #Top 10 percent +}) + +f <- ddply(data, .(sample_source, native), function(d2){ + data.frame(total_score = d2[1:20,]$total_score) +}) + + + +dens <- estimate_density_1d(f, ids = c("sample_source"), variable = "total_score") + +plot_id <- "total_score_top_20_by_native" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x, y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score - Top 20") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + + +dens <- estimate_density_1d( + data = data_rm_out, + ids = c("sample_source"), + variable = "total_score") + +plot_id <- "total_score_top_90_percent_by_native" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x, y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + +dens <- estimate_density_1d( + data = data_top, + ids = c("sample_source"), + variable = "total_score") + +plot_id <- "total_score_top_10_percent_by_native" +p <- ggplot(data=dens) + theme_bw() + + geom_line(aes(x, y, colour=sample_source), size=1.4) + + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + + ggtitle("Rosetta Structure Score") + + labs(x="Rosetta Energy Units") + + scale_y_continuous("FeatureDensity", breaks=c(0, .3, .6)) +save_plots(self, plot_id, sample_sources, output_dir, output_formats) + })) # end FeaturesAnalysis From e3709586f3e5a9442a8c43973ba653c9cef617f0 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 13:28:37 -0600 Subject: [PATCH 50/55] add plots for total score testing by native. --- inst/scripts/analysis/plots/scores/total_score.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/scores/total_score.R b/inst/scripts/analysis/plots/scores/total_score.R index a8377c2..d356de1 100644 --- a/inst/scripts/analysis/plots/scores/total_score.R +++ b/inst/scripts/analysis/plots/scores/total_score.R @@ -168,14 +168,14 @@ data_top <- ddply(data, .(sample_source, native), function(d2){ }) f <- ddply(data, .(sample_source, native), function(d2){ - data.frame(total_score = d2[1:20,]$total_score) + data.frame(total_score = d2[1:5,]$total_score) }) dens <- estimate_density_1d(f, ids = c("sample_source"), variable = "total_score") -plot_id <- "total_score_top_20_by_native" +plot_id <- "total_score_top_5_by_native" p <- ggplot(data=dens) + theme_bw() + geom_line(aes(x, y, colour=sample_source), size=1.4) + geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)) + From b9414f90c36785a666d4d768f6ec30e3a59ce31f Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 14:14:07 -0600 Subject: [PATCH 51/55] add native plots. --- .../antibodies/SASA/ab_paratope_SASA_den.R | 42 +++++- .../plots/interfaces/SASA/int_SASA_den.R | 120 ++++++++++-------- .../plots/interfaces/energies/int_dG_vs.R | 68 ++++++++++ .../interfaces/energies/int_energies_den.R | 81 ++++++++++++ .../energies/int_energies_residue_den.R | 70 ++++++++++ .../interfaces/packing/int_packing_den.R | 49 +++++++ .../interfaces/packing/int_sc_value_vs.R | 44 ++++++- 7 files changed, 417 insertions(+), 57 deletions(-) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index f920700..a638df2 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -73,7 +73,47 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_line(aes(x, y, colour=sample_source), size=1.2) + xlab("SASA") + ggtitle("CDR Paratope SASA") - plot_field(p, "paratope_sasa__top_10_percent_den") + plot_field(p, "paratope_sasa_top_10_percent_den") + #Natives + + sele = " + SELECT + paratope_SASA, + paratope_hSASA, + paratope_SASA - paratope_hSASA as paratope_pSASA, + natives.native as native + FROM + ab_metrics, + natives + WHERE + ab_metrics.struct_id = natives.struct_id + " + + data = query_sample_sources(sample_sources, sele) + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$paratope_SASA <= quantile(d2$paratope_SASA, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$paratope_SASA <= quantile(d2$paratope_SASA, .10))) #Top 10 percent + }) + + group = c("sample_source") + dens <- estimate_density_1d(data_rm_out, group, c("paratope_SASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope SASA") + plot_field(p, "top_90_percent_paratope_sasa_den_by_native") + + group = c("sample_source") + dens <- estimate_density_1d(data_top, group, c("paratope_SASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope SASA") + plot_field(p, "paratope_sasa_top_10_percent_den_by_native") })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R index 8983195..6d0c390 100644 --- a/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R +++ b/inst/scripts/analysis/plots/interfaces/SASA/int_SASA_den.R @@ -19,42 +19,6 @@ feature_reporter_dependencies = c("InterfaceFeatures/AntibodyFeatures"), run=function(self, sample_sources, output_dir, output_formats){ #First we run on all the interfaces in the database - - - -# sele = " -# SELECT -# dSASA, -# dSASA_hphobic, -# dSASA_polar, -# interface -# FROM -# interfaces" -# -# data = query_sample_sources(sample_sources, sele) -# - -# fields = c("dSASA", "dSASA_hphobic", "dSASA_polar") -# for(field in fields){ -# -# group = c("sample_source") -# dens <- estimate_density_1d(data, group, field) -# p <- ggplot(data=dens, na.rm=T) + parts + -# geom_line(aes(x, y, colour=sample_source), size=1.2) + -# ggtitle(field) -# plot_field(p, paste(field, "den_by_all", sep="_")) -# -# group = c("sample_source", "interface") -# dens <- estimate_density_1d(data, group, field) -# p <- ggplot(data=dens, na.rm=T) + parts + -# geom_line(aes(x, y, colour=sample_source), size=1.2) + -# ggtitle(field) -# plot_field(p, paste(field, "den_by_interface", sep="_"),grid=~interface) -# } -# -# -# #dSASA sides -# int_data = data plot_parts <- list( geom_indicator(aes(indicator=counts, colour=sample_source, group=sample_source)), @@ -164,21 +128,6 @@ run=function(self, sample_sources, output_dir, output_formats){ } - -# Plotting all together - Might look like crap, but lets try it. -# group = c("sample_source", "interface", "side") -# dens_dsasa <- estimate_density_1d(data, group, c("dSASA")) -# dens_dsasa_bb <- estimate_density_1d(data, group, c("dSASA_bb")) -# dens_dsasa_sc <- estimate_density_1d(data, group, c("dSASA_sc")) -# -# p <- ggplot(data=dens_dsasa, na.rm=T) + parts + -# geom_line(aes(x, y, colour=sample_source), size=1.2) + -# #geom_point(data=dens_dsasa, aes(x, y, colour=sample_source, size=.5, pch="o")) + -# geom_line(data=dens_dsasa_bb, aes(x, y, colour=sample_source, linetype= "dotted"), size=1.2) + -# geom_line(data=dens_dsasa_sc, aes(x, y, colour=sample_source, linetype= "dotdash"), size= 1.2) + -# ggtitle("dSASA Density") - -# plot_field(p, paste("dSASA_all", "den_sides","by_interface", sep="_"), grid=side~interface) #### Means ######### fields = c("dSASA") @@ -260,6 +209,73 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("Aromatic dSASA Fraction") plot_field(p, "dSASA_aromatic_fraction_den_by_interface", grid=side~interface) - + #Natives + + sele = " + SELECT + dSASA, + dSASA_sc, + dSASA - dSASA_sc as dSASA_bb, + dhSASA, + dhSASA_sc, + dhSASA - dhSASA_sc as dhSASA_bb, + dhSASA_rel_by_charge, + aromatic_dSASA_fraction, + interface, + side, + natives.native as native + FROM + interface_sides, + natives + WHERE + interface_sides.struct_id = natives.struct_id + ORDER BY dSASA DESC + " + data = query_sample_sources(sample_sources, sele) + + fields = c("dSASA") + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dSASA <= quantile(d2$dSASA, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dSASA <= quantile(d2$dSASA, .10))) #Top 10 percent + }) + + for (field in fields){ + + parts = list(plot_parts, scale_x_continuous("SASA")) + group = c("sample_source", "side") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_90_percent_den_sides_by_native_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_90_percent_den_sides_by_native","by_interface", sep="_"), grid=side~interface) + + parts = list(plot_parts, scale_x_continuous("SASA")) + group = c("sample_source", "side") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_10_percent_den_sides_by_native_by_all", sep="_"), grid=side ~ .) + + group = c("sample_source", "interface", "side") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Buried", field, sep=" ")) + plot_field(p, paste(field, "top_10_percent_den_sides_by_native","by_interface", sep="_"), grid=side~interface) + + } + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 75dd35f..2793563 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -123,4 +123,72 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "dG_vs_total_score_top_10_percent_by_all") plot_field(p, "dG_vs_total_score_top_10_percent_by_interface", grid=~ interface) + +#Native Comparisons + + sele <- " + SELECT + interfaces.dG as dG, + interfaces.dG_cross as dG_cross, + interfaces.delta_unsatHbonds as delta_unsatHbonds, + interfaces.hbond_E_fraction as hbond_E_fraction, + interfaces.dSASA as dSASA, + interfaces.interface as interface, + structure_scores.score_value as total_score, + natives.native as native + FROM + interfaces, + score_types, + structure_scores, + natives + WHERE + score_types.score_type_name='total_score' AND + structure_scores.score_type_id = score_types.score_type_id AND + structure_scores.struct_id = interfaces.struct_id AND + structure_scores.struct_id = natives.struct_id + " + + + + data = query_sample_sources(sample_sources, sele) + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent + }) + + p <- ggplot(data=data_rm_out, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs dSASA") + + ylab("SASA") + + xlab("REU (dG)") + plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_all") + plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_interface", grid=~ interface) + + p <- ggplot(data=data_top, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs dSASA") + + ylab("SASA") + + xlab("REU (dG)") + plot_field(p, "dG_vs_dSASA_top_10_percentdG_by_native_by_all") + plot_field(p, "dG_vs_dSASA_top_10_percentdG_by_native_by_interface", grid=~ interface) + + #dG vs Total Energy + p <- ggplot(data=data_rm_out, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs total_score") + + xlab("REU (dG)") + + ylab("REU (Total Score)") + plot_field(p, "dG_vs_total_score_top_90_percentdG_by_native_by_all") + plot_field(p, "dG_vs_total_score_top_90_percentdG_by_native_by_interface", grid=~ interface) + + #dG vs Total Energy + p <- ggplot(data=data_top, aes(y = total_score, x = dG, colour=sample_source)) + parts_no_density + + ggtitle("dG vs total_score") + + xlab("REU (dG)") + + ylab("REU (Total Score") + plot_field(p, "dG_vs_total_score_top_10_percent_by_native_by_all") + plot_field(p, "dG_vs_total_score_top_10_percent_by_native_by_interface", grid=~ interface) + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R index 1cf0ec3..6d12916 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_den.R @@ -142,4 +142,85 @@ run=function(self, sample_sources, output_dir, output_formats){ scale_x_discrete(labels=function(x) abbreviate(x, minlength=17)) plot_field(p, "dG_top_by_interface", grid=interface ~ .) + + #By Natives + + sele <- " + SELECT + interfaces.dG as dG, + interfaces.dG_cross as dG_cross, + interfaces.hbond_E_fraction as hbond_E_fraction, + interfaces.interface as interface, + structure_scores.score_value as total_score, + natives.native as native + FROM + interfaces, + score_types, + structure_scores, + natives + WHERE + score_types.score_type_name='total_score' AND + structure_scores.score_type_id = score_types.score_type_id AND + structure_scores.struct_id = interfaces.struct_id AND + structure_scores.struct_id = natives.struct_id + ORDER BY dG; + " + + data = query_sample_sources(sample_sources, sele) + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent + }) + + f <- ddply(data, .(sample_source, native), function(d2){ + data.frame(dG = d2[1:5,]$dG) + }) + + #Basic Densities + fields = c("dG" ) + for(field in fields){ + parts = list(plot_parts, scale_x_continuous("Rosetta Energy")) + + group = c("sample_source") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_90_percent_den_by_native_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_90_percent_den_by_native_by_interface", sep="_"), grid=interface ~ .) + + group = c("sample_source") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_10_percent_den_by_native_by_all", sep="_"), ) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_10_percent_den_by_native_by_interface", sep="_"), grid=interface ~ .) + + group = c("sample_source") + dens <- estimate_density_1d(f, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "top_5_den_by_native_by_all", sep="_"), ) + + } + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R index 228eef6..1f0e4a9 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_energies_residue_den.R @@ -101,4 +101,74 @@ run=function(self, sample_sources, output_dir, output_formats){ } + #By Natives + sele <-" + SELECT + interface_residues.interface as interface, + interface_residues.dG as dG, + interface_residues.dSASA as dSASA, + interface_residues.energy_int as energy_int, + interface_residues.energy_sep as energy_sep, + natives.native as native + FROM + interface_residues, + natives + WHERE + interface_residues.struct_id = natives.struct_id" + + #Density plots + + data = query_sample_sources(sample_sources, sele) + ##Overall plots for all residues: Add Side data once we have this. + + #Densities + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent + }) + + #Energies + fields = c("dG") + for(field in fields){ + group = c("sample_source") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "residue_dens_top_90_percent_by_native_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "residue_dens_top_90_percent_by_native_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "top_10_percent_residue_by_native_dens_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data = dens, na.rm=T) + plot_parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(paste("Residue", field, sep=" ")) + + xlab("REU") + #scale_x_continuous("REU", limit = c(-15, 15)) + plot_field(p, paste(field, "top_10_percent_residue_dens_by_native_by_interface", sep="_"), grid=interface ~ .) + + } + })) \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 8a9f84d..509977d 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -67,4 +67,53 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, paste(field, "den_by_interface", sep="_"), grid=interface ~ .) } + sele <- " + SELECT + sc_value, + packstat, + interface, + natives.native + FROM + interfaces, + natives + WHERE + interfaces.struct_id = natives.struct_id + ORDER BY sc_value + " + + #By Native. + data = query_sample_sources(sample_sources, sele) + #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$sc_value >= quantile(d2$sc_value, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$sc_value >= quantile(d2$sc_value, .10))) #Top 10 percent + }) + + f <- ddply(data, .(sample_source, native), function(d2){ + data.frame(sc_value = d2[1:5,]$sc_value) + }) + + fields = c("sc_value") + for(field in fields){ + parts = list(plot_parts, scale_x_continuous("value", limit = c(0, 1.0))) + + group = c("sample_source") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_native_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_native_by_interface", sep="_"), grid=interface ~ .) + } + })) # end FeaturesAnalysis \ No newline at end of file diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R index f7d4a1f..d2607d0 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_sc_value_vs.R @@ -108,12 +108,48 @@ run=function(self, sample_sources, output_dir, output_formats){ plot_field(p, "sc_value_vs_delta_unsat_polars_by_interface", grid=interface ~ sample_source) - #3D Plots + #By Native - #sc_value vs dG vs dSASA + sele <- " + SELECT + sc_value, + packstat, + dSASA, + dG, + dG_cross, + delta_unsatHbonds, + interface, + natives.native as native + FROM + interfaces, + natives, + WHERE + interfaces.struct_id = natives.struct_id + " + + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .90))) #Remove high energy outliers + }) + + data_top <- ddply(data, .(sample_source, native), function(d2){ + subset(d2, subset=(d2$dG <= quantile(d2$dG, .10))) #Top 10 percent + }) + + #sc_value vs dG + p <- ggplot(data = data_rm_out, aes(x=sc_value, y=dG)) + parts + + ggtitle("sc_value_vs_dG") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("REU") + plot_field(p, "sc_value_vs_dG(top_90_percent)-by_native_by_all", grid=sample_source ~ .) + plot_field(p, "sc_value_vs_dG(top_90_percent)-by_native_interface", grid=interface ~ sample_source) + + p <- ggplot(data = data_top, aes(x=sc_value, y=dG)) + parts + + ggtitle("sc_value_vs_dG") + + scale_x_continuous("sc_value", limit = c(0, 1.0)) + + scale_y_continuous("REU") + plot_field(p, "top_10_percent_dG-sc_value_vs_dG_by_native_by_all", grid=sample_source ~ .) + plot_field(p, "top_10_percent_dG-sc_value_vs_dG_by_native_by_interface", grid=interface ~ sample_source) - #Sides: - #sc_value vs interface_energy })) # end FeaturesAnalysis \ No newline at end of file From 1b122577157c99ca0b9e3179d4c107cef2b5016e Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 14:26:38 -0600 Subject: [PATCH 52/55] add native plots. --- .../interfaces/packing/int_packing_den.R | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 509977d..5ad58a5 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -114,6 +114,56 @@ run=function(self, sample_sources, output_dir, output_formats){ geom_line(aes(x, y, colour=sample_source), size=1.2) + ggtitle(field) plot_field(p, paste(field, "den_by_native_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_native_by_all", sep="_")) + + group = c("sample_source", "interface") + dens <- estimate_density_1d(data, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_by_native_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_90_percent_by_native_by_all", sep="_")) + + dens <- estimate_density_1d(data_rm_out, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_90_percent_by_native_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_10_percent_by_native_by_all", sep="_")) + + dens <- estimate_density_1d(data_top, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_10_percent_by_native_by_interface", sep="_"), grid=interface ~ .) + + dens <- estimate_density_1d(f, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_5_by_native_by_all", sep="_")) + + dens <- estimate_density_1d(f, group, field) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + ggtitle(field) + plot_field(p, paste(field, "den_top_5_by_native_by_interface", sep="_"), grid=interface ~ .) + } })) # end FeaturesAnalysis \ No newline at end of file From d03a9ab58fd195123d6dfb5df443aebad3af1fa0 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 14:36:58 -0600 Subject: [PATCH 53/55] add native plots. --- .../analysis/plots/interfaces/packing/int_packing_den.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R index 5ad58a5..5abc983 100644 --- a/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R +++ b/inst/scripts/analysis/plots/interfaces/packing/int_packing_den.R @@ -85,11 +85,11 @@ run=function(self, sample_sources, output_dir, output_formats){ data = query_sample_sources(sample_sources, sele) #data_rm_out = data[data$dG<=5000 & data$dG>-5000,]#Remove high energy outliers - data_rm_out <- ddply(data, .(sample_source, native), function(d2){ + data_top <- ddply(data, .(sample_source, native), function(d2){ subset(d2, subset=(d2$sc_value >= quantile(d2$sc_value, .90))) #Remove high energy outliers }) - data_top <- ddply(data, .(sample_source, native), function(d2){ + data_rm_out <- ddply(data, .(sample_source, native), function(d2){ subset(d2, subset=(d2$sc_value >= quantile(d2$sc_value, .10))) #Top 10 percent }) From 67075445c7137d46f38d97327b31afc12d07b5f7 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 14:43:40 -0600 Subject: [PATCH 54/55] add native plots. --- inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R index 2793563..5ba7d46 100644 --- a/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R +++ b/inst/scripts/analysis/plots/interfaces/energies/int_dG_vs.R @@ -165,8 +165,8 @@ run=function(self, sample_sources, output_dir, output_formats){ ggtitle("dG vs dSASA") + ylab("SASA") + xlab("REU (dG)") - plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_all") - plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_interface", grid=~ interface) + plot_field(p, "dG_vs_dSASA_top_90_percentdG_by_native_by_all") + plot_field(p, "dG_vs_dSASA_top_90_percentdG_native_by_interface", grid=~ interface) p <- ggplot(data=data_top, aes(y = dSASA, x = dG, colour=sample_source)) + parts_no_density + ggtitle("dG vs dSASA") + From 036a1e57e5aa60ba40a2dbc481af15135617f8db Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Thu, 8 Feb 2018 14:49:18 -0600 Subject: [PATCH 55/55] add native plots. --- .../analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R index a638df2..db9a0b4 100644 --- a/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R +++ b/inst/scripts/analysis/plots/antibodies/SASA/ab_paratope_SASA_den.R @@ -59,6 +59,14 @@ run=function(self, sample_sources, output_dir, output_formats){ subset(d2, subset=(d2$paratope_SASA <= quantile(d2$paratope_SASA, .10))) #Top 10 percent }) + group = c("sample_source") + dens <- estimate_density_1d(data, group, c("paratope_SASA")) + p <- ggplot(data=dens, na.rm=T) + parts + + geom_line(aes(x, y, colour=sample_source), size=1.2) + + xlab("SASA") + + ggtitle("CDR Paratope SASA") + plot_field(p, "paratope_sasa_den") + group = c("sample_source") dens <- estimate_density_1d(data_rm_out, group, c("paratope_SASA")) p <- ggplot(data=dens, na.rm=T) + parts +