diff --git a/ot_tractability_pipeline_v2/buckets_sm.py b/ot_tractability_pipeline_v2/buckets_sm.py index e643c22..b22b292 100755 --- a/ot_tractability_pipeline_v2/buckets_sm.py +++ b/ot_tractability_pipeline_v2/buckets_sm.py @@ -223,7 +223,7 @@ def set_strings2(x): self._search_chembl_clinical() self._process_protein_complexes() - self.gene_xref = self.id_xref[['accession', 'ensembl_gene_id', 'symbol', 'Entry name', 'Protein names', + self.gene_xref = self.id_xref[['accession', 'ensembl_gene_id', 'symbol', 'Entry Name', 'Protein names', 'IDG_family', 'IDG_dtoclass', 'IDG_tdl', 'IDG_fam', 'GO_BioProcess', 'GO_MolFunction']] @@ -251,7 +251,7 @@ def set_strings2(x): # ], as_index=False).agg(f0).replace({'drug_chembl_id':{'tmp': np.nan}}) self.clinical_evidence = self.clinical_evidence.groupby(['ensembl_gene_id', 'drug_chembl_id'], as_index=False).agg(f0) - self.out_df = self.gene_xref.merge(self.clinical_evidence.drop(['accession', 'symbol', 'Entry name', 'Protein names', + self.out_df = self.gene_xref.merge(self.clinical_evidence.drop(['accession', 'symbol', 'Entry Name', 'Protein names', 'IDG_family', 'IDG_dtoclass', 'IDG_tdl', 'IDG_fam', 'GO_BioProcess', 'GO_MolFunction'], axis=1), how='outer', on='ensembl_gene_id') @@ -682,7 +682,7 @@ def assign_buckets(self): print(self.out_df.columns) # Add extra buckets to the list below - self.out_df = self.out_df[['symbol', 'accession', 'Entry name', 'Protein names', + self.out_df = self.out_df[['symbol', 'accession', 'Entry Name', 'Protein names', 'IDG_family', 'IDG_dtoclass', 'IDG_tdl', 'IDG_fam', 'GO_BioProcess', 'GO_MolFunction', 'Bucket_1_sm', 'Bucket_2_sm', 'Bucket_3_sm',