30
30
from pytorch_lightning .loggers import TensorBoardLogger , WandbLogger
31
31
32
32
from project .utils .deepinteract_constants import FEAT_COLS , ALLOWABLE_FEATS , D3TO1
33
- from project .utils .dips_plus_utils import postprocess_pruned_pairs
33
+ from project .utils .dips_plus_utils import postprocess_pruned_pairs , impute_postprocessed_missing_feature_values
34
34
from project .utils .graph_utils import prot_df_to_dgl_graph_feats
35
35
from project .utils .protein_feature_utils import GeometricProteinFeatures
36
36
@@ -573,9 +573,11 @@ def create_input_dir_struct(input_dataset_dir: str, pdb_code: str):
573
573
_ , _ = dir_struct_create_proc .communicate () # Wait until the directory structure creation cmd is finished
574
574
575
575
576
- def copy_input_to_raw_dir (input_dataset_dir : str , pdb_filepath : str , pdb_code : str ):
576
+ def copy_input_to_raw_dir (input_dataset_dir : str , pdb_filepath : str , pdb_code : str , chain_indic : str ):
577
577
"""Make a copy of the input PDB file in the newly-created raw directory."""
578
- input_copy_cmd = f'cp { pdb_filepath } { os .path .join (input_dataset_dir , "raw" , pdb_code )} '
578
+ filename = db .get_pdb_code (pdb_filepath ) + f'_{ chain_indic } .pdb' \
579
+ if chain_indic not in pdb_filepath else db .get_pdb_name (pdb_filepath )
580
+ input_copy_cmd = f'cp { pdb_filepath } { os .path .join (input_dataset_dir , "raw" , pdb_code , filename )} '
579
581
input_copy_proc = subprocess .Popen (input_copy_cmd .split (), stdout = subprocess .PIPE , cwd = os .getcwd ())
580
582
_ , _ = input_copy_proc .communicate () # Wait until the input copy cmd is finished
581
583
@@ -590,6 +592,7 @@ def make_dataset(input_dataset_dir='datasets/Input/raw', output_dir='datasets/In
590
592
pa .parse_all (input_dataset_dir , parsed_dir , num_cpus )
591
593
592
594
complexes_dill = os .path .join (output_dir , 'complexes/complexes.dill' )
595
+ os .remove (complexes_dill ) # Ensure that pairs are made everytime this function is called
593
596
comp .complexes (parsed_dir , complexes_dill , source_type )
594
597
complexes = comp .read_complexes (complexes_dill )
595
598
pairs_dir = os .path .join (output_dir , 'pairs' )
@@ -697,7 +700,7 @@ def impute_missing_feature_values(output_dir='datasets/Input/final/raw',
697
700
inputs = [(pair_filename .as_posix (), pair_filename .as_posix (), impute_atom_features , advanced_logging )
698
701
for pair_filename in Path (output_dir ).rglob ('*.dill' )]
699
702
# Without impute_atom_features set to True, non-CA atoms will be filtered out after writing updated pairs
700
- par .submit_jobs (impute_missing_feature_values , inputs , num_cpus )
703
+ par .submit_jobs (impute_postprocessed_missing_feature_values , inputs , num_cpus )
701
704
702
705
703
706
def convert_input_pdb_files_to_pair (left_pdb_filepath : str , right_pdb_filepath : str , input_dataset_dir : str ,
@@ -707,8 +710,8 @@ def convert_input_pdb_files_to_pair(left_pdb_filepath: str, right_pdb_filepath:
707
710
pdb_code = db .get_pdb_group (list (ca .get_complex_pdb_codes ([left_pdb_filepath , right_pdb_filepath ]))[0 ])
708
711
# Iteratively execute the PDB file feature generation process
709
712
create_input_dir_struct (input_dataset_dir , pdb_code )
710
- copy_input_to_raw_dir (input_dataset_dir , left_pdb_filepath , pdb_code )
711
- copy_input_to_raw_dir (input_dataset_dir , right_pdb_filepath , pdb_code )
713
+ copy_input_to_raw_dir (input_dataset_dir , left_pdb_filepath , pdb_code , 'l_u' )
714
+ copy_input_to_raw_dir (input_dataset_dir , right_pdb_filepath , pdb_code , 'r_u' )
712
715
make_dataset (os .path .join (input_dataset_dir , 'raw' ), os .path .join (input_dataset_dir , 'interim' ))
713
716
generate_psaia_features (psaia_dir = psaia_dir ,
714
717
psaia_config = psaia_config ,
0 commit comments