@@ -630,6 +630,7 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
630
630
orig_pdb_name = db .get_pdb_name (orig_pdb_filepath )
631
631
orig_pdb_df = PandasPdb ().read_pdb (new_pdb_filepath ).df ['ATOM' ]
632
632
unique_chain_ids = np .unique (orig_pdb_df ['chain_id' ].values )
633
+
633
634
"""Ascertain the chain ID corresponding to the original PDB file, using one of two available methods.
634
635
Method 1: Used with datasets such as EVCoupling adopting .atom filename extensions (e.g., 4DI3C.atom)
635
636
Method 2: Used with datasets such as DeepHomo adopting regular .pdb filename extensions (e.g., 2FNUA.pdb)"""
@@ -643,9 +644,24 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
643
644
else : # Method 2: Try to use unique chain IDs
644
645
# Assume the first/second index is the first non-empty chain ID (e.g., 'A')
645
646
orig_pdb_chain_id = unique_chain_ids [0 ] if (unique_chain_ids [0 ] != '' ) else unique_chain_ids [1 ]
647
+
648
+ # Update existing parsed chains to contain the newly-recovered chain ID
649
+ parsed_dir = os .path .join (interim_dataset_dir , 'parsed' , pdb_code )
650
+ parsed_filenames = [
651
+ os .path .join (parsed_dir , filename ) for filename in os .listdir (parsed_dir ) if new_pdb_code in filename
652
+ ]
653
+ parsed_filenames .sort ()
654
+ # Load in the existing Pair
655
+ chain_df = pd .read_pickle (parsed_filenames [chain_number - 1 ])
656
+ # Update the corresponding chain ID
657
+ chain_df .chain = orig_pdb_chain_id
658
+ # Save the updated Pair
659
+ chain_df .to_pickle (parsed_filenames [chain_number - 1 ])
660
+
646
661
# Update the existing Pair to contain the newly-recovered chain ID
647
662
pair_dir = os .path .join (interim_dataset_dir , 'pairs' , pdb_code )
648
663
pair_filenames = [os .path .join (pair_dir , filename ) for filename in os .listdir (pair_dir ) if new_pdb_code in filename ]
664
+ pair_filenames .sort ()
649
665
# Load in the existing Pair
650
666
with open (pair_filenames [0 ], 'rb' ) as f :
651
667
pair = dill .load (f )
0 commit comments