Skip to content

Commit 6d885b4

Browse files
committed
Restore original chain IDs to EVCoupling input chain DataFrames
1 parent f301dcf commit 6d885b4

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

project/utils/deepinteract_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
630630
orig_pdb_name = db.get_pdb_name(orig_pdb_filepath)
631631
orig_pdb_df = PandasPdb().read_pdb(new_pdb_filepath).df['ATOM']
632632
unique_chain_ids = np.unique(orig_pdb_df['chain_id'].values)
633+
633634
"""Ascertain the chain ID corresponding to the original PDB file, using one of two available methods.
634635
Method 1: Used with datasets such as EVCoupling adopting .atom filename extensions (e.g., 4DI3C.atom)
635636
Method 2: Used with datasets such as DeepHomo adopting regular .pdb filename extensions (e.g., 2FNUA.pdb)"""
@@ -643,9 +644,24 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
643644
else: # Method 2: Try to use unique chain IDs
644645
# Assume the first/second index is the first non-empty chain ID (e.g., 'A')
645646
orig_pdb_chain_id = unique_chain_ids[0] if (unique_chain_ids[0] != '') else unique_chain_ids[1]
647+
648+
# Update existing parsed chains to contain the newly-recovered chain ID
649+
parsed_dir = os.path.join(interim_dataset_dir, 'parsed', pdb_code)
650+
parsed_filenames = [
651+
os.path.join(parsed_dir, filename) for filename in os.listdir(parsed_dir) if new_pdb_code in filename
652+
]
653+
parsed_filenames.sort()
654+
# Load in the existing Pair
655+
chain_df = pd.read_pickle(parsed_filenames[chain_number - 1])
656+
# Update the corresponding chain ID
657+
chain_df.chain = orig_pdb_chain_id
658+
# Save the updated Pair
659+
chain_df.to_pickle(parsed_filenames[chain_number - 1])
660+
646661
# Update the existing Pair to contain the newly-recovered chain ID
647662
pair_dir = os.path.join(interim_dataset_dir, 'pairs', pdb_code)
648663
pair_filenames = [os.path.join(pair_dir, filename) for filename in os.listdir(pair_dir) if new_pdb_code in filename]
664+
pair_filenames.sort()
649665
# Load in the existing Pair
650666
with open(pair_filenames[0], 'rb') as f:
651667
pair = dill.load(f)

0 commit comments

Comments
 (0)