Skip to content

Commit

Permalink
Move the packed pdb files to the files/pdb directory, with a better n…
Browse files Browse the repository at this point in the history
…ame, id and description
  • Loading branch information
Jgmedina95 committed Jan 30, 2024
1 parent 13dc325 commit d2358ac
Showing 1 changed file with 42 additions and 15 deletions.
57 changes: 42 additions & 15 deletions mdagent/tools/base_tools/preprocess_tools/pdb_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ def summarize_errors(errors):


class Molecule:
def __init__(self, filename, number_of_molecules=1, instructions=None):
def __init__(self, filename, file_id, number_of_molecules=1, instructions=None):
self.filename = filename
self.id = file_id
self.number_of_molecules = number_of_molecules
self.instructions = instructions if instructions else []
self.load()
Expand All @@ -276,6 +277,7 @@ def __init__(
self.molecules = []
self.file_number = 1
self.file_description = file_description
self.final_name = None

def add_molecule(self, molecule):
self.molecules.append(molecule)
Expand All @@ -284,16 +286,33 @@ def add_molecule(self, molecule):

def generate_input_header(self):
# Generate the header of the input file in .inp format
orig_pdbs_ids = [
f"{molecule.number_of_molecules + molecule.id}"
for molecule in self.molecules
]

_final_name = f'{"_".join(orig_pdbs_ids)}'

while os.path.exists(f"packed_structures_v{self.file_number}.pdb"):
self.file_description = (
"Packed Structures of the following molecules:\n"
+ "\n".join(
[
f"Molecule ID: {molecule.id}, "
f"Number of Molecules: {molecule.number_of_molecules}"
for molecule in self.molecules
]
)
)
while os.path.exists(f"{_final_name}_v{self.file_number}.pdb"):
self.file_number += 1

self.final_name = f"{_final_name}_v{self.file_number}.pdb"
with open("packmol.inp", "w") as out:
out.write("##Automatically generated by LangChain\n")
out.write("tolerance 2.0\n")
out.write("filetype pdb\n")
out.write(
f"output packed_structures_v{self.file_number}.pdb\n"
f"output {self.final_name}\n"
) # this is the name of the final file
out.close()

Expand Down Expand Up @@ -327,19 +346,23 @@ def run_packmol(self, PathRegistry):
"Packmol failed to run. Please check the input file and try again."
)

PathRegistry.map_path(
f"packed_structures_v{self.file_number}.pdb",
f"packed_structures_v{self.file_number}.pdb",
self.file_description,
)
# validate final pdb
pdb_validation = validate_pdb_format(f"packed_structures{self.file_number}.pdb")
pdb_validation = validate_pdb_format(f"{self.file_name}")
if pdb_validation[0] == 0:
# delete .inp files
os.remove("packmol.inp")
for molecule in self.molecules:
os.remove(molecule.filename)
return "PDB file validated successfully"
# name of packed pdb file
time_stamp = PathRegistry.get_timestamp()
PathRegistry.map_path(
f"PACKED_{time_stamp}",
f"{self.final_name}",
self.file_description,
)
# move file to files/pdb
os.rename(self.final_name, f"files/pdb/{self.final_name}")
return f"PDB file validated successfully. FileID: PACKED_{time_stamp}"
elif pdb_validation[0] == 1:
# format pdb_validation[1] list of errors
errors = summarize_errors(pdb_validation[1])
Expand All @@ -356,6 +379,7 @@ def run_packmol(self, PathRegistry):
def packmol_wrapper(
PathRegistry,
pdbfiles: List,
files_id: List,
number_of_molecules: List,
instructions: List[List],
):
Expand All @@ -365,10 +389,10 @@ def packmol_wrapper(
# create a box
box = PackmolBox()
# add molecules to the box
for pdbfile, number_of_molecules, instructions in zip(
pdbfiles, number_of_molecules, instructions
for pdbfile, file_id, number_of_molecules, instructions in zip(
pdbfiles, files_id, number_of_molecules, instructions
):
molecule = Molecule(pdbfile, number_of_molecules, instructions)
molecule = Molecule(pdbfile, file_id, number_of_molecules, instructions)
box.add_molecule(molecule)
# generate input header
box.generate_input_header()
Expand Down Expand Up @@ -441,8 +465,10 @@ def _run(self, **values) -> str:
except ValidationError as e:
return str(e)
error_msg = values.get("error", None)
pdbfiles = values.get("pdbfiles_id", [])
pdbfiles = [self.path_registry.get_mapped_path(pdbfile) for pdbfile in pdbfiles]
pdbfile_ids = values.get("pdbfiles_id", [])
pdbfiles = [
self.path_registry.get_mapped_path(pdbfile) for pdbfile in pdbfile_ids
]
pdbfile_names = [pdbfile.split("/")[-1] for pdbfile in pdbfiles]
# copy them to the current directory with temp_ names
for pdbfile, pdbfile_name in zip(pdbfiles, pdbfile_names):
Expand All @@ -468,6 +494,7 @@ def _run(self, **values) -> str:
return packmol_wrapper(
self.path_registry,
pdbfiles=pdbfile_names,
files_id=pdbfile_ids,
number_of_molecules=number_of_molecules,
instructions=instructions,
)
Expand Down

0 comments on commit d2358ac

Please sign in to comment.