Skip to content

Commit

Permalink
Typo check and minor grammar corrections. (#173)
Browse files Browse the repository at this point in the history
  • Loading branch information
brittyscience authored Feb 21, 2025
1 parent 144f016 commit b5a7029
Show file tree
Hide file tree
Showing 13 changed files with 55 additions and 55 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ It's built using Langchain and uses a collection of tools to set up and execute


## Environment Setup
To use the OpenMM features in the agent, please set up a conda environment, following these steps.
To use the OpenMM features in the agent, please set up a conda environment following these steps.
```
conda env create -n mdcrow -f environment.yaml
conda activate mdcrow
```

If you already have a conda environment, you can install dependencies before you activate it with the following step.
If you already have a conda environment, you can install dependencies before you activate it with the following steps.
- Install the necessary conda dependencies: `conda env update -n <YOUR_CONDA_ENV_HERE> -f environment.yaml`


Expand All @@ -25,14 +25,14 @@ Other tools require API keys, such as paper-qa for literature searches. We recom
1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
2. Replace the placeholder values in `.env` with your actual keys

You can ask MDCrow to conduct molecular dynamics tasks using OpenAI's GPT model
You can ask MDCrow to conduct molecular dynamics tasks using OpenAI's GPT model.
```
from mdcrow import MDCrow
agent = MDCrow(model="gpt-3.5-turbo")
agent.run("Simulate protein 1ZNI at 300 K for 0.1 ps and calculate the RMSD over time.")
```
Note: to distinguish Together models from the rest, you'll need to add "together\" prefix in model flag, such as `agent = MDCrow(model="together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")`
Note: To distinguish Together models from the rest, you'll need to add the "together\" prefix in the model flag, such as `agent = MDCrow(model="together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")`

## LLM Providers
By default, we support LLMs through OpenAI API. However, feel free to use other LLM providers. Make sure to install the necessary package for it. Here's a list of packages required for alternative LLM providers we support:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@


class HydrogenBondTool(BaseTool):
"""Note that this tool only usees the Baker-Hubbard method for identifying hydrogen bonds.
"""Note that this tool only uses the Baker-Hubbard method for identifying hydrogen bonds.
Other methods (kabsch-sander, wernet-nilsson) can be implemented later, if desired.
"""

name = "hydrogen_bond_tool"
description = (
"Identifies hydrogen bonds and plots the results from the"
"Identifies hydrogen bonds and plots the results from the "
"provided trajectory data."
"Input the File ID for the trajectory file and optionally the topology file. "
"The tool will output the file ID of the results and plot."
Expand Down
4 changes: 2 additions & 2 deletions mdcrow/tools/base_tools/analysis_tools/inertia.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def calculate_moment_of_inertia(self):

def plot_moi(self):
"""
Analyze and visualize the principal moments of inertia.
Analyzes and visualizes the principal moments of inertia.
"""
message = ""
if self.moments_of_inertia is None:
Expand All @@ -60,7 +60,7 @@ def plot_moi(self):
if self.traj.n_frames == 1: # only one frame
moi_string = ", ".join(f"{moi:.2f}" for moi in self.moments_of_inertia[0])
message += (
"Only one frame in trajectory, no plot generated. \n"
"Only one frame in the trajectory, no plot generated. \n"
f"Principal Moments of Inertia: {moi_string}. \n"
)
return message
Expand Down
10 changes: 5 additions & 5 deletions mdcrow/tools/base_tools/analysis_tools/rdf_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class RDFToolInput(BaseModel):
trajectory_fileid: str = Field(
None, description="Trajectory file. Either dcd, hdf5, xtc oe xyz"
None, description="Trajectory file. Either dcd, hdf5, xtc or xyz"
)

topology_fileid: Optional[str] = Field(None, description="Topology file")
Expand Down Expand Up @@ -166,17 +166,17 @@ def validate_input(self, input):
raise ValueError(
"Incorrect Inputs: "
"Stride must be a positive integer "
"or None for default value of 1"
"or None for default value of 1."
)
except ValueError:
raise ValueError(
"Incorrect Inputs: Stride must be an integer "
"or None for default value of 1"
"or None for default value of 1."
)
else:
if stride <= 0:
raise ValueError(
"Incorrect Inputs: " "Stride must be a positive integer"
"Incorrect Inputs: " "Stride must be a positive integer."
)

if atom_indices:
Expand All @@ -185,7 +185,7 @@ def validate_input(self, input):
except ValueError:
raise ValueError(
"Incorrect Inputs: Atom indices must be a comma "
"separated list of integers or None for all atoms"
"separated list of integers or None for all atoms."
)
inputs = {
"trajectory_fileid": trajectory_id,
Expand Down
2 changes: 1 addition & 1 deletion mdcrow/tools/base_tools/analysis_tools/rmsd_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def rmsd(path_registry, traj, ref_traj, mol_name, select="protein"):
"""
Calculate the root mean square deviation (RMSD) of each selected atom.
Can be used for either protions or small molecules.
Can be used for either proteins or small molecules.
"""
print("Calculating RMSD...")
msg = ""
Expand Down
14 changes: 7 additions & 7 deletions mdcrow/tools/base_tools/analysis_tools/sasa.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
class SASAFunctions:
def __init__(self, path_registry, top_fileid, traj_fileid=None, mol_name=None):
"""
Initialize the SASAFunctions class with topology and/or trajectory files.
Initializes the SASAFunctions class with topology and/or trajectory files.
Parameters:
path_registry (PathRegistry): mapping file IDs to file paths.
Expand All @@ -29,14 +29,14 @@ def __init__(self, path_registry, top_fileid, traj_fileid=None, mol_name=None):

def calculate_sasa(self, probe_radius=0.14):
"""
Calculate the Solvent Accessible Surface Area (SASA) for each
frame in the trajectory using Shrake-Rupley algorithm.
Calculates the Solvent Accessible Surface Area (SASA) for each
frame in the trajectory using the Shrake-Rupley algorithm.
Parameters:
probe_radius (float, optional): The radius of the probe used to calculate SASA.
Default is 0.14 nm (1.4 Å).
The default is 0.14 nm (1.4 Å).
"""
print("Calcuating SASA ...")
print("Calculating SASA ...")
self.residue_sasa = md.shrake_rupley(
self.traj, probe_radius=probe_radius, mode="residue"
)
Expand All @@ -58,7 +58,7 @@ def calculate_sasa(self, probe_radius=0.14):

def plot_sasa(self):
"""
Plot the total SASA and per-residue SASA over time.
Plots the total SASA and per-residue SASA over time.
"""
message = ""
if self.total_sasa is None or self.residue_sasa is None:
Expand Down Expand Up @@ -110,7 +110,7 @@ def plot_sasa(self):
class SolventAccessibleSurfaceArea(BaseTool):
name = "SolventAccessibleSurfaceArea"
description = (
"Compute the Solvent Accessible Surface Area (SASA) for a molecule or protein."
"Computes the Solvent Accessible Surface Area (SASA) for a molecule or protein."
"\nInputs: \n"
"\t(str) File ID for the topology file. \n"
"\t(str, optional) File ID for the trajectory file. \n"
Expand Down
26 changes: 13 additions & 13 deletions mdcrow/tools/base_tools/analysis_tools/vis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def _find_png(self):
return png_files

def run_molrender(self, cif_path: str) -> str:
"""Function to run molrender,
it requires node.js to be installed
"""Function to run molrender.
It requires Node.js
and the molrender package to be
installed globally.
This will save .png
Expand All @@ -43,8 +43,8 @@ def run_molrender(self, cif_path: str) -> str:
f"mol_render_{self.cif_file_name}",
f"{self.path_registry.ckpt_figures}/{file_name[0]}",
(
f"Visualization of cif file {self.cif_file_name}"
"as png file. using molrender."
f"Visualization of CIF file {self.cif_file_name}"
"as a PNG file using molrender."
),
)

Expand All @@ -62,8 +62,8 @@ def create_notebook(self, cif_file: str) -> str:
tool, it will create
a notebook
with the code to
install nglview and
display the cif/pdb file."""
install NGLView and
display the CIF/PDB file."""
self.cif_file_name = os.path.basename(cif_file)

# Create a new notebook
Expand Down Expand Up @@ -97,21 +97,21 @@ def create_notebook(self, cif_file: str) -> str:
self.path_registry.map_path(
notebook_name,
notebook_name,
f"Notebook to visualize cif/pdb file {self.cif_file_name} using nglview.",
f"Notebook to visualize CIF/PDB file {self.cif_file_name} using NGLView.",
)
return "Visualization Complete"


class VisualizeProtein(BaseTool):
"""To get a png, you must install molrender
https://github.com/molstar/molrender/tree/master
"""To get a PNG, you must install molrender
https://github.com/molstar/molrender/tree/master.
Otherwise, you will get a notebook where you
can visualize the protein."""

name = "PDBVisualization"
description = """This tool will create
a visualization of a cif
file as a png file OR
a visualization of a CIF
file as a PNG file OR
it will create
a .ipynb file with the
visualization of the
Expand All @@ -130,7 +130,7 @@ def __init__(self, path_registry: Optional[PathRegistry]):
def _run(self, cif_file_name: str) -> str:
"""use the tool."""
if not self.path_registry:
return "Failed. Error: Path registry is not set"
return "Failed. Error: Path registry is not set."
cif_path = self.path_registry.get_mapped_path(cif_file_name)
if not cif_path:
return f"Failed. File not found: {cif_file_name}"
Expand All @@ -141,7 +141,7 @@ def _run(self, cif_file_name: str) -> str:
print(f"Error running molrender: {str(e)}. Using NGLView instead.")
try:
vis.create_notebook(cif_path)
return "Succeeded. Visualization created as notebook"
return "Succeeded. Visualization created as notebook."
except Exception as e:
return f"Failed. {type(e).__name__}: {e}"

Expand Down
22 changes: 11 additions & 11 deletions mdcrow/tools/base_tools/preprocess_tools/clean_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class CleaningToolFunctionInput(BaseModel):
"""Input model for CleaningToolFunction"""

pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry")
pdb_id: str = Field(..., description="ID of the PDB/CIF file in the path registry.")
replace_nonstandard_residues: bool = Field(
True, description="Whether to replace nonstandard residues with standard ones. "
True, description="Whether to replace nonstandard residues with standard ones."
)
add_missing_atoms: bool = Field(
True,
Expand All @@ -36,8 +36,8 @@ class CleaningToolFunctionInput(BaseModel):
class CleaningToolFunction(BaseTool):
name = "CleaningToolFunction"
description = """
This tool performs various cleaning operations on a PDB or CIF file.
Operations can include removing heterogens,
This tool performs various cleaning operations on a PDB or CIF file,
including removing heterogens,
adding missing atoms and hydrogens,
replacing nonstandard residues, and/or removing water.
Expand All @@ -62,7 +62,7 @@ def _run(self, **input_args) -> str:
pdbfile_id = input_args.get("pdb_id", None)
if pdbfile_id is None:
return """Failed. No file was provided.
The input has to be a dictionary with the key 'pdb_id'"""
The input must be a dictionary containing the key 'pdb_id'"""
remove_heterogens = input_args.get("remove_heterogens", True)
remove_water = input_args.get("remove_water", True)
add_hydrogens = input_args.get("add_hydrogens", True)
Expand Down Expand Up @@ -105,32 +105,32 @@ def _run(self, **input_args) -> str:
fixer.removeHeterogens(True)
file_description += " Removed Heterogens, and Water Kept. "
except Exception:
print("error at removeHeterogens")
print("Error at removeHeterogens")

try:
if replace_nonstandard_residues:
fixer.replaceNonstandardResidues()
file_description += " Replaced Nonstandard Residues. "
except Exception:
print("error at replaceNonstandardResidues")
print("Error at replaceNonstandardResidues")
try:
fixer.findMissingAtoms()
except Exception:
print("error at findMissingAtoms")
print("Error at findMissingAtoms")
try:
if add_missing_atoms:
fixer.addMissingAtoms()
except Exception:
print("error at addMissingAtoms")
print("Error at addMissingAtoms")
try:
if add_hydrogens:
fixer.addMissingHydrogens(add_hydrogens_ph)
file_description += f"Added Hydrogens at pH {add_hydrogens_ph}. "
except Exception:
print("error at addMissingHydrogens")
print("Error at addMissingHydrogens")

file_description += (
"Missing Atoms Added and replaces nonstandard residues. "
"Missing Atoms added and nonstandard residues replaced. "
)
file_mode = "w" if add_hydrogens else "a"
file_name = self.path_registry.write_file_name(
Expand Down
12 changes: 6 additions & 6 deletions mdcrow/tools/base_tools/preprocess_tools/packing.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ def __init__(self, filename, file_id, number_of_molecules=1, instructions=None):
self.load()

def load(self):
# load the molecule data (optional)
# Load the molecule data (optional)
pass

def get_number_of_atoms(self):
# return the number of atoms in this molecule
# Return the number of atoms in this molecule
pass


Expand Down Expand Up @@ -103,7 +103,7 @@ def generate_input_header(self) -> None:

self.final_name = f"{_final_name}_v{self.file_number}.pdb"
with open("packmol.inp", "w") as out:
out.write("##Automatically generated by LangChain\n")
out.write("## Automatically generated by LangChain\n")
out.write("tolerance 2.0\n")
out.write("filetype pdb\n")
out.write(
Expand Down Expand Up @@ -135,7 +135,7 @@ def run_packmol(self):
cmd = "packmol < packmol.inp"
result = subprocess.run(cmd, shell=True, text=True, capture_output=True)
if result.returncode != 0:
print("Packmol failed to run with 'packmol < packmol.inp' command")
print("Packmol failed to run using the command: 'packmol < packmol.inp'.")
result = subprocess.run(
"./" + cmd, shell=True, text=True, capture_output=True
)
Expand Down Expand Up @@ -164,7 +164,7 @@ def run_packmol(self):
self.file_description,
)
# move file to files/pdb
print("successfull!")
print("Successful!")
return f"PDB file validated successfully. FileID: PACKED_{time_stamp}"
elif pdb_validation[0] == 1:
# format pdb_validation[1] list of errors
Expand Down Expand Up @@ -338,7 +338,7 @@ def _run(self, **values) -> str:
if result.returncode != 0:
return (
"Failed. Packmol is not installed. Please install"
"packmol at "
"Packmol from "
"'https://m3g.github.io/packmol/download.shtml'"
"and try again."
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_analysis/test_inertia.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_plot_moi_one_frame(moi_functions):
# Simulate a single frame of inertia tensor data
moi_functions.moments_of_inertia = np.array([[1.0, 2.0, 3.0]])
result = moi_functions.plot_moi()
assert "Only one frame in trajectory, no plot generated." in result
assert "Only one frame in the trajectory, no plot generated." in result


@patch("mdcrow.tools.base_tools.analysis_tools.inertia.plt.savefig")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sim/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_parse_cutoff(setupandrun, input_cutoff, expected_result):
def test_parse_cutoff_unknown_unit(setupandrun):
with pytest.raises(ValueError) as e:
setupandrun._parse_cutoff("2pc")
assert "Unknown unit" in str(e.value)
assert "Unknown unit" in str(e.value)


def test_parse_temperature(setupandrun):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sim/test_setupandrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_parse_cutoff(setupandrun, input_cutoff, expected_result):
def test_parse_cutoff_unknown_unit(setupandrun):
with pytest.raises(ValueError) as e:
setupandrun._parse_cutoff("2pc")
assert "Unknown unit" in str(e.value)
assert "Unknown unit" in str(e.value)


def test_parse_temperature(setupandrun):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sim/test_write_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def script_content_2(script_content_vars_2, openmm_sim):
)


def est_construct_script_content_script2(script_content_2, script_content_vars_2):
def test_construct_script_content_script2(script_content_2, script_content_vars_2):
assert (
f"ewaldErrorTolerance = {script_content_vars_2.ewald_error_tolerance}"
in script_content_2
Expand Down

0 comments on commit b5a7029

Please sign in to comment.