Typo check and minor grammar corrections. (#173)

ur-whitelab · Feb 21, 2025 · b5a7029 · b5a7029
1 parent 144f016
commit b5a7029
Show file tree

Hide file tree

Showing 13 changed files with 55 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -3,13 +3,13 @@ It's built using Langchain and uses a collection of tools to set up and execute
 
 
 ## Environment Setup
-To use the OpenMM features in the agent, please set up a conda environment, following these steps.
+To use the OpenMM features in the agent, please set up a conda environment following these steps.
 ```
 conda env create -n mdcrow -f environment.yaml
 conda activate mdcrow
 ```
 
-If you already have a conda environment, you can install dependencies before you activate it with the following step.
+If you already have a conda environment, you can install dependencies before you activate it with the following steps.
 - Install the necessary conda dependencies: `conda env update -n <YOUR_CONDA_ENV_HERE> -f environment.yaml`
 
 
@@ -25,14 +25,14 @@ Other tools require API keys, such as paper-qa for literature searches. We recom
 1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
 2. Replace the placeholder values in `.env` with your actual keys
 
-You can ask MDCrow to conduct molecular dynamics tasks using OpenAI's GPT model
+You can ask MDCrow to conduct molecular dynamics tasks using OpenAI's GPT model.
 ```
 from mdcrow import MDCrow
 
 agent = MDCrow(model="gpt-3.5-turbo")
 agent.run("Simulate protein 1ZNI at 300 K for 0.1 ps and calculate the RMSD over time.")
 ```
-Note: to distinguish Together models from the rest, you'll need to add "together\" prefix in model flag, such as `agent = MDCrow(model="together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")`
+Note: To distinguish Together models from the rest, you'll need to add the "together\" prefix in the model flag, such as `agent = MDCrow(model="together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")`
 
 ## LLM Providers
 By default, we support LLMs through OpenAI API. However, feel free to use other LLM providers. Make sure to install the necessary package for it. Here's a list of packages required for alternative LLM providers we support:

diff --git a/mdcrow/tools/base_tools/analysis_tools/hydrogen_bonding_tools.py b/mdcrow/tools/base_tools/analysis_tools/hydrogen_bonding_tools.py
@@ -6,13 +6,13 @@
 
 
 class HydrogenBondTool(BaseTool):
-    """Note that this tool only usees the Baker-Hubbard method for identifying hydrogen bonds.
+    """Note that this tool only uses the Baker-Hubbard method for identifying hydrogen bonds.
     Other methods (kabsch-sander, wernet-nilsson) can be implemented later, if desired.
     """
 
     name = "hydrogen_bond_tool"
     description = (
-        "Identifies hydrogen bonds and plots the results from the"
+        "Identifies hydrogen bonds and plots the results from the "
         "provided trajectory data."
         "Input the File ID for the trajectory file and optionally the topology file. "
         "The tool will output the file ID of the results and plot."

diff --git a/mdcrow/tools/base_tools/analysis_tools/inertia.py b/mdcrow/tools/base_tools/analysis_tools/inertia.py
@@ -51,7 +51,7 @@ def calculate_moment_of_inertia(self):
 
     def plot_moi(self):
         """
-        Analyze and visualize the principal moments of inertia.
+        Analyzes and visualizes the principal moments of inertia.
         """
         message = ""
         if self.moments_of_inertia is None:
@@ -60,7 +60,7 @@ def plot_moi(self):
         if self.traj.n_frames == 1:  # only one frame
             moi_string = ", ".join(f"{moi:.2f}" for moi in self.moments_of_inertia[0])
             message += (
-                "Only one frame in trajectory, no plot generated. \n"
+                "Only one frame in the trajectory, no plot generated. \n"
                 f"Principal Moments of Inertia: {moi_string}. \n"
             )
             return message

diff --git a/mdcrow/tools/base_tools/analysis_tools/rdf_tool.py b/mdcrow/tools/base_tools/analysis_tools/rdf_tool.py
@@ -10,7 +10,7 @@
 
 class RDFToolInput(BaseModel):
     trajectory_fileid: str = Field(
-        None, description="Trajectory file. Either dcd, hdf5, xtc oe xyz"
+        None, description="Trajectory file. Either dcd, hdf5, xtc or xyz"
     )
 
     topology_fileid: Optional[str] = Field(None, description="Topology file")
@@ -166,17 +166,17 @@ def validate_input(self, input):
                         raise ValueError(
                             "Incorrect Inputs: "
                             "Stride must be a positive integer "
-                            "or None for default value of 1"
+                            "or None for default value of 1."
                         )
                 except ValueError:
                     raise ValueError(
                         "Incorrect Inputs: Stride must be an integer "
-                        "or None for default value of 1"
+                        "or None for default value of 1."
                     )
             else:
                 if stride <= 0:
                     raise ValueError(
-                        "Incorrect Inputs: " "Stride must be a positive integer"
+                        "Incorrect Inputs: " "Stride must be a positive integer."
                     )
 
         if atom_indices:
@@ -185,7 +185,7 @@ def validate_input(self, input):
             except ValueError:
                 raise ValueError(
                     "Incorrect Inputs: Atom indices must be a comma "
-                    "separated list of integers or None for all atoms"
+                    "separated list of integers or None for all atoms."
                 )
         inputs = {
             "trajectory_fileid": trajectory_id,

diff --git a/mdcrow/tools/base_tools/analysis_tools/rmsd_tools.py b/mdcrow/tools/base_tools/analysis_tools/rmsd_tools.py
@@ -11,7 +11,7 @@
 def rmsd(path_registry, traj, ref_traj, mol_name, select="protein"):
     """
     Calculate the root mean square deviation (RMSD) of each selected atom.
-    Can be used for either protions or small molecules.
+    Can be used for either proteins or small molecules.
     """
     print("Calculating RMSD...")
     msg = ""

diff --git a/mdcrow/tools/base_tools/analysis_tools/sasa.py b/mdcrow/tools/base_tools/analysis_tools/sasa.py
@@ -11,7 +11,7 @@
 class SASAFunctions:
     def __init__(self, path_registry, top_fileid, traj_fileid=None, mol_name=None):
         """
-        Initialize the SASAFunctions class with topology and/or trajectory files.
+        Initializes the SASAFunctions class with topology and/or trajectory files.
 
         Parameters:
         path_registry (PathRegistry): mapping file IDs to file paths.
@@ -29,14 +29,14 @@ def __init__(self, path_registry, top_fileid, traj_fileid=None, mol_name=None):
 
     def calculate_sasa(self, probe_radius=0.14):
         """
-        Calculate the Solvent Accessible Surface Area (SASA) for each
-        frame in the trajectory using Shrake-Rupley algorithm.
+        Calculates the Solvent Accessible Surface Area (SASA) for each
+        frame in the trajectory using the Shrake-Rupley algorithm.
 
         Parameters:
         probe_radius (float, optional): The radius of the probe used to calculate SASA.
-            Default is 0.14 nm (1.4 Å).
+            The default is 0.14 nm (1.4 Å).
         """
-        print("Calcuating SASA ...")
+        print("Calculating SASA ...")
         self.residue_sasa = md.shrake_rupley(
             self.traj, probe_radius=probe_radius, mode="residue"
         )
@@ -58,7 +58,7 @@ def calculate_sasa(self, probe_radius=0.14):
 
     def plot_sasa(self):
         """
-        Plot the total SASA and per-residue SASA over time.
+        Plots the total SASA and per-residue SASA over time.
         """
         message = ""
         if self.total_sasa is None or self.residue_sasa is None:
@@ -110,7 +110,7 @@ def plot_sasa(self):
 class SolventAccessibleSurfaceArea(BaseTool):
     name = "SolventAccessibleSurfaceArea"
     description = (
-        "Compute the Solvent Accessible Surface Area (SASA) for a molecule or protein."
+        "Computes the Solvent Accessible Surface Area (SASA) for a molecule or protein."
         "\nInputs: \n"
         "\t(str) File ID for the topology file. \n"
         "\t(str, optional) File ID for the trajectory file. \n"

diff --git a/mdcrow/tools/base_tools/analysis_tools/vis_tools.py b/mdcrow/tools/base_tools/analysis_tools/vis_tools.py
@@ -21,8 +21,8 @@ def _find_png(self):
         return png_files
 
     def run_molrender(self, cif_path: str) -> str:
-        """Function to run molrender,
-        it requires node.js to be installed
+        """Function to run molrender.
+        It requires Node.js
         and the molrender package to be
         installed globally.
         This will save .png
@@ -43,8 +43,8 @@ def run_molrender(self, cif_path: str) -> str:
             f"mol_render_{self.cif_file_name}",
             f"{self.path_registry.ckpt_figures}/{file_name[0]}",
             (
-                f"Visualization of cif file {self.cif_file_name}"
-                "as png file. using molrender."
+                f"Visualization of CIF file {self.cif_file_name}"
+                "as a PNG file using molrender."
             ),
         )
 
@@ -62,8 +62,8 @@ def create_notebook(self, cif_file: str) -> str:
         tool, it will create
         a notebook
         with the code to
-        install nglview and
-        display the cif/pdb file."""
+        install NGLView and
+        display the CIF/PDB file."""
         self.cif_file_name = os.path.basename(cif_file)
 
         # Create a new notebook
@@ -97,21 +97,21 @@ def create_notebook(self, cif_file: str) -> str:
         self.path_registry.map_path(
             notebook_name,
             notebook_name,
-            f"Notebook to visualize cif/pdb file {self.cif_file_name} using nglview.",
+            f"Notebook to visualize CIF/PDB file {self.cif_file_name} using NGLView.",
         )
         return "Visualization Complete"
 
 
 class VisualizeProtein(BaseTool):
-    """To get a png, you must install molrender
-    https://github.com/molstar/molrender/tree/master
+    """To get a PNG, you must install molrender
+    https://github.com/molstar/molrender/tree/master.
     Otherwise, you will get a notebook where you
     can visualize the protein."""
 
     name = "PDBVisualization"
     description = """This tool will create
-                    a visualization of a cif
-                    file as a png file OR
+                    a visualization of a CIF
+                    file as a PNG file OR
                     it will create
                     a .ipynb file with the
                     visualization of the
@@ -130,7 +130,7 @@ def __init__(self, path_registry: Optional[PathRegistry]):
     def _run(self, cif_file_name: str) -> str:
         """use the tool."""
         if not self.path_registry:
-            return "Failed. Error: Path registry is not set"
+            return "Failed. Error: Path registry is not set."
         cif_path = self.path_registry.get_mapped_path(cif_file_name)
         if not cif_path:
             return f"Failed. File not found: {cif_file_name}"
@@ -141,7 +141,7 @@ def _run(self, cif_file_name: str) -> str:
             print(f"Error running molrender: {str(e)}. Using NGLView instead.")
             try:
                 vis.create_notebook(cif_path)
-                return "Succeeded. Visualization created as notebook"
+                return "Succeeded. Visualization created as notebook."
             except Exception as e:
                 return f"Failed. {type(e).__name__}: {e}"
 

diff --git a/mdcrow/tools/base_tools/preprocess_tools/clean_tools.py b/mdcrow/tools/base_tools/preprocess_tools/clean_tools.py
@@ -11,9 +11,9 @@
 class CleaningToolFunctionInput(BaseModel):
     """Input model for CleaningToolFunction"""
 
-    pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry")
+    pdb_id: str = Field(..., description="ID of the PDB/CIF file in the path registry.")
     replace_nonstandard_residues: bool = Field(
-        True, description="Whether to replace nonstandard residues with standard ones. "
+        True, description="Whether to replace nonstandard residues with standard ones."
     )
     add_missing_atoms: bool = Field(
         True,
@@ -36,8 +36,8 @@ class CleaningToolFunctionInput(BaseModel):
 class CleaningToolFunction(BaseTool):
     name = "CleaningToolFunction"
     description = """
-    This tool performs various cleaning operations on a PDB or CIF file.
-    Operations can include removing heterogens,
+    This tool performs various cleaning operations on a PDB or CIF file,
+    including removing heterogens,
     adding missing atoms and hydrogens,
     replacing nonstandard residues, and/or removing water.
 
@@ -62,7 +62,7 @@ def _run(self, **input_args) -> str:
             pdbfile_id = input_args.get("pdb_id", None)
             if pdbfile_id is None:
                 return """Failed. No file was provided.
-                The input has to be a dictionary with the key 'pdb_id'"""
+                The input must be a dictionary containing the key 'pdb_id'"""
             remove_heterogens = input_args.get("remove_heterogens", True)
             remove_water = input_args.get("remove_water", True)
             add_hydrogens = input_args.get("add_hydrogens", True)
@@ -105,32 +105,32 @@ def _run(self, **input_args) -> str:
                     fixer.removeHeterogens(True)
                     file_description += " Removed Heterogens, and Water Kept. "
             except Exception:
-                print("error at removeHeterogens")
+                print("Error at removeHeterogens")
 
             try:
                 if replace_nonstandard_residues:
                     fixer.replaceNonstandardResidues()
                     file_description += " Replaced Nonstandard Residues. "
             except Exception:
-                print("error at replaceNonstandardResidues")
+                print("Error at replaceNonstandardResidues")
             try:
                 fixer.findMissingAtoms()
             except Exception:
-                print("error at findMissingAtoms")
+                print("Error at findMissingAtoms")
             try:
                 if add_missing_atoms:
                     fixer.addMissingAtoms()
             except Exception:
-                print("error at addMissingAtoms")
+                print("Error at addMissingAtoms")
             try:
                 if add_hydrogens:
                     fixer.addMissingHydrogens(add_hydrogens_ph)
                     file_description += f"Added Hydrogens at pH {add_hydrogens_ph}. "
             except Exception:
-                print("error at addMissingHydrogens")
+                print("Error at addMissingHydrogens")
 
             file_description += (
-                "Missing Atoms Added and replaces nonstandard residues. "
+                "Missing Atoms added and nonstandard residues replaced. "
             )
             file_mode = "w" if add_hydrogens else "a"
             file_name = self.path_registry.write_file_name(

diff --git a/mdcrow/tools/base_tools/preprocess_tools/packing.py b/mdcrow/tools/base_tools/preprocess_tools/packing.py
@@ -50,11 +50,11 @@ def __init__(self, filename, file_id, number_of_molecules=1, instructions=None):
         self.load()
 
     def load(self):
-        # load the molecule data (optional)
+        # Load the molecule data (optional)
         pass
 
     def get_number_of_atoms(self):
-        # return the number of atoms in this molecule
+        # Return the number of atoms in this molecule
         pass
 
 
@@ -103,7 +103,7 @@ def generate_input_header(self) -> None:
 
         self.final_name = f"{_final_name}_v{self.file_number}.pdb"
         with open("packmol.inp", "w") as out:
-            out.write("##Automatically generated by LangChain\n")
+            out.write("## Automatically generated by LangChain\n")
             out.write("tolerance 2.0\n")
             out.write("filetype pdb\n")
             out.write(
@@ -135,7 +135,7 @@ def run_packmol(self):
         cmd = "packmol < packmol.inp"
         result = subprocess.run(cmd, shell=True, text=True, capture_output=True)
         if result.returncode != 0:
-            print("Packmol failed to run with 'packmol < packmol.inp' command")
+            print("Packmol failed to run using the command: 'packmol < packmol.inp'.")
             result = subprocess.run(
                 "./" + cmd, shell=True, text=True, capture_output=True
             )
@@ -164,7 +164,7 @@ def run_packmol(self):
                 self.file_description,
             )
             # move file to files/pdb
-            print("successfull!")
+            print("Successful!")
             return f"PDB file validated successfully. FileID: PACKED_{time_stamp}"
         elif pdb_validation[0] == 1:
             # format pdb_validation[1] list of errors
@@ -338,7 +338,7 @@ def _run(self, **values) -> str:
             if result.returncode != 0:
                 return (
                     "Failed. Packmol is not installed. Please install"
-                    "packmol at "
+                    "Packmol from "
                     "'https://m3g.github.io/packmol/download.shtml'"
                     "and try again."
                 )

diff --git a/tests/test_analysis/test_inertia.py b/tests/test_analysis/test_inertia.py
@@ -40,7 +40,7 @@ def test_plot_moi_one_frame(moi_functions):
     # Simulate a single frame of inertia tensor data
     moi_functions.moments_of_inertia = np.array([[1.0, 2.0, 3.0]])
     result = moi_functions.plot_moi()
-    assert "Only one frame in trajectory, no plot generated." in result
+    assert "Only one frame in the trajectory, no plot generated." in result
 
 
 @patch("mdcrow.tools.base_tools.analysis_tools.inertia.plt.savefig")

diff --git a/tests/test_sim/test_setup.py b/tests/test_sim/test_setup.py
@@ -26,7 +26,7 @@ def test_parse_cutoff(setupandrun, input_cutoff, expected_result):
 def test_parse_cutoff_unknown_unit(setupandrun):
     with pytest.raises(ValueError) as e:
         setupandrun._parse_cutoff("2pc")
-        assert "Unknown unit" in str(e.value)
+    assert "Unknown unit" in str(e.value)
 
 
 def test_parse_temperature(setupandrun):

diff --git a/tests/test_sim/test_setupandrun.py b/tests/test_sim/test_setupandrun.py
@@ -128,7 +128,7 @@ def test_parse_cutoff(setupandrun, input_cutoff, expected_result):
 def test_parse_cutoff_unknown_unit(setupandrun):
     with pytest.raises(ValueError) as e:
         setupandrun._parse_cutoff("2pc")
-        assert "Unknown unit" in str(e.value)
+    assert "Unknown unit" in str(e.value)
 
 
 def test_parse_temperature(setupandrun):

diff --git a/tests/test_sim/test_write_script.py b/tests/test_sim/test_write_script.py
@@ -219,7 +219,7 @@ def script_content_2(script_content_vars_2, openmm_sim):
     )
 
 
-def est_construct_script_content_script2(script_content_2, script_content_vars_2):
+def test_construct_script_content_script2(script_content_2, script_content_vars_2):
     assert (
         f"ewaldErrorTolerance = {script_content_vars_2.ewald_error_tolerance}"
         in script_content_2