Merge pull request #536 from ReactionMechanismGenerator/xtb10

Adde xTB and xTB-GSM
ReactionMechanismGenerator · Aug 7, 2022 · bfcef09 · bfcef09
2 parents 020acc5 + dd9a1f5
commit bfcef09
Show file tree

Hide file tree

Showing 72 changed files with 9,480 additions and 277 deletions.
diff --git a/.github/workflows/cont_int.yml b/.github/workflows/cont_int.yml
@@ -33,7 +33,9 @@ jobs:
         use-mamba: true
 
     - name: Update environment
-      run: mamba env update -n arc_env -f environment.yml
+      run: |
+        mamba env update -n arc_env -f environment.yml
+        conda list
 
     - name: Install codecov
       run: mamba install -y -c conda-forge codecov

diff --git a/Makefile b/Makefile
@@ -24,6 +24,12 @@ install-gcn-cpu:
 install-kinbot:
 	bash devtools/install_kinbot.sh
 
+install-sella:
+	bash devtools/install_sella.sh
+
+install-xtb:
+	bash devtools/install_xtb.sh
+
 clean:
 	find -type d -name __pycache__ -exec rm -rf {} +
 	rm -rf testing

diff --git a/arc/checks/ts.py b/arc/checks/ts.py
@@ -240,6 +240,8 @@ def check_normal_mode_displacement(reaction: 'ARCReaction',
                        and spc.mol.atoms[0].element.symbol == 'H' for spc in reaction.r_species + reaction.p_species)
     # bond_lone_hs = False
     xyz = parser.parse_xyz_from_file(job.local_path_to_output_file)
+    if not xyz['coords']:
+        xyz = reaction.ts_species.get_xyz()
 
     done = False
     for amplitude in amplitudes:

diff --git a/arc/common.py b/arc/common.py
@@ -178,9 +178,8 @@ def check_ess_settings(ess_settings: Optional[dict] = None) -> dict:
     # run checks:
     for ess, server_list in settings_dict.items():
         if ess.lower() not in ['gaussian', 'qchem', 'molpro', 'orca', 'terachem', 'onedmin', 'psi4',
-                               'gcn', 'heuristics', 'autotst', 'kinbot']:
-            raise SettingsError(f'ESS software are Gaussian, QChem, Molpro, Orca, TeraChem, Psi4, '
-                                f'or OneDMin. Got: {ess}')
+                               'gcn', 'heuristics', 'autotst', 'kinbot', 'xtb', 'xtb_gsm']:
+            raise SettingsError(f'Got an unrecognized software in ESS settings: {ess}')
         for server in server_list:
             if not isinstance(server, bool) and server.lower() not in [s.lower() for s in servers.keys()]:
                 server_names = [name for name in servers.keys()]
@@ -937,7 +936,8 @@ def almost_equal_lists(iter1: Union[list, tuple, np.ndarray],
             if not almost_equal_lists(iter1=entry1, iter2=entry2, rtol=rtol, atol=atol):
                 return False
         else:
-            if isinstance(entry1, (int, float)) and isinstance(entry2, (int, float)):
+            if isinstance(entry1, (int, float, np.float32, np.float64)) \
+                    and isinstance(entry2, (int, float, np.float32, np.float64)):
                 if not np.isclose([entry1], [entry2], rtol=rtol, atol=atol):
                     return False
             else:
@@ -1589,3 +1589,30 @@ def _check_r_n_p_symbols_between_rmg_and_arc_rxns(arc_reaction: 'ARCReaction',
             print(rmg_p_symbols)
             result = False
     return result
+
+
+def safe_copy_file(source: str,
+                   destination: str,
+                   wait: int = 10,
+                   max_cycles: int = 100,
+                   ):
+    """
+    Copy a file safely.
+
+    Args:
+        source (str): The full path to the file to be copied.
+        destination (str): The full path to the file destination.
+        wait (int, optional): The number of seconds to wait between cycles.
+        max_cycles (int, optional): The number of cycles.
+    """
+    for i in range(max_cycles):
+        try:
+            shutil.copyfile(src=source, dst=destination)
+        except OSError:
+            time.sleep(wait)
+        except shutil.SameFileError:
+            break
+        else:
+            break
+        if i >= max_cycles:
+            break
diff --git a/arc/imports.py b/arc/imports.py
@@ -10,7 +10,7 @@
 from arc.settings.submit import incore_commands, pipe_submit, submit_scripts
 
 
-# Common imports where the user can optionally put a modified copy of an ARC file un their ~/.arc folder
+# Common imports where the user can optionally put a modified copy of settings.py or submit.py file under ~/.arc
 home = os.getenv("HOME") or os.path.expanduser("~")
 local_arc_path = os.path.join(home, '.arc')
 
@@ -28,7 +28,8 @@
         local_settings_dict = {key: val for key, val in vars(local_settings).items() if '__' not in key}
         settings.update(local_settings_dict)
         # Set global_ess_settings to None if using a local settings file (ARC's defaults are dummies)
-        settings['global_ess_settings'] = local_settings_dict['global_ess_settings'] or None
+        settings['global_ess_settings'] = local_settings_dict['global_ess_settings'] \
+            if 'global_ess_settings' in local_settings_dict and local_settings_dict['global_ess_settings'] else None
 
 local_arc_submit_path = os.path.join(local_arc_path, 'submit.py')
 if os.path.isfile(local_arc_submit_path):

diff --git a/arc/job/adapter.py b/arc/job/adapter.py
@@ -20,9 +20,10 @@
 from enum import Enum
 from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 
+import numpy as np
 import pandas as pd
 
-from arc.common import ARC_PATH, get_logger, torsions_to_scans
+from arc.common import ARC_PATH, get_logger, read_yaml_file, save_yaml_file, torsions_to_scans
 from arc.exceptions import JobError
 from arc.imports import local_arc_path, pipe_submit, settings, submit_scripts
 from arc.job.local import (change_mode,
@@ -65,7 +66,6 @@ class JobEnum(str, Enum):
             - openbabel
             - rdkit
             - terachem
-            - torchani
             - AIMNet (https://github.com/aiqm/aimnet)
             - turbomol
             - xtb
@@ -89,13 +89,16 @@ class JobEnum(str, Enum):
     psi4 = 'psi4'
     qchem = 'qchem'
     terachem = 'terachem'
+    torchani = 'torchani'
+    xtb = 'xtb'
 
     # TS search methods
     autotst = 'autotst'  # AutoTST, 10.1021/acs.jpca.7b07361, 10.26434/chemrxiv.13277870.v2
     heuristics = 'heuristics'  # ARC's heuristics
     kinbot = 'kinbot'  # KinBot, 10.1016/j.cpc.2019.106947
     gcn = 'gcn'  # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500
     user = 'user'  # user guesses
+    xtb_gsm = 'xtb_gsm'   # Double ended growing string method (DE-GSM), [10.1021/ct400319w, 10.1063/1.4804162] via xTB
 
 
 class JobTypeEnum(str, Enum):
@@ -288,6 +291,42 @@ def execute_queue(self):
         """
         pass
 
+    def execute(self):
+        """
+        Execute a job.
+        The execution type could be 'incore', 'queue', or 'pipe'.
+
+        An 'incore' execution type assumes a single job (if more are given, only the first one will be executed),
+        and executes the job in the same CPU process as ARC (i.e., Python waits for the response).
+
+        A 'queue' execution type assumes a single job (if more are given, only the first one will be executed),
+        and submits that single job to the server queue. The server could be either remote (accessed via SSH) or local.
+
+        A 'pipe' execution type assumes an array of jobs and submits several ARC instances (workers)
+        with an HDF5 file that contains specific directions.
+        The output is returned within the HDF5 file.
+        The new ARC instance, representing a single worker, will run all of its jobs incore.
+        """
+        self.upload_files()
+        execution_type = JobExecutionTypeEnum(self.execution_type)
+        if execution_type == JobExecutionTypeEnum.incore:
+            self.initial_time = datetime.datetime.now()
+            self.job_status[0] = 'running'
+            self.execute_incore()
+            self.job_status[0] = 'done'
+            self.job_status[1]['status'] = 'done'
+            self.final_time = datetime.datetime.now()
+            self.determine_run_time()
+        elif execution_type == JobExecutionTypeEnum.queue:
+            self.execute_queue()
+        elif execution_type == JobExecutionTypeEnum.pipe:
+            # Todo:
+            #   - Check that the HDF5 file is available, else raise an error.
+            #   - Submit ARC workers with the HDF5 file.
+            self.execute_queue()  # This is temporary until pipe is fully functional.
+        if not self.restarted:
+            self._write_initiated_job_to_csv_file()
+
     def legacy_queue_execution(self):
         """
         Execute a job to the server's queue.
@@ -324,39 +363,6 @@ def set_job_shell_file_to_upload(self) -> dict:
                     change_mode(mode='+x', file_name=file_name, path=self.local_path)
             return self.get_file_property_dictionary(file_name=file_name, make_x=True)
 
-    def execute(self):
-        """
-        Execute a job.
-        The execution type could be 'incore', 'queue', or 'pipe'.
-
-        An 'incore' execution type assumes a single job (if more are given, only the first one will be executed),
-        and executes the job in the same CPU process as ARC (i.e., Python waits for the response).
-
-        A 'queue' execution type assumes a single job (if more are given, only the first one will be executed),
-        and submits that single job to the server queue. The server could be either remote (accessed via SSH) or local.
-
-        A 'pipe' execution type assumes an array of jobs and submits several ARC instances (workers)
-        with an HDF5 file that contains specific directions.
-        The output is returned within the HDF5 file.
-        The new ARC instance, representing a single worker, will run all of its jobs incore.
-        """
-        self.upload_files()
-        execution_type = JobExecutionTypeEnum(self.execution_type)
-        if execution_type == JobExecutionTypeEnum.incore:
-            self.job_status[0] = 'running'
-            self.execute_incore()
-            self.job_status[0] = 'done'
-            self.job_status[1]['status'] = 'done'
-        elif execution_type == JobExecutionTypeEnum.queue:
-            self.execute_queue()
-        elif execution_type == JobExecutionTypeEnum.pipe:
-            # Todo:
-            #   - Check that the HDF5 file is available, else raise an error.
-            #   - Submit ARC workers with the HDF5 file.
-            self.execute_queue()  # This is temporary until pipe is fully functional.
-        if not self.restarted:
-            self._write_initiated_job_to_csv_file()
-
     def determine_job_array_parameters(self):
         """
         Determine the number of processes to use in a job array
@@ -475,6 +481,8 @@ def write_submit_script(self) -> None:
         """
         Write a submit script to execute the job.
         """
+        if self.server is None:
+            return
         if self.max_job_time > 9999 or self.max_job_time <= 0:
             self.max_job_time = 120
         architecture = ''
@@ -736,8 +744,8 @@ def set_cpu_and_mem(self):
         if max_cpu is not None and job_cpu_cores > max_cpu:
             job_cpu_cores = max_cpu
         self.cpu_cores = self.cpu_cores or job_cpu_cores
-        max_mem = servers[self.server].get('memory', None) if self.server is not None else 16  # Max memory per node in GB.
-        job_max_server_node_memory_allocation = default_job_settings.get('job_max_server_node_memory_allocation', 0.8)
+        max_mem = servers[self.server].get('memory', None) if self.server is not None else 32.0  # Max memory per node in GB.
+        job_max_server_node_memory_allocation = default_job_settings.get('job_max_server_node_memory_allocation', 0.95)
         if max_mem is not None and self.job_memory_gb > max_mem * job_max_server_node_memory_allocation:
             logger.warning(f'The memory for job {self.job_name} using {self.job_adapter} ({self.job_memory_gb} GB) '
                            f'exceeds {100 * job_max_server_node_memory_allocation}% of the the maximum node memory on '
@@ -1001,7 +1009,7 @@ def rename_output_file(self):
         The renaming should happen automatically, this method functions to troubleshoot
         cases where renaming wasn't successful the first time.
         """
-        if not os.path.isfile(self.local_path_to_output_file) and self.yml_out_path is None:
+        if not os.path.isfile(self.local_path_to_output_file) and not self.local_path_to_output_file.endswith('.yml'):
             rename_output(local_file_path=self.local_path_to_output_file, software=self.job_adapter)
 
     def add_to_args(self,
@@ -1304,3 +1312,25 @@ def troubleshoot_server(self):
         if run_job:
             # resubmit job
             self.execute()
+
+    def save_output_file(self,
+                         key: Optional[str] = None,
+                         val: Optional[Union[float, dict, np.ndarray]] = None,
+                         content_dict: Optional[dict] = None,
+                         ):
+        """
+        Save the output of a job to the YAML output file.
+
+        Args:
+            key (str, optional): The key for the YAML output file.
+            val (Union[float, dict, np.ndarray], optional): The value to be stored.
+            content_dict (dict, optional): A dictionary to store.
+
+        """
+        yml_out_path = os.path.join(self.local_path, 'output.yml')
+        content = read_yaml_file(yml_out_path) if os.path.isfile(yml_out_path) else dict()
+        if content_dict is not None:
+            content.update(content_dict)
+        if key is not None:
+            content[key] = val
+        save_yaml_file(path=yml_out_path, content=content)
diff --git a/arc/job/adapterTest.py b/arc/job/adapterTest.py
@@ -38,11 +38,14 @@ def test_job_enum(self):
         self.assertEqual(JobEnum('psi4').value, 'psi4')
         self.assertEqual(JobEnum('qchem').value, 'qchem')
         self.assertEqual(JobEnum('terachem').value, 'terachem')
+        self.assertEqual(JobEnum('torchani').value, 'torchani')
+        self.assertEqual(JobEnum('xtb').value, 'xtb')
         self.assertEqual(JobEnum('autotst').value, 'autotst')
         self.assertEqual(JobEnum('heuristics').value, 'heuristics')
         self.assertEqual(JobEnum('kinbot').value, 'kinbot')
         self.assertEqual(JobEnum('gcn').value, 'gcn')
         self.assertEqual(JobEnum('user').value, 'user')
+        self.assertEqual(JobEnum('xtb_gsm').value, 'xtb_gsm')
         with self.assertRaises(ValueError):
             JobEnum('wrong')
 
@@ -284,9 +287,9 @@ def test_write_array_submit_script(self):
 
     def test_write_queue_submit_script(self):
         """Test writing a queue submit script"""
-        self.job_2.number_of_processes, self.job_2.workers = 1, None
-        self.job_2.write_submit_script()
-        with open(os.path.join(self.job_2.local_path, submit_filenames[servers[self.job_2.server]['cluster_soft']]),
+        self.job_4.number_of_processes, self.job_4.workers = 1, None
+        self.job_4.write_submit_script()
+        with open(os.path.join(self.job_4.local_path, submit_filenames[servers[self.job_4.server]['cluster_soft']]),
                   'r') as f:
             lines = f.readlines()
         array, hdf5, g16 = False, False, False
@@ -329,10 +332,10 @@ def test_set_cpu_and_mem(self):
 
     def test_set_file_paths(self):
         """Test setting up the job's paths"""
-        self.assertEqual(self.job_2.local_path, os.path.join(self.job_2.project_directory, 'calcs', 'Species',
-                                                             self.job_2.species_label, self.job_2.job_name))
-        self.assertEqual(self.job_2.remote_path, os.path.join('runs', 'ARC_Projects', self.job_2.project,
-                                                              self.job_2.species_label, self.job_2.job_name))
+        self.assertEqual(self.job_1.local_path, os.path.join(self.job_1.project_directory, 'calcs', 'Species',
+                                                             self.job_1.species_label, self.job_1.job_name))
+        self.assertEqual(self.job_1.remote_path, os.path.join('runs', 'ARC_Projects', self.job_1.project,
+                                                              self.job_1.species_label, self.job_1.job_name))
 
     def test_format_max_job_time(self):
         """Test that the maximum job time can be formatted properly, including days, minutes, and seconds"""

diff --git a/arc/job/adapters/__init__.py b/arc/job/adapters/__init__.py
@@ -5,3 +5,4 @@
 import arc.job.adapters.qchem
 import arc.job.adapters.terachem
 import arc.job.adapters.ts
+import arc.job.adapters.xtb_adapter