Skip to content

Commit

Permalink
Merge pull request #536 from ReactionMechanismGenerator/xtb10
Browse files Browse the repository at this point in the history
Adde xTB and xTB-GSM
  • Loading branch information
kfir4444 authored Aug 7, 2022
2 parents 020acc5 + dd9a1f5 commit bfcef09
Show file tree
Hide file tree
Showing 72 changed files with 9,480 additions and 277 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/cont_int.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ jobs:
use-mamba: true

- name: Update environment
run: mamba env update -n arc_env -f environment.yml
run: |
mamba env update -n arc_env -f environment.yml
conda list
- name: Install codecov
run: mamba install -y -c conda-forge codecov
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ install-gcn-cpu:
install-kinbot:
bash devtools/install_kinbot.sh

install-sella:
bash devtools/install_sella.sh

install-xtb:
bash devtools/install_xtb.sh

clean:
find -type d -name __pycache__ -exec rm -rf {} +
rm -rf testing
Expand Down
2 changes: 2 additions & 0 deletions arc/checks/ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ def check_normal_mode_displacement(reaction: 'ARCReaction',
and spc.mol.atoms[0].element.symbol == 'H' for spc in reaction.r_species + reaction.p_species)
# bond_lone_hs = False
xyz = parser.parse_xyz_from_file(job.local_path_to_output_file)
if not xyz['coords']:
xyz = reaction.ts_species.get_xyz()

done = False
for amplitude in amplitudes:
Expand Down
35 changes: 31 additions & 4 deletions arc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,8 @@ def check_ess_settings(ess_settings: Optional[dict] = None) -> dict:
# run checks:
for ess, server_list in settings_dict.items():
if ess.lower() not in ['gaussian', 'qchem', 'molpro', 'orca', 'terachem', 'onedmin', 'psi4',
'gcn', 'heuristics', 'autotst', 'kinbot']:
raise SettingsError(f'ESS software are Gaussian, QChem, Molpro, Orca, TeraChem, Psi4, '
f'or OneDMin. Got: {ess}')
'gcn', 'heuristics', 'autotst', 'kinbot', 'xtb', 'xtb_gsm']:
raise SettingsError(f'Got an unrecognized software in ESS settings: {ess}')
for server in server_list:
if not isinstance(server, bool) and server.lower() not in [s.lower() for s in servers.keys()]:
server_names = [name for name in servers.keys()]
Expand Down Expand Up @@ -937,7 +936,8 @@ def almost_equal_lists(iter1: Union[list, tuple, np.ndarray],
if not almost_equal_lists(iter1=entry1, iter2=entry2, rtol=rtol, atol=atol):
return False
else:
if isinstance(entry1, (int, float)) and isinstance(entry2, (int, float)):
if isinstance(entry1, (int, float, np.float32, np.float64)) \
and isinstance(entry2, (int, float, np.float32, np.float64)):
if not np.isclose([entry1], [entry2], rtol=rtol, atol=atol):
return False
else:
Expand Down Expand Up @@ -1589,3 +1589,30 @@ def _check_r_n_p_symbols_between_rmg_and_arc_rxns(arc_reaction: 'ARCReaction',
print(rmg_p_symbols)
result = False
return result


def safe_copy_file(source: str,
destination: str,
wait: int = 10,
max_cycles: int = 100,
):
"""
Copy a file safely.
Args:
source (str): The full path to the file to be copied.
destination (str): The full path to the file destination.
wait (int, optional): The number of seconds to wait between cycles.
max_cycles (int, optional): The number of cycles.
"""
for i in range(max_cycles):
try:
shutil.copyfile(src=source, dst=destination)
except OSError:
time.sleep(wait)
except shutil.SameFileError:
break
else:
break
if i >= max_cycles:
break
5 changes: 3 additions & 2 deletions arc/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from arc.settings.submit import incore_commands, pipe_submit, submit_scripts


# Common imports where the user can optionally put a modified copy of an ARC file un their ~/.arc folder
# Common imports where the user can optionally put a modified copy of settings.py or submit.py file under ~/.arc
home = os.getenv("HOME") or os.path.expanduser("~")
local_arc_path = os.path.join(home, '.arc')

Expand All @@ -28,7 +28,8 @@
local_settings_dict = {key: val for key, val in vars(local_settings).items() if '__' not in key}
settings.update(local_settings_dict)
# Set global_ess_settings to None if using a local settings file (ARC's defaults are dummies)
settings['global_ess_settings'] = local_settings_dict['global_ess_settings'] or None
settings['global_ess_settings'] = local_settings_dict['global_ess_settings'] \
if 'global_ess_settings' in local_settings_dict and local_settings_dict['global_ess_settings'] else None

local_arc_submit_path = os.path.join(local_arc_path, 'submit.py')
if os.path.isfile(local_arc_submit_path):
Expand Down
106 changes: 68 additions & 38 deletions arc/job/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
from enum import Enum
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

import numpy as np
import pandas as pd

from arc.common import ARC_PATH, get_logger, torsions_to_scans
from arc.common import ARC_PATH, get_logger, read_yaml_file, save_yaml_file, torsions_to_scans
from arc.exceptions import JobError
from arc.imports import local_arc_path, pipe_submit, settings, submit_scripts
from arc.job.local import (change_mode,
Expand Down Expand Up @@ -65,7 +66,6 @@ class JobEnum(str, Enum):
- openbabel
- rdkit
- terachem
- torchani
- AIMNet (https://github.com/aiqm/aimnet)
- turbomol
- xtb
Expand All @@ -89,13 +89,16 @@ class JobEnum(str, Enum):
psi4 = 'psi4'
qchem = 'qchem'
terachem = 'terachem'
torchani = 'torchani'
xtb = 'xtb'

# TS search methods
autotst = 'autotst' # AutoTST, 10.1021/acs.jpca.7b07361, 10.26434/chemrxiv.13277870.v2
heuristics = 'heuristics' # ARC's heuristics
kinbot = 'kinbot' # KinBot, 10.1016/j.cpc.2019.106947
gcn = 'gcn' # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500
user = 'user' # user guesses
xtb_gsm = 'xtb_gsm' # Double ended growing string method (DE-GSM), [10.1021/ct400319w, 10.1063/1.4804162] via xTB


class JobTypeEnum(str, Enum):
Expand Down Expand Up @@ -288,6 +291,42 @@ def execute_queue(self):
"""
pass

def execute(self):
"""
Execute a job.
The execution type could be 'incore', 'queue', or 'pipe'.
An 'incore' execution type assumes a single job (if more are given, only the first one will be executed),
and executes the job in the same CPU process as ARC (i.e., Python waits for the response).
A 'queue' execution type assumes a single job (if more are given, only the first one will be executed),
and submits that single job to the server queue. The server could be either remote (accessed via SSH) or local.
A 'pipe' execution type assumes an array of jobs and submits several ARC instances (workers)
with an HDF5 file that contains specific directions.
The output is returned within the HDF5 file.
The new ARC instance, representing a single worker, will run all of its jobs incore.
"""
self.upload_files()
execution_type = JobExecutionTypeEnum(self.execution_type)
if execution_type == JobExecutionTypeEnum.incore:
self.initial_time = datetime.datetime.now()
self.job_status[0] = 'running'
self.execute_incore()
self.job_status[0] = 'done'
self.job_status[1]['status'] = 'done'
self.final_time = datetime.datetime.now()
self.determine_run_time()
elif execution_type == JobExecutionTypeEnum.queue:
self.execute_queue()
elif execution_type == JobExecutionTypeEnum.pipe:
# Todo:
# - Check that the HDF5 file is available, else raise an error.
# - Submit ARC workers with the HDF5 file.
self.execute_queue() # This is temporary until pipe is fully functional.
if not self.restarted:
self._write_initiated_job_to_csv_file()

def legacy_queue_execution(self):
"""
Execute a job to the server's queue.
Expand Down Expand Up @@ -324,39 +363,6 @@ def set_job_shell_file_to_upload(self) -> dict:
change_mode(mode='+x', file_name=file_name, path=self.local_path)
return self.get_file_property_dictionary(file_name=file_name, make_x=True)

def execute(self):
"""
Execute a job.
The execution type could be 'incore', 'queue', or 'pipe'.
An 'incore' execution type assumes a single job (if more are given, only the first one will be executed),
and executes the job in the same CPU process as ARC (i.e., Python waits for the response).
A 'queue' execution type assumes a single job (if more are given, only the first one will be executed),
and submits that single job to the server queue. The server could be either remote (accessed via SSH) or local.
A 'pipe' execution type assumes an array of jobs and submits several ARC instances (workers)
with an HDF5 file that contains specific directions.
The output is returned within the HDF5 file.
The new ARC instance, representing a single worker, will run all of its jobs incore.
"""
self.upload_files()
execution_type = JobExecutionTypeEnum(self.execution_type)
if execution_type == JobExecutionTypeEnum.incore:
self.job_status[0] = 'running'
self.execute_incore()
self.job_status[0] = 'done'
self.job_status[1]['status'] = 'done'
elif execution_type == JobExecutionTypeEnum.queue:
self.execute_queue()
elif execution_type == JobExecutionTypeEnum.pipe:
# Todo:
# - Check that the HDF5 file is available, else raise an error.
# - Submit ARC workers with the HDF5 file.
self.execute_queue() # This is temporary until pipe is fully functional.
if not self.restarted:
self._write_initiated_job_to_csv_file()

def determine_job_array_parameters(self):
"""
Determine the number of processes to use in a job array
Expand Down Expand Up @@ -475,6 +481,8 @@ def write_submit_script(self) -> None:
"""
Write a submit script to execute the job.
"""
if self.server is None:
return
if self.max_job_time > 9999 or self.max_job_time <= 0:
self.max_job_time = 120
architecture = ''
Expand Down Expand Up @@ -736,8 +744,8 @@ def set_cpu_and_mem(self):
if max_cpu is not None and job_cpu_cores > max_cpu:
job_cpu_cores = max_cpu
self.cpu_cores = self.cpu_cores or job_cpu_cores
max_mem = servers[self.server].get('memory', None) if self.server is not None else 16 # Max memory per node in GB.
job_max_server_node_memory_allocation = default_job_settings.get('job_max_server_node_memory_allocation', 0.8)
max_mem = servers[self.server].get('memory', None) if self.server is not None else 32.0 # Max memory per node in GB.
job_max_server_node_memory_allocation = default_job_settings.get('job_max_server_node_memory_allocation', 0.95)
if max_mem is not None and self.job_memory_gb > max_mem * job_max_server_node_memory_allocation:
logger.warning(f'The memory for job {self.job_name} using {self.job_adapter} ({self.job_memory_gb} GB) '
f'exceeds {100 * job_max_server_node_memory_allocation}% of the the maximum node memory on '
Expand Down Expand Up @@ -1001,7 +1009,7 @@ def rename_output_file(self):
The renaming should happen automatically, this method functions to troubleshoot
cases where renaming wasn't successful the first time.
"""
if not os.path.isfile(self.local_path_to_output_file) and self.yml_out_path is None:
if not os.path.isfile(self.local_path_to_output_file) and not self.local_path_to_output_file.endswith('.yml'):
rename_output(local_file_path=self.local_path_to_output_file, software=self.job_adapter)

def add_to_args(self,
Expand Down Expand Up @@ -1304,3 +1312,25 @@ def troubleshoot_server(self):
if run_job:
# resubmit job
self.execute()

def save_output_file(self,
key: Optional[str] = None,
val: Optional[Union[float, dict, np.ndarray]] = None,
content_dict: Optional[dict] = None,
):
"""
Save the output of a job to the YAML output file.
Args:
key (str, optional): The key for the YAML output file.
val (Union[float, dict, np.ndarray], optional): The value to be stored.
content_dict (dict, optional): A dictionary to store.
"""
yml_out_path = os.path.join(self.local_path, 'output.yml')
content = read_yaml_file(yml_out_path) if os.path.isfile(yml_out_path) else dict()
if content_dict is not None:
content.update(content_dict)
if key is not None:
content[key] = val
save_yaml_file(path=yml_out_path, content=content)
17 changes: 10 additions & 7 deletions arc/job/adapterTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ def test_job_enum(self):
self.assertEqual(JobEnum('psi4').value, 'psi4')
self.assertEqual(JobEnum('qchem').value, 'qchem')
self.assertEqual(JobEnum('terachem').value, 'terachem')
self.assertEqual(JobEnum('torchani').value, 'torchani')
self.assertEqual(JobEnum('xtb').value, 'xtb')
self.assertEqual(JobEnum('autotst').value, 'autotst')
self.assertEqual(JobEnum('heuristics').value, 'heuristics')
self.assertEqual(JobEnum('kinbot').value, 'kinbot')
self.assertEqual(JobEnum('gcn').value, 'gcn')
self.assertEqual(JobEnum('user').value, 'user')
self.assertEqual(JobEnum('xtb_gsm').value, 'xtb_gsm')
with self.assertRaises(ValueError):
JobEnum('wrong')

Expand Down Expand Up @@ -284,9 +287,9 @@ def test_write_array_submit_script(self):

def test_write_queue_submit_script(self):
"""Test writing a queue submit script"""
self.job_2.number_of_processes, self.job_2.workers = 1, None
self.job_2.write_submit_script()
with open(os.path.join(self.job_2.local_path, submit_filenames[servers[self.job_2.server]['cluster_soft']]),
self.job_4.number_of_processes, self.job_4.workers = 1, None
self.job_4.write_submit_script()
with open(os.path.join(self.job_4.local_path, submit_filenames[servers[self.job_4.server]['cluster_soft']]),
'r') as f:
lines = f.readlines()
array, hdf5, g16 = False, False, False
Expand Down Expand Up @@ -329,10 +332,10 @@ def test_set_cpu_and_mem(self):

def test_set_file_paths(self):
"""Test setting up the job's paths"""
self.assertEqual(self.job_2.local_path, os.path.join(self.job_2.project_directory, 'calcs', 'Species',
self.job_2.species_label, self.job_2.job_name))
self.assertEqual(self.job_2.remote_path, os.path.join('runs', 'ARC_Projects', self.job_2.project,
self.job_2.species_label, self.job_2.job_name))
self.assertEqual(self.job_1.local_path, os.path.join(self.job_1.project_directory, 'calcs', 'Species',
self.job_1.species_label, self.job_1.job_name))
self.assertEqual(self.job_1.remote_path, os.path.join('runs', 'ARC_Projects', self.job_1.project,
self.job_1.species_label, self.job_1.job_name))

def test_format_max_job_time(self):
"""Test that the maximum job time can be formatted properly, including days, minutes, and seconds"""
Expand Down
1 change: 1 addition & 0 deletions arc/job/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
import arc.job.adapters.qchem
import arc.job.adapters.terachem
import arc.job.adapters.ts
import arc.job.adapters.xtb_adapter
Loading

0 comments on commit bfcef09

Please sign in to comment.